Example #1
0
 def save_triples_to_file(triples, path):
     try:
         sorted_triples = utils.sorted_items(triples)
         lines = [
             '%s\t%s\t%s\n' % (sec, xy[0], xy[1])
             for sec, xy in sorted_triples
         ]
         utils.filer.write_lines(path, lines)
     except:
         raise
Example #2
0
verses = parse_verse_ranges(args.verses)

if args.exclude:
    exclusions = load_wordset(args.exclude)
else:
    exclusions = set()

lexemes = load_yaml(args.lexemes)

if args.headwords:
    headwords = load_yaml(args.headwords)
else:
    headwords = {}


for entry in get_morphgnt(verses, args.sblgnt_dir):
    if entry[0] == "WORD":
        lexeme = entry[8]
        if lexeme not in exclusions and lexeme not in headwords:
            pos = entry[2]
            if pos in ["N-", "A-"]:
                if "full-citation-form" in lexemes[lexeme]:
                    headword = lexemes[lexeme]["full-citation-form"]
                else:
                    headword = lexemes[lexeme]["danker-entry"]
                headwords[lexeme] = headword

for lexeme, headword in sorted_items(headwords):
    print("{}: {}".format(lexeme, headword))
Example #3
0
    help="path to lexemes file "
    "(defaults to lexemes.yaml)")

args = argparser.parse_args()

verses = parse_verse_ranges(args.verses)

if args.exclude:
    exclusions = load_wordset(args.exclude)
else:
    exclusions = set()

lexemes = load_yaml(args.lexemes)

if args.headwords:
    headwords = load_yaml(args.headwords)
else:
    headwords = {}


for entry in get_morphgnt(verses):
    if entry[0] == "WORD":
        lemma = entry[1]["lemma"]
        if lemma not in exclusions and lemma not in headwords:
            pos = entry[1]["ccat-pos"]
            if pos in ["N-", "A-"]:
                headwords[lemma] = lexemes[lemma]["headword"]

for lemma, headword in sorted_items(headwords):
    print("{}: {}".format(lemma, headword))
Example #4
0
    help="path to morphological-lexicon lexemes.yaml file "
         "(defaults to ../morphological-lexicon/lexemes.yaml)")

args = argparser.parse_args()

verses = parse_verse_ranges(args.verses)

if args.exclude:
    exclusions = load_wordset(args.exclude)
else:
    exclusions = set()

lexemes = load_yaml(args.lexemes)

if args.glosses:
    glosses = load_yaml(args.glosses)
else:
    glosses = {}


for entry in get_morphgnt(verses):
    if entry[0] == "WORD":
        lemma = entry[1]["lemma"]
        if lemma not in exclusions and lemma not in glosses:
            glosses[lemma] = {"default": lexemes[lemma].get("gloss", "\"@@@\"")}

for lemma, gloss_entries in sorted_items(glosses):
    print("{}:".format(lemma))
    for k, v in sorted_items(gloss_entries):
        print("    {}: {}".format(k, v))
Example #5
0
#!/usr/bin/env python3
"""
Note that this is a one-off script to pull in the lexemes.yaml data from
morphological-lexicon and store it here.

Once the reduced lexemes.yaml is in this repo, this script exists only for
historical interest and reproducibility.
"""

from utils import load_yaml, sorted_items

for key, value in sorted_items(
        load_yaml("../../morphgnt/morphological-lexicon/lexemes.yaml")):
    print("{}:".format(key))
    headword = value.get("full-citation-form", value.get("danker-entry", key))
    gloss = value.get("gloss")
    print("    headword: {}".format(headword))
    if gloss:
        print("    gloss: {}".format(gloss))
Example #6
0
                       "(defaults to lexemes.yaml)")

args = argparser.parse_args()

verses = parse_verse_ranges(args.verses)

if args.exclude:
    exclusions = load_wordset(args.exclude)
else:
    exclusions = set()

lexemes = load_yaml(args.lexemes)

if args.glosses:
    glosses = load_yaml(args.glosses)
else:
    glosses = {}

for entry in get_morphgnt(verses):
    if entry[0] == "WORD":
        lemma = entry[1]["lemma"]
        if lemma not in exclusions and lemma not in glosses:
            glosses[lemma] = {
                "default": lexemes[lemma].get("gloss", "\"@@@\"")
            }

for lemma, gloss_entries in sorted_items(glosses):
    print("{}:".format(lemma))
    for k, v in sorted_items(gloss_entries):
        print("    {}: {}".format(k, v))
Example #7
0
    dest="sblgnt_dir",
    default="../sblgnt",
    help="path to MorphGNT sblgnt directory (defaults to ../sblgnt)")

args = argparser.parse_args()

verses = parse_verse_ranges(args.verses)

if args.exclude:
    exclusions = load_wordset(args.exclude)
else:
    exclusions = set()

lexemes = load_yaml(args.lexemes)

if args.glosses:
    glosses = load_yaml(args.glosses)
else:
    glosses = {}

for entry in get_morphgnt(verses, args.sblgnt_dir):
    if entry[0] == "WORD":
        lexeme = entry[8]
        if lexeme not in exclusions and lexeme not in glosses:
            glosses[lexeme] = {"default": lexemes[lexeme].get("gloss", "@@@")}

for lexeme, gloss_entries in sorted_items(glosses):
    print("{}:".format(lexeme))
    for k, v in sorted_items(gloss_entries):
        print("    {}: {}".format(k, v))
Example #8
0
#!/usr/bin/env python3

"""
Note that this is a one-off script to pull in the lexemes.yaml data from
morphological-lexicon and store it here.

Once the reduced lexemes.yaml is in this repo, this script exists only for
historical interest and reproducibility.
"""

from utils import load_yaml, sorted_items

for key, value in sorted_items(load_yaml("../../morphgnt/morphological-lexicon/lexemes.yaml")):
    print("{}:".format(key))
    headword = value.get("full-citation-form", value.get("danker-entry", key))
    gloss = value.get("gloss")
    print("    headword: {}".format(headword))
    if gloss:
        print("    gloss: {}".format(gloss))