def script_find_where_character_occurs(): """ >>> script_find_where_character_occurs() :return: """ d = defaultdict(list) annotated_proper_nouns = utils.get_annotated_proper_nouns_txt( constants.LEMMATISED_DLH_CLEAN_PERSON_NAMES) dlh_books = get_parsed_dlh_books() for i, book in enumerate(dlh_books): for j, para in enumerate(book): for k, sentence in enumerate(para): for lemma in annotated_proper_nouns: for form in annotated_proper_nouns[lemma]: if form in sentence: d[form].append(f"{i + 1}-{j + 1}-{k + 1}") filename = os.path.join(PACKDIR, Work.DLH.get_main_directory(), "dlh_clean_lemma_occurrences.txt") with codecs.open(filename, "w", encoding="utf-8") as f: l = [] for item in d: line = f"{item}: ({len(d[item])}) " + " ".join(d[item]) l.append(line) f.write("\n".join(l))
def get_lemmatised_dlh_place_names(): """ >>> get_lemmatised_dlh_place_names()['Ligeris'] ['Ligeris', 'Ligerem'] """ file_path = os.path.join(PACKDIR, Work.DLH.get_main_directory(), constants.LEMMATISED_DLH_CLEAN_PLACES) return utils.get_annotated_proper_nouns_txt(file_path)
def get_lemmatised_dlh_person_names(): """ >>> get_lemmatised_dlh_person_names()['Chlodovechus'] ['Chlodovechus', 'Chlodovechi', 'Chlodovechum', 'Chlodovecho'] """ file_path = os.path.join(PACKDIR, Work.DLH.get_main_directory(), constants.LEMMATISED_DLH_CLEAN_PERSON_NAMES) return utils.get_annotated_proper_nouns_txt(file_path)
def get_lemmatised_dlh_proper_nouns(): """ >>> get_lemmatised_dlh_proper_nouns()['Burgundia'] ['Burgundias', 'Burgundiam', 'Burgundiae', 'Burgundia', 'Burgundio'] """ file_path = os.path.join(PACKDIR, Work.DLH.get_main_directory(), constants.LEMMATISED_DLH_PROPER_NOUNS) return utils.get_annotated_proper_nouns_txt(file_path)
def get_volsunga_annotated_names(): """ >>> annotated_names = get_volsunga_annotated_names() >>> annotated_names["Sigmundr"] ['Sigmundr', 'Sigmund', 'Sigmundi', 'Sigmundar'] :return: """ volsunga_names_proper_nouns = os.path.join( PACKDIR, Work.VOL.get_main_directory(), constants.VOLSUNGA_CLEAN_NAMES_PROPER_NOUNS) return utils.get_annotated_proper_nouns_txt(volsunga_names_proper_nouns)