def find_alliteration(self): """ Find alliterations in the complete verse. :return: """ if len(self.phonological_features_text) == 0: logger.error("No phonological transcription found") raise ValueError else: first_sounds = [] for i, line in enumerate(self.phonological_features_text): first_sounds.append([]) for j, short_line in enumerate(line): first_sounds[i].append([]) for viisuord in short_line: first_sounds[i][j].append(viisuord[0]) verse_alliterations = [] n_alliterations_lines = [] for i, first_sound_line in enumerate(first_sounds): if isinstance(self.long_lines[i][0], ShortLine) and isinstance( self.long_lines[i][1], ShortLine): self.long_lines[i][0].get_first_sounds() self.long_lines[i][1].get_first_sounds() alli, counter = self.long_lines[i][0].find_alliterations( self.long_lines[i][1]) verse_alliterations.append(alli) n_alliterations_lines.append(counter) elif isinstance(self.long_lines[i][0], LongLine): self.long_lines[i][0].get_first_sounds() alli, counter = self.long_lines[i][0].find_alliterations() verse_alliterations.append(alli) n_alliterations_lines.append(counter) return verse_alliterations, n_alliterations_lines
def open_pickle(path: str): """Open a pickle and return loaded pickle object. :type path: str :param : path: File path to pickle file to be opened. :rtype : object """ try: with open(path, "rb") as opened_pickle: try: return pickle.load(opened_pickle) except Exception as pickle_error: logger.error(pickle_error) raise except FileNotFoundError as fnf_error: logger.error(fnf_error) raise except IOError as io_err: logger.error(io_err) raise except EOFError as eof_error: logger.error(eof_error) raise except pickle.UnpicklingError as unp_error: logger.error(unp_error) raise
def to_phonetics(self): """ Transcribing words in verse helps find alliteration. """ if len(self.long_lines) == 0: logger.error("No text was imported") self.syllabified_text = [] else: transcriber = Transcriber(DIPHTHONGS_IPA, DIPHTHONGS_IPA_class, IPA_class, old_norse_rules) transcribed_text = [] phonological_features_text = [] for i, long_line in enumerate(self.long_lines): transcribed_text.append([]) phonological_features_text.append([]) for short_line in long_line: assert isinstance(short_line, ShortLine) or isinstance( short_line, LongLine) short_line.to_phonetics(transcriber) transcribed_text[i].append(short_line.transcribed) phonological_features_text[i].append( short_line.phonological_features_text) self.transcribed_text = transcribed_text self.phonological_features_text = phonological_features_text
def __init__(self, place=None, manner=None, voiced=None, ipar=None, geminate=None): if isinstance(place, Place) or place is None: self.place = place else: logger.error("Incorrect argument") if isinstance(manner, Manner) or manner is None: self.manner = manner else: logger.error("Incorrect argument") raise ValueError if type(voiced) == bool or voiced is None: self.voiced = voiced else: logger.error("Incorrect argument") raise TypeError if type(geminate) == bool or geminate is None: self.geminate = geminate else: logger.error("Incorrect argument") raise TypeError self.ipar = ipar
def __init__(self, height=None, backness=None, rounded=None, length=None, ipar=None): if isinstance(height, Height) or height is None: self.height = height else: logger.error("Incorrect argument") raise ValueError if isinstance(backness, Backness) or backness is None: self.backness = backness else: logger.error("Incorrect argument") raise ValueError if type(rounded) == bool or rounded is None: self.rounded = rounded else: logger.error("Incorrect argument") raise TypeError if isinstance(length, Length) or length is None: self.length = length else: logger.error("Incorrect argument") raise ValueError self.ipar = ipar
def from_regular_expression(re_rule, estimated_sound, ipa_class): """ :param re_rule: pattern (first argument of re.sub) :param estimated_sound: an IPA character (second argument of re.sub) :param ipa_class: dict whose keys are IPA characters and values are Vowel or Consonant instances :return: corresponding Rule instance """ assert len(re_rule) > 0 if re_rule[0] == "^": place = Rank.first elif re_rule[-1] == "$": place = Rank.last else: place = Rank.inner before_pattern = r"(?<=\(\?\<\=\[)\w*" core_pattern = r"(?<=\))\w(?=\(\?\=)|(?<=\^)\w(?=\(\?\=)|(?<=\))\w(?=\$)" after_pattern = r"(?<=\(\?\=\[)\w*" before_search = re.search(before_pattern, re_rule) core_search = re.search(core_pattern, re_rule) after_search = re.search(after_pattern, re_rule) if before_search is None: before = None else: before = [ ipa_class[ipar].to_abstract() for ipar in before_search.group(0) ] if core_search is not None: core = ipa_class[core_search.group(0)] else: logger.error("No core") raise ValueError if after_search is None: after = None else: after = [ ipa_class[ipar].to_abstract() for ipar in after_search.group(0) ] abstract_position = AbstractPosition(place, before, after) return Rule(abstract_position, core, ipa_class[estimated_sound])
def syllabify(self, hierarchy): """ Syllables may play a role in verse classification. """ if len(self.long_lines) == 0: logger.error("No text was imported") self.syllabified_text = [] else: syllabifier = Syllabifier(language="old_norse", break_geminants=True) syllabifier.set_hierarchy(hierarchy) syllabified_text = [] for i, long_line in enumerate(self.long_lines): syllabified_text.append([]) for short_line in long_line: assert isinstance(short_line, ShortLine) or isinstance( short_line, LongLine) short_line.syllabify(syllabifier) syllabified_text[i].append(short_line.syllabified) self.syllabified_text = syllabified_text
def lemmatize(self, input_text, return_raw=False, return_string=False): """Take incoming string or list of tokens. Lookup done against a key-value list of lemmata-headword. If a string, tokenize with ``PunktLanguageVars()``. If a final period appears on a token, remove it, then re-add once replacement done. TODO: rm check for final period, change PunktLanguageVars() """ assert type(input_text) in [ list, str ], logger.error("Input must be a list or string.") if type(input_text) is str: punkt = PunktLanguageVars() tokens = punkt.word_tokenize(input_text) else: tokens = input_text lemmatized_tokens = [] for token in tokens: # check for final period final_period = False if token[-1] == ".": final_period = True token = token[:-1] # look for token in lemma dict keys if token.lower() in self.lemmata.keys(): headword = self.lemmata[token.lower()] # re-add final period if rm'd if final_period: headword += "." # append to return list if not return_raw: lemmatized_tokens.append(headword) else: lemmatized_tokens.append(token + "/" + headword) # if token not found in lemma-headword list else: # re-add final period if rm'd if final_period: token += "." if not return_raw: lemmatized_tokens.append(token) else: lemmatized_tokens.append(token + "/" + token) if not return_string: return lemmatized_tokens elif return_string: return " ".join(lemmatized_tokens)
""" import re import unicodedata from nltk.tokenize import wordpunct_tokenize from cltkv1.core.cltk_logger import logger try: # James Tauber's greek_accentuation package from greek_accentuation import characters as chars except ImportError as import_error: message = ('Missing "greek_accentuation" package. Install with ' "`pip install greek-accentuation`.") logger.error(message) logger.error(import_error) raise __author__ = ["Jack Duff <*****@*****.**>"] __license__ = "MIT License. See LICENSE." # Dictionaries of phonological reconstructions for use in transcribing. # Probert, Philomen. 2010. Phonology, in E. Bakker, A Companion to the \ # Ancient Greek Language. # (Entries which are commented out are realized through diacritic analysis.) GREEK = { "Attic": { "Probert": { "correspondence": {