Example #1
0
    def find_alliteration(self):
        """
        Find alliterations in the complete verse.
        :return:
        """
        if len(self.phonological_features_text) == 0:
            logger.error("No phonological transcription found")
            raise ValueError
        else:
            first_sounds = []
            for i, line in enumerate(self.phonological_features_text):
                first_sounds.append([])
                for j, short_line in enumerate(line):
                    first_sounds[i].append([])
                    for viisuord in short_line:
                        first_sounds[i][j].append(viisuord[0])

            verse_alliterations = []
            n_alliterations_lines = []
            for i, first_sound_line in enumerate(first_sounds):
                if isinstance(self.long_lines[i][0], ShortLine) and isinstance(
                        self.long_lines[i][1], ShortLine):
                    self.long_lines[i][0].get_first_sounds()
                    self.long_lines[i][1].get_first_sounds()
                    alli, counter = self.long_lines[i][0].find_alliterations(
                        self.long_lines[i][1])
                    verse_alliterations.append(alli)
                    n_alliterations_lines.append(counter)
                elif isinstance(self.long_lines[i][0], LongLine):
                    self.long_lines[i][0].get_first_sounds()
                    alli, counter = self.long_lines[i][0].find_alliterations()
                    verse_alliterations.append(alli)
                    n_alliterations_lines.append(counter)
            return verse_alliterations, n_alliterations_lines
Example #2
0
def open_pickle(path: str):
    """Open a pickle and return loaded pickle object.
    :type path: str
    :param : path: File path to pickle file to be opened.
    :rtype : object
    """
    try:
        with open(path, "rb") as opened_pickle:
            try:
                return pickle.load(opened_pickle)
            except Exception as pickle_error:
                logger.error(pickle_error)
                raise
    except FileNotFoundError as fnf_error:
        logger.error(fnf_error)
        raise
    except IOError as io_err:
        logger.error(io_err)
        raise
    except EOFError as eof_error:
        logger.error(eof_error)
        raise
    except pickle.UnpicklingError as unp_error:
        logger.error(unp_error)
        raise
Example #3
0
    def to_phonetics(self):
        """
        Transcribing words in verse helps find alliteration.
        """
        if len(self.long_lines) == 0:
            logger.error("No text was imported")
            self.syllabified_text = []
        else:
            transcriber = Transcriber(DIPHTHONGS_IPA, DIPHTHONGS_IPA_class,
                                      IPA_class, old_norse_rules)
            transcribed_text = []
            phonological_features_text = []
            for i, long_line in enumerate(self.long_lines):
                transcribed_text.append([])
                phonological_features_text.append([])
                for short_line in long_line:
                    assert isinstance(short_line, ShortLine) or isinstance(
                        short_line, LongLine)
                    short_line.to_phonetics(transcriber)
                    transcribed_text[i].append(short_line.transcribed)
                    phonological_features_text[i].append(
                        short_line.phonological_features_text)

            self.transcribed_text = transcribed_text
            self.phonological_features_text = phonological_features_text
Example #4
0
 def __init__(self,
              place=None,
              manner=None,
              voiced=None,
              ipar=None,
              geminate=None):
     if isinstance(place, Place) or place is None:
         self.place = place
     else:
         logger.error("Incorrect argument")
     if isinstance(manner, Manner) or manner is None:
         self.manner = manner
     else:
         logger.error("Incorrect argument")
         raise ValueError
     if type(voiced) == bool or voiced is None:
         self.voiced = voiced
     else:
         logger.error("Incorrect argument")
         raise TypeError
     if type(geminate) == bool or geminate is None:
         self.geminate = geminate
     else:
         logger.error("Incorrect argument")
         raise TypeError
     self.ipar = ipar
Example #5
0
 def __init__(self,
              height=None,
              backness=None,
              rounded=None,
              length=None,
              ipar=None):
     if isinstance(height, Height) or height is None:
         self.height = height
     else:
         logger.error("Incorrect argument")
         raise ValueError
     if isinstance(backness, Backness) or backness is None:
         self.backness = backness
     else:
         logger.error("Incorrect argument")
         raise ValueError
     if type(rounded) == bool or rounded is None:
         self.rounded = rounded
     else:
         logger.error("Incorrect argument")
         raise TypeError
     if isinstance(length, Length) or length is None:
         self.length = length
     else:
         logger.error("Incorrect argument")
         raise ValueError
     self.ipar = ipar
Example #6
0
    def from_regular_expression(re_rule, estimated_sound, ipa_class):
        """

        :param re_rule: pattern (first argument of re.sub)
        :param estimated_sound: an IPA character (second argument of re.sub)
        :param ipa_class: dict whose keys are IPA characters and values are Vowel or Consonant instances
        :return: corresponding Rule instance
        """
        assert len(re_rule) > 0
        if re_rule[0] == "^":
            place = Rank.first
        elif re_rule[-1] == "$":
            place = Rank.last
        else:
            place = Rank.inner

        before_pattern = r"(?<=\(\?\<\=\[)\w*"
        core_pattern = r"(?<=\))\w(?=\(\?\=)|(?<=\^)\w(?=\(\?\=)|(?<=\))\w(?=\$)"
        after_pattern = r"(?<=\(\?\=\[)\w*"
        before_search = re.search(before_pattern, re_rule)
        core_search = re.search(core_pattern, re_rule)
        after_search = re.search(after_pattern, re_rule)
        if before_search is None:
            before = None
        else:
            before = [
                ipa_class[ipar].to_abstract()
                for ipar in before_search.group(0)
            ]
        if core_search is not None:
            core = ipa_class[core_search.group(0)]
        else:
            logger.error("No core")
            raise ValueError
        if after_search is None:
            after = None
        else:
            after = [
                ipa_class[ipar].to_abstract() for ipar in after_search.group(0)
            ]
        abstract_position = AbstractPosition(place, before, after)
        return Rule(abstract_position, core, ipa_class[estimated_sound])
Example #7
0
 def syllabify(self, hierarchy):
     """
     Syllables may play a role in verse classification.
     """
     if len(self.long_lines) == 0:
         logger.error("No text was imported")
         self.syllabified_text = []
     else:
         syllabifier = Syllabifier(language="old_norse",
                                   break_geminants=True)
         syllabifier.set_hierarchy(hierarchy)
         syllabified_text = []
         for i, long_line in enumerate(self.long_lines):
             syllabified_text.append([])
             for short_line in long_line:
                 assert isinstance(short_line, ShortLine) or isinstance(
                     short_line, LongLine)
                 short_line.syllabify(syllabifier)
                 syllabified_text[i].append(short_line.syllabified)
         self.syllabified_text = syllabified_text
Example #8
0
    def lemmatize(self, input_text, return_raw=False, return_string=False):
        """Take incoming string or list of tokens. Lookup done against a
        key-value list of lemmata-headword. If a string, tokenize with
        ``PunktLanguageVars()``. If a final period appears on a token, remove
        it, then re-add once replacement done.
        TODO: rm check for final period, change PunktLanguageVars()
        """
        assert type(input_text) in [
            list, str
        ], logger.error("Input must be a list or string.")
        if type(input_text) is str:
            punkt = PunktLanguageVars()
            tokens = punkt.word_tokenize(input_text)
        else:
            tokens = input_text

        lemmatized_tokens = []
        for token in tokens:
            # check for final period
            final_period = False
            if token[-1] == ".":
                final_period = True
                token = token[:-1]

            # look for token in lemma dict keys
            if token.lower() in self.lemmata.keys():
                headword = self.lemmata[token.lower()]

                # re-add final period if rm'd
                if final_period:
                    headword += "."

                # append to return list
                if not return_raw:
                    lemmatized_tokens.append(headword)
                else:
                    lemmatized_tokens.append(token + "/" + headword)
            # if token not found in lemma-headword list
            else:
                # re-add final period if rm'd
                if final_period:
                    token += "."

                if not return_raw:
                    lemmatized_tokens.append(token)
                else:
                    lemmatized_tokens.append(token + "/" + token)
        if not return_string:
            return lemmatized_tokens
        elif return_string:
            return " ".join(lemmatized_tokens)
Example #9
0
"""

import re
import unicodedata

from nltk.tokenize import wordpunct_tokenize

from cltkv1.core.cltk_logger import logger

try:
    # James Tauber's greek_accentuation package
    from greek_accentuation import characters as chars
except ImportError as import_error:
    message = ('Missing "greek_accentuation" package. Install with '
               "`pip install greek-accentuation`.")
    logger.error(message)
    logger.error(import_error)
    raise

__author__ = ["Jack Duff <*****@*****.**>"]
__license__ = "MIT License. See LICENSE."

# Dictionaries of phonological reconstructions for use in transcribing.
# Probert, Philomen. 2010. Phonology, in E. Bakker, A Companion to the \
# Ancient Greek Language.
# (Entries which are commented out are realized through diacritic analysis.)

GREEK = {
    "Attic": {
        "Probert": {
            "correspondence": {