Example #1
0
def search_path(
        rootpath: str,
        include_pairs: bool = True) -> Dict[str, List[Tuple[str, str, str]]]:
    """
    Args:
        rootpath (str)
        include_pairs (bool)

    Returns:
        Dict[str, List[Tuple[str, str, str]]]
    """
    lang_code = r'[a-z]{2,3}(?:_[A-Za-z]+)?'
    type_re = {
        'analyzer':
        re.compile(r'(({0}(-{0})?)-(an)?mor(ph)?)\.mode'.format(lang_code)),
        'generator':
        re.compile(r'(({0}(-{0})?)-gener[A-z]*)\.mode'.format(lang_code)),
        'pair':
        re.compile(r'({0})-({0})\.mode'.format(lang_code)),
        'tagger':
        re.compile(r'(({0}(-{0})?)-tagger[A-z]*)\.mode'.format(lang_code)),
    }
    modes = {
        'analyzer': [],
        'generator': [],
        'pair': [],
        'tagger': [],
    }  # type: Dict[str, List[Tuple[str, str, str]]]

    real_root = os.path.abspath(os.path.realpath(rootpath))

    for dirpath, dirnames, files in os.walk(rootpath, followlinks=True):
        if is_loop(dirpath, rootpath, real_root):
            dirnames[:] = []
            continue
        for filename in [f for f in files if f.endswith('.mode')]:
            for mtype, regex in type_re.items():
                m = regex.match(filename)
                if m:
                    if mtype != 'pair':
                        modename = m.group(1)  # e.g. en-es-anmorph
                        langlist = [
                            to_alpha3_code(l) for l in m.group(2).split('-')
                        ]
                        lang_pair = '-'.join(langlist)  # e.g. en-es
                        dir_of_modes = os.path.dirname(dirpath)
                        mode = (dir_of_modes, modename, lang_pair)
                        modes[mtype].append(mode)
                    elif include_pairs:
                        lang_src, lang_trg = m.groups()
                        mode = (os.path.join(dirpath, filename),
                                to_alpha3_code(lang_src),
                                to_alpha3_code(lang_trg))
                        modes[mtype].append(mode)
    return modes
Example #2
0
 def __init__(self, lang: str) -> None:
     """
     Args:
         lang (str)
     """
     self.analyzer_cmds = {}  # type: Dict[str, List[List[str]]]
     self.lang = to_alpha3_code(lang)  # type: str
     if self.lang not in apertium.analyzers:
         raise apertium.ModeNotInstalled(self.lang)
     else:
         self.path, self.mode = apertium.analyzers[self.lang]
Example #3
0
 def __init__(self, lang: str) -> None:
     """
     Args:
         lang (str)
     """
     self.tagger_cmds: Dict[str, List[List[str]]] = {}
     self.lang: str = to_alpha3_code(lang)
     if self.lang not in apertium.taggers:
         raise apertium.ModeNotInstalled(self.lang)
     else:
         self.path, self.mode = apertium.taggers[self.lang]
Example #4
0
    def generate(self, in_text, formatting='none'):  # type: (Generator, str, str) -> Union[str, List[str]]
        """
        Args:
            in_text (str)
            formatting (str)

        Returns:
            Union[str, List[str]]
        """
        self.lang = to_alpha3_code(self.lang)

        if self.lang in apertium.generators:
            commands = list(self._get_commands())
            result = execute(in_text, commands)
            return result.rstrip('\x00')
        else:
            raise apertium.ModeNotInstalled(self.lang)