Ejemplo n.º 1
0
def call_apertium(text, mode_path):
    """
    Call Apertium for the rescue! Accepts a text and a mode path, as in '~/apertium-foo-bar/modes/foo-bar.mode'.
    Returns the text processed with that mode.
    """
    chunks = mode_path.split('/')
    mode = chunks[-1].split('.')[0]
    path = '/'.join(chunks[:-2])
    p = sh.apertium('-u -d {0}'.format(path), mode, _in=text.encode('utf-8'), _encoding='utf-8')
    output = p.stdout.decode('utf-8')
    return output
Ejemplo n.º 2
0
def call_apertium(text, mode_path):
    """
    Call Apertium for the rescue! Accepts a text and a mode path, as in '~/apertium-foo-bar/modes/foo-bar.mode'.
    Returns the text processed with that mode.
    """
    chunks = mode_path.split('/')
    mode = chunks[-1].split('.')[0]
    path = '/'.join(chunks[:-2])
    p = sh.apertium('-u -d {0}'.format(path), mode, _in=text.encode('utf-8'), _encoding='utf-8')
    output = p.stdout.decode('utf-8')
    return output
Ejemplo n.º 3
0
 def translate_with_apertium(self, input, pair):
     try:
         # Works with a temporary file as buffer (opened in text mode)
         with NamedTemporaryFile(mode='w+t') as temp_input_file:
             temp_input_file.writelines(input)
             temp_input_file.seek(0)
             input_translated = apertium('-ud', self.pack_dir, pair,
                                         temp_input_file.name)
     except ErrorReturnCode as e:
         raise Exception('Unable to translate this string.')
     return str(input_translated)
Ejemplo n.º 4
0
 def launch_apertium(value, input):
     if value.endswith('.mode'):
         chunks = value.split('/')
         mode = chunks[-1].split('.')[0]
         path = '/'.join(chunks[:-2])
         p = sh.apertium('-u -d {0}'.format(path), mode, _in=input.encode('utf-8'), _encoding='utf-8')
         output = p.stdout.decode('utf-8')
     elif value.endswith('.txt'):
         output = open(value).read()
     else:
         raise click.BadParameter('Invalid argument. Please specify either '
                                  'path to .txt file or path to '
                                  'Apertium translator / morhological analyzer for your language pair.')
     return output
Ejemplo n.º 5
0
    def generate_anmor(value, text):

        # from tagger or translation mode, try to guess morphological analyzer mode
        if value.endswith('.mode'):
            chunks = value.split('/')
            mode = chunks[-1].split('.')[0]
            path = '/'.join(chunks[:-2])
            if mode.endswith('tagger'):
                mode = '-'.join(mode.split('-')[:-1])
            mode += '-anmor'
            p = sh.apertium('-u -d {0}'.format(path), mode, _in=text.encode('utf-8'), _encoding='utf-8')
            output = p.stdout.decode('utf-8')
        else:
            output = None  # well, should do without it
        return output
Ejemplo n.º 6
0
 def launch_apertium(value, input):
     if value.endswith('.mode'):
         chunks = value.split('/')
         mode = chunks[-1].split('.')[0]
         path = '/'.join(chunks[:-2])
         p = sh.apertium('-d {0}'.format(path),
                         mode,
                         _in=input.encode('utf-8'),
                         _encoding='utf-8')
         output = p.stdout.decode('utf-8')
     elif value.endswith('.txt'):
         output = open(value).read()
     else:
         raise click.BadParameter(
             'Invalid argument. Please specify either'
             'path to .txt file or path to'
             'Apertium translator / POS tagger for your language pair.')
     return output
Ejemplo n.º 7
0
    def generate_anmor(value, text):

        # from tagger or translation mode, try to guess morphological analyzer mode
        if value.endswith('.mode'):
            chunks = value.split('/')
            mode = chunks[-1].split('.')[0]
            path = '/'.join(chunks[:-2])
            if mode.endswith('tagger'):
                mode = '-'.join(mode.split('-')[:-1])
            mode += '-anmor'
            p = sh.apertium('-u -d {0}'.format(path),
                            mode,
                            _in=text.encode('utf-8'),
                            _encoding='utf-8')
            output = p.stdout.decode('utf-8')
        else:
            output = None  # well, should do without it
        return output
Ejemplo n.º 8
0
    def launch_apertium(value, input):
        if value.endswith('.mode'):
            chunks = value.split('/')
            mode = chunks[-1].split('.')[0]
            path = '/'.join(chunks[:-2])
            p = sh.apertium('-u -d {0}'.format(path),
                            mode,
                            _in=input.encode('utf-8'),
                            _encoding='utf-8')
            output = p.stdout.decode('utf-8')
        elif value.endswith('.txt'):
            # output = open(value).read()
            output = (open(value).read()).rstrip('\n')

        elif value.endswith('.lst'):
            # get a list of machine-translated documents and insert their contents here
            # the first line is an offset, the rest are filenames
            # Newlines are replaced by "@@@" which will be processed later by a custom filter in the docgisting.html
            # template (ugly hack)
            f = open(value)
            offset = f.readline().rstrip('\n')
            if not offset.endswith('/'):
                offset = offset + '/'
            output = ""
            while True:
                mtfilename = f.readline()
                if mtfilename == '':
                    break
                mtcontents = open((offset + mtfilename).rstrip('\n')).read()
                output = output + "@@@".join(mtcontents.split(
                    "\n")) + "\n"  # not sure about this last \n
        else:
            raise click.BadParameter(
                'Invalid argument. Please specify '
                'path to .txt file, '
                'path to .lst file, '
                'or path to '
                'Apertium translator / morhological analyzer for your language pair.'
            )
        return output
Ejemplo n.º 9
0
PATH_TO_APERTIUM = "/home/zhake/Source/apertium-eng-kaz"
TRANSLATION_DIRECTION = "eng-kaz"
PATH_TO_MORPH_LANG_NMT_MODEL = "/media/zhake/Data/Универ/NMT/Experiments/МЯ с и без \
    сегментации/С сегментацией без слов/dataset/nmt_attention_model"

# прочитать текст со входа
for source_text_line in sys.stdin:
    # ==========
    # сделать морфологический анализ и получить MorphLang_sl
    # ==========

    # apertium -d '/home/zhake/Source/apertium-eng-kaz' eng-kaz-tagger
    morph_lang_sl = str(
        sh.apertium(
            sh.echo(source_text_line),
            "-d",
            PATH_TO_APERTIUM,
            TRANSLATION_DIRECTION + "-tagger",
        ))

    # весь текст в нижний регистр, так как словарь в нижнем регистре
    # до морфологического анализа делать не стоит, так как для него регистр важен
    morph_lang_sl = morph_lang_sl.lower()

    # ==========
    # разделить теги и слова
    # ==========

    # "очистка", чтобы слова разделялись только "$^"
    morph_lang_sl = re.sub(pattern="\$.+?\^", repl="$^", string=morph_lang_sl)

    # удаляем пробелы по краям, первый символ "^" и последний символ "$"
Ejemplo n.º 10
0
 def local_pairs(self):
     output = apertium('-d', self.pack_dir, '-l').strip()
     return [s.strip() for s in output.split('\n')]