Esempio n. 1
0
def fetch_alignment(basename, langs, backend='hunalign'):
    assert langs
    real_langs = list(set(lang[:2]
                          for lang in langs))

    if len(real_langs) == 1:
        text_len = len(fetch_sentences(basename, real_langs[0]))
        return Alignment.create_straight(text_len, len(langs))
    elif len(real_langs) == 2:
        try:
            a = Alignment.from_file("%s/%s-%s.%s" %
                                    (basename, real_langs[0], real_langs[1], backend))
        except IOError:
            real_langs.reverse()
            a = Alignment.from_file("%s/%s-%s.%s" %
                                    (basename, real_langs[0], real_langs[1], backend))

    else: # len(real_langs) == 3 :(
        a1 = Alignment.from_file('%s/pl-cu.%s' % (basename, backend)).as_ladder()
        a2 = Alignment.from_file('%s/cu-el.%s' % (basename, backend)).as_ladder()
        a3 = Alignment.from_file('%s/pl-el.%s' % (basename, backend)).as_ladder()
        a3 = [(b, a) for (a, b) in a3] # reversed
        a = merge_3_alignments(a1, a2, a3)
        real_langs = ['pl', 'cu', 'el'] # needed later

    columns = _transpose(a.data)
    columns_map = { real_langs[i] : columns[i]
                    for i in range(len(real_langs)) }

    # common part for 2 and 3
    chosen_columns = [columns_map[lang[:2]] for lang in langs]
    chosen_columns.append(columns[2])
    return Alignment(_transpose(chosen_columns))
Esempio n. 2
0
    def get_alignment(self, langs, backend=None):
        """like fetcher"""
        assert len(langs) >= 2
        assert not backend or backend in possible_backends

        real_langs = list(set(lang[:2]
                              for lang in langs))

        if len(real_langs) == 1:
            text_len = len(fetch_sentences(basename, real_langs[0]))
            return Alignment.create_straight(text_len, len(langs))
        elif len(real_langs) == 2:
            a = None
            for i in range(2):
                for b in ([backend] if backend else possible_backends):
                    try:
                        langs_string = '-'.join(str(l) for l in real_langs)
                        a = Alignment.from_file(self._p(langs_string + '.' + b))
                        break
                    except IOError:
                        continue
                if a:
                    break
                real_langs.reverse()
            if not a:
                raise IOError

        else: # len(real_langs) == 3 :(
            a1 = self.get_alignment(['pl', 'cu'], backend).as_ladder()
            a2 = self.get_alignment(['cu', 'el'], backend).as_ladder()
            a3 = self.get_alignment(['pl', 'el'], backend).as_ladder()
#            a3 = [(b, a) for (a, b) in a3] # reversed
            a = merge_3_alignments(a1, a2, a3)
            real_langs = ['pl', 'cu', 'el'] # needed later

        columns = _transpose(a.data)
        columns_map = { real_langs[i] : columns[i]
                        for i in range(len(real_langs)) }

        # common part for 2 and 3
        chosen_columns = [columns_map[lang[:2]] for lang in langs]
        chosen_columns.append(columns[2])
        return Alignment(_transpose(chosen_columns))