コード例 #1
0
ファイル: PairManager.py プロジェクト: mik01aj/corthus
 def from_file(cls, file_path):
     m = re.match('.*(\w\w)-(\w\w)$', file_path)
     lang1 = m.group(1)
     lang2 = m.group(2)
     pm = PairManager()
     with codecs.open(file_path) as f:
         na = NewAlignment.read(f)
         first = True
         for row in na:
             if first:
                 pm.hapax_prob = float(row['_f'].split()[-1])
                 first = False
                 continue
             count = int(row['_f'].split()[0])
             prob = float(row['_f'].split()[-1])
             pm.pairs[row[lang1], row[lang2]] = (count, prob)
             pm.pairs_by_prob.append((prob, row[lang1], row[lang2]))
     pm.pairs_by_prob.sort(reverse=True)
     return pm
コード例 #2
0
ファイル: evaluation.py プロジェクト: mik01aj/corthus
                    x_longest = x
                    y_longest = y
            else:
                M[x][y] = 0
    return (longest, len(s1) - x_longest, len(s2) - y_longest)


if __name__ == "__main__":
    from Alignment import Alignment
    from NewAlignment import NewAlignment

    langs = ("pl", "cu")

    # A - tested alignment
    tf = TextFolder("texts/kanon_izr/")
    aA = NewAlignment.from_old_alignment(
        tf.get_alignment(langs, "my"), langs, [tf.get_sentences(lang) for lang in langs]
    )

    # B - correct alignment
    with open("texts/kanon_izr/everything") as f:
        aB = NewAlignment.read(f)

    baseline = NewAlignment()
    baseline.easy_append(pl=" ".join(tf.get_sentences("pl")), cu=" ".join(tf.get_sentences("cu")))

    aB.pretty_print("pl", "cu")

    print evaluate_alignment(aA, aB)
    print evaluate_alignment(baseline, aB)