Example #1
0
 def show_matrix(self, es, fs, t, a):
     '''
     print matrix according to viterbi alignment like
           fs
      -------------
     e|           |
     s|           |
      |           |
      -------------
     >>> sentences = [("僕 は 男 です", "I am a man"),
                      ("私 は 女 です", "I am a girl"),
                      ("私 は 先生 です", "I am a teacher"),
                      ("彼女 は 先生 です", "She is a teacher"),
                      ("彼 は 先生 です", "He is a teacher"),
                      ]
     >>> t, a = train(sentences, loop_count=1000)
     >>> args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a)
     |x| | | |
     | | |x| |
     | | | |x|
     | | |x| |
     '''
     max_a = self.viterbi_alignment(es, fs, t, a).items()
     m = len(es)
     n = len(fs)
     return utility.matrix(m, n, max_a, es, fs)
Example #2
0
def show_matrix(es, fs, t, a):
    '''
    print matrix according to viterbi alignment like
          fs
     -------------
    e|           |
    s|           |
     |           |
     -------------
    >>> sentences = [("僕 は 男 です", "I am a man"),
                     ("私 は 女 です", "I am a girl"),
                     ("私 は 先生 です", "I am a teacher"),
                     ("彼女 は 先生 です", "She is a teacher"),
                     ("彼 は 先生 です", "He is a teacher"),
                     ]
    >>> t, a = train(sentences, loop_count=1000)
    >>> args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a)
    |x| | | |
    | | |x| |
    | | | |x|
    | | |x| |
    '''
    max_a = viterbi_alignment(es, fs, t, a).items()
    m = len(es)
    n = len(fs)
    return utility.matrix(m, n, max_a)
Example #3
0
    e2f_train = ibmmodel2._train(e2f_corpus, loop_count=10)
    e2f = ibmmodel2.viterbi_alignment(fs, es, *e2f_train).items()

    return alignment(es, fs, e2f, f2e)


if __name__ == '__main__':
    # test for alignment
    es = "michael assumes that he will stay in the house".split()
    fs = "michael geht davon aus , dass er im haus bleibt".split()
    e2f = [(1, 1), (2, 2), (2, 3), (2, 4), (3, 6),
           (4, 7), (7, 8), (9, 9), (6, 10)]
    f2e = [(1, 1), (2, 2), (3, 6), (4, 7), (7, 8),
           (8, 8), (9, 9), (5, 10), (6, 10)]
    from smt.utils.utility import matrix
    print(matrix(len(es), len(fs), e2f, es, fs))
    print(matrix(len(es), len(fs), f2e, es, fs))
    ali = _alignment(es, fs, e2f, f2e)
    print(matrix(len(es), len(fs), ali, es, fs))

    # test for symmetrization
    from smt.utils.utility import mkcorpus
    sentenses = [("僕 は 男 です", "I am a man"),
                 ("私 は 女 です", "I am a girl"),
                 ("私 は 先生 です", "I am a teacher"),
                 ("彼女 は 先生 です", "She is a teacher"),
                 ("彼 は 先生 です", "He is a teacher"),
                 ]
    corpus = mkcorpus(sentenses)
    es = "私 は 先生 です".split()
    fs = "I am a teacher".split()
Example #4
0
                     ]
    >>> t, a = train(sentences, loop_count=1000)
    >>> args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a)
    |x| | | |
    | | |x| |
    | | | |x|
    | | |x| |
    '''
    max_a = viterbi_alignment(es, fs, t, a).items()
    m = len(es)
    n = len(fs)
    return utility.matrix(m, n, max_a)


if __name__ == '__main__':
    #sentences = [("the house", "das Haus"),
    #              ("the book", "das Buch"),
    #              ("a book", "ein Buch"),
    #              ]
    print(utility.matrix(2, 3, [(1, 1), (2, 3)]))
    sentences = [
        ("僕 は 男 です", "I am a man"),
        ("私 は 女 です", "I am a girl"),
        ("私 は 先生 です", "I am a teacher"),
        ("彼女 は 先生 です", "She is a teacher"),
        ("彼 は 先生 です", "He is a teacher"),
    ]
    t, a = train(sentences, loop_count=100)
    args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a)
    print(show_matrix(*args))
Example #5
0
                     ("彼 は 先生 です", "He is a teacher"),
                     ]
    >>> t, a = train(sentences, loop_count=1000)
    >>> args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a)
    |x| | | |
    | | |x| |
    | | | |x|
    | | |x| |
    '''
    max_a = viterbi_alignment(es, fs, t, a).items()
    m = len(es)
    n = len(fs)
    return utility.matrix(m, n, max_a)


if __name__ == '__main__':
    #sentences = [("the house", "das Haus"),
    #              ("the book", "das Buch"),
    #              ("a book", "ein Buch"),
    #              ]
    print(utility.matrix(2, 3, [(1, 1), (2, 3)]))
    sentences = [("僕 は 男 です", "I am a man"),
                 ("私 は 女 です", "I am a girl"),
                 ("私 は 先生 です", "I am a teacher"),
                 ("彼女 は 先生 です", "She is a teacher"),
                 ("彼 は 先生 です", "He is a teacher"),
                 ]
    t, a = train(sentences, loop_count=100)
    args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a)
    print(show_matrix(*args))
Example #6
0
    e2f_train = ibmmodel2._train(e2f_corpus, loop_count=1000)
    e2f = ibmmodel2.viterbi_alignment(fs, es, *e2f_train).items()

    return alignment(es, fs, e2f, f2e)


if __name__ == '__main__':
    # test for alignment
    es = "michael assumes that he will stay in the house".split()
    fs = "michael geht davon aus , dass er im haus bleibt".split()
    e2f = [(1, 1), (2, 2), (2, 3), (2, 4), (3, 6),
           (4, 7), (7, 8), (9, 9), (6, 10)]
    f2e = [(1, 1), (2, 2), (3, 6), (4, 7), (7, 8),
           (8, 8), (9, 9), (5, 10), (6, 10)]
    from smt.utils.utility import matrix
    print(matrix(len(es), len(fs), e2f))
    print(matrix(len(es), len(fs), f2e))
    ali = _alignment(es, fs, e2f, f2e)
    print(matrix(len(es), len(fs), ali))

    # test for symmetrization
    from smt.utils.utility import mkcorpus
    sentenses = [("僕 は 男 です", "I am a man"),
                 ("私 は 女 です", "I am a girl"),
                 ("私 は 先生 です", "I am a teacher"),
                 ("彼女 は 先生 です", "She is a teacher"),
                 ("彼 は 先生 です", "He is a teacher"),
                 ]
    corpus = mkcorpus(sentenses)
    es = "私 は 先生 です".split()
    fs = "I am a teacher".split()