예제 #1
0
def test_new_model_order():
    lm = ARPAModelSimple()
    assert lm.order() is None

    for p in PARSERS:
        lm = arpa.loadf(TEST_ARPA, parser=p)[0]
        assert lm.order() == 5
예제 #2
0
def cal_A_adapted_arpa(B, f_B_star, B_hist_index, alpha, z_epsilon):
    A_adapted = ARPAModelSimple()

    global zh1, zh2

    check_sum = 0

    # unigram
    print("processing unigram...")
    for e in B._entries(1):
        w = e[1]
        p_A_w = float(p(B, w)) * alpha[w[0]] / z_epsilon
        A_adapted.add_entry(ngram=w, p=math.log(p_A_w, B._base))
        check_sum += p_A_w
    assert check_sum - 1 < 0.0001
    zh2[tuple()] = z_epsilon

    # ngram, n >= 2
    for n in range(2, B.order() + 1):
        print("processing %d-gram..." % n)
        progress_count = 0

        zh1.clear()
        zh1 = zh2
        zh2 = dict()

        len_h = n - 1
        for h, w_list in B_hist_index[len_h].items():
            z_h = cal_z(h, B, f_B_star, B_hist_index, alpha, z_epsilon)
            z_h_prime = zh1.get(h[1:], None)  # we can cache this
            if z_h_prime is None:
                z_h_prime = cal_z(h[1:], B, f_B_star, B_hist_index, alpha,
                                  z_epsilon)

            bow_A_h = (B._base**float(B._bos[h])) * z_h_prime / z_h
            A_adapted._bos[h] = math.log(bow_A_h, B._base)

            for w in w_list:
                hw = h + (w, )
                # p_A_hw = cal_p_A(hw, f_B_star, alpha, z_h, bow_A_h, A_adapted)
                p_A_hw = cal_p_A_old(hw, alpha, B, z_h)
                # if p_A_hw - p_A_hw2 > 0.0001:
                #     print("p_A_hw=", p_A_hw, "p_A_hw2=", p_A_hw2)
                #     exit(0)
                A_adapted.add_entry(ngram=hw, p=math.log(p_A_hw, B._base))

                progress_count += 1
                if progress_count % 1000000 == 0:
                    print(progress_count)

    for order, count in B.counts():
        A_adapted.add_count(order, count)

    return A_adapted
예제 #3
0
def test_new_model_log_s():
    lm = ARPAModelSimple()
    with pytest.raises(ValueError):
        lm.log_p(1)
예제 #4
0
def test_new_model_counts():
    lm = ARPAModelSimple()
    assert lm.counts() == []
예제 #5
0
def test_new_model_contains():
    lm = ARPAModelSimple()
    lm.add_entry(["foo"], 1.0)
    assert "foo" in lm
예제 #6
0
def test_input_equality():
    lm = ARPAModelSimple()
    with pytest.raises(KeyError):
        assert lm.p('foo') == lm.p(('foo', ))
    with pytest.raises(KeyError):
        assert lm.p('xxx') == lm.p(('xxx', ))
    with pytest.raises(KeyError):
        assert lm.p('a little') == lm.p(('a', 'little'))
    with pytest.raises(KeyError):
        assert lm.p('xxx little') == lm.p(('xxx', 'little'))

    lm = arpa.loadf(TEST_ARPA)[0]
    assert lm.p('foo') == lm.p(('foo', ))
    assert lm.p('xxx') == lm.p(('xxx', ))
    assert lm.p('a little') == lm.p(('a', 'little'))
    assert lm.p('xxx little') == lm.p(('xxx', 'little'))
예제 #7
0
def test_log_s_int():
    lm = ARPAModelSimple()
    with pytest.raises(ValueError):
        lm.log_s(1)
예제 #8
0
def test_log_p_empty_string():
    lm = ARPAModelSimple()
    with pytest.raises(ValueError):
        lm.log_p('')
예제 #9
0
def test_new_model_vocabulary():
    lm = ARPAModelSimple()
    assert lm.vocabulary() == []
예제 #10
0
def test_log_p_raw():
    lm = ARPAModelSimple()
    with pytest.raises(KeyError):
        lm.log_p_raw('UnladenSwallow')
def test_new_model_contains():
    lm = ARPAModelSimple()
    lm.add_entry(('foo', ), 1.0)
    assert 'foo' in lm
예제 #12
0
def test_log_s_int():
    lm = ARPAModelSimple()
    with pytest.raises(ValueError):
        lm.log_s(1)
예제 #13
0
def test_log_p_empty_tuple():
    lm = ARPAModelSimple()
    with pytest.raises(ValueError):
        lm.log_p(tuple())
예제 #14
0
def test_new_model_len():
    lm = ARPAModelSimple()
    assert len(lm) == 0
예제 #15
0
def test_log_p_empty_string():
    lm = ARPAModelSimple()
    with pytest.raises(ValueError):
        lm.log_p('')
예제 #16
0
def test_new_model_order():
    lm = ARPAModelSimple()
    assert lm.order() is None
예제 #17
0
def test_log_p_empty_tuple():
    lm = ARPAModelSimple()
    with pytest.raises(ValueError):
        lm.log_p(tuple())
예제 #18
0
def test_new_model_contains_not():
    lm = ARPAModelSimple()
    assert "foo" not in lm
예제 #19
0
def test_log_p_raw():
    lm = ARPAModelSimple()
    with pytest.raises(KeyError):
        lm.log_p_raw('UnladenSwallow')