コード例 #1
0
def load_adaptation_sample(filename):
    if filename.endswith(".gz"):
        A_models = arpa.load(gzip.open(filename, mode='rt'))
    else:
        A_models = arpa.loadf(filename)
    A = A_models[0]
    return A
コード例 #2
0
ファイル: test_arpa.py プロジェクト: dipanjannag/python-arpa
def test_load_dump():
    with open(TEST_ARPA, 'rt') as fp:
        lm = arpa.load(fp)[0]
        fp.seek(0)
        with tempfile.TemporaryFile(mode='w+t') as gp:
            arpa.dump(lm, gp)
            gp.seek(0)
            assert fp.read() == gp.read()
コード例 #3
0
ファイル: test_arpa.py プロジェクト: oplatek/python-arpa
def test_load_dump():
    with open(TEST_ARPA, "rt") as fp:
        lm = arpa.load(fp)[0]
        fp.seek(0)
        with tempfile.TemporaryFile(mode="w+t") as gp:
            arpa.dump(lm, gp)
            gp.seek(0)
            assert fp.read() == gp.read()
コード例 #4
0
def load_background(filename):
    if filename.endswith(".gz"):
        B_models = arpa.load(gzip.open(filename, mode='rt'))
    else:
        B_models = arpa.loadf(filename)
    B = B_models[0]  # ARPA files may contain several models.

    # We can recover f_B_star (i.e., discounted probabilities) from interpolated probabilities
    # As B is an interpolated model, i.e., p_B(w|h) = f_B_star(w|h) + bow_B(h) * p_B(w|h')
    # Thus,
    #
    #    f_B_star(w|h) = p_B(w|h) - bow_B(h) * p_B(w|h')
    #
    # where h' = h[1:]
    f_B_star = dict()
    for n in range(2, B.order() + 1):
        print("%d-gram" % n)
        # progress_count = 0
        for e in B._entries(n):  # entry format: (log10(prob), hw, log10(bow))
            hw = e[1]
            h = hw[:-1]
            h_prime_w = hw[1:]
            f_B_star[hw] = B._base**float(e[0]) - B._base**(
                float(B._bos[h]) + float(log_p(B, h_prime_w)))
            # assert f_B_star[hw] >= 0

            # progress_count += 1
            # if progress_count % 2000 == 0:
            #     print(progress_count)

    # Index structure:
    # len(h) --> h --> {w | hw is seen in the corpus}, where len(h) >= 1
    B_hist_index = [defaultdict(list) for i in range(B.order())]
    for n in range(2, B.order() + 1):
        print("%d-gram" % n)
        # progress_count = 0
        for e in B._entries(n):
            hw = e[1]
            h = hw[:-1]
            w = hw[-1]
            B_hist_index[len(h)][h].append(w)

            # progress_count += 1
            # if progress_count % 2000 == 0:
            #     print(progress_count)

    return B, f_B_star, B_hist_index
コード例 #5
0
ファイル: test_arpa.py プロジェクト: dipanjannag/python-arpa
def test_load_option_parser():
    with pytest.raises(ValueError):
        arpa.load(None, parser='foo')
コード例 #6
0
ファイル: test_arpa.py プロジェクト: dipanjannag/python-arpa
def test_load_option_model():
    with pytest.raises(ValueError):
        arpa.load(None, model='foo')
コード例 #7
0
ファイル: test_arpa.py プロジェクト: sfischer13/python-arpa
def test_load_option_parser():
    with pytest.raises(ValueError):
        arpa.load(None, parser='foo')
コード例 #8
0
ファイル: test_arpa.py プロジェクト: sfischer13/python-arpa
def test_load_option_model():
    with pytest.raises(ValueError):
        arpa.load(None, model='foo')