Ejemplo n.º 1
0
def test_prune():
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    candidates = [candidate for candidate, _, _ in miner.top()]
    miner.prune(5)
    pruned_candidates = [candidate for candidate, _, _ in miner.top()]
    assert pruned_candidates == [
        candidate for candidate in candidates if len(candidate.split()) <= 5
    ]
Ejemplo n.º 2
0
def test_process_texts():
    """Test processing of texts
    """
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    assert miner.top()[0] == ('indonesian debt restructuring agency', 1.0)
    assert miner.top()[3] == ('integrated network and dynamical'
                              ' reasoning assembler', 1.0)
    assert miner.top()[7] == ('reasoning assembler', 0.0)

    # check that top works with limit
    assert miner.top(limit=5) == miner.top()[0:5]
Ejemplo n.º 3
0
def test_process_with_exclude():
    """Test processing of texts with excluded words"""
    miner = AdeftMiner('INDRA', exclude='and')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    assert miner.top()[0] == ('dynamical reasoning assembler', 2.0)
    assert miner.top()[1] == ('indonesian debt restructuring agency', 1.0)
Ejemplo n.º 4
0
def test_serialize_adeft_miner():
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    temp_filename = os.path.join(SCRATCH_PATH, uuid.uuid4().hex)
    with open(temp_filename, 'w') as f:
        miner.dump(f)
    with open(temp_filename) as f:
        miner2 = load_adeft_miner(f)
    assert miner.top() == miner2.top()
    assert miner.get_longforms() == miner2.get_longforms()
Ejemplo n.º 5
0
def test_miner_to_dict():
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    miner_dict = miner.to_dict()
    miner2 = load_adeft_miner_from_dict(miner_dict)
    assert miner.top() == miner2.top()
    assert miner.get_longforms(use_alignment_based_scoring=False) == \
        miner2.get_longforms(use_alignment_based_scoring=False)
    miner.compute_alignment_scores()
    assert miner.get_longforms() == miner2.get_longforms()
Ejemplo n.º 6
0
def test_compose_adeft_miners():
    miner1 = AdeftMiner('INDRA')
    miner2 = AdeftMiner('INDRA')
    miner3 = AdeftMiner('INDRA')

    miner1.process_texts([example_text1, example_text2])
    miner2.process_texts([example_text3, example_text4])
    miner3.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    combined = compose(miner1, miner2)
    print(combined)
    assert combined.top() == miner3.top()
Ejemplo n.º 7
0
def test_get_longforms():
    """Test breadth first search algorithm to extract longforms
    """
    miner = AdeftMiner('INDRA')
    # ensure list of longforms is initialized correctly
    assert miner.top() == []

    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    longforms = miner.get_longforms(cutoff=0.5)
    assert (len(longforms) == 2)
    assert longforms[0] == ('indonesian debt restructuring agency', 1.0)
    assert longforms[1] == ('integrated network and dynamical'
                            ' reasoning assembler', 1.0)