Python AdeftMiner Examples

Programming Language: Python

Namespace/Package Name: adeft.discover

Class/Type: AdeftMiner

Examples at hotexamples.com: 8

Python AdeftMiner - 8 examples found. These are the top rated real world Python examples of adeft.discover.AdeftMiner extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AdeftMiner(8)

process_texts(7)

top(7)

get_longforms(3)

_add(1)

compute_alignment_scores(1)

dump(1)

prune(1)

to_dict(1)

Example #1

Show file

File: test_discover.py Project: pagreene/adeft

def test_process_with_exclude():
    """Test processing of texts with excluded words"""
    miner = AdeftMiner('INDRA', exclude='and')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    assert miner.top()[0] == ('dynamical reasoning assembler', 2.0)
    assert miner.top()[1] == ('indonesian debt restructuring agency', 1.0)

Example #2

Show file

def test_miner_to_dict():
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    miner_dict = miner.to_dict()
    miner2 = load_adeft_miner_from_dict(miner_dict)
    assert miner.top() == miner2.top()
    assert miner.get_longforms(use_alignment_based_scoring=False) == \
        miner2.get_longforms(use_alignment_based_scoring=False)
    miner.compute_alignment_scores()
    assert miner.get_longforms() == miner2.get_longforms()

Example #3

Show file

File: test_discover.py Project: steppi/adeft

def test_prune():
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    candidates = [candidate for candidate, _, _ in miner.top()]
    miner.prune(5)
    pruned_candidates = [candidate for candidate, _, _ in miner.top()]
    assert pruned_candidates == [
        candidate for candidate in candidates if len(candidate.split()) <= 5
    ]

Example #4

Show file

def test_serialize_adeft_miner():
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    temp_filename = os.path.join(SCRATCH_PATH, uuid.uuid4().hex)
    with open(temp_filename, 'w') as f:
        miner.dump(f)
    with open(temp_filename) as f:
        miner2 = load_adeft_miner(f)
    assert miner.top() == miner2.top()
    assert miner.get_longforms() == miner2.get_longforms()

Example #5

Show file

File: test_discover.py Project: pagreene/adeft

def test_process_texts():
    """Test processing of texts
    """
    miner = AdeftMiner('INDRA')
    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    assert miner.top()[0] == ('indonesian debt restructuring agency', 1.0)
    assert miner.top()[3] == ('integrated network and dynamical'
                              ' reasoning assembler', 1.0)
    assert miner.top()[7] == ('reasoning assembler', 0.0)

    # check that top works with limit
    assert miner.top(limit=5) == miner.top()[0:5]

Example #6

Show file

File: test_discover.py Project: pagreene/adeft

def test_add():
    """Test the addition of candidates to the trie

    First add one maximal candidate. All nested parent candidates will be
    added as well. Check that the candidates are contained in the trie and
    that likelihood calculations are correct. Then add the parent of the
    original maximal candidate and check that likelihood has been updated
    correctly.
    """
    miner = AdeftMiner('INDRA')
    candidate = [
        'the', 'integrated', 'network', 'and', 'dynamical', 'reasoning',
        'assembler'
    ]
    miner._add(candidate)
    stemmed = ['assembl', 'reason', 'dynam', 'and', 'network', 'integr', 'the']
    counts = [1] * 7
    penalty = [1] * 6 + [0]
    current = miner._internal_trie
    for penalty, token in zip(penalty, stemmed):
        assert token in current.children
        score = 1 - penalty
        assert current.children[token].score == score
        current = current.children[token]
    miner._add(candidate[1:])
    counts = [2] * 6 + [1]
    penalty = [2] * 5 + [1, 0]
    current = miner._internal_trie
    for count, penalty, token in zip(counts, penalty, stemmed):
        assert token in current.children
        score = count - penalty
        assert current.children[token].score == score
        current = current.children[token]

Example #7

Show file

File: test_discover.py Project: pagreene/adeft

def test_get_longforms():
    """Test breadth first search algorithm to extract longforms
    """
    miner = AdeftMiner('INDRA')
    # ensure list of longforms is initialized correctly
    assert miner.top() == []

    miner.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    longforms = miner.get_longforms(cutoff=0.5)
    assert (len(longforms) == 2)
    assert longforms[0] == ('indonesian debt restructuring agency', 1.0)
    assert longforms[1] == ('integrated network and dynamical'
                            ' reasoning assembler', 1.0)

Example #8

Show file

def test_compose_adeft_miners():
    miner1 = AdeftMiner('INDRA')
    miner2 = AdeftMiner('INDRA')
    miner3 = AdeftMiner('INDRA')

    miner1.process_texts([example_text1, example_text2])
    miner2.process_texts([example_text3, example_text4])
    miner3.process_texts(
        [example_text1, example_text2, example_text3, example_text4])
    combined = compose(miner1, miner2)
    print(combined)
    assert combined.top() == miner3.top()