def test_prune(): miner = AdeftMiner('INDRA') miner.process_texts( [example_text1, example_text2, example_text3, example_text4]) candidates = [candidate for candidate, _, _ in miner.top()] miner.prune(5) pruned_candidates = [candidate for candidate, _, _ in miner.top()] assert pruned_candidates == [ candidate for candidate in candidates if len(candidate.split()) <= 5 ]
def test_process_texts(): """Test processing of texts """ miner = AdeftMiner('INDRA') miner.process_texts( [example_text1, example_text2, example_text3, example_text4]) assert miner.top()[0] == ('indonesian debt restructuring agency', 1.0) assert miner.top()[3] == ('integrated network and dynamical' ' reasoning assembler', 1.0) assert miner.top()[7] == ('reasoning assembler', 0.0) # check that top works with limit assert miner.top(limit=5) == miner.top()[0:5]
def test_process_with_exclude(): """Test processing of texts with excluded words""" miner = AdeftMiner('INDRA', exclude='and') miner.process_texts( [example_text1, example_text2, example_text3, example_text4]) assert miner.top()[0] == ('dynamical reasoning assembler', 2.0) assert miner.top()[1] == ('indonesian debt restructuring agency', 1.0)
def test_serialize_adeft_miner(): miner = AdeftMiner('INDRA') miner.process_texts( [example_text1, example_text2, example_text3, example_text4]) temp_filename = os.path.join(SCRATCH_PATH, uuid.uuid4().hex) with open(temp_filename, 'w') as f: miner.dump(f) with open(temp_filename) as f: miner2 = load_adeft_miner(f) assert miner.top() == miner2.top() assert miner.get_longforms() == miner2.get_longforms()
def test_miner_to_dict(): miner = AdeftMiner('INDRA') miner.process_texts( [example_text1, example_text2, example_text3, example_text4]) miner_dict = miner.to_dict() miner2 = load_adeft_miner_from_dict(miner_dict) assert miner.top() == miner2.top() assert miner.get_longforms(use_alignment_based_scoring=False) == \ miner2.get_longforms(use_alignment_based_scoring=False) miner.compute_alignment_scores() assert miner.get_longforms() == miner2.get_longforms()
def test_compose_adeft_miners(): miner1 = AdeftMiner('INDRA') miner2 = AdeftMiner('INDRA') miner3 = AdeftMiner('INDRA') miner1.process_texts([example_text1, example_text2]) miner2.process_texts([example_text3, example_text4]) miner3.process_texts( [example_text1, example_text2, example_text3, example_text4]) combined = compose(miner1, miner2) print(combined) assert combined.top() == miner3.top()
def test_get_longforms(): """Test breadth first search algorithm to extract longforms """ miner = AdeftMiner('INDRA') # ensure list of longforms is initialized correctly assert miner.top() == [] miner.process_texts( [example_text1, example_text2, example_text3, example_text4]) longforms = miner.get_longforms(cutoff=0.5) assert (len(longforms) == 2) assert longforms[0] == ('indonesian debt restructuring agency', 1.0) assert longforms[1] == ('integrated network and dynamical' ' reasoning assembler', 1.0)