def test_returned_fact_extractor_has_method_predict(self): # ie, can be used for scoring an evidence b = BootstrappedIEPipeline(mock.MagicMock(), []) kn = self.build_training_knowledge({'likes': 3}) result = b.learn_fact_extractors(kn) predictor = list(result.values())[0] self.assertTrue(hasattr(predictor, 'predict')) self.assertTrue(callable(predictor.predict)) self.assertTrue(hasattr(predictor, 'predict_proba')) self.assertTrue(callable(predictor.predict_proba))
def test_no_fact_extractor_is_built_when_not_enough_data(self): b = BootstrappedIEPipeline(mock.MagicMock(), []) kn = self.build_training_knowledge({ 'likes': 1, 'hates': 1, 'looks': 2 }) result = b.learn_fact_extractors(kn) self.assertEqual(len(result), 1) self.assertIn('looks', result)
def test_fact_extractor_is_created_with_FactExtractorFactory(self): b = BootstrappedIEPipeline(mock.MagicMock(), []) kn = self.build_training_knowledge({'likes': 3, 'hates': 2}) with mock.patch('iepy.core.FactExtractorFactory') as m_FEF: b.learn_fact_extractors(kn) self.assertEqual(m_FEF.call_count, 2) actual_calls = [args for args, kwargs in m_FEF.call_args_list] expected_calls = [(b.extractor_config, k) for k in kn.per_relation().values()] self.assertEqual(len(actual_calls), len(expected_calls)) for c in expected_calls: self.assertIn(c, actual_calls)
def test_relations_are_infered_from_seeds(self): f1 = FactFactory(e1__kind=u'person', e2__kind=u'location', relation=u'x') f2 = FactFactory(e1__kind=u'person', e2__kind=u'location', relation=u'y') f3 = FactFactory(e1__kind=u'person', e2__kind=u'person', relation=u'z') f4 = FactFactory(e1__kind=u'location', e2__kind=u'person', relation=u'w') b = BootstrappedIEPipeline(mock.MagicMock(), [f1, f2, f3, f4]) self.assertEqual( b.relations, { u'x': (u'person', u'location'), u'y': (u'person', u'location'), u'z': (u'person', u'person'), u'w': (u'location', u'person') })
class TestBootstrapAcceptingKnowledge(unittest.TestCase): """Step 6 of iepy takes the output of the classifier, and uses it for increasing the known things""" def setUp(self): self.threshold = 0.5 self.b = BootstrappedIEPipeline(mock.MagicMock(), [], fact_threshold=self.threshold) assert len(self.b.knowledge) == 0 def test_evidences_with_high_prediction_score_are_accepted(self): ev1 = EvidenceFactory() ev2 = EvidenceFactory() ev3 = EvidenceFactory() classifier_out = {ev1: 1, ev2: 0.5, ev3: 0.1} self.b.filter_facts(classifier_out) self.assertIn(ev1, self.b.knowledge) self.assertNotIn(ev2, self.b.knowledge) self.assertNotIn(ev3, self.b.knowledge) def test_no_accept_evidence_well_ranked_if_human_previously_rejected(self): ev1 = EvidenceFactory() classifier_out = {ev1: 1} self.b.answers[ev1] = 0 self.b.filter_facts(classifier_out) self.assertNotIn(ev1, self.b.knowledge) def test_dropping_mode_discards_previous_not_human_knowledge(self): self.b.drop_guesses_each_round = True ev1 = EvidenceFactory() ev2 = EvidenceFactory() self.b.knowledge[ev1] = 1.0 self.b.knowledge[ev2] = 1.0 self.b.answers[ev1] = 1.0 assert ev2 not in self.b.answers # Nothing new to add self.b.filter_facts({}) # ev1 shall be preserved cause was human provided self.assertIn(ev1, self.b.knowledge) # ev2 not self.assertNotIn(ev2, self.b.knowledge)
from iepy.human_validation import TerminalInterviewer from iepy.knowledge import Knowledge from iepy.utils import load_facts_from_csv if __name__ == u'__main__': opts = docopt(__doc__, version=0.1) connection = db.connect(opts[u'<dbname>']) seed_facts = load_facts_from_csv(opts[u'<seeds_file>']) output_file = opts[u'<output_file>'] gold_standard_file = opts[u'--gold'] if gold_standard_file: gold_standard = Knowledge.load_from_csv(gold_standard_file) else: gold_standard = None p = BootstrappedIEPipeline(connection, seed_facts, gold_standard) logging.basicConfig( level=logging.DEBUG, format=u"%(asctime)s - %(name)s - %(levelname)s - %(message)s") STOP = u'STOP' p.start() # blocking keep_looping = True while keep_looping: qs = list(p.questions_available()) if not qs: keep_looping = False term = TerminalInterviewer(qs, p.add_answer, [(STOP, u'Stop execution ASAP')])
def test_one_fact_extractor_built_per_relation_in_available_data(self): b = BootstrappedIEPipeline(mock.MagicMock(), []) kn = self.build_training_knowledge({'likes': 3, 'hates': 2}) result = b.learn_fact_extractors(kn) self.assertEqual(len(result), 2)
def setUp(self): self.threshold = 0.5 self.b = BootstrappedIEPipeline(mock.MagicMock(), [], fact_threshold=self.threshold) assert len(self.b.knowledge) == 0