def test_collect_matches2doc_ranker_driver_min_ranker(): doc = create_document_to_score_same_depth_level() driver = SimpleCollectMatchesRankDriver() executor = MinRanker() driver.attach(executor=executor, pea=None) import sys min_value_30 = sys.maxsize min_value_20 = sys.maxsize for match in doc.matches: if match.parent_id == '30': if match.score.value < min_value_30: min_value_30 = match.score.value if match.parent_id == '20': if match.score.value < min_value_20: min_value_20 = match.score.value assert min_value_30 < min_value_20 driver._traverse_apply([ doc, ]) assert len(doc.matches) == 2 assert doc.matches[0].id == '30' assert doc.matches[0].score.value == pytest.approx( (1. / (1. + min_value_30)), 0.0000001) assert doc.matches[1].id == '20' assert doc.matches[1].score.value == pytest.approx( (1. / (1. + min_value_20)), 0.0000001) for match in doc.matches: # match score is computed w.r.t to doc.id assert match.score.ref_id == doc.id
def test_chunk2doc_ranker_driver_traverse_apply_larger_range(): docs = [ create_chunk_chunk_matches_to_score(), ] driver = SimpleChunk2DocRankDriver(traversal_paths=('cc', 'c')) executor = MinRanker() driver.attach(executor=executor, pea=None) driver._traverse_apply(docs) for doc in docs: assert len(doc.matches) == 1 assert len(doc.chunks) == 1 chunk = doc.chunks[0] assert len(chunk.matches) == 2 min_granularity_2 = chunk.matches[0].score.value for idx, m in enumerate(chunk.matches): # the score should be 1 / (1 + id * 2) if m.score.value < min_granularity_2: min_granularity_2 = m.score.value assert m.score.value == pytest.approx(1. / (1 + float(m.id) * 2.), 0.0001) assert m.score.ref_id == 101 match = doc.matches[0] assert match.score.ref_id == 100 assert match.score.value == pytest.approx(1. / (1 + min_granularity_2), 0.0001)
def test_chunk2doc_ranker_driver_traverse_apply(): docs = [create_chunk_matches_to_score(), ] driver = SimpleChunk2DocRankDriver(recur_range=(0, 1)) executor = MinRanker() driver.attach(executor=executor, pea=None) driver._traverse_apply(docs) for doc in docs: assert len(doc.matches) == 2 for idx, m in enumerate(doc.matches): # the score should be 1 / (1 + id * 2) assert m.score.value == pytest.approx(1. / (1 + m.id * 2.), 0.0001)
def test_chunk2doc_ranker_driver_traverse_apply(self): docs = [ create_chunk_matches_to_score(), ] driver = SimpleChunk2DocRankDriver(depth_range=(0, 1)) executor = MinRanker() driver.attach(executor=executor, pea=None) driver._traverse_apply(docs) for doc in docs: assert len(doc.matches) == 2 for idx, m in enumerate(doc.matches): # the score should be 1 / (1 + id * 2) self.assertAlmostEqual(m.score.value, 1. / (1 + m.id * 2.)) assert m.level_depth == 0
def test_chunk2doc_ranker_driver_min_ranker(): doc = create_document_to_score() driver = SimpleChunk2DocRankDriver() executor = MinRanker() driver.attach(executor=executor, pea=None) driver._apply_all(doc.chunks, doc) assert len(doc.matches) == 4 assert doc.matches[0].id == 40 assert doc.matches[0].score.value == pytest.approx(1 / (1 + 4), 0.0001) assert doc.matches[1].id == 50 assert doc.matches[1].score.value == pytest.approx(1 / (1 + 5), 0.0001) assert doc.matches[2].id == 60 assert doc.matches[2].score.value == pytest.approx(1 / (1 + 6), 0.0001) assert doc.matches[3].id == 70 assert doc.matches[3].score.value == pytest.approx(1 / (1 + 7), 0.0001) for match in doc.matches: # match score is computed w.r.t to doc.id assert match.score.ref_id == doc.id
def test_chunk2doc_ranker_driver_MinRanker(self): doc = create_document_to_score() driver = SimpleChunk2DocRankDriver() executor = MinRanker() driver.attach(executor=executor, pea=None) driver._apply_all(doc.chunks, doc) assert len(doc.matches) == 4 assert doc.matches[0].id == 40 self.assertAlmostEqual(doc.matches[0].score.value, 1 / (1 + 4)) assert doc.matches[1].id == 50 self.assertAlmostEqual(doc.matches[1].score.value, 1 / (1 + 5)) assert doc.matches[2].id == 60 self.assertAlmostEqual(doc.matches[2].score.value, 1 / (1 + 6)) assert doc.matches[3].id == 70 self.assertAlmostEqual(doc.matches[3].score.value, 1 / (1 + 7)) for match in doc.matches: # match score is computed w.r.t to doc.id assert match.score.ref_id == doc.id