def test_subtopic_labels(kvl, store, label_store): def lab(cid1, sid1, cid2, sid2, neg=False): coref_val = CorefValue.Negative if neg else CorefValue.Positive return Label(cid1, cid2, 'unknown', coref_val, sid1, sid2) nlab = lambda a, b, c, d: lab(a, b, c, d, neg=True) def has_label(haystack, needle): return any(lab.same_subject_as(needle) and lab.value == needle.value for lab in haystack) folders = Folders(kvl) folders.add_folder('top') folders.add_item('top', 'foo', 'a', 'ax') folders.add_item('top', 'foo', 'b', 'bx') folders.add_item('top', 'bar', 'c', 'cx') folders.add_item('top', 'bar', 'd', 'dx') folders.add_folder('other') folders.add_item('other', 'baz', 'e', 'ex') # Simulates a negative label from a search result. label_store.put(nlab('a', 'ax', 'r', None)) label_store.put(nlab('b', 'bx', 'r', None)) pairwise = PairwiseFeatureLearner(store, label_store, 'a', 'ax') labels = pairwise.infer_subtopic_labels() print('\n'.join(map(repr, labels))) assert has_label(labels, nlab('a', 'ax', 'e', 'ex')) assert has_label(labels, nlab('a', 'ax', 'c', 'cx')) assert has_label(labels, nlab('a', 'ax', 'd', 'dx')) assert has_label(labels, lab('a', 'ax', 'b', 'bx')) assert has_label(labels, nlab('a', 'ax', 'r', None)) assert has_label(labels, nlab('b', 'bx', 'r', None)) assert len(labels) == 8 # 2 for folders, 2 for results, 4 inferred
def test_only_negative_labels(store, label_store): '''Make sure the learner can handle one class of labels.''' content_objs, labels = interesting_training_data() store.put(content_objs) for lab in labels: if lab.value == CorefValue.Negative: label_store.put(lab) # Overwrite the query so we get some hits. store.put([('q', counter_fc({'x': 5, 'y': 90}))]) # And assign a label to the query. label_store.put(neg_label('q', 'a')) results = (mod_pairwise.similar(store, label_store) .set_query_id('q') .set_query_params({'limit': 100}) .recommendations()) # The search engine will fall back to a plain index scan if it has # insufficient training data. assert len(results['results']) >= 0
def test_only_negative_labels(store, label_store): '''Make sure the learner can handle one class of labels.''' content_objs, labels = interesting_training_data() store.put(content_objs) for lab in labels: if lab.value == CorefValue.Negative: label_store.put(lab) # Overwrite the query so we get some hits. store.put([('q', counter_fc({'x': 5, 'y': 90}))]) # And assign a label to the query. label_store.put(neg_label('q', 'a')) results = (mod_pairwise.similar( store, label_store).set_query_id('q').set_query_params({ 'limit': 100 }).recommendations()) # The search engine will fall back to a plain index scan if it has # insufficient training data. assert len(results['results']) >= 0
def test_search_engine(store, label_store): '''Pretty much the same as classify, but for the search engine.''' content_objs, labels = interesting_training_data() store.put(content_objs) for lab in labels: label_store.put(lab) # Overwrite the query so we get some hits. store.put([('q', counter_fc({'x': 5, 'y': 90}))]) # And assign some labels to the query. # (label expansion should fill in the rest) label_store.put(pos_label('q', 'b')) label_store.put(neg_label('q', 'a')) # These can be removed in negative label inference returns. ---AG label_store.put(neg_label('q', 'b')) label_store.put(neg_label('q', 'c')) results = (mod_pairwise.similar(store, label_store) .set_query_id('q') .set_query_params({'limit': 1}) .recommendations()) assert results['results'][0][0] != 'a'
def test_search_engine(store, label_store): '''Pretty much the same as classify, but for the search engine.''' content_objs, labels = interesting_training_data() store.put(content_objs) for lab in labels: label_store.put(lab) # Overwrite the query so we get some hits. store.put([('q', counter_fc({'x': 5, 'y': 90}))]) # And assign some labels to the query. # (label expansion should fill in the rest) label_store.put(pos_label('q', 'b')) label_store.put(neg_label('q', 'a')) # These can be removed in negative label inference returns. ---AG label_store.put(neg_label('q', 'b')) label_store.put(neg_label('q', 'c')) results = (mod_pairwise.similar( store, label_store).set_query_id('q').set_query_params({ 'limit': 1 }).recommendations()) assert results['results'][0][0] != 'a'
def test_subtopic_labels(kvl, store, label_store): def lab(cid1, sid1, cid2, sid2, neg=False): coref_val = CorefValue.Negative if neg else CorefValue.Positive return Label(cid1, cid2, 'unknown', coref_val, sid1, sid2) nlab = lambda a, b, c, d: lab(a, b, c, d, neg=True) def has_label(haystack, needle): return any( lab.same_subject_as(needle) and lab.value == needle.value for lab in haystack) folders = Folders(kvl) folders.add_folder('top') folders.add_item('top', 'foo', 'a', 'ax') folders.add_item('top', 'foo', 'b', 'bx') folders.add_item('top', 'bar', 'c', 'cx') folders.add_item('top', 'bar', 'd', 'dx') folders.add_folder('other') folders.add_item('other', 'baz', 'e', 'ex') # Simulates a negative label from a search result. label_store.put(nlab('a', 'ax', 'r', None)) label_store.put(nlab('b', 'bx', 'r', None)) pairwise = PairwiseFeatureLearner(store, label_store, 'a', 'ax') labels = pairwise.infer_subtopic_labels() print('\n'.join(map(repr, labels))) assert has_label(labels, nlab('a', 'ax', 'e', 'ex')) assert has_label(labels, nlab('a', 'ax', 'c', 'cx')) assert has_label(labels, nlab('a', 'ax', 'd', 'dx')) assert has_label(labels, lab('a', 'ax', 'b', 'bx')) assert has_label(labels, nlab('a', 'ax', 'r', None)) assert has_label(labels, nlab('b', 'bx', 'r', None)) assert len(labels) == 8 # 2 for folders, 2 for results, 4 inferred