예제 #1
0
def test_subtopic_labels(kvl, store, label_store):
    def lab(cid1, sid1, cid2, sid2, neg=False):
        coref_val = CorefValue.Negative if neg else CorefValue.Positive
        return Label(cid1, cid2, 'unknown', coref_val, sid1, sid2)
    nlab = lambda a, b, c, d: lab(a, b, c, d, neg=True)
    def has_label(haystack, needle):
        return any(lab.same_subject_as(needle) and lab.value == needle.value
                   for lab in haystack)

    folders = Folders(kvl)
    folders.add_folder('top')
    folders.add_item('top', 'foo', 'a', 'ax')
    folders.add_item('top', 'foo', 'b', 'bx')
    folders.add_item('top', 'bar', 'c', 'cx')
    folders.add_item('top', 'bar', 'd', 'dx')
    folders.add_folder('other')
    folders.add_item('other', 'baz', 'e', 'ex')

    # Simulates a negative label from a search result.
    label_store.put(nlab('a', 'ax', 'r', None))
    label_store.put(nlab('b', 'bx', 'r', None))

    pairwise = PairwiseFeatureLearner(store, label_store, 'a', 'ax')

    labels = pairwise.infer_subtopic_labels()
    print('\n'.join(map(repr, labels)))

    assert has_label(labels, nlab('a', 'ax', 'e', 'ex'))
    assert has_label(labels, nlab('a', 'ax', 'c', 'cx'))
    assert has_label(labels, nlab('a', 'ax', 'd', 'dx'))
    assert has_label(labels, lab('a', 'ax', 'b', 'bx'))
    assert has_label(labels, nlab('a', 'ax', 'r', None))
    assert has_label(labels, nlab('b', 'bx', 'r', None))
    assert len(labels) == 8  # 2 for folders, 2 for results, 4 inferred
예제 #2
0
def test_only_negative_labels(store, label_store):
    '''Make sure the learner can handle one class of labels.'''
    content_objs, labels = interesting_training_data()
    store.put(content_objs)
    for lab in labels:
        if lab.value == CorefValue.Negative:
            label_store.put(lab)

    # Overwrite the query so we get some hits.
    store.put([('q', counter_fc({'x': 5, 'y': 90}))])

    # And assign a label to the query.
    label_store.put(neg_label('q', 'a'))

    results = (mod_pairwise.similar(store, label_store)
                           .set_query_id('q')
                           .set_query_params({'limit': 100})
                           .recommendations())
    # The search engine will fall back to a plain index scan if it has
    # insufficient training data.
    assert len(results['results']) >= 0
예제 #3
0
def test_only_negative_labels(store, label_store):
    '''Make sure the learner can handle one class of labels.'''
    content_objs, labels = interesting_training_data()
    store.put(content_objs)
    for lab in labels:
        if lab.value == CorefValue.Negative:
            label_store.put(lab)

    # Overwrite the query so we get some hits.
    store.put([('q', counter_fc({'x': 5, 'y': 90}))])

    # And assign a label to the query.
    label_store.put(neg_label('q', 'a'))

    results = (mod_pairwise.similar(
        store, label_store).set_query_id('q').set_query_params({
            'limit': 100
        }).recommendations())
    # The search engine will fall back to a plain index scan if it has
    # insufficient training data.
    assert len(results['results']) >= 0
예제 #4
0
def test_search_engine(store, label_store):
    '''Pretty much the same as classify, but for the search engine.'''
    content_objs, labels = interesting_training_data()
    store.put(content_objs)
    for lab in labels:
        label_store.put(lab)

    # Overwrite the query so we get some hits.
    store.put([('q', counter_fc({'x': 5, 'y': 90}))])

    # And assign some labels to the query.
    # (label expansion should fill in the rest)
    label_store.put(pos_label('q', 'b'))
    label_store.put(neg_label('q', 'a'))
    # These can be removed in negative label inference returns. ---AG
    label_store.put(neg_label('q', 'b'))
    label_store.put(neg_label('q', 'c'))

    results = (mod_pairwise.similar(store, label_store)
                           .set_query_id('q')
                           .set_query_params({'limit': 1})
                           .recommendations())
    assert results['results'][0][0] != 'a'
예제 #5
0
def test_search_engine(store, label_store):
    '''Pretty much the same as classify, but for the search engine.'''
    content_objs, labels = interesting_training_data()
    store.put(content_objs)
    for lab in labels:
        label_store.put(lab)

    # Overwrite the query so we get some hits.
    store.put([('q', counter_fc({'x': 5, 'y': 90}))])

    # And assign some labels to the query.
    # (label expansion should fill in the rest)
    label_store.put(pos_label('q', 'b'))
    label_store.put(neg_label('q', 'a'))
    # These can be removed in negative label inference returns. ---AG
    label_store.put(neg_label('q', 'b'))
    label_store.put(neg_label('q', 'c'))

    results = (mod_pairwise.similar(
        store, label_store).set_query_id('q').set_query_params({
            'limit': 1
        }).recommendations())
    assert results['results'][0][0] != 'a'
예제 #6
0
def test_subtopic_labels(kvl, store, label_store):
    def lab(cid1, sid1, cid2, sid2, neg=False):
        coref_val = CorefValue.Negative if neg else CorefValue.Positive
        return Label(cid1, cid2, 'unknown', coref_val, sid1, sid2)

    nlab = lambda a, b, c, d: lab(a, b, c, d, neg=True)

    def has_label(haystack, needle):
        return any(
            lab.same_subject_as(needle) and lab.value == needle.value
            for lab in haystack)

    folders = Folders(kvl)
    folders.add_folder('top')
    folders.add_item('top', 'foo', 'a', 'ax')
    folders.add_item('top', 'foo', 'b', 'bx')
    folders.add_item('top', 'bar', 'c', 'cx')
    folders.add_item('top', 'bar', 'd', 'dx')
    folders.add_folder('other')
    folders.add_item('other', 'baz', 'e', 'ex')

    # Simulates a negative label from a search result.
    label_store.put(nlab('a', 'ax', 'r', None))
    label_store.put(nlab('b', 'bx', 'r', None))

    pairwise = PairwiseFeatureLearner(store, label_store, 'a', 'ax')

    labels = pairwise.infer_subtopic_labels()
    print('\n'.join(map(repr, labels)))

    assert has_label(labels, nlab('a', 'ax', 'e', 'ex'))
    assert has_label(labels, nlab('a', 'ax', 'c', 'cx'))
    assert has_label(labels, nlab('a', 'ax', 'd', 'dx'))
    assert has_label(labels, lab('a', 'ax', 'b', 'bx'))
    assert has_label(labels, nlab('a', 'ax', 'r', None))
    assert has_label(labels, nlab('b', 'bx', 'r', None))
    assert len(labels) == 8  # 2 for folders, 2 for results, 4 inferred