コード例 #1
0
ファイル: test_vectors.py プロジェクト: terU3760/conceptnet5
def test_vector_space_wrapper_filter():
    wrap = VectorSpaceWrapper(frame=TEST_FRAME)
    wrap.load()
    ok_('/c/pl/kombinacja' in wrap.similar_terms('/c/en/nordic_combined', filter='/c/pl',
                                                 limit=1).index)

    ok_('/c/en/present' in wrap.similar_terms('/c/en/gift', filter='/c/en/present', limit=1).index)
コード例 #2
0
def test_missing_language():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()

    # The frame contains no Esperanto, of course, so the out-of-vocabulary
    # mechanism will fail. We should simply get no results, not crash.
    similarity = vectors.similar_terms('/c/eo/ekzemplo')
    eq_(len(similarity), 0)
コード例 #3
0
def test_similar_terms_filter():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    ok_('/c/pl/kombinacja' in vectors.similar_terms(
        '/c/en/nordic_combined', filter='/c/pl', limit=1).index)

    ok_('/c/en/present' in vectors.similar_terms(
        '/c/en/gift', filter='/c/en/present', limit=1).index)
コード例 #4
0
def test_missing_language():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()

    # The frame contains no Esperanto, of course, so the out-of-vocabulary
    # mechanism will fail. We should simply get no results, not crash.
    similarity = vectors.similar_terms('/c/eo/ekzemplo')
    eq_(len(similarity), 0)
コード例 #5
0
def test_match_prefix():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    term = '/c/en/figure_skate'
    expected_prefix_matches = [('/c/en/figure', 0.0033333333333333335),
                               ('/c/en/figure skater', 0.0033333333333333335),
                               ('/c/en/figure skating', 0.0033333333333333335)]
    prefix_matches = vectors._match_prefix(term=term, prefix_weight=0.01)
    eq_(expected_prefix_matches, prefix_matches)
コード例 #6
0
def test_cache_with_oov():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    # check the vector of all zeros is returned if the term is not present
    ok_(not vectors.get_vector('/c/en/test', oov_vector=False).any())

    # If include_neighbors=True, the neighbor of 'test' in ConceptNet ('trial')
    #  will be used to approximate its vector
    ok_(vectors.get_vector('/c/en/test', oov_vector=True).any())
コード例 #7
0
def test_cache_with_oov():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    # check the vector of all zeros is returned if the term is not present
    ok_(not vectors.get_vector('/c/en/test', oov_vector=False).any())

    # If include_neighbors=True, the neighbor of 'test' in ConceptNet ('trial')
    #  will be used to approximate its vector
    ok_(vectors.get_vector('/c/en/test', oov_vector=True).any())
コード例 #8
0
ファイル: bias.py プロジェクト: akiratu/conceptnet5
def measure_bias(frame):
    """
    Return a DataFrame that measures biases in a semantic space, on four
    data sets:

    - Gender
    - Fine-grained ethnicity
    - Coarse-grained ethnicity
    - Religious beliefs
    """
    vsw = VectorSpaceWrapper(frame=frame)
    vsw.load()

    gender_binary_axis = normalize_vec(
        get_category_axis(frame, FEMALE_WORDS) -
        get_category_axis(frame, MALE_WORDS))
    gender_bias_numbers = []
    for female_biased_word, male_biased_word in GENDER_BIAS_PAIRS:
        female_biased_uri = standardized_uri('en', female_biased_word)
        male_biased_uri = standardized_uri('en', male_biased_word)
        diff = normalize_vec(
            vsw.get_vector(female_biased_uri) -
            vsw.get_vector(male_biased_uri)).dot(gender_binary_axis)
        gender_bias_numbers.append(diff)

    mean = np.mean(gender_bias_numbers)
    sem = scipy.stats.sem(gender_bias_numbers)
    gender_bias = pd.Series([mean, mean - sem * 2, mean + sem * 2],
                            index=['bias', 'low', 'high'])

    stereotype_vecs_1 = get_vocabulary_vectors(frame, PEOPLE_BY_ETHNICITY)
    stereotype_vecs_2 = get_vocabulary_vectors(frame, ETHNIC_STEREOTYPE_TERMS)
    fine_ethnic_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)

    stereotype_vecs_1 = get_vocabulary_vectors(frame, COARSE_ETHNICITY_TERMS)
    stereotype_vecs_2 = get_vocabulary_vectors(frame, ETHNIC_STEREOTYPE_TERMS)
    coarse_ethnic_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)

    stereotype_vecs_1 = pd.DataFrame(
        np.vstack(
            [get_category_axis(frame, names) for names in ETHNIC_NAME_SETS]))
    stereotype_vecs_2 = get_vocabulary_vectors(frame, ETHNIC_STEREOTYPE_TERMS)
    name_ethnic_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)

    stereotype_vecs_1 = get_vocabulary_vectors(frame, PEOPLE_BY_BELIEF)
    stereotype_vecs_2 = get_vocabulary_vectors(frame, BELIEF_STEREOTYPE_TERMS)
    belief_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)

    return pd.DataFrame({
        'gender': gender_bias,
        'ethnicity-fine': fine_ethnic_bias,
        'ethnicity-coarse': coarse_ethnic_bias,
        'ethnicity-names': name_ethnic_bias,
        'beliefs': belief_bias
    }).T
コード例 #9
0
def test_expand_terms(multi_ling_frame):
    vectors = VectorSpaceWrapper(frame=multi_ling_frame)
    vectors.load()
    term = [('/c/en/ski_jumper', 1.0)]
    expanded_terms = vectors.expand_terms(terms=term, oov_vector=True)

    expected_expanded_terms = [
        ('/c/en/ski_jumper', 0.9900990099009901),
        ('/c/en/ski_jumping', 0.009900990099009901),
    ]
    assert expected_expanded_terms == expanded_terms
コード例 #10
0
def test_match_prefix():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    term = '/c/en/figure_skate'
    expected_prefix_matches = [
        ('/c/en/figure', 0.0033333333333333335),
        ('/c/en/figure skater', 0.0033333333333333335),
        ('/c/en/figure skating', 0.0033333333333333335),
    ]
    prefix_matches = vectors._match_prefix(term=term, prefix_weight=0.01)
    eq_(expected_prefix_matches, prefix_matches)
コード例 #11
0
def test_similar_terms_filter(multi_ling_frame):
    vectors = VectorSpaceWrapper(frame=multi_ling_frame)
    vectors.load()
    assert ('/c/pl/kombinacja'
            in vectors.similar_terms('/c/en/nordic_combined',
                                     filter='/c/pl',
                                     limit=1).index)

    assert ('/c/en/present' in vectors.similar_terms('/c/en/gift',
                                                     filter='/c/en/present',
                                                     limit=1).index)
コード例 #12
0
def test_match_prefix(simple_frame):
    vectors = VectorSpaceWrapper(frame=simple_frame)
    vectors.load()
    term = '/c/en/figure_skate'
    expected_prefix_matches = [
        ('/c/en/figure', 0.0033333333333333335),
        ('/c/en/figure skater', 0.0033333333333333335),
        ('/c/en/figure skating', 0.0033333333333333335),
    ]
    prefix_matches = vectors._match_prefix(term=term, prefix_weight=0.01)
    assert expected_prefix_matches == prefix_matches
コード例 #13
0
def test_similar_terms():
    """
    Check if VectorSpaceWrapper's index is sorted and its elements are concepts.
    """
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    ok_('/c/en/figure skating' in vectors.similar_terms('/c/en/figure skating',
                                                        limit=3).index)
    ok_('/c/en/figure skater' in vectors.similar_terms('/c/en/figure skating',
                                                       limit=3).index)
    ok_('/c/en/figure' in vectors.similar_terms('/c/en/figure skating',
                                                limit=3).index)
コード例 #14
0
def test_lookup_neighbors():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    term = '/c/pl/skoki_narciarskie'
    neighbors = vectors._find_neighbors(term=term,
                                        limit_per_term=10,
                                        weight=1.0)
    expected_neighbors = [('/c/en/ski_jumping', 0.02),
                          ('http://pl.dbpedia.org/resource/Skoki_narciarskie',
                           0.01), ('/c/en/ski_jumping', 0.01),
                          ('/c/en/ski_jumping', 0.005)]
    eq_(expected_neighbors, neighbors)
コード例 #15
0
def test_similar_terms_filter():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    ok_(
        '/c/pl/kombinacja'
        in vectors.similar_terms('/c/en/nordic_combined', filter='/c/pl', limit=1).index
    )

    ok_(
        '/c/en/present'
        in vectors.similar_terms('/c/en/gift', filter='/c/en/present', limit=1).index
    )
コード例 #16
0
def test_similar_terms(simple_frame):
    """
    Check if VectorSpaceWrapper's index is sorted and its elements are concepts.
    """
    vectors = VectorSpaceWrapper(frame=simple_frame)
    vectors.load()
    assert ('/c/en/figure skating'
            in vectors.similar_terms('/c/en/figure skating', limit=3).index)
    assert ('/c/en/figure skater'
            in vectors.similar_terms('/c/en/figure skating', limit=3).index)
    assert ('/c/en/figure' in vectors.similar_terms('/c/en/figure skating',
                                                    limit=3).index)
コード例 #17
0
def test_lookup_neighbors():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    term = '/c/pl/skoki_narciarskie'
    neighbors = vectors._find_neighbors(term=term, limit_per_term=10, weight=1.0)
    expected_neighbors = {
        ('/c/en/ski_jumping', 0.02),
        ('/c/en/ski_jumping', 0.01),
        ('http://pl.dbpedia.org/resource/Skoki_narciarskie', 0.01),
        ('/c/de/skispringen', 0.01),
        ('/c/en/ski_jumping', 0.005),
    }
    eq_(expected_neighbors, set(neighbors))
コード例 #18
0
def test_expand_terms():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    term = [('/c/en/ski_jumper', 1.0)]
    expanded_terms = vectors.expand_terms(terms=term, limit_per_term=2, oov_vector=True)

    expected_expanded_terms = [
        ('/c/en/ski_jumper', 0.9523809523809523),
        ('/c/pt/saltadores_de_esqui', 0.019047619047619046),
        ('/c/pl/skoczek_narciarski', 0.019047619047619046),
        ('/c/en/ski_jumping', 0.009523809523809523),
    ]
    eq_(expected_expanded_terms, expanded_terms)
コード例 #19
0
def test_load():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    ok_(vectors.frame is not None)
    ok_(vectors.small_frame is not None)
    ok_(all(label.startswith('/c/en/') for label in vectors.frame.index))
    ok_(vectors.frame.index.is_monotonic_increasing)
    ok_(vectors.small_frame.shape[1] <= 100)
    ok_(vectors._trie is not None)

    # test there are no transformations to raw terms other than adding the
    # English tag
    ok_('/c/en/figure skater' in vectors.frame.index)  # no underscore
    ok_('/c/en/Island' in vectors.frame.index)  # no case folding
コード例 #20
0
def test_load():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    ok_(vectors.frame is not None)
    ok_(vectors.small_frame is not None)
    ok_(all(label.startswith('/c/en/') for label in vectors.frame.index))
    ok_(vectors.frame.index.is_monotonic_increasing)
    ok_(vectors.small_frame.shape[1] <= 100)
    ok_(vectors._trie is not None)

    # test there are no transformations to raw terms other than adding the
    # English tag
    ok_('/c/en/figure skater' in vectors.frame.index)  # no underscore
    ok_('/c/en/Island' in vectors.frame.index)  # no case folding
コード例 #21
0
def test_load(simple_frame):
    vectors = VectorSpaceWrapper(frame=simple_frame)
    vectors.load()
    assert vectors.frame is not None
    assert vectors.small_frame is not None
    assert all(label.startswith('/c/en/') for label in vectors.frame.index)
    assert vectors.frame.index.is_monotonic_increasing
    assert vectors.small_frame.shape[1] <= 100
    assert vectors._trie is not None

    # test there are no transformations to raw terms other than adding the
    # English tag
    assert '/c/en/figure skater' in vectors.frame.index  # no underscore
    assert '/c/en/Island' in vectors.frame.index  # no case folding
コード例 #22
0
def test_expand_terms():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    term = [('/c/en/ski_jumper', 1.0)]
    expanded_terms = vectors.expand_terms(terms=term,
                                          limit_per_term=2,
                                          oov_vector=True)

    # /c/en/bounder and /c/en/skier from neighbor search
    # /c/en/ski_jumping from prefix match
    expected_expanded_terms = [('/c/en/ski_jumper', 0.9523809523809523),
                               ('/c/en/bounder', 0.019047619047619046),
                               ('/c/en/skier', 0.019047619047619046),
                               ('/c/en/ski_jumping', 0.009523809523809523)]
    eq_(expected_expanded_terms, expanded_terms)
コード例 #23
0
def test_expand_terms():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    term = [('/c/en/ski_jumper', 1.0)]
    expanded_terms = vectors.expand_terms(terms=term,
                                          limit_per_term=2,
                                          oov_vector=True)

    expected_expanded_terms = [
        ('/c/en/ski_jumper', 0.9523809523809523),
        ('/c/pt/saltadores_de_esqui', 0.019047619047619046),
        ('/c/pl/skoczek_narciarski', 0.019047619047619046),
        ('/c/en/ski_jumping', 0.009523809523809523),
    ]
    eq_(expected_expanded_terms, expanded_terms)
コード例 #24
0
def test_similar_terms():
    """
    Check if VectorSpaceWrapper's index is sorted and its elements are concepts.
    """
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    ok_(
        '/c/en/figure skating'
        in vectors.similar_terms('/c/en/figure skating', limit=3).index
    )
    ok_(
        '/c/en/figure skater'
        in vectors.similar_terms('/c/en/figure skating', limit=3).index
    )
    ok_('/c/en/figure' in vectors.similar_terms('/c/en/figure skating', limit=3).index)
コード例 #25
0
ファイル: test_vectors.py プロジェクト: terU3760/conceptnet5
def test_vector_space_wrapper():
    """
    Check if VectorSpaceWrapper's index is sorted and its elements are concepts.
    """
    wrap = VectorSpaceWrapper(frame=TEST_FRAME)
    wrap.load()
    ok_(all(is_term(label) for label in wrap.frame.index))
    ok_(wrap.frame.index.is_monotonic_increasing)

    # test there are no transformations to raw terms other than adding the english tag
    ok_('/c/en/figure skater' in wrap.frame.index) # no underscore
    ok_('/c/en/Island' in wrap.frame.index) # no case folding

    # test index_prefix_range
    ok_(wrap.index_prefix_range('/c/en/figure') == (3, 6))
    ok_(wrap.index_prefix_range('/c/en/skating') == (0, 0))

    # test_similar_terms
    ok_('/c/en/figure skating' in wrap.similar_terms('/c/en/figure skating', limit=3).index)
    ok_('/c/en/figure skater' in wrap.similar_terms('/c/en/figure skating', limit=3).index)
    ok_('/c/en/figure' in wrap.similar_terms('/c/en/figure skating', limit=3).index)
コード例 #26
0
def test_vector_space_wrapper(frame=None):
    """
    Check if VectorSpaceWrapper's index is sorted and its elements are concepts.
    """

    # Load a VSW from a user-supplied frame
    if frame:
        frame = load_any_embeddings(frame)
        wrap = VectorSpaceWrapper(frame=frame)
        wrap.load()
        ok_(all(label.startswith('/c') for label in wrap.frame.index[1:]))
        ok_(wrap.frame.index.is_monotonic_increasing)

    # Load a VSW from a filename
    vector_filename = DATA + '/vectors/glove12-840B.h5'
    wrap = VectorSpaceWrapper(vector_filename=vector_filename)
    wrap.load()
    ok_(all(label.startswith('/c') for label in wrap.frame.index[1:]))
    ok_(wrap.frame.index.is_monotonic_increasing)

    # Load a VSW from a frame
    frame = load_any_embeddings(DATA + '/vectors/glove12-840B.h5')
    wrap = VectorSpaceWrapper(frame=frame)
    wrap.load()
    ok_(all(label.startswith('/c') for label in wrap.frame.index[1:]))
    ok_(wrap.frame.index.is_monotonic_increasing)
コード例 #27
0
def test_index_prefix_range():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    eq_(vectors._index_prefix_range('/c/en/figure'), (3, 6))
    eq_(vectors._index_prefix_range('/c/en/skating'), (0, 0))
コード例 #28
0
def test_index_prefix_range():
    vectors = VectorSpaceWrapper(frame=TEST_FRAME)
    vectors.load()
    eq_(vectors._index_prefix_range('/c/en/figure'), (3, 6))
    eq_(vectors._index_prefix_range('/c/en/skating'), (0, 0))
コード例 #29
0
def test_cache_with_oov(multi_ling_frame):
    vectors = VectorSpaceWrapper(frame=multi_ling_frame)
    vectors.load()
    # check the vector of all zeros is returned if the term is not present
    assert not vectors.get_vector('/c/en/test', oov_vector=False).any()
コード例 #30
0
def test_index_prefix_range(simple_frame):
    vectors = VectorSpaceWrapper(frame=simple_frame)
    vectors.load()
    assert vectors._index_prefix_range('/c/en/figure') == (3, 6)
    assert vectors._index_prefix_range('/c/en/skating') == (0, 0)