def test_middle_bigram_pos_tag_featurizer(corpus): from collections import defaultdict kbt = rel_ext.KBTriple(rel='worked_at', sbj='Randall_Munroe', obj='xkcd') feature_counter = defaultdict(int) # Make sure `feature_counter` is being updated, not reinitialized: feature_counter['<s> VBZ'] += 5 feature_counter = middle_bigram_pos_tag_featurizer(kbt, corpus, feature_counter) expected = defaultdict( int, {'<s> VBZ':6,'VBZ DT':1,'DT JJ':1,'JJ VBN':1,'VBN IN':1,'IN </s>':1}) assert feature_counter == expected, "Expected:\n{}\nGot:\n{}".format(expected, feature_counter)
def test_directional_bag_of_words_featurizer(corpus): from collections import defaultdict kbt = rel_ext.KBTriple(rel='worked_at', sbj='Randall_Munroe', obj='xkcd') feature_counter = defaultdict(int) # Make sure `feature_counter` is being updated, not reinitialized: feature_counter['is_OS'] += 5 feature_counter = directional_bag_of_words_featurizer(kbt, corpus, feature_counter) expected = defaultdict( int, {'is_OS':6,'a_OS':1,'webcomic_OS':1,'created_OS':1,'by_OS':1}) assert feature_counter == expected, "Expected:\n{}\nGot:\n{}".format(expected, feature_counter)
def test_synset_featurizer(corpus): from collections import defaultdict kbt = rel_ext.KBTriple(rel='worked_at', sbj='Randall_Munroe', obj='xkcd') feature_counter = defaultdict(int) # Make sure `feature_counter` is being updated, not reinitialized: feature_counter["Synset('be.v.01')"] += 5 feature_counter = synset_featurizer(kbt, corpus, feature_counter) # The full return values for this tend to be long, so we just # test a few examples to avoid cluttering up this notebook. test_cases = {"Synset('be.v.01')": 6, "Synset('embody.v.02')": 1} for ss, expected in test_cases.items(): result = feature_counter[ss] assert result == expected, "Incorrect count for {}: Expected {}; Got {}".format( ss, expected, result)