def test_make_feature_basic(self): # Check that make_feature builds a usable feature from a bare # function # Build a simple feature that tracks its calls witness = [] data = object() def simple_feature(data): witness.append(data) return 123 f = make_feature(simple_feature) # Check that the feature was built reasonably: # * The feature has the right type self.assertIsInstance(f, Feature) # * The feature takes its name from the function self.assertEqual(f.name, "simple_feature") # Try the feature result = f(data) # Check the result # * The feature returned its value self.assertEqual(result, 123) # * The function was actually called self.assertEqual(witness, [data])
def test_complex_schema(self): # A data point class class DataPoint(object): pass # A feature with a complex schema @input_schema(DataPoint, lambda d: d.a == d.b, a=int, b=float, c=str) def identity(x): return x f = make_feature(identity) # This accepts a valid data point and calls the feature valid = DataPoint() valid.a, valid.b, valid.c = 2, 2.0, "" self.assertIs(f(valid), valid) # All arguments in the call are ANDed together, so if one fails, # the generated schema fails invalid = DataPoint() invalid.a, invalid.b, invalid.c = 2, 3.14, "" with self.assertRaises(f.InputValueError): f(invalid) # Keyword arguments detect type mismatches in the fields invalid = DataPoint() invalid.a, invalid.b, invalid.c = 2.0, 2.0, "" with self.assertRaises(f.InputValueError): f(invalid) # Keyword arguments detect missing fields invalid = DataPoint() invalid.a, invalid.b = 2, 2.0 with self.assertRaises(f.InputValueError): f(invalid)
class TestBagOfPos(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(bag_of_pos) fixtures = dict(test_eq1=( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", EQ, set(u"DT JJ NN".split())), test_eq2=(u"Drinking", EQ, set(u"DT".split())), test_eq3=(u"", EQ, set()))
class TestSyntacticTreeBagOfTags(ManagerTestCase, FeatureEvidenceBaseCase): def bag_of_tree_tags(datapoint): tags = set() to_explore = datapoint.segment.syntactic_sentences while to_explore: tree = to_explore.pop(0) if isinstance(tree, str): # leaf continue tags.add(tree.label()) to_explore.extend(list(tree)) return tags feature = make_feature(bag_of_tree_tags) fixtures = dict( test_empty=(lambda: _e( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, set()), test_one=(lambda: _e( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", syntactic_sentence=""" (ROOT (S (NP (NNP Drinking) (NNP Mate)) (VP (VBZ makes) (S (NP (PRP you)) (VP (VB go) (PP (TO to) (NP (DT the) (NN toilet)))))))) """), EQ, set("ROOT S NP NNP VP VBZ PRP VB PP TO DT NN".split())), )
def assert_feature_passes_fixture(self, feature_spec, fixture): """ Check that the given feature (function or Feature instance) passes all the conditions given in the fixture `fixture` is a dictionary where each key/value pair describes a simple example for the feature. The key should be a string (which will be reported in case of failure, so you know which case failed), and the value is a tuple (input, predicate, value). The `input` is the value that will be passed as argument as a feature. The predicate and the value give the condition, and should be one of the following: * (input, EQ, value) checks that feature(input) == value * (input, APPROX, value) checks that feature(input) == value approximately the error allowed is given by the constant EPSILON in this module * (input, IN, values) checks that feature(input) in values * (input, RAISES, eclass) checks that feature(input) raises an exception of eclass type. Note that input/output validation always raise an exception that subclasses ValueError """ failures = [] feature_spec = make_feature(feature_spec) for label, (data_point, predicate, value) in fixture.items(): if not _PREDICATES[predicate](feature_spec, data_point, value): msg = '%s failed, %s %s %s' % ( label, feature_spec(data_point), _EXPLAIN_PREDICATE_FAIL[predicate], value) failures.append(msg) self.assertFalse(failures, msg='; '.join(failures))
def assert_feature_passes_fixture(self, feature_spec, fixture): """ Check that the given feature (function or Feature instance) passes all the conditions given in the fixture `fixture` is a dictionary where each key/value pair describes a simple example for the feature. The key should be a string (which will be reported in case of failure, so you know which case failed), and the value is a tuple (input, predicate, value). The `input` is the value that will be passed as argument as a feature. The predicate and the value give the condition, and should be one of the following: * (input, EQ, value) checks that feature(input) == value * (input, APPROX, value) checks that feature(input) == value approximately the error allowed is given by the constant EPSILON in this module * (input, IN, values) checks that feature(input) in values * (input, RAISES, eclass) checks that feature(input) raises an exception of eclass type. Note that input/output validation always raise an exception that subclasses ValueError """ failures = [] feature_spec = make_feature(feature_spec) for label, (data_point, predicate, value) in fixture.items(): if not _PREDICATES[predicate](feature_spec, data_point, value): msg = '%s failed, %s %s %s' % (label, feature_spec( data_point), _EXPLAIN_PREDICATE_FAIL[predicate], value) failures.append(msg) self.assertFalse(failures, msg='; '.join(failures))
def test_soft_schema(self): @input_schema({'key': int}) def key(d): return d['key'] f = make_feature(key) # Dictionary schemas work more or less normally... self.assertEqual(f({'key': 42}), 42) # except that they aren't strict with extra keys: self.assertEqual(f({'key': 42, 'another': 37}), 42)
class TestBagOfWords(TestCase, FeatureEvidenceBaseCase): feature = make_feature(bag_of_words) fixtures = dict( test_eq1=(_e(u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, set(u"drinking mate makes you go to the toilet".split())), test_eq2=(_e(u"Drinking"), EQ, set(u"drinking".split())), test_eq3=(_e(u""), EQ, set()) )
def test_feature_renaming(self): @feature_name("new name") def simple_feature(data): return 123 f = make_feature(simple_feature) # Feature was renamed self.assertEqual(f.name, "new name") # Feature still works self.assertEqual(f(None), 123)
class TestBagOfWordPos(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(bag_of_wordpos) fixtures = dict(test_eq1=( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", EQ, { (u"drinking", u"DT"), (u"mate", u"JJ"), (u"makes", u"NN"), (u"you", u"DT"), (u"go", u"JJ"), (u"to", u"NN"), (u"the", u"DT"), (u"toilet", u"JJ") }), test_eq2=(u"Drinking", EQ, {(u"drinking", u"DT")}), test_eq3=(u"", EQ, set()))
class TestEntityOrder(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(entity_order) fixtures = dict( test_lr=( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", EQ, 1), test_rl=( u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}", EQ, 0), )
class TestBagOfLemmas(ManagerTestCase, FeatureEvidenceBaseCase): def bag_of_lemmas(datapoint): return set(datapoint.segment.lemmas) feature = make_feature(bag_of_lemmas) fixtures = dict( test_lemmas=(lambda: _e( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, set("drinking mate makes you go to the toilet".split())), test_none=(lambda: _e(u""), EQ, set()), )
class TestEntityOrder(TestCase, FeatureEvidenceBaseCase): feature = make_feature(entity_order) fixtures = dict( test_lr=(_e(u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, 1), test_rl=(_e(u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}"), EQ, 0), test_empty=(_e(u""), RAISES, ValueError), test_no_entity=(_e(u"Drinking mate yeah"), RAISES, ValueError), )
class TestVerbsInBetweenEntitiesCount(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(verbs_count_in_between) fixtures = dict( test_none=(lambda: _e( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", base_pos=["JJ"]), EQ, 0), test_all=(lambda: _e( u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}", base_pos=["VB", u"VBD"]), EQ, 5), )
class TestSymbolsInBetween(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(symbols_in_between) fixtures = dict( test_none=( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", EQ, 0), test_one=( u"Drinking {Mate|thing**}, makes you go to the {toilet|thing*}", EQ, 1), test_two=( u"Drinking {Mate|thing**}, makes you go, to the {toilet|thing*}", EQ, 1), # its only boolean )
class TestVerbsTotalCount(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(verbs_count) fixtures = dict( test_none=(lambda: _e( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", base_pos=["JJ"]), EQ, 0), test_all=(lambda: _e( u"Drinking {Argentinean Mate|thing**} makes you go to the {toilet|thing*}", base_pos=["VB", u"VBD"]), EQ, 9), test_empty=(u"", EQ, 0), test_no_entity=(u"Drinking mate yeah", EQ, 0), )
def test_output_schema(self): @output_schema(str) def identity(x): return x f = make_feature(identity) # input_schema does NOT modify the original function behavior: self.assertEqual(identity(0), 0) # for the feature, behavior is preserved when respecting the schema self.assertEqual(f("x"), "x") # But if schema is violated, an exception is raised with self.assertRaises(f.OutputValueError): f(0)
class TestBagOfWordBigrams(TestCase, FeatureEvidenceBaseCase): feature = make_feature(bag_of_word_bigrams) fixtures = dict( test_eq1=(_e(u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, {(u"drinking", u"mate"), (u"mate", u"makes"), (u"makes", u"you"), (u"you", u"go"), (u"go", u"to"), (u"to", u"the"), (u"the", u"toilet")}), test_eq2=(_e(u"Drinking mate"), EQ, {(u"drinking", u"mate")}), test_eq3=(_e(u"Drinking"), EQ, set()), test_eq4=(_e(u""), EQ, set()) )
class TestBagOfWordsInBetween(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(bag_of_words_in_between) fixtures = dict( test_eq1=( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", EQ, set(u"makes you go to the".split())), test_eq2=( u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}", EQ, set(u"makes you go to the".split())), test_eq3= (u"Drinking {Mate|thing*} or {Tea|thing} makes you go to the {toilet|thing**}", EQ, set(u"or tea makes you go to the".split())), test_eq5=(u"{Mate|thing**} {toilet|thing*}", EQ, set()), )
class TestVerbsInBetweenEntitiesCount(TestCase, FeatureEvidenceBaseCase): feature = make_feature(verbs_count_in_between) fixtures = dict( test_none=(_e(u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", base_pos=["JJ"]), EQ, 0), test_all=(_e(u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}", base_pos=["VB", u"VBD"]), EQ, 5), test_empty=(_e(u""), RAISES, ValueError), test_no_entity=(_e(u"Drinking mate yeah"), RAISES, ValueError), )
class TestSymbolsInBetween(TestCase, FeatureEvidenceBaseCase): feature = make_feature(symbols_in_between) fixtures = dict( test_none=(_e(u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, 0), test_one=(_e(u"Drinking {Mate|thing**}, makes you go to the {toilet|thing*}"), EQ, 1), test_two=(_e(u"Drinking {Mate|thing**}, makes you go, to the {toilet|thing*}"), EQ, 1), # its only boolean test_empty=(_e(u""), RAISES, ValueError), test_no_entity=(_e(u"Drinking mate yeah"), RAISES, ValueError), )
class TestBagOfPosInBetween(TestCase, FeatureEvidenceBaseCase): feature = make_feature(bag_of_pos_in_between) fixtures = dict( test_eq1=(_e(u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, set(u"DT JJ NN".split())), test_eq2=(_e(u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}"), EQ, set(u"DT JJ NN".split())), test_no_entity=(_e(u"Drinking mate yeah"), RAISES, ValueError), test_empty=(_e(u""), RAISES, ValueError), test_eq5=(_e(u"{Mate|thing**} {toilet|thing*}"), EQ, set()), )
class TestBagOfWordPosInBetween(TestCase, FeatureEvidenceBaseCase): feature = make_feature(bag_of_wordpos_in_between) fixtures = dict( test_eq1=(_e(u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, {(u"makes", u"NN"), (u"you", u"DT"), (u"go", u"JJ"), (u"to", u"NN"), (u"the", u"DT")}), test_eq2=(_e(u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}"), EQ, {(u"makes", u"NN"), (u"you", u"DT"), (u"go", u"JJ"), (u"to", u"NN"), (u"the", u"DT")}), test_empty=(_e(u""), RAISES, ValueError), test_no_entity=(_e(u"Drinking mate yeah"), RAISES, ValueError), test_eq6=(_e(u"{Mate|thing**} {toilet|thing*}"), EQ, set()), )
class TestEntityDistance(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(entity_distance) fixtures = dict( test_lr=( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", EQ, 5), test_rl=( u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}", EQ, 5), test_multiword=( u"Drinking {Argentinean Mate|thing*} the {toilet|thing**}", EQ, 1), test_zero=(u"Drinking {Argentinean Mate|thing*} {toilet|thing**}", EQ, 0), )
class TestLemmasInBetweenEntitiesCount(ManagerTestCase, FeatureEvidenceBaseCase): def lemmas_count_in_between(datapoint): i, j = in_between_offsets(datapoint) return len([x for x in datapoint.segment.lemmas[i:j]]) feature = make_feature(lemmas_count_in_between) fixtures = dict( test_lemmas=(lambda: _e( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, 5), test_none=(lambda: _e(u"Drinking {Mate|thing*} {rocks|feeling**}"), EQ, 0), )
def test_input_schema(self): @input_schema(str) def length(s): return len(s) f = make_feature(length) # input_schema does NOT modify the original function behavior: self.assertEqual(length([1, 2, 3]), 3) # for the feature, behavior is preserved when respecting the schema self.assertEqual(f("wxyz"), 4) # But if schema is violated, an exception is raised with self.assertRaises(f.InputValueError): f([1, 2, 3])
class TestEntityDistance(TestCase, FeatureEvidenceBaseCase): feature = make_feature(entity_distance) fixtures = dict( test_lr=(_e(u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, 5), test_rl=(_e(u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}"), EQ, 5), test_multiword=(_e(u"Drinking {Argentinean Mate|thing*} the {toilet|thing**}"), EQ, 1), test_zero=(_e(u"Drinking {Argentinean Mate|thing*} {toilet|thing**}"), EQ, 0), test_empty=(_e(u""), RAISES, ValueError), test_no_entity=(_e(u"Drinking mate yeah"), RAISES, ValueError), )
class TestBagOfWordBigramsInBetween(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(bag_of_word_bigrams_in_between) fixtures = dict( test_eq1=( u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}", EQ, {(u"makes", u"you"), (u"you", u"go"), (u"go", u"to"), (u"to", u"the")}), test_eq2=( u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}", EQ, {(u"makes", u"you"), (u"you", u"go"), (u"go", u"to"), (u"to", u"the")}), test_eq3=(u"{Mate|thing*} makes you {toilet|thing**}", EQ, {(u"makes", u"you")}), test_eq4=(u"{Mate|thing*} makes {toilet|thing**}", EQ, set()), test_eq6=(u"{Mate|thing**} {toilet|thing*}", EQ, set()), )
class TestBagOfWordsInBetween(TestCase, FeatureEvidenceBaseCase): feature = make_feature(bag_of_words_in_between) fixtures = dict( test_eq1=(_e(u"Drinking {Mate|thing*} makes you go to the {toilet|thing**}"), EQ, set(u"makes you go to the".split())), test_eq2=(_e(u"Drinking {Mate|thing**} makes you go to the {toilet|thing*}"), EQ, set(u"makes you go to the".split())), test_eq3=(_e(u"Drinking {Mate|thing*} or {Tea|thing} makes you go to the {toilet|thing**}"), EQ, set(u"or tea makes you go to the".split())), test_err=(_e(u"Drinking mate yeah"), RAISES, ValueError), test_err2=(_e(u"Drinking {mate|thing*} yeah"), RAISES, ValueError), test_empty=(_e(u""), RAISES, ValueError), test_eq5=(_e(u"{Mate|thing**} {toilet|thing*}"), EQ, set()), )
class TestOtherEntitiesInBetween(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(other_entities_in_between) fixtures = dict( test_lr= (u"Drinking {Mate|thing*} makes {you|told} go to the {toilet|thing**}", EQ, 1), test_rl= (u"Drinking {Mate|thing**} makes {you|told} go to the {toilet|thing*}", EQ, 1), test_many= (u"Drinking {Mate|thing**} {makes|yeah} {you|told} {go|bad} {to|music} {the|aaa} {toilet|thing*}", EQ, 5), test_multiword=( u"Drinking {Argentinean Mate|thing*} {the|told} {toilet|thing**}", EQ, 1), test_zero=(u"Drinking {Argentinean Mate|thing*} {toilet|thing**}", EQ, 0), test_zero2=(u"Drinking {Argentinean Mate|thing*} the {toilet|thing**}", EQ, 0), )
class TestTotalEntitiesNumber(ManagerTestCase, FeatureEvidenceBaseCase): feature = make_feature(total_number_of_entities) fixtures = dict( test_lr= (u"Drinking {Mate|thing*} makes {you|told} go to the {toilet|thing**}", EQ, 3), test_rl= (u"Drinking {Mate|thing**} makes {you|told} go to the {toilet|thing*}", EQ, 3), test_many= (u"Drinking {Mate|thing**} {makes|yeah} {you|told} {go|bad} {to|music} {the|aaa} {toilet|thing*}", EQ, 7), test_multiword=( u"Drinking {Argentinean Mate|thing*} {the|told} {toilet|thing**}", EQ, 3), test_zero=(u"Drinking {Argentinean Mate|thing*} {toilet|thing**}", EQ, 2), test_zero2=(u"Drinking {Argentinean Mate|thing*} the {toilet|thing**}", EQ, 2), )
def assert_passes_fuzz(self, feature_spec, tries=1000): """ Generates tries data points for the feature (which should have an input schema which allows generation) randomly, and applies those to the feature. It checks that the evaluation proceeds without raising exceptions and that it produces valid outputs according to the output schema. """ feature_spec = make_feature(feature_spec) for i in xrange(tries): data_point = generate.generate(feature_spec.input_schema) try: feature = feature_spec(data_point) except Exception as e: self.fail("Error evaluating; input=%r error=%r" % (data_point, e)) try: feature_spec.output_schema.validate(feature) except schema.SchemaError: self.fail("Invalid output schema; input=%r output=%r" % (data_point, feature))