def test_errors_are_raised(self): self.ev = TolerantFeatureEvaluator([BrokenFeature, DumbFeatureA]) self.ev.fit([]) def transform(): list(self.ev.transform(SAMPLES)) # force generation self.assertRaises(RuntimeError, transform)
class TolerantEvaluatorTransformTests(TestCase): def test_errors_are_raised(self): self.ev = TolerantFeatureEvaluator([BrokenFeature, DumbFeatureA]) self.ev.fit([]) def transform(): list(self.ev.transform(SAMPLES)) # force generation self.assertRaises(RuntimeError, transform)
def test_when_feature_is_excluded_discarded_samples_are_reevaluated(self): self.ev = TolerantFeatureEvaluator( [DescriptionFeature, DumbFeatureA, EntireSampleFeature]) self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(samples), len(result)) # EntireSampleFeature is the last, so is the last value per tuple self.assertIn(nodescription, [r[-1] for r in result])
def test_sample_is_excluded_if_any_feature_fails_when_evaluating_it(self): self.ev = TolerantFeatureEvaluator( [DescriptionFeature, EntireSampleFeature]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = len(SAMPLES) + 1 # dont exclude samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = self.ev.fit_transform(samples) self.assertTrue(len(list(result)) < len(samples)) # EntireSampleFeature is the last, so is the last value per tuple self.assertNotIn(nodescription, [r[-1] for r in result])
class TolerantEvaluatorFitTransformTests(TestCase, TolerantFittingCases): # We try to mimic most of the cases seen on Fit tests, and check that # they are working equivalently with fit_transform fit_method_name = 'fit_transform' def test_sample_is_excluded_if_any_feature_fails_when_evaluating_it(self): self.ev = TolerantFeatureEvaluator( [DescriptionFeature, EntireSampleFeature]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = len(SAMPLES) + 1 # dont exclude samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = self.ev.fit_transform(samples) self.assertTrue(len(list(result)) < len(samples)) # EntireSampleFeature is the last, so is the last value per tuple self.assertNotIn(nodescription, [r[-1] for r in result]) def test_if_a_feature_is_excluded_all_results_doesnt_include_it(self): # This means: if a Feature evaluated fine for some samples until it was # excluded, once we decided to exclude it, we must make sure that # previous samples for which this feature was evaluated, are now # striped out of those evaluations self.ev = TolerantFeatureEvaluator([DescriptionFeature, DumbFeatureA]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated result = self.ev.fit_transform(SAMPLES + [{'nodescription': u'tada!'}]) # Check that there are results. Otherwise, next loop is dumb self.assertTrue(result) for r in result: self.assertEqual(len(r), 1) # only one value per sample self.assertEqual(r[0], 'a') # Remember DumbFeatureA returns 'a' def test_when_feature_is_excluded_discarded_samples_are_reevaluated(self): self.ev = TolerantFeatureEvaluator( [DescriptionFeature, DumbFeatureA, EntireSampleFeature]) self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(samples), len(result)) # EntireSampleFeature is the last, so is the last value per tuple self.assertIn(nodescription, [r[-1] for r in result]) def test_consumable_is_consumed_only_once(self): samples = (s for s in SAMPLES) # can be consumed once only self.ev = TolerantFeatureEvaluator([EntireSampleFeature]) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(SAMPLES), len(result))
def test_if_a_feature_is_excluded_all_results_doesnt_include_it(self): # This means: if a Feature evaluated fine for some samples until it was # excluded, once we decided to exclude it, we must make sure that # previous samples for which this feature was evaluated, are now # striped out of those evaluations self.ev = TolerantFeatureEvaluator([DescriptionFeature, DumbFeatureA]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated result = self.ev.fit_transform(SAMPLES + [{'nodescription': u'tada!'}]) # Check that there are results. Otherwise, next loop is dumb self.assertTrue(result) for r in result: self.assertEqual(len(r), 1) # only one value per sample self.assertEqual(r[0], 'a') # Remember DumbFeatureA returns 'a'
class TolerantEvaluatorFitTransformTests(TestCase, TolerantFittingCases): # We try to mimic most of the cases seen on Fit tests, and check that # they are working equivalently with fit_transform fit_method_name = 'fit_transform' def test_sample_is_excluded_if_any_feature_fails_when_evaluating_it(self): self.ev = TolerantFeatureEvaluator([DescriptionFeature, EntireSampleFeature]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = len(SAMPLES) + 1 # dont exclude samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = self.ev.fit_transform(samples) self.assertTrue(len(list(result)) < len(samples)) # EntireSampleFeature is the last, so is the last value per tuple self.assertNotIn(nodescription, [r[-1] for r in result]) def test_if_a_feature_is_excluded_all_results_doesnt_include_it(self): # This means: if a Feature evaluated fine for some samples until it was # excluded, once we decided to exclude it, we must make sure that # previous samples for which this feature was evaluated, are now # striped out of those evaluations self.ev = TolerantFeatureEvaluator([DescriptionFeature, DumbFeatureA]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated result = self.ev.fit_transform(SAMPLES + [{'nodescription': u'tada!'}]) # Check that there are results. Otherwise, next loop is dumb self.assertTrue(result) for r in result: self.assertEqual(len(r), 1) # only one value per sample self.assertEqual(r[0], 'a') # Remember DumbFeatureA returns 'a' def test_when_feature_is_excluded_discarded_samples_are_reevaluated(self): self.ev = TolerantFeatureEvaluator([DescriptionFeature, DumbFeatureA, EntireSampleFeature]) self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(samples), len(result)) # EntireSampleFeature is the last, so is the last value per tuple self.assertIn(nodescription, [r[-1] for r in result]) def test_consumable_is_consumed_only_once(self): samples = (s for s in SAMPLES) # can be consumed once only self.ev = TolerantFeatureEvaluator([EntireSampleFeature]) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(SAMPLES), len(result))
def test_when_feature_is_excluded_discarded_sample_is_reconsidered(self): self.ev = TolerantFeatureEvaluator( [DescriptionFeature, AgeFeature, DumbFeatureA]) self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated bad_sample = {'nothing': u'this sample has no description, not age'} samples = [bad_sample] self.apply_fit(samples) self.assertEqual(self.ev.alive_features, (DumbFeatureA, ))
def __init__(self, features, tolerant=False, sparse=True): # Upgrade `features` to `Feature` instances. features = list(map(make_feature, features)) if tolerant: self.evaluator = TolerantFeatureEvaluator(features) else: self.evaluator = FeatureEvaluator(features) self.flattener = FeatureMappingFlattener(sparse=sparse)
def test_when_feature_is_excluded_discarded_samples_are_reevaluated(self): self.ev = TolerantFeatureEvaluator([DescriptionFeature, DumbFeatureA, EntireSampleFeature]) self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(samples), len(result)) # EntireSampleFeature is the last, so is the last value per tuple self.assertIn(nodescription, [r[-1] for r in result])
def test_sample_is_excluded_if_any_feature_fails_when_evaluating_it(self): self.ev = TolerantFeatureEvaluator([DescriptionFeature, EntireSampleFeature]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = len(SAMPLES) + 1 # dont exclude samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = self.ev.fit_transform(samples) self.assertTrue(len(list(result)) < len(samples)) # EntireSampleFeature is the last, so is the last value per tuple self.assertNotIn(nodescription, [r[-1] for r in result])
def test_feature_is_excluded_after_K_fails_no_matter_when(self): actual_feature = BrokenFeature broken_feature = mock.Mock(wraps=actual_feature, spec=actual_feature) self.ev = TolerantFeatureEvaluator([broken_feature, DumbFeatureA]) # We'll make sure strict mode is turned off self.ev.FEATURE_STRICT_UNTIL = 0 # now make sure that a feature can fail up to 2 times (K on test name) self.ev.FEATURE_MAX_ERRORS_ALLOWED = 2 self.apply_fit(SAMPLES[:]) # Feature was excluded from features list self.assertNotIn(broken_feature, self.ev.alive_features) # Feature was not called anymore after failing K+1 times self.assertEqual(broken_feature.call_count, self.ev.FEATURE_MAX_ERRORS_ALLOWED + 1)
def test_feature_is_excluded_if_fails_on_firts_M_samples(self): # The description-feature needs "description" on data-point actual_feature = DescriptionFeature description_feature = mock.Mock(wraps=actual_feature, spec=actual_feature) self.ev = TolerantFeatureEvaluator([description_feature, DumbFeatureA]) # We'll use 2 as the first M of test title self.ev.FEATURE_STRICT_UNTIL = 2 samples = SAMPLES[:] samples.insert(0, {'pk': 33}) self.apply_fit(samples) # Caption was excluded from features list self.assertNotIn(description_feature, self.ev.alive_features) # Feature was not called anymore after failing, which occurred with the # first sample self.assertEqual(description_feature.call_count, 1)
def test_alive_features_can_hide_bad_samples(self): # While a Feature is alive, each time he failed, the sample causing the # failure is hidden to the rest of the features, in an attempt to # minimize damage of rare bad samples. self.ev = TolerantFeatureEvaluator( [DescriptionFeature, AgeFeature, DumbFeatureA]) self.ev.FEATURE_STRICT_UNTIL = 0 # No strict mode self.ev.FEATURE_MAX_ERRORS_ALLOWED = 1 # No feature failure tolerated no_age = {'description': u'i have'} bad_sample = {'nothing': u'this sample has no description, not age'} samples = [bad_sample, no_age] # Since the 2nd sample will succeed when evaluated on # DescriptionFeature but fail when evaluated with AgeFeature, we want # to check if at the end the AgeFeature is alive or not. If not, it's # because it suffered 2 failures, so the bad_sample wasn't hidden. self.apply_fit(samples) # Checking the preconditions of our test assert DescriptionFeature in self.ev.alive_features self.assertIn(AgeFeature, self.ev.alive_features)
def test_consumable_is_consumed_only_once(self): samples = (s for s in SAMPLES) # can be consumed once only self.ev = TolerantFeatureEvaluator([EntireSampleFeature]) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(SAMPLES), len(result))
def test_once_fitted_says_fitted(self): self.ev = TolerantFeatureEvaluator([DumbFeatureA]) self.assertFalse(self.ev.fitted) self.apply_fit([]) self.assertTrue(self.ev.fitted)
def test_fit_creates_alive_features_tuple(self): self.ev = TolerantFeatureEvaluator([DumbFeatureA]) self.assertFalse(hasattr(self.ev, 'alive_features')) self.apply_fit([]) self.assertTrue(hasattr(self.ev, 'alive_features'))
def test_returns_itself(self): self.ev = TolerantFeatureEvaluator([DumbFeatureA]) self.assertEqual(self.ev.fit([]), self.ev)
class TolerantEvaluatorFitTests(TestCase, TolerantFittingCases): fit_method_name = 'fit' def test_returns_itself(self): self.ev = TolerantFeatureEvaluator([DumbFeatureA]) self.assertEqual(self.ev.fit([]), self.ev)
def test_if_no_more_features_then_blows_up(self): self.ev = TolerantFeatureEvaluator([BrokenFeature]) self.ev.FEATURE_STRICT_UNTIL = 2 with self.assertRaises(self.ev.NoFeaturesLeftError): self.apply_fit(SAMPLES[:])