def test_average(): drop_caches() # Two subtitutions. q1a = Quote(string="Chase it others is the dogs hound") q1b = Quote(string="Others is the hound hound") s1 = Substitution(source=q1a, destination=q1b, start=2, position=3) q2a = Quote(string="Chase it others is the frisbee hound") q2b = q1b s2 = Substitution(source=q2a, destination=q2b, start=2, position=3) # Our test feature. values = {"dog": 2, "hound": 3, "frisbee": 4, "chase": 6, "cad": 7, "other": 8} def feature(word=None): if word is None: return set(values.keys()) else: return values.get(word, np.nan) # Global average and average of synonyms (computed on lemmas) are well # retrieved. assert s1._static_average(feature) == 30 / 6 assert s1._average(feature, False) == 30 / 6 assert s2._static_average(feature) == 30 / 6 assert s2._average(feature, False) == 30 / 6 assert s1._average(feature, True) == np.mean([3, 6, 7]) # 'frisbee' has no synonyms. assert np.isnan(s2._average(feature, True)) # If we have a lot of NaNs, things still work well. drop_caches() values = {"dog": 2, "frisbee": 4, "chase": np.nan, "cad": 7, "other": 8} assert s1._average(feature, True) == 7 # 'frisbee' has no synonyms. assert np.isnan(s2._average(feature, True))