def test_clustering_none(): drop_caches() with settings.file_override("CLUSTERING"): with open(settings.CLUSTERING, "wb") as f: pickle.dump({"dog": 2, "cat": 3}, f) assert set(SubstitutionFeaturesMixin._clustering()) == {"dog", "cat"}
def test_clustering_none_with_computed(): drop_caches() # Lemmas are all lowercase. for word in SubstitutionFeaturesMixin._clustering(): assert word.islower() or is_int(word[0]) or is_int(word[-1]) or word in ["%", "!"]
def test_clustering(): drop_caches() assert abs(SubstitutionFeaturesMixin._clustering("dog") - 0.0009318641757868838) < 1e-17 assert abs(SubstitutionFeaturesMixin._clustering("play") - 0.0016238663632016216) < 1e-17 assert np.isnan(SubstitutionFeaturesMixin._clustering("wickiup"))