def compute_features(self, tokens, drop_out=False): """Compute features on the provided tokens The *drop_out* parameters allows to activate drop out on features that have a positive drop out ratio. This should only be used during training. """ if resource_exists(self.language, STEMS): tokens = [ Token(t.value, t.start, t.end, stem=stem(t.normalized_value, self.language)) for t in tokens] else: tokens = [Token(t.value, t.start, t.end, stem=t.normalized_value) for t in tokens] cache = [{TOKEN_NAME: token} for token in tokens] features = [] random_state = check_random_state(self.config.random_seed) for i in range(len(tokens)): token_features = UnupdatableDict() for feature in self.features: f_drop_out = feature.drop_out if drop_out and random_state.rand() < f_drop_out: continue value = feature.compute(i, cache) if value is not None: token_features[feature.name] = value features.append(token_features) return features
def test_should_load_resources_from_package(self): # Given clear_resources() # When load_resources("snips_nlu_en") # Then self.assertTrue(resource_exists("en", "gazetteers"))
def test_should_load_resources_from_data_path(self): # Given clear_resources() # When load_resources("en") # Then self.assertTrue(resource_exists("en", "gazetteers"))
def test_should_load_resources_from_path(self): # Given clear_resources() resources_path = DATA_PATH / "en" # When load_resources(str(resources_path)) # Then self.assertTrue(resource_exists("en", "gazetteers"))
def preprocess(string): normalized = normalize(string) if resource_exists(self.language, STEMS) and self.use_stemming: return stem(normalized, self.language) return normalized