예제 #1
0
    def compute_features(self, tokens, drop_out=False):
        """Compute features on the provided tokens

        The *drop_out* parameters allows to activate drop out on features that
        have a positive drop out ratio. This should only be used during
        training.
        """

        if resource_exists(self.language, STEMS):
            tokens = [
                Token(t.value, t.start, t.end,
                      stem=stem(t.normalized_value, self.language))
                for t in tokens]
        else:
            tokens = [Token(t.value, t.start, t.end, stem=t.normalized_value)
                      for t in tokens]
        cache = [{TOKEN_NAME: token} for token in tokens]
        features = []
        random_state = check_random_state(self.config.random_seed)
        for i in range(len(tokens)):
            token_features = UnupdatableDict()
            for feature in self.features:
                f_drop_out = feature.drop_out
                if drop_out and random_state.rand() < f_drop_out:
                    continue
                value = feature.compute(i, cache)
                if value is not None:
                    token_features[feature.name] = value
            features.append(token_features)
        return features
예제 #2
0
    def compute_features(self, tokens, drop_out=False):
        """Compute features on the provided tokens

        The *drop_out* parameters allows to activate drop out on features that
        have a positive drop out ratio. This should only be used during
        training.
        """

        if resource_exists(self.language, STEMS):
            tokens = [
                Token(t.value, t.start, t.end,
                      stem=stem(t.normalized_value, self.language))
                for t in tokens]
        else:
            tokens = [Token(t.value, t.start, t.end, stem=t.normalized_value)
                      for t in tokens]
        cache = [{TOKEN_NAME: token} for token in tokens]
        features = []
        random_state = check_random_state(self.config.random_seed)
        for i in range(len(tokens)):
            token_features = UnupdatableDict()
            for feature in self.features:
                f_drop_out = feature.drop_out
                if drop_out and random_state.rand() < f_drop_out:
                    continue
                value = feature.compute(i, cache)
                if value is not None:
                    token_features[feature.name] = value
            features.append(token_features)
        return features
예제 #3
0
    def test_should_load_resources_from_package(self):
        # Given
        clear_resources()

        # When
        load_resources("snips_nlu_en")

        # Then
        self.assertTrue(resource_exists("en", "gazetteers"))
예제 #4
0
    def test_should_load_resources_from_data_path(self):
        # Given
        clear_resources()

        # When
        load_resources("en")

        # Then
        self.assertTrue(resource_exists("en", "gazetteers"))
예제 #5
0
    def test_should_load_resources_from_path(self):
        # Given
        clear_resources()
        resources_path = DATA_PATH / "en"

        # When
        load_resources(str(resources_path))

        # Then
        self.assertTrue(resource_exists("en", "gazetteers"))
예제 #6
0
 def preprocess(string):
     normalized = normalize(string)
     if resource_exists(self.language, STEMS) and self.use_stemming:
         return stem(normalized, self.language)
     return normalized
예제 #7
0
 def preprocess(string):
     normalized = normalize(string)
     if resource_exists(self.language, STEMS) and self.use_stemming:
         return stem(normalized, self.language)
     return normalized