def compute_feature(self, tokens, token_index):
     if self.use_stemming:
         value = stem_token(tokens[token_index], self.language)
     else:
         value = normalize_token(tokens[token_index])
     cluster = get_word_clusters(self.language)[self.cluster_name]
     return cluster.get(value, None)
Exemple #2
0
def _get_word_cluster_features(query_tokens, clusters_name, language):
    if not clusters_name:
        return []
    ngrams = get_all_ngrams(query_tokens)
    cluster_features = []
    for ngram in ngrams:
        cluster = get_word_clusters(language)[clusters_name].get(
            ngram[NGRAM].lower(), None)
        if cluster is not None:
            cluster_features.append(cluster)
    return cluster_features
def _get_word_cluster_features(query_tokens, clusters_name, language):
    if not clusters_name:
        return []
    ngrams = get_all_ngrams(query_tokens)
    cluster_features = []
    for ngram in ngrams:
        cluster = get_word_clusters(language)[clusters_name].get(
            ngram[NGRAM].lower(), None)
        if cluster is not None:
            cluster_features.append(cluster)
    return cluster_features
Exemple #4
0
def _get_word_cluster_features(query_tokens, language):
    cluster_name = CLUSTER_USED_PER_LANGUAGES.get(language, False)
    if not cluster_name:
        return []
    ngrams = get_all_ngrams(query_tokens)
    cluster_features = []
    for ngram in ngrams:
        cluster = get_word_clusters(language)[cluster_name].get(
            ngram[NGRAM].lower(), None)
        if cluster is not None:
            cluster_features.append(cluster)
    return cluster_features
Exemple #5
0
def _get_word_cluster_features(query_tokens, language):
    cluster_name = CLUSTER_USED_PER_LANGUAGES.get(language, False)
    if not cluster_name:
        return []
    ngrams = get_all_ngrams(query_tokens)
    cluster_features = []
    for ngram in ngrams:
        cluster = get_word_clusters(language)[cluster_name].get(
            ngram[NGRAM].lower(), None)
        if cluster is not None:
            cluster_features.append(cluster)
    return cluster_features
 def language(self, value):
     if value is not None:
         self._language = value
         self.cluster = get_word_clusters(self.language)[self.cluster_name]
         self.args["language_code"] = self.language
Exemple #7
0
 def compute_feature(self, tokens, token_index):
     normalized_value = tokens[token_index].stem if self.use_stemming \
         else tokens[token_index].normalized_value
     cluster = get_word_clusters(self.language)[self.cluster_name]
     return cluster.get(normalized_value, None)
Exemple #8
0
 def compute_feature(self, tokens, token_index):
     normalized_value = tokens[token_index].stem if self.use_stemming \
         else tokens[token_index].normalized_value
     cluster = get_word_clusters(self.language)[self.cluster_name]
     return cluster.get(normalized_value, None)
Exemple #9
0
 def language(self, value):
     if value is not None:
         self._language = value
         self.cluster = get_word_clusters(self.language)[self.cluster_name]
         self.args["language_code"] = self.language
Exemple #10
0
def _get_tokens_clusters(tokens, language, cluster_name):
    clusters = get_word_clusters(language)[cluster_name]
    return [clusters[t] for t in tokens if t in clusters]
def _get_tokens_clusters(tokens, language, cluster_name):
    clusters = get_word_clusters(language)[cluster_name]
    return [clusters[t] for t in tokens if t in clusters]