def _get_word_cluster_features(query_tokens, clusters_name, language): if not clusters_name: return [] ngrams = get_all_ngrams(query_tokens) cluster_features = [] for ngram in ngrams: cluster = get_word_cluster(language, clusters_name).get( ngram[NGRAM].lower(), None) if cluster is not None: cluster_features.append(cluster) return cluster_features
def _get_tokens_clusters(tokens, language, cluster_name): clusters = get_word_cluster(language, cluster_name) return [clusters[t] for t in tokens if t in clusters]
def cluster(self): if self._cluster is None: self._cluster = get_word_cluster(self.resources, self.cluster_name) return self._cluster