Exemple #1
0
    def analogy(self, pos, neg, n=10, metric="cosine"):
        """
        Analogy similarity.

        Parameters
        ----------
        pos : list
        neg : list

        Returns
        -------
        Tuple of 2 numpy.array:
            1. position in self.vocab
            2. cosine similarity

        Example
        -------
            `king - man + woman = queen` will be: `pos=['king', 'woman'], neg=['man']`
        """
        exclude = pos + neg
        pos = [(word, 1.0) for word in pos]
        neg = [(word, -1.0) for word in neg]

        mean = []
        for word, direction in pos + neg:
            mean.append(direction * self[word])
        mean = np.array(mean).mean(axis=0)

        metrics = distance(self.vectors, mean, metric=metric)
        best = metrics.argsort()[::-1][:n + len(exclude)]

        exclude_idx = [np.where(best == self.ix(word)) for word in exclude if self.ix(word) in best]
        new_best = np.delete(best, exclude_idx)
        best_metrics = metrics[new_best]
        return new_best[:n], best_metrics[:n]
Exemple #2
0
    def closest(self, vector, n=10, metric="cosine"):
        """Returns the closest n words to a vector

        Parameters
        -------
        vector : numpy.array
        n : int (default 10)

        Returns
        -------
        Tuple of 2 numpy.array:
            1. position in self.vocab
            2. cosine similarity
        """
        distances = distance(self.vectors, vector, metric=metric)
        best = np.argsort(distances)[::-1][1:n + 1]
        best_metrics = distances[best]
        return best, best_metrics
Exemple #3
0
    def closest(self, vector, n=10, metric="cosine"):
        """Returns the closest n words to a vector

        Parameters
        -------
        vector : numpy.array
        n : int (default 10)

        Returns
        -------
        Tuple of 2 numpy.array:
            1. position in self.vocab
            2. cosine similarity
        """
        distances = distance(self.vectors, vector, metric=metric)
        best = np.argsort(distances)[::-1][1:n + 1]
        best_metrics = distances[best]
        return best, best_metrics
Exemple #4
0
    def distance(self, *args, **kwargs):
        """
        Compute the distance distance between two vectors or more (all combinations) of words

        Parameters
        ----------
        words : one or more words
        n : int (default 10)
            number of neighbors to return
        metric : string (default "cosine")
            What metric to use
        """
        metric = kwargs.get("metric", "cosine")  # Default is cosine

        combinations = list(itertools.combinations(args, r=2))

        ret = []
        for word1, word2 in combinations:
            dist = distance(self[word1], self[word2], metric=metric)
            ret.append((word1, word2, dist))
        return ret
Exemple #5
0
    def distance(self, *args, **kwargs):
        """
        Compute the distance distance between two vectors or more (all combinations) of words

        Parameters
        ----------
        words : one or more words
        n : int (default 10)
            number of neighbors to return
        metric : string (default "cosine")
            What metric to use
        """
        metric = kwargs.get("metric", "cosine")    # Default is cosine

        combinations = list(itertools.combinations(args, r=2))

        ret = []
        for word1, word2 in combinations:
            dist = distance(self[word1], self[word2], metric=metric)
            ret.append((word1, word2, dist))
        return ret
Exemple #6
0
    def analogy(self, pos, neg, n=10, metric="cosine"):
        """
        Analogy similarity.

        Parameters
        ----------
        pos : list
        neg : list

        Returns
        -------
        Tuple of 2 numpy.array:
            1. position in self.vocab
            2. cosine similarity

        Example
        -------
            `king - man + woman = queen` will be: `pos=['king', 'woman'], neg=['man']`
        """
        exclude = pos + neg
        pos = [(word, 1.0) for word in pos]
        neg = [(word, -1.0) for word in neg]

        mean = []
        for word, direction in pos + neg:
            mean.append(direction * self[word])
        mean = np.array(mean).mean(axis=0)

        metrics = distance(self.vectors, mean, metric=metric)
        best = metrics.argsort()[::-1][:n + len(exclude)]

        exclude_idx = [
            np.where(best == self.ix(word)) for word in exclude
            if self.ix(word) in best
        ]
        new_best = np.delete(best, exclude_idx)
        best_metrics = metrics[new_best]
        return new_best[:n], best_metrics[:n]