Example #1
0
    def create_keyedvector_from_matrix(self, embedding_matrix, word2id):
        """
        Imports the necessary attributes for the Embedding object from an embedding matrix and a word2id vocabulary. Can be used for custom pre-trained embeddings.
        Parameters
        ----------
        embedding_matrix: numpy.ndarray
            Embedding matrix as a numpy object
        word2id: dict
            Word vocabulary (key: word, value: word_index)
        """

        vocab = {
            word: word2id[word]
            for word in sorted(word2id, key=word2id.__getitem__, reverse=False)
        }
        embedding_matrix = embedding_matrix
        vector_size = embedding_matrix.shape[1]

        kv = KeyedVectors(vector_size)
        kv.vector_size = vector_size
        kv.vectors = embedding_matrix

        kv.index2word = list(vocab.keys())

        kv.vocab = {
            word: Vocab(index=word_id, count=0)
            for word, word_id in vocab.items()
        }

        self.embedding = kv
Example #2
0
    def _set_keyedvector(self, attrname, keys, dim, vec=None):
        keyed_vec = KeyedVectors(dim)
        dummy_max_count = len(keys) + 1
        for i, key in enumerate(keys):
            key = str(key)
            keyed_vec.vocab[key] = Vocab(index=i, count=dummy_max_count - i) # dummy count
            keyed_vec.index2word.append(key)

        if vec is not None:
            keyed_vec.vectors = vec
            keyed_vec.init_sims()

        setattr(self, attrname, keyed_vec)