Beispiel #1
0
    def create_keyedvector_from_matrix(self, embedding_matrix, word2id):
        """
        Imports the necessary attributes for the Embedding object from an embedding matrix and a word2id vocabulary.
        Can be used for custom pre-trained embeddings.

        Parameters
        ----------
        embedding_matrix: numpy.ndarray
            Embedding matrix as a numpy object
        word2id: dict
            Word vocabulary (key: word, value: word_index)
        """

        vocab = {
            word: word2id[word]
            for word in sorted(word2id, key=word2id.__getitem__, reverse=False)
        }
        embedding_matrix = embedding_matrix
        vector_size = embedding_matrix.shape[1]

        kv = KeyedVectors(vector_size)
        kv.vector_size = vector_size
        kv.vectors = embedding_matrix

        kv.index_to_key = list(vocab.keys())

        kv.key_to_index = {
            word: Vocab(index=word_id, count=0)
            for word, word_id in vocab.items()
        }

        self.embedding = kv