def test_elmo_offline(): mentions = get_embedd_mentions() embeder = ElmoEmbedding() for mention in mentions: embeder.get_head_feature_vector(mention) elmo_embeddings = WordEmbeddingRelationExtraction(EmbeddingMethod.ELMO_OFFLINE, elmo_file=None) elmo_embeddings.embedding.embeder = embeder.cache assert ( elmo_embeddings.extract_all_relations(mentions[0], mentions[0]).pop() == RelationType.WORD_EMBEDDING_MATCH ) assert ( elmo_embeddings.extract_all_relations(mentions[0], mentions[1]).pop() == RelationType.WORD_EMBEDDING_MATCH ) assert ( elmo_embeddings.extract_all_relations(mentions[0], mentions[2]).pop() == RelationType.NO_RELATION_FOUND ) assert ( elmo_embeddings.extract_all_relations(mentions[0], mentions[3]).pop() == RelationType.WORD_EMBEDDING_MATCH )
def load_elmo_for_vocab(vocabulary): elmo_embeddings = ElmoEmbedding() elmo_dict = dict() for mention_string in vocabulary: if mention_string not in elmo_dict: mention_embedding = elmo_embeddings.get_avrg_feature_vector( mention_string) elmo_dict[mention_string] = mention_embedding return elmo_dict
def __init__( self, method: EmbeddingMethod = EmbeddingMethod.GLOVE, glove_file: str = None, elmo_file: str = None, cos_accepted_dist: float = 0.7, ): """ Extract Relation between two mentions according to Word Embedding cosine distance Args: method (optional): EmbeddingMethod.{GLOVE/GLOVE_OFFLINE/ELMO/ELMO_OFFLINE} (default = GLOVE) glove_file (required on GLOVE/GLOVE_OFFLINE mode): str Location of Glove file elmo_file (required on ELMO_OFFLINE mode): str Location of Elmo file """ if method == EmbeddingMethod.GLOVE: self.embedding = GloveEmbedding(glove_file) self.contextual = False elif method == EmbeddingMethod.GLOVE_OFFLINE: self.embedding = GloveEmbeddingOffline(glove_file) self.contextual = False elif method == EmbeddingMethod.ELMO: self.embedding = ElmoEmbedding() self.contextual = True elif method == EmbeddingMethod.ELMO_OFFLINE: self.embedding = ElmoEmbeddingOffline(elmo_file) self.contextual = True self.accepted_dist = cos_accepted_dist super(WordEmbeddingRelationExtraction, self).__init__()
def load_elmo_for_vocab(mentions): """ Create the embedding using the cache logic in the embedding class Args: mentions: Returns: """ elmo_embeddings = ElmoEmbedding() for mention in mentions: elmo_embeddings.get_head_feature_vector(mention) logger.info('Total words/contexts in vocabulary %d', len(elmo_embeddings.cache)) return elmo_embeddings.cache
def __init__(self, method: EmbeddingMethod, glove_file: str = None, elmo_file: str = None): """ Extract Relation between two mentions according to Word Embedding cosine distance Args: method (required): EmbeddingMethod.{GLOVE/GLOVE_OFFLINE/ELMO/ELMO_OFFLINE} glove_file (required on GLOVE/GLOVE_OFFLINE mode): str Location of Glove file elmo_file (required on ELMO_OFFLINE mode): str Location of Elmo file """ if method == EmbeddingMethod.GLOVE: self.embedding = GloveEmbedding(glove_file) elif method == EmbeddingMethod.GLOVE_OFFLINE: self.embedding = GloveEmbeddingOffline(glove_file) elif method == EmbeddingMethod.ELMO: self.embedding = ElmoEmbedding() elif method == EmbeddingMethod.ELMO_OFFLINE: self.embedding = ElmoEmbeddingOffline(elmo_file) super(WordEmbeddingRelationExtraction, self).__init__()