def test_elmo_offline():
    mentions = get_embedd_mentions()
    embeder = ElmoEmbedding()
    for mention in mentions:
        embeder.get_head_feature_vector(mention)

    elmo_embeddings = WordEmbeddingRelationExtraction(EmbeddingMethod.ELMO_OFFLINE, elmo_file=None)
    elmo_embeddings.embedding.embeder = embeder.cache

    assert (
        elmo_embeddings.extract_all_relations(mentions[0], mentions[0]).pop()
        == RelationType.WORD_EMBEDDING_MATCH
    )

    assert (
        elmo_embeddings.extract_all_relations(mentions[0], mentions[1]).pop()
        == RelationType.WORD_EMBEDDING_MATCH
    )

    assert (
        elmo_embeddings.extract_all_relations(mentions[0], mentions[2]).pop()
        == RelationType.NO_RELATION_FOUND
    )

    assert (
        elmo_embeddings.extract_all_relations(mentions[0], mentions[3]).pop()
        == RelationType.WORD_EMBEDDING_MATCH
    )
    def __init__(
        self,
        method: EmbeddingMethod = EmbeddingMethod.GLOVE,
        glove_file: str = None,
        elmo_file: str = None,
        cos_accepted_dist: float = 0.7,
    ):
        """
        Extract Relation between two mentions according to Word Embedding cosine distance

        Args:
            method (optional): EmbeddingMethod.{GLOVE/GLOVE_OFFLINE/ELMO/ELMO_OFFLINE}
                (default = GLOVE)
            glove_file (required on GLOVE/GLOVE_OFFLINE mode): str Location of Glove file
            elmo_file (required on ELMO_OFFLINE mode): str Location of Elmo file
        """
        if method == EmbeddingMethod.GLOVE:
            self.embedding = GloveEmbedding(glove_file)
            self.contextual = False
        elif method == EmbeddingMethod.GLOVE_OFFLINE:
            self.embedding = GloveEmbeddingOffline(glove_file)
            self.contextual = False
        elif method == EmbeddingMethod.ELMO:
            self.embedding = ElmoEmbedding()
            self.contextual = True
        elif method == EmbeddingMethod.ELMO_OFFLINE:
            self.embedding = ElmoEmbeddingOffline(elmo_file)
            self.contextual = True

        self.accepted_dist = cos_accepted_dist
        super(WordEmbeddingRelationExtraction, self).__init__()
def load_elmo_for_vocab(vocabulary):
    elmo_embeddings = ElmoEmbedding()
    elmo_dict = dict()
    for mention_string in vocabulary:
        if mention_string not in elmo_dict:
            mention_embedding = elmo_embeddings.get_avrg_feature_vector(
                mention_string)
            elmo_dict[mention_string] = mention_embedding

    return elmo_dict
def load_elmo_for_vocab(mentions):
    """
    Create the embedding using the cache logic in the embedding class
    Args:
        mentions:

    Returns:

    """
    elmo_embeddings = ElmoEmbedding()

    for mention in mentions:
        elmo_embeddings.get_head_feature_vector(mention)

    logger.info('Total words/contexts in vocabulary %d', len(elmo_embeddings.cache))
    return elmo_embeddings.cache
Beispiel #5
0
    def __init__(self, method: EmbeddingMethod, glove_file: str = None, elmo_file: str = None):
        """
        Extract Relation between two mentions according to Word Embedding cosine distance

        Args:
            method (required): EmbeddingMethod.{GLOVE/GLOVE_OFFLINE/ELMO/ELMO_OFFLINE}
            glove_file (required on GLOVE/GLOVE_OFFLINE mode): str Location of Glove file
            elmo_file (required on ELMO_OFFLINE mode): str Location of Elmo file
        """
        if method == EmbeddingMethod.GLOVE:
            self.embedding = GloveEmbedding(glove_file)
        elif method == EmbeddingMethod.GLOVE_OFFLINE:
            self.embedding = GloveEmbeddingOffline(glove_file)
        elif method == EmbeddingMethod.ELMO:
            self.embedding = ElmoEmbedding()
        elif method == EmbeddingMethod.ELMO_OFFLINE:
            self.embedding = ElmoEmbeddingOffline(elmo_file)

        super(WordEmbeddingRelationExtraction, self).__init__()