def run_example():
    logger.info("Running relation extraction example......")
    computed = ComputedRelationExtraction()
    ref_dict = ReferentDictRelationExtraction(
        ref_dict=str(LIBRARY_ROOT / "datasets" / "coref.dict1.tsv"))
    vo = VerboceanRelationExtraction(
        vo_file=str(LIBRARY_ROOT / "datasets" /
                    "verbocean.unrefined.2004-05-20.txt"))
    wiki = WikipediaRelationExtraction()
    wn = WordnetRelationExtraction()
    embed = WordEmbeddingRelationExtraction(method=EmbeddingMethod.ELMO)

    mention_x1 = MentionDataLight(
        "IBM",
        mention_context=
        "IBM manufactures and markets computer hardware, middleware and software",
    )
    mention_y1 = MentionDataLight(
        "International Business Machines",
        mention_context="International Business Machines Corporation is an "
        "American multinational information technology company",
    )

    computed_relations = computed.extract_all_relations(mention_x1, mention_y1)
    ref_dict_relations = ref_dict.extract_all_relations(mention_x1, mention_y1)
    vo_relations = vo.extract_all_relations(mention_x1, mention_y1)
    wiki_relations = wiki.extract_all_relations(mention_x1, mention_y1)
    embed_relations = embed.extract_all_relations(mention_x1, mention_y1)
    wn_relaions = wn.extract_all_relations(mention_x1, mention_y1)

    if RelationType.NO_RELATION_FOUND in computed_relations:
        logger.info("No Computed relation found")
    else:
        logger.info("Found Computed relations-%s",
                    str(list(computed_relations)))

    if RelationType.NO_RELATION_FOUND in ref_dict_relations:
        logger.info("No Referent-Dict relation found")
    else:
        logger.info("Found Referent-Dict relations-%s",
                    str(list(ref_dict_relations)))

    if RelationType.NO_RELATION_FOUND in vo_relations:
        logger.info("No Verb-Ocean relation found")
    else:
        logger.info("Found Verb-Ocean relations-%s", str(list(vo_relations)))

    if RelationType.NO_RELATION_FOUND in wiki_relations:
        logger.info("No Wikipedia relation found")
    else:
        logger.info("Found Wikipedia relations-%s", str(wiki_relations))
    if RelationType.NO_RELATION_FOUND in embed_relations:
        logger.info("No Embedded relation found")
    else:
        logger.info("Found Embedded relations-%s", str(list(embed_relations)))
    if RelationType.NO_RELATION_FOUND in wn_relaions:
        logger.info("No Wordnet relation found")
    else:
        logger.info("Found Wordnet relations-%s", str(wn_relaions))
Ejemplo n.º 2
0
def run_example():
    logger.info('Running relation extraction example......')
    computed = ComputedRelationExtraction()
    ref_dict = ReferentDictRelationExtraction(OnlineOROfflineMethod.ONLINE,
                                              LIBRARY_ROOT + '/datasets/ref.dict1.tsv')
    vo = VerboceanRelationExtraction(OnlineOROfflineMethod.ONLINE,
                                     LIBRARY_ROOT + '/datasets/verbocean.unrefined.2004-05-20.txt')
    wiki = WikipediaRelationExtraction(WikipediaSearchMethod.ONLINE)
    wn = WordnetRelationExtraction(OnlineOROfflineMethod.ONLINE)

    mention_x1 = MentionDataLight(
        'IBM',
        mention_context='IBM manufactures and markets computer hardware, middleware and software')
    mention_y1 = MentionDataLight(
        'International Business Machines',
        mention_context='International Business Machines Corporation is an '
                        'American multinational information technology company')

    computed_relations = computed.extract_all_relations(mention_x1, mention_y1)
    ref_dict_relations = ref_dict.extract_all_relations(mention_x1, mention_y1)
    vo_relations = vo.extract_all_relations(mention_x1, mention_y1)
    wiki_relations = wiki.extract_sub_relations(mention_x1, mention_y1,
                                                RelationType.WIKIPEDIA_REDIRECT_LINK)
    embed = WordEmbeddingRelationExtraction(
        EmbeddingMethod.ELMO)
    embed_relations = embed.extract_all_relations(mention_x1, mention_y1)
    wn_relaions = wn.extract_sub_relations(mention_x1, mention_y1,
                                           RelationType.WORDNET_DERIVATIONALLY)

    if RelationType.NO_RELATION_FOUND in computed_relations:
        logger.info('No Computed relation found')
    else:
        logger.info('Found Computed relations-%s', str(list(computed_relations)))

    if RelationType.NO_RELATION_FOUND in ref_dict_relations:
        logger.info('No Referent-Dict relation found')
    else:
        logger.info('Found Referent-Dict relations-%s', str(list(ref_dict_relations)))

    if RelationType.NO_RELATION_FOUND in vo_relations:
        logger.info('No Verb-Ocean relation found')
    else:
        logger.info('Found Verb-Ocean relations-%s', str(list(vo_relations)))

    if RelationType.NO_RELATION_FOUND in wiki_relations:
        logger.info('No Wikipedia relation found')
    else:
        logger.info('Found Wikipedia relations-%s', str(list(wiki_relations)))
    if RelationType.NO_RELATION_FOUND in embed_relations:
        logger.info('No Embedded relation found')
    else:
        logger.info('Found Embedded relations-%s', str(list(embed_relations)))
    if RelationType.NO_RELATION_FOUND in wn_relaions:
        logger.info('No Wordnet relation found')
    else:
        logger.info('Found Wordnet relations-%s', str(list(wn_relaions)))
Ejemplo n.º 3
0
def load_modules(cdc_resources):
    models = list()
    models.append(ComputedRelationExtraction())
    models.append(
        WikipediaRelationExtraction(
            cdc_resources.wiki_search_method,
            wiki_file=cdc_resources.wiki_folder,
            host=cdc_resources.elastic_host,
            port=cdc_resources.elastic_port,
            index=cdc_resources.elastic_index,
        )
    )
    models.append(
        WordEmbeddingRelationExtraction(
            cdc_resources.embed_search_method,
            glove_file=cdc_resources.glove_file,
            elmo_file=cdc_resources.elmo_file,
            cos_accepted_dist=0.75,
        )
    )
    models.append(
        ReferentDictRelationExtraction(
            cdc_resources.referent_dict_method, cdc_resources.referent_dict_file
        )
    )
    return models
    def load_modules(self):
        relations = set()
        for sieve in self.event_config.sieves_order:
            relations.add(sieve[0])
        for sieve in self.entity_config.sieves_order:
            relations.add(sieve[0])

        if any('WIKIPEDIA' in relation.name for relation in relations):
            self.wiki = WikipediaRelationExtraction(
                self.cdc_resources.wiki_search_method,
                wiki_file=self.cdc_resources.wiki_folder,
                host=self.cdc_resources.elastic_host,
                port=self.cdc_resources.elastic_port,
                index=self.cdc_resources.elastic_index)
        if RelationType.WORD_EMBEDDING_MATCH in relations:
            self.embeds = WordEmbeddingRelationExtraction(
                self.cdc_resources.embed_search_method,
                glove_file=self.cdc_resources.glove_file,
                elmo_file=self.cdc_resources.elmo_file)
        if RelationType.VERBOCEAN_MATCH in relations:
            self.vo = VerboceanRelationExtraction(
                self.cdc_resources.vo_search_method,
                self.cdc_resources.vo_dict_file)
        if RelationType.REFERENT_DICT in relations:
            self.ref_dict = ReferentDictRelationExtraction(
                self.cdc_resources.referent_dict_method,
                self.cdc_resources.referent_dict_file)
        if RelationType.WITHIN_DOC_COREF in relations:
            self.within_doc = WithinDocCoref(self.cdc_resources.wd_file)
        if any('WORDNET' in relation.name for relation in relations):
            self.wordnet = WordnetRelationExtraction(
                self.cdc_resources.wn_search_method,
                self.cdc_resources.wn_folder)
def ref_dict_dump():
    logger.info('Extracting referent dict dump, this may take a while...')
    ref_dict_file = args.ref_dict
    out_file = args.output
    mentions_entity_gold_file = [args.mentions]
    vocab = load_mentions_vocab(mentions_entity_gold_file, True)

    ref_dict = ReferentDictRelationExtraction.load_reference_dict(ref_dict_file)

    ref_dict_for_vocab = {}
    for word in vocab:
        if word in ref_dict:
            ref_dict_for_vocab[word] = ref_dict[word]

    logger.info('Found %d words from vocabulary', len(ref_dict_for_vocab.keys()))
    logger.info('Preparing to save refDict output file')
    with open(out_file, 'w') as f:
        json.dump(ref_dict_for_vocab, f)
    logger.info('Done saved to-%s', out_file)