Exemple #1
0
    def __stanford_openie(self, input, output, verbose=False):
        with open(input, 'r') as input_file:
            contents = input_file.read()
            input_file.close()

        if verbose:
            print('Searching for triples using Stanford OpenIE ...')

        nlp = CoreNLPWrapper()
        annotated = nlp.annotate(
            contents,
            properties={
                'annotators':
                'tokenize, ssplit, pos, ner, depparse, parse, openie'
            })

        for sentence in annotated['sentences']:
            for openie in sentence['openie']:
                with open(output, 'a') as output_file:
                    triple = Triple(sentence['index'], openie['subject'],
                                    openie['relation'], openie['object'])
                    if verbose:
                        print(triple.to_string())
                    output_file.write(triple.to_string() + '\n')
                    output_file.close()

        return output
Exemple #2
0
    def insert_knowledge(self, triple, check_conflict):
        """
        Insert triple to the knowledge graph.

        :param triple: the triple to be inserted to the knowledge graph
        :type triple: dict
        :param check_conflict: whether it should check for conflicts first or not.
        :type check_conflict: bool
        :return: list of conflicts if there are conflicts and check_conflict is True, None otherwise
        :rtype: list or None
        """
        self.db_triples_collection.replace_one({'subject': triple['subject'],
                                                'relation': triple['relation'],
                                                'objects': triple['objects']},
                                               triple, upsert=True)
        if check_conflict:
            # check if the exact triples are already in the knowledge graph?
            exists = self.knowledge_graph.check_triple_object_existence(Triple.from_dict(triple))
            if not exists:
                conflicts = self.knowledge_graph.get_triples(triple['subject'], triple['relation'])
                if conflicts is not None:
                    return conflicts
        self.knowledge_graph.insert_triple_object(Triple.from_dict(triple))
        self.db_triples_collection.update_one({'subject': triple['subject'],
                                               'relation': triple['relation'],
                                               'objects': triple['objects']},
                                              {'$set': {'added': True}})
Exemple #3
0
    def __create_triples_from_coreference(self, triple, entity_clusters):
        """
        Create additional triples based on corefering entities.

        :param triple: base triple
        :type triple: triple.Triple
        :param entity_clusters: dictionary of entity coreference clusters
        :type entity_clusters: dict
        :return: list of newly created triples based on corefering entities
        :rtype: list
        """
        triples = [triple]
        # get corefering mentions of subject and objects
        corefs_subject = entity_clusters.get(triple.subject)
        corefs_objects = [entity_clusters.get(obj) for obj in triple.objects]
        corefs_objects = list(filter(None, corefs_objects))

        # extend triples with the combination of corefering mentions of subject and objects
        if corefs_subject is not None:
            triples.extend(
                [Triple(coref, triple.relation, triple.objects) for coref in corefs_subject])
        if len(corefs_objects) > 0:
            triples.extend(
                [Triple(triple.subject, triple.relation, [coref]) for obj in corefs_objects
                 for coref in obj])
        if corefs_subject is not None and len(corefs_objects) > 0:
            for coref_s in corefs_subject:
                for obj in corefs_objects:
                    triples.extend([Triple(coref_s, triple.relation, [coref_o]) for coref_o in obj])

        return triples
Exemple #4
0
    def __clausie(self, input, output, verbose=False):
        with open(input, 'r') as input_file:
            contents = input_file.read()
            input_file.close()

        if verbose:
            print('Searching for triples using ClausIE ...')

        input_clausie = os.path.splitext(input)[0] + '_clausie_input.txt'
        open(input_clausie, 'w').close()

        print('Preparing contents to be processed by ClausIE at {}'.format(
            input_clausie))

        nlp = CoreNLPWrapper()
        annotated = nlp.annotate(
            contents, properties={'annotators': 'tokenize, ssplit, pos'})

        for sentence in annotated['sentences']:
            sent_str = ''
            for token in sentence['tokens']:
                if token['pos'] == 'POS':
                    sent_str.strip()

                sent_str += token['word'] + ' '

            with open(input_clausie, 'a') as clausie_file:
                clausie_file.write(
                    str(sentence['index']) + '\t' + sent_str.strip() + '\n')
                clausie_file.close()

        clausie_out = ClausIEWrapper.run_clausie(input_clausie, output,
                                                 verbose)

        os.remove(input_clausie)

        # We need to do some adjustments to the output.
        final_contents = ""
        with open(clausie_out, 'r') as clausie_out_file:
            line = clausie_out_file.readline()
            while line:
                line = line.replace('\"', '').split('\t')
                triple = Triple(line[0].strip(),
                                NLPUtils.adjust_tokens(line[1].strip()),
                                line[2].strip(),
                                NLPUtils.adjust_tokens(line[3].strip()))
                if verbose:
                    print(triple.to_string())

                final_contents += triple.to_string() + '\n'

                line = clausie_out_file.readline()

        final_file = open(clausie_out, "w")
        n = final_file.write(final_contents)
        final_file.close()

        return final_file
Exemple #5
0
 def __process_synonym(self, synset, relation, triple):
     for lemma in synset.lemmas():
         if lemma.name() != relation:
             dbpedia_lemma = convert_to_dbpedia_ontology(lemma.name())
             synonym_triple = Triple(triple.subject, dbpedia_lemma, triple.objects)
             exists = self.knowledge_graph.check_triple_object_existence(synonym_triple, transitive=True)
             if exists:
                 return [synonym_triple]
             opposite_exists = self.knowledge_graph.check_triple_object_opposite_relation_existence(
                 synonym_triple, transitive=True)
             if opposite_exists:
                 return [Triple(obj, dbpedia_lemma, [triple.subject]) for obj in triple.objects]
Exemple #6
0
    def __extract_and_save_triples(self, url, texts, extraction_scope, kg_auto_update):
        """
        Private method to extract triples and an article given the URL and save the triples to DB.
        Non-conflicting triples are added to knowledge graph if kg_auto_update is True.

        :param url: URL of article whose triples are going to be extracted
        :type url: str
        :param texts: article text whose triples are going to be extracted
        :type texts: str
        :param extraction_scope: The scope of the extraction, deciding whether it should include only relations between
            'named_entities', 'noun_phrases', or 'all.
        :type extraction_scope: str
        :param kg_auto_update: whether the non-conflicting triples are added to the knowledge graph or not.
        :type kg_auto_update: bool
        """
        self.logger.info('Extracting triples for article: %s', url)
        # set 'added' to False for all triples initially
        triples = [{'sentence': results[0],
                    'triples': [{**triple.to_dict(), **{'added': False}} for triple in results[1]]}
                   for results in self.triple_producer.produce_triples(texts,
                                                                       extraction_scope=extraction_scope)]
        for sentence in triples:
            for triple in sentence['triples']:
                exists = self.knowledge_graph.check_triple_object_existence(Triple.from_dict(triple))
                # The exact triple already exists in the KG. Mark as added.
                if exists is True:
                    triple['added'] = True

        self.db_article_collection.update_one({'source': url}, {'$set': {'triples': triples}})

        if (kg_auto_update is None and self.auto_update) or kg_auto_update:
            self.logger.info('Inserting non conflicting knowledge for ' + url)
            self.insert_all_nonconflicting_knowledge(url)
Exemple #7
0
def transitive_exact_match_fact_check_triples():
    """
    Exact-match closed-world fact checking method, where the input is a list of triples.
    It also checks for entities with the sameAs relation.
    ---
    tags:
      - Fact-Checker
    consumes:
      - application/json
    parameters:
      - in: body
        name: triples_array
        schema:
          id: triples_array
    responses:
      200:
        description: Fact-checking result
        schema:
          id: fact_checking_result
    """
    input_triples = request.get_json()
    input_triples = [Triple.from_dict(triple) for triple in input_triples]
    triples = exact_match_fc.fact_check_triples(input_triples, transitive=True)
    triples = [{
        'triple': triple.to_dict(),
        'result': result,
        'other_triples': [other.to_dict() for other in other_triples]
    } for (triple, (result, other_triples)) in triples.items()]
    return {'triples': triples}, 200
Exemple #8
0
def non_exact_match_fact_check_triples():
    """
    Non-exact match closed-world fact checking method, where the input is a list of triples.
    ---
    tags:
      - Fact-Checker
    consumes:
      - application/json
    parameters:
      - in: body
        name: triples_array
        schema:
          id: triples_array
        required: true
    responses:
      200:
        description: Fact-checking result
        schema:
          id: fact_checking_result
    """
    input_triples = request.get_json()
    input_triples = [Triple.from_dict(triple) for triple in input_triples]
    triples = non_exact_match_fc.fact_check_triples(input_triples)
    triples = [{
        'triple': triple.to_dict(),
        'result': result,
        'other_triples': [other.to_dict() for other in other_triples]
    } for (triple, (result, other_triples)) in triples.items()]
    return {'triples': triples}, 200
Exemple #9
0
    def non_exact_fact_check(self, original_triple, entity_clusters=[]):
        """
        Check whether the triple or the "related triples" exist in the knowledge graph or not.
        Related triples are:

        - triples with the opposite relation (Object - Relation - Subject)
        - triples with subject or object replaced with the corefering entity (if entity_clusters is not None)
        - triples with relation replaced with its synonyms
        - triples with same subject and object, but different relation.

        :param original_triple: triple extracted from the text or inputted
        :type original_triple: triple.Triple
        :param entity_clusters: dictionary of entity coreference clusters
        :type entity_clusters: dict
        :return: a tuple of the triple and its existence, if found in the knowledge graph. None, otherwise.
        :rtype: tuple
        """
        original_exists = self.knowledge_graph.check_triple_object_existence(original_triple, transitive=True)
        if original_exists is True:
            return 'exists', []
        # conflicts = self.knowledge_graph.get_triples(original_triple.subject, original_triple.relation)
        # if conflicts is None:
        #     return 'conflicts', conflicts

        triples = [original_triple]
        if len(entity_clusters) > 0:
            triples = self.__create_triples_from_coreference(original_triple, entity_clusters)

        possibilities = []
        conflicts = []
        for triple in triples:
            # check original triple
            original_exists = self.knowledge_graph.check_triple_object_existence(triple, transitive=True)
            if original_exists is True:
                possibilities.append(triple)
                break
            same_entities = [self.knowledge_graph.get_relation_triples(triple.subject, obj, transitive=True)
                             for obj in triple.objects]
            flatten_same_entities = [triple for triple_list in same_entities if triple_list is not None
                                     for triple in triple_list if triple is not None]
            if len(flatten_same_entities) > 0:
                possibilities.extend(flatten_same_entities)
            conflict = self.knowledge_graph.get_triples(triple.subject, triple.relation, transitive=True)
            if conflict is not None:
                conflicts.extend(conflict)
            # check triple with opposite relation (Object - Relation - Subject)
            opposite_exists = self.knowledge_graph.check_triple_object_opposite_relation_existence(triple, transitive=True)
            opposite_triple = [Triple(obj, triple.relation, [triple.subject]) for obj in triple.objects]
            if opposite_exists is True:
                possibilities.extend(opposite_triple)
            # check triple with the synonyms of its relation
            synonym_result = self.check_relation_synonyms(triple)
            if synonym_result is not None:
                possibilities.extend(synonym_result)
        if len(possibilities) > 0:
            return 'possible', list(set(possibilities+conflicts))
        if len(conflicts) > 0:
            return 'conflicts', conflicts

        return 'none', []
Exemple #10
0
    def insert_all_nonconflicting_knowledge(self, article_url):
        """
        Insert non-conflicting triples of an article to the knowledge graph.

        :param article_url: URL of the article source
        :type article_url: str
        """
        article = self.db_article_collection.find_one({'source': article_url})
        for sentence in article['triples']:
            for triple in sentence['triples']:
                if self.knowledge_graph.check_triple_object_existence(Triple.from_dict(triple)):
                    triple['added'] = True
                else:
                    conflicts = self.knowledge_graph.get_triples(triple['subject'], triple['relation'], transitive=True)
                    # if triple not in conflicts:
                    if conflicts is None or len(conflicts) < 1:
                        self.knowledge_graph.insert_triple_object(Triple.from_dict(triple))
                        triple['added'] = True
                    else:
                        triple['added'] = False
        self.db_article_collection.update_one({'source': article['source']}, {'$set': {'triples': article['triples']}})
Exemple #11
0
    def get_knowledge(self, subject, relation, objects=None):
        """
        Returns triple from the knowledge graph that has the given conditions.
        If objects are given, it will return back the triple if it exist in the knowledge graph.

        :param subject: subject of the triple in DBpedia format
        :type subject: str
        :param relation: relation of the triple in DBpedia format
        :type relation: str
        :param objects: (list) of objects of the triple in DBpedia format, this parameter is optional
        :type objects: list or str
        :return: list of triples
        :rtype: list
        """
        if objects is None:
            return [triple.to_dict() for triple in self.knowledge_graph.get_triples(subject, relation)]
        if type(objects) is str:
            objects = [objects]
        triple = Triple(subject, relation, objects)
        if self.knowledge_graph.check_triple_object_existence(triple):
            return [triple.to_dict()]
Exemple #12
0
    def insert_articles_knowledge(self, articles_triples):
        """
        Insert triples that are related to an article to the knowledge graph.
        If the triple has conflict, mark the conflict as 'added' in the db.
        If the triple doesn't exist on db, add the triple to the db.

        :param articles_triples: dictionary of article triples
        :type articles_triples: dict
        """
        for article in articles_triples:
            stored_triples = self.db_article_collection.find_one({'source': article['source']})['triples']
            for sentence in article['triples']:
                stored_sentence = next((sent_triples for sent_triples in stored_triples
                                        if sent_triples['sentence'] == sentence['sentence']), None)
                for triple in sentence['triples']:
                    # FIXME: check for conflict? probably no need to
                    self.knowledge_graph.insert_triple_object(Triple.from_dict(triple))
                    if stored_sentence is not None and triple in stored_sentence['triples']:
                        self.db_article_collection.update_many({'source': article['source']},
                                                               {'$set': {
                                                                   'triples.$[].triples.$[triple].added': True}},
                                                               array_filters=[
                                                                   {'triple.subject': triple['subject'],
                                                                    'triple.relation': triple['relation'],
                                                                    'triple.objects': triple['objects']
                                                                    }]
                                                               )

                    # new triple from existing sentence
                    elif stored_sentence is not None:
                        self.db_article_collection.update_one({'source': article['source'],
                                                               'triples': {'$elemMatch':
                                                                               {'sentence': stored_sentence[
                                                                                   'sentence']}}},
                                                              {'$push': {'triples.$.triples':
                                                                             {'subject': triple['subject'],
                                                                              'relation': triple['relation'],
                                                                              'objects': triple['objects'],
                                                                              'added': True}}}
                                                              )
                        # may need to check other sentences, or even articles for the same triple
                    # new triple from non-existing sentence
                    else:
                        # accommodate triples about the article that are manually inserted
                        self.db_article_collection.update_one({'source': article['source']},
                                                              {'$push': {'triples': {'sentence': '', 'triples': [
                                                                  {'subject': triple['subject'],
                                                                   'relation': triple['relation'],
                                                                   'objects': triple['objects'],
                                                                   'added': True}]}
                                                                         }})
Exemple #13
0
def exact_match_fact_check_triples_sentences():
    """
    Exact match closed-world fact checking method, where the input is a list of triples.
    The sentence is included with the triples, only for matching purposes.
    ---
    tags:
      - Fact-Checker
    consumes:
      - application/json
    parameters:
      - in: body
        name: triples_sentences_array
        schema:
          id: triples_sentences_array
          type: array
          items:
            type: object
            properties:
              sentence:
                type: string
              triples:
                $ref: '#/definitions/triples_array'
        required: true
    responses:
      200:
        description: Fact-checking result
        schema:
          id: fact_checking_sentences_result
    """
    input = request.get_json()
    all_triples = []
    for sentence in input:
        input_triples = [
            Triple.from_dict(triple) for triple in sentence['triples']
        ]
        triples = exact_match_fc.fact_check_triples(input_triples)
        triples = [{
            'triple':
            triple.to_dict(),
            'result':
            result,
            'other_triples': [other.to_dict() for other in other_triples]
        } for (triple, (result, other_triples)) in triples.items()]
        all_triples.append({
            'sentence': sentence['sentence'],
            'triples': triples
        })
    return {'triples': all_triples}, 200
Exemple #14
0
    def delete_knowledge(self, triples):
        """
        Remove triples from knowledge graph.

        :param triples: list of triples (in the form of dictionaries)
        :type triples: list
        """
        for triple in triples:
            self.knowledge_graph.delete_triple_object(Triple.from_dict(triple), transitive=True)
            # Need to update both triples from articles and from user input. We don't know where the triple was from.
            self.db_article_collection.update_many({'triples': {'$exists': True}},
                                                   {'$set': {'triples.$[].triples.$[triple].added': False}},
                                                   array_filters=[{'triple.subject': triple['subject'],
                                                                   'triple.relation': triple['relation'],
                                                                   'triple.objects': triple['objects']
                                                                   }])
            self.db_triples_collection.update_one({'subject': triple['subject'],
                                                   'relation': triple['relation'],
                                                   'objects': triple['objects']},
                                                  {'$set': {'added': False}})
Exemple #15
0
    def __senna(self, input_filename, output_filename, verbose=False):
        if verbose:
            print('Performing Sentence Role Labeling with SENNA...')

        senna = SennaWrapper()

        out_contents = ''
        with open(input_filename, 'r') as input_file:
            sentence_number = 0
            for line in input_file.readlines():
                if len(line) < 1: continue

                senna_output = senna.srl(NLPUtils.adjust_tokens(line), verbose=False)
                for predicate in senna_output.keys():
                    dict_contents = senna_output[predicate]
                    agent = None
                    patient = None

                    if 'A0' in dict_contents and 'A1' in dict_contents:
                        agent = dict_contents['A0']
                        patient = dict_contents['A1']

                    elif 'A0' in dict_contents: # No A1
                        agent = dict_contents['A0']
                        if 'A2' in dict_contents:
                            patient = dict_contents['A2']
                        else:
                            for key in dict_contents.keys():
                                if not key == 'A0':
                                    patient = dict_contents

                    elif 'A1' in dict_contents: # No A0
                        patient = dict_contents['A1']
                        if 'A2' in dict_contents:
                            agent = dict_contents['A2']
                        else:
                            for key in dict_contents.keys():
                                if not key == 'A1':
                                    agent = dict_contents[key]

                    else: # Neither A0 nor A1
                        if 'A2' in dict_contents:
                            agent = dict_contents['A2']
                            for key in dict_contents.keys():
                               if not key == 'A2':
                                   patient = dict_contents[key]
                        else: # Very unlikely
                            key_lst = dict_contents.keys()
                            key_lst.sort(key = len) # sort by string length
                            agent = dict_contents[key_lst[0]]
                            patient = dict_contents[key_lst[1]]

                    if agent is None or patient is None:
                        print('-Warning: No agent or patient determined for predicate {}'.format(predicate))
                        print('-- agent: {}'.format(agent))
                        print('-- patient: {}'.format(patient))
                        continue

                    triple = Triple(sentence_number, agent, predicate, patient)

                    if verbose:
                        print(triple.to_string())

                    out_contents += triple.to_string() + '\n'

                sentence_number += 1

            input_file.close()

        with open(output_filename, 'w') as output_file:
            output_file.write(out_contents)
            output_file.close()

        return output_filename
Exemple #16
0
    def make(self, triples_filename, links_filename, verbose=False):
        #if not triples_filename.startswith('/'):
        #    triples_filename = os.path.dirname(os.path.realpath(__file__)) + '/' + triples_filename

        #if not links_filename == None and not links_filename.startswith('/'):
        #    links_filename = os.path.dirname(os.path.realpath(__file__)) + '/' + links_filename

        print('Processing triples from {}'.format(triples_filename))

        self.__prefixed = {
            'http://www.w3.org/2000/01/rdf-schema#': 'rdfs',
            'http://local/local.owl#': 'local',
            'http://local/verbnet_roles.owl#': 'vn.role'
        }
        if not links_filename == None:
            with open(links_filename, 'r') as links_file:
                for line in links_file.readlines():
                    if len(line) < 2: continue

                    if line.startswith('@PREFIX'):
                        line_list = line.split()
                        prefix = line_list[1]
                        prefix = prefix[:prefix.rfind(':')]

                        uri = line_list[2]
                        uri = uri[uri.find('<') + 1:uri.find('>')]

                        self.__prefixed.update({uri: prefix})

                    elif line.startswith('@LINK'):
                        line_list = line.split('\t')

                        predicate = line_list[1]
                        links = '\t'.join(line_list[2:])
                        self.__links.update({predicate: links})

                links_file.close()

        with open(triples_filename, 'r') as triples_file:
            for line in triples_file.readlines():
                line_lst = line.replace('\"', '').split('\t')
                sentence_number = line_lst[0].strip()
                subject = line_lst[1].strip()
                predicate = line_lst[2].strip()
                object = line_lst[3].strip()

                if not predicate in self.__links and predicate.find(':') < 0:
                    if verbose:
                        print(
                            'Warning: no match for predicate "{}" was found in the links! Skipping triple ...'
                            .format(predicate))
                #    continue

                predicate_link = ''
                #if predicate.find(':') < 0: #Predicates that are already resources/links
                if predicate in self.__links:
                    predicate_link = self.__links[predicate]

                entities = set([str(X) for X in self.__links.keys()])
                closest_subjects = difflib.get_close_matches(subject,
                                                             entities,
                                                             n=3,
                                                             cutoff=1.0)
                closest_objects = difflib.get_close_matches(object,
                                                            entities,
                                                            n=3,
                                                            cutoff=1.0)

                if len(closest_subjects) < 1:
                    if verbose:
                        print(
                            'Warning: no match for subject "{}" was found in the links! Attempting partials ...'
                            .format(subject))
                    #subj = subject
                    # Reverse sorted list of entities by string length
                    #lst_entities = sorted(list(entities), key=len, reverse=True)
                    #for elem in lst_entities:
                    #    if elem in subj:
                    #        if verbose:
                    #            print('-- Found: {}'.format(elem))
                    #        closest_subjects.append(elem)
                    #        subj = subj.replace(elem, '')

                    #if len(closest_subjects) < 1:
                    #    if verbose:
                    #        print('WARNING: not even partial matches were found for subject "{}" in the links!'.format(subject))
                    #    continue

                if len(closest_objects) < 1:
                    if verbose:
                        print(
                            'Warning: no match for object "{}" was found in the links! Atempting partials ...'
                            .format(object))
                    obj = object
                    # Reverse sorted list of entities by string length
                    #lst_entities = sorted(list(entities), key=len, reverse=True)
                    #for elem in lst_entities:
                    #    if elem in obj:
                    #        if verbose:
                    #            print('-- Found: {}'.format(elem))
                    #        closest_objects.append(elem)
                    #        obj = obj.replace(elem, '')

                    #if len(closest_objects) < 1:
                    #    if verbose:
                    #        print('WARNING: not even partial matches were found for object "{}" in the links!'.format(object))
                    #    continue

                # Check for exact matches and discard the others if that's the case
                #for sub in closest_subjects:
                #    if subject == sub:
                #        closest_subjects = [sub]
                #        break
                #for ob in closest_objects:
                #    if object == ob:
                #        closest_objects = [ob]
                #        break

                subject_links = []
                for subj in closest_subjects:
                    subject_links += [self.__links[subj]]
                object_links = []
                for obj in closest_objects:
                    object_links += [self.__links[obj]]

                triple = Triple(sentence_number, subject, predicate, object,
                                subject_links, predicate_link, object_links)

                prefixes, classes, properties, mapped, relation = triple.to_turtle(
                )
                self.__prefixed.update(prefixes)
                self.__classes.update(classes)
                self.__mapped_relations.update(mapped)
                self.__properties.update(properties)
                self.__relations.add(relation)

            triples_file.close()

        output_filename = os.path.splitext(triples_filename)[0]
        output_filename = output_filename[:output_filename.rfind('_'
                                                                 )] + '_kg.ttl'
        open(output_filename, 'w').close()  # Clean the file in case it exists

        with open(output_filename, 'a') as output_file:
            for key in self.__prefixed.keys():
                output_file.write('@prefix\t{}:\t<{}>\t.\n'.format(
                    self.__prefixed[key], key))

            output_file.write('\n#### Classes ####\n\n')
            for key in self.__classes.keys():
                output_file.write('{}\n\n'.format(self.__classes[key]))

            output_file.write('#### Properties ####\n\n')
            for key in self.__properties.keys():
                output_file.write('{}\n\n'.format(self.__properties[key]))

            output_file.write('#### Mapped Relations ####\n\n')
            for mapping in self.__mapped_relations:
                output_file.write('{}\n'.format(mapping))

            output_file.write('\n#### Relations ####\n\n')
            for relation in self.__relations:
                output_file.write('{}\n'.format(relation))

            output_file.close()
        print('Linked entities were stored at {}'.format(output_filename))

        return output_filename
Exemple #17
0
def exact_match_fact_check_triples():
    """
    Exact-match closed-world fact checking method, where the input is a list of triples.
    ---
    tags:
      - Fact-Checker
    consumes:
      - application/json
    parameters:
      - in: body
        name: triples_array
        schema:
          id: triples_array
          type: array
          items:
            type: object
            properties:
              subject:
                type: string
              relation:
                type: string
              objects:
                type: array
                items:
                  type: string
        required: true
    responses:
      200:
        description: Fact-checking result
        schema:
          id: fact_checking_result
          properties:
            triples:
              type: array
              items:
                type: object
                properties:
                  triple:
                    type: object
                    properties:
                      subject:
                        type: string
                      relation:
                        type: string
                      objects:
                        type: array
                        items:
                          type: string
                  result:
                    type: string
                    enum: [exists, conflicts, possible, none]
                  other_triples:
                    type: array
                    description: list of triples that support the result (conflicting triples, possible triples)
                    $ref: '#/definitions/triples_array'
            truthfulness:
              type: number
    """
    input_triples = request.get_json()
    input_triples = [Triple.from_dict(triple) for triple in input_triples]
    triples = exact_match_fc.fact_check_triples(input_triples)
    triples = [{
        'triple': triple.to_dict(),
        'result': result,
        'other_triples': [other.to_dict() for other in other_triples]
    } for (triple, (result, other_triples)) in triples.items()]
    return {'triples': triples}, 200
Exemple #18
0
    def __senna(self, input_filename, output_filename, verbose=False):
        if verbose:
            print('Performing Sentence Role Labeling with SENNA...')

        senna = SennaWrapper()

        out_contents = ''
        with open(input_filename, 'r') as input_file:
            sentence_number = 0
            for line in input_file.readlines():
                if len(line) < 1:
                    continue

                dependency_list = NLPUtils.dependency_parse(
                    line,
                    deps_key='enhancedPlusPlusDependencies',
                    verbose=verbose)

                previous_term = ''
                previous_compound = ''
                dict_basic_to_most_specific = {}
                connective_dependencies = []
                while len(dependency_list) > 0:
                    elem = dependency_list.pop()

                    if elem[1] in ['ROOT', 'punct', 'det'
                                   ] or 'subj' in elem[1] or 'obj' in elem[1]:
                        continue

                    if elem[1] in ['compound', 'nmod:poss', 'aux', 'neg'
                                   ] or elem[1].endswith('mod'):
                        if previous_term == elem[0]:
                            updated_term = '{} {}'.format(
                                elem[2], previous_compound)
                        else:
                            updated_term = '{} {}'.format(elem[2], elem[0])
                            previous_compound = elem[0]
                        dict_basic_to_most_specific[elem[0]] = updated_term

                        triple = Triple(sentence_number, updated_term,
                                        'rdfs:subClassOf', previous_compound)

                        previous_compound = updated_term
                        previous_term = elem[0]

                        if verbose:
                            print(triple.to_string())

                        out_contents += triple.to_string() + '\n'

                    elif elem[1] in ['acl', 'appos'
                                     ] or elem[1].startswith('nmod:'):
                        connective_dependencies.append(elem)

                while len(connective_dependencies) > 0:
                    elem = connective_dependencies.pop()

                    if elem[1] == 'nmod:poss':
                        continue

                    if elem[1].find(':') > 0:  # e.g. 'nmod:of'
                        connector = elem[1][elem[1].find(':') + 1:]
                    elif elem[1] in ['acl', 'appos']:
                        connector = ''
                    else:
                        connector = elem[1]

                    first = elem[0]
                    if first in dict_basic_to_most_specific.keys():
                        first = dict_basic_to_most_specific[first]

                    second = elem[2]
                    if second in dict_basic_to_most_specific.keys():
                        second = dict_basic_to_most_specific[second]

                    if connector == '':
                        full = '{} {}'.format(first, second)
                    else:
                        full = '{} {} {}'.format(first, connector, second)

                    triple = Triple(
                        sentence_number,
                        full, 'local:{}_{}'.format(connector,
                                                   second.replace(' ',
                                                                  '')), first)
                    if verbose:
                        print(triple.to_string())
                    out_contents += triple.to_string() + '\n'

                    triple = Triple(
                        sentence_number, full,
                        'local:{}_{}'.format(first.replace(' ', ''),
                                             connector), second)
                    if verbose:
                        print(triple.to_string())
                    out_contents += triple.to_string() + '\n'

                    dict_basic_to_most_specific[elem[0]] = full

                senna_output = senna.srl(line, verbose=False)
                for predicate in senna_output.keys():
                    pred_args = senna_output[predicate]
                    pred_arg_names = NLPUtils.get_verbnet_args(predicate,
                                                               verbose=True)
                    if len(pred_arg_names) < 1:
                        print(
                            'WARNING -- Unable to retrieve predicate arg names for "{}"'
                            .format(predicate))

                    if verbose:
                        print('predicate: {}, args: {}'.format(
                            predicate, pred_args))

                    for pred_arg in pred_args:
                        if 'AM-NEG' == pred_arg:
                            predicate = 'not {}'.format(predicate)
                        elif 'AM-MOD' == pred_arg:
                            predicate = ' '.join(
                                [pred_args['AM-MOD'].strip(), predicate])
                        elif pred_arg.startswith('AM-'):
                            # Remove initial stopwords (e.g. determiners)
                            s = pred_args[pred_arg].strip()
                            split = s.split(' ', 1)
                            if NLPUtils.is_stopword(
                                    split[0]) and len(split) > 1:
                                s = s.split(' ', 1)[1]

                            triple = Triple(sentence_number, predicate,
                                            'local:{}'.format(pred_arg), s)
                            if verbose:
                                print(triple.to_string())

                            out_contents += triple.to_string() + '\n'

                    for i in range(len(pred_arg_names)):
                        pred_args_index = 'A{}'.format(i)
                        if pred_args_index in pred_args:
                            # Remove initial stopwords (e.g. determiners)
                            s = pred_args[pred_args_index].strip()
                            split = s.split(' ', 1)
                            if NLPUtils.is_stopword(
                                    split[0]) and len(split) > 1:
                                s = s.split(' ', 1)[1]

                            triple = Triple(
                                sentence_number, predicate,
                                'vn.role:{}'.format(pred_arg_names[i]), s)
                            if verbose:
                                print(triple.to_string())

                            out_contents += triple.to_string() + '\n'

                sentence_number += 1

            input_file.close()

        with open(output_filename, 'w') as output_file:
            output_file.write(out_contents)
            output_file.close()

        return output_filename
Exemple #19
0
    def make(self, triples_filename, links_filename, verbose=False):
        if not triples_filename.startswith('/'):
            triples_filename = os.path.dirname(
                os.path.realpath(__file__)) + '/' + triples_filename

        if not links_filename.startswith('/'):
            links_filename = os.path.dirname(
                os.path.realpath(__file__)) + '/' + links_filename

        print('Processing predicates from {}'.format(triples_filename))

        self.__predicates = {}
        self.__prefixed = {
            'http://www.w3.org/2000/01/rdf-schema#': 'rdfs',
            'http://local/local.owl#': 'local'
        }
        self.__entities = {}
        with open(links_filename, 'r') as links_file:
            for line in links_file.readlines():
                if len(line) < 2: continue

                if line.startswith('@PREFIX'):
                    line_list = line.split()
                    prefix = line_list[1]
                    prefix = prefix[:prefix.rfind(':')]

                    uri = line_list[2]
                    uri = uri[uri.find('<') + 1:uri.find('>')]

                    self.__prefixed.update({uri: prefix})

                elif line.startswith('@PREDICATE'):
                    line_list = line.split()
                    line = ' '.join(line_list[1:])

                    line_list = line.split(';')
                    predicate = line_list[0]
                    link = line_list[1]
                    self.__predicates.update({predicate: link})

                elif line.startswith('@ENTITY'):
                    line_list = line.split()
                    line = ' '.join(line_list[1:])

                    line_list = line.split(';')
                    entity = line_list[0]
                    links = line_list[1].split(',')
                    self.__entities.update({entity: links})

            links_file.close()

        with open(triples_filename, 'r') as triples_file:
            for line in triples_file.readlines():
                line_lst = line.replace('\"', '').split('\t')
                sentence_number = line_lst[0]
                subject = line_lst[1]
                predicate = line_lst[2]
                object = line_lst[3]

                if not predicate in self.__predicates:
                    print('Warning: predicate "{}" not found in links!'.format(
                        predicate))
                    continue

                predicate_link = self.__predicates[predicate]

                closest_subject = difflib.get_close_matches(
                    subject, self.__entities)
                closest_object = difflib.get_close_matches(
                    object, self.__entities)

                if len(closest_subject) < 1 or len(closest_object) < 1:
                    continue

                subject_link = self.__entities[closest_subject[0]]
                object_link = self.__entities[closest_object[0]]

                triple = Triple(sentence_number, closest_subject[0], predicate,
                                closest_object[0], subject_link,
                                predicate_link, object_link)
                prefixes, classes, properties, mapped, relation = triple.to_turtle(
                )
                self.__prefixed.update(prefixes)
                self.__classes.update(classes)
                self.__mapped_relations.update(mapped)
                self.__properties.update(properties)
                self.__relations.add(relation)

            triples_file.close()

        output_filename = os.path.splitext(triples_filename)[0]
        output_filename = output_filename[:output_filename.rfind('_'
                                                                 )] + '_kg.ttl'
        open(output_filename, 'w').close()  # Clean the file in case it exists

        with open(output_filename, 'a') as output_file:
            for key in self.__prefixed.keys():
                output_file.write('@prefix\t{}:\t<{}>\t.\n'.format(
                    self.__prefixed[key], key))

            output_file.write('\n#### Classes ####\n\n')
            for key in self.__classes.keys():
                output_file.write('{}\n\n'.format(self.__classes[key]))

            output_file.write('#### Properties ####\n\n')
            for key in self.__properties.keys():
                output_file.write('{}\n\n'.format(self.__properties[key]))

            output_file.write('#### Mapped Relations ####\n\n')
            for mapping in self.__mapped_relations:
                output_file.write('{}\n'.format(mapping))

            output_file.write('\n#### Relations ####\n\n')
            for relation in self.__relations:
                output_file.write('{}\n'.format(relation))

            output_file.close()
        print('Linked entities were stored at {}'.format(output_filename))

        return output_filename