def insert_knowledge(self, triple, check_conflict): """ Insert triple to the knowledge graph. :param triple: the triple to be inserted to the knowledge graph :type triple: dict :param check_conflict: whether it should check for conflicts first or not. :type check_conflict: bool :return: list of conflicts if there are conflicts and check_conflict is True, None otherwise :rtype: list or None """ self.db_triples_collection.replace_one({'subject': triple['subject'], 'relation': triple['relation'], 'objects': triple['objects']}, triple, upsert=True) if check_conflict: # check if the exact triples are already in the knowledge graph? exists = self.knowledge_graph.check_triple_object_existence(Triple.from_dict(triple)) if not exists: conflicts = self.knowledge_graph.get_triples(triple['subject'], triple['relation']) if conflicts is not None: return conflicts self.knowledge_graph.insert_triple_object(Triple.from_dict(triple)) self.db_triples_collection.update_one({'subject': triple['subject'], 'relation': triple['relation'], 'objects': triple['objects']}, {'$set': {'added': True}})
def __extract_and_save_triples(self, url, texts, extraction_scope, kg_auto_update): """ Private method to extract triples and an article given the URL and save the triples to DB. Non-conflicting triples are added to knowledge graph if kg_auto_update is True. :param url: URL of article whose triples are going to be extracted :type url: str :param texts: article text whose triples are going to be extracted :type texts: str :param extraction_scope: The scope of the extraction, deciding whether it should include only relations between 'named_entities', 'noun_phrases', or 'all. :type extraction_scope: str :param kg_auto_update: whether the non-conflicting triples are added to the knowledge graph or not. :type kg_auto_update: bool """ self.logger.info('Extracting triples for article: %s', url) # set 'added' to False for all triples initially triples = [{'sentence': results[0], 'triples': [{**triple.to_dict(), **{'added': False}} for triple in results[1]]} for results in self.triple_producer.produce_triples(texts, extraction_scope=extraction_scope)] for sentence in triples: for triple in sentence['triples']: exists = self.knowledge_graph.check_triple_object_existence(Triple.from_dict(triple)) # The exact triple already exists in the KG. Mark as added. if exists is True: triple['added'] = True self.db_article_collection.update_one({'source': url}, {'$set': {'triples': triples}}) if (kg_auto_update is None and self.auto_update) or kg_auto_update: self.logger.info('Inserting non conflicting knowledge for ' + url) self.insert_all_nonconflicting_knowledge(url)
def transitive_exact_match_fact_check_triples(): """ Exact-match closed-world fact checking method, where the input is a list of triples. It also checks for entities with the sameAs relation. --- tags: - Fact-Checker consumes: - application/json parameters: - in: body name: triples_array schema: id: triples_array responses: 200: description: Fact-checking result schema: id: fact_checking_result """ input_triples = request.get_json() input_triples = [Triple.from_dict(triple) for triple in input_triples] triples = exact_match_fc.fact_check_triples(input_triples, transitive=True) triples = [{ 'triple': triple.to_dict(), 'result': result, 'other_triples': [other.to_dict() for other in other_triples] } for (triple, (result, other_triples)) in triples.items()] return {'triples': triples}, 200
def non_exact_match_fact_check_triples(): """ Non-exact match closed-world fact checking method, where the input is a list of triples. --- tags: - Fact-Checker consumes: - application/json parameters: - in: body name: triples_array schema: id: triples_array required: true responses: 200: description: Fact-checking result schema: id: fact_checking_result """ input_triples = request.get_json() input_triples = [Triple.from_dict(triple) for triple in input_triples] triples = non_exact_match_fc.fact_check_triples(input_triples) triples = [{ 'triple': triple.to_dict(), 'result': result, 'other_triples': [other.to_dict() for other in other_triples] } for (triple, (result, other_triples)) in triples.items()] return {'triples': triples}, 200
def insert_all_nonconflicting_knowledge(self, article_url): """ Insert non-conflicting triples of an article to the knowledge graph. :param article_url: URL of the article source :type article_url: str """ article = self.db_article_collection.find_one({'source': article_url}) for sentence in article['triples']: for triple in sentence['triples']: if self.knowledge_graph.check_triple_object_existence(Triple.from_dict(triple)): triple['added'] = True else: conflicts = self.knowledge_graph.get_triples(triple['subject'], triple['relation'], transitive=True) # if triple not in conflicts: if conflicts is None or len(conflicts) < 1: self.knowledge_graph.insert_triple_object(Triple.from_dict(triple)) triple['added'] = True else: triple['added'] = False self.db_article_collection.update_one({'source': article['source']}, {'$set': {'triples': article['triples']}})
def insert_articles_knowledge(self, articles_triples): """ Insert triples that are related to an article to the knowledge graph. If the triple has conflict, mark the conflict as 'added' in the db. If the triple doesn't exist on db, add the triple to the db. :param articles_triples: dictionary of article triples :type articles_triples: dict """ for article in articles_triples: stored_triples = self.db_article_collection.find_one({'source': article['source']})['triples'] for sentence in article['triples']: stored_sentence = next((sent_triples for sent_triples in stored_triples if sent_triples['sentence'] == sentence['sentence']), None) for triple in sentence['triples']: # FIXME: check for conflict? probably no need to self.knowledge_graph.insert_triple_object(Triple.from_dict(triple)) if stored_sentence is not None and triple in stored_sentence['triples']: self.db_article_collection.update_many({'source': article['source']}, {'$set': { 'triples.$[].triples.$[triple].added': True}}, array_filters=[ {'triple.subject': triple['subject'], 'triple.relation': triple['relation'], 'triple.objects': triple['objects'] }] ) # new triple from existing sentence elif stored_sentence is not None: self.db_article_collection.update_one({'source': article['source'], 'triples': {'$elemMatch': {'sentence': stored_sentence[ 'sentence']}}}, {'$push': {'triples.$.triples': {'subject': triple['subject'], 'relation': triple['relation'], 'objects': triple['objects'], 'added': True}}} ) # may need to check other sentences, or even articles for the same triple # new triple from non-existing sentence else: # accommodate triples about the article that are manually inserted self.db_article_collection.update_one({'source': article['source']}, {'$push': {'triples': {'sentence': '', 'triples': [ {'subject': triple['subject'], 'relation': triple['relation'], 'objects': triple['objects'], 'added': True}]} }})
def exact_match_fact_check_triples_sentences(): """ Exact match closed-world fact checking method, where the input is a list of triples. The sentence is included with the triples, only for matching purposes. --- tags: - Fact-Checker consumes: - application/json parameters: - in: body name: triples_sentences_array schema: id: triples_sentences_array type: array items: type: object properties: sentence: type: string triples: $ref: '#/definitions/triples_array' required: true responses: 200: description: Fact-checking result schema: id: fact_checking_sentences_result """ input = request.get_json() all_triples = [] for sentence in input: input_triples = [ Triple.from_dict(triple) for triple in sentence['triples'] ] triples = exact_match_fc.fact_check_triples(input_triples) triples = [{ 'triple': triple.to_dict(), 'result': result, 'other_triples': [other.to_dict() for other in other_triples] } for (triple, (result, other_triples)) in triples.items()] all_triples.append({ 'sentence': sentence['sentence'], 'triples': triples }) return {'triples': all_triples}, 200
def delete_knowledge(self, triples): """ Remove triples from knowledge graph. :param triples: list of triples (in the form of dictionaries) :type triples: list """ for triple in triples: self.knowledge_graph.delete_triple_object(Triple.from_dict(triple), transitive=True) # Need to update both triples from articles and from user input. We don't know where the triple was from. self.db_article_collection.update_many({'triples': {'$exists': True}}, {'$set': {'triples.$[].triples.$[triple].added': False}}, array_filters=[{'triple.subject': triple['subject'], 'triple.relation': triple['relation'], 'triple.objects': triple['objects'] }]) self.db_triples_collection.update_one({'subject': triple['subject'], 'relation': triple['relation'], 'objects': triple['objects']}, {'$set': {'added': False}})
def exact_match_fact_check_triples(): """ Exact-match closed-world fact checking method, where the input is a list of triples. --- tags: - Fact-Checker consumes: - application/json parameters: - in: body name: triples_array schema: id: triples_array type: array items: type: object properties: subject: type: string relation: type: string objects: type: array items: type: string required: true responses: 200: description: Fact-checking result schema: id: fact_checking_result properties: triples: type: array items: type: object properties: triple: type: object properties: subject: type: string relation: type: string objects: type: array items: type: string result: type: string enum: [exists, conflicts, possible, none] other_triples: type: array description: list of triples that support the result (conflicting triples, possible triples) $ref: '#/definitions/triples_array' truthfulness: type: number """ input_triples = request.get_json() input_triples = [Triple.from_dict(triple) for triple in input_triples] triples = exact_match_fc.fact_check_triples(input_triples) triples = [{ 'triple': triple.to_dict(), 'result': result, 'other_triples': [other.to_dict() for other in other_triples] } for (triple, (result, other_triples)) in triples.items()] return {'triples': triples}, 200