def output_raw(raw_arg1, raw_arg2, raw_relation, sources): frame = u"{1} %s {2}" % (raw_relation) raw = GRAPH.get_or_create_assertion( GRAPH.get_or_create_frame('en', frame), [GRAPH.get_or_create_concept('en', raw_arg1), GRAPH.get_or_create_concept('en', raw_arg2)], {'dataset': 'reverb/en', 'license': 'CC-By-SA', 'sources': '|'.join(sources)} ) # Turns out that only en.wikipedia.org matters as a domain. The rest are # all mirrors. conjunction = GRAPH.get_or_create_conjunction([wikipedia, reverb]) # The assertions start with numbers are really bad in ReVerb. # We set a small weight on the justification edge, if we include # them at all. if raw_arg1[0].isdigit(): GRAPH.justify(conjunction, raw, weight=0.2) else: GRAPH.justify(conjunction, raw, weight=0.7) for source in sources: # Put in context with Wikipedia articles. topic = article_url_to_topic(source) context = GRAPH.get_or_create_concept('en', topic) context_normal = GRAPH.get_or_create_concept('en', *normalize_topic(topic)) GRAPH.add_context(raw, context) GRAPH.get_or_create_edge('normalized', context, context_normal) return raw
def output_triple(arg1, arg2, relation, raw, sources): arg1 = normalize(arg1).strip() arg2 = normalize(arg2).strip() relation = normalize_rel(relation).strip() found_relation = False if relation == 'be for': found_relation = True relation = 'UsedFor' if relation == 'be used for': found_relation = True relation = 'UsedFor' if relation == 'be not': found_relation = True relation = 'IsNot' if relation == 'be part of': found_relation = True relation = 'PartOf' if relation == 'be similar to': found_relation = True relation = 'SimilarTo' if relation.startswith('be ') and relation.endswith( ' of') and relation[3:-3] in TYPE_WORDS: found_relation = True relation = 'IsA' if found_relation: rel_node = GRAPH.get_or_create_relation(relation) else: rel_node = GRAPH.get_or_create_concept('en', relation) print '%s(%s, %s)' % \ (relation, arg1, arg2), assertion = GRAPH.get_or_create_assertion(rel_node, [ GRAPH.get_or_create_concept('en', arg1), GRAPH.get_or_create_concept('en', arg2) ], { 'dataset': 'reverb/en', 'license': 'CC-By-SA', 'normalized': True }) GRAPH.derive_normalized(raw, assertion) conjunction = GRAPH.get_or_create_conjunction([raw, reverb_triple]) GRAPH.justify(conjunction, assertion) for source in sources: # Put in context with Wikipedia articles. topic = article_url_to_topic(source) context = GRAPH.get_or_create_concept('en', topic) context_normal = GRAPH.get_or_create_concept('en', *normalize_topic(topic)) GRAPH.add_context(assertion, context_normal) GRAPH.get_or_create_edge('normalized', context, context_normal) print "in", context_normal return assertion
def output_triple(arg1, arg2, relation, raw, sources): arg1 = normalize(arg1).strip() arg2 = normalize(arg2).strip() relation = normalize_rel(relation).strip() found_relation = False if relation == 'be for': found_relation = True relation = 'UsedFor' if relation == 'be used for': found_relation = True relation = 'UsedFor' if relation == 'be not': found_relation = True relation = 'IsNot' if relation == 'be part of': found_relation = True relation = 'PartOf' if relation == 'be similar to': found_relation = True relation = 'SimilarTo' if relation.startswith('be ') and relation.endswith(' of') and relation[3:-3] in TYPE_WORDS: found_relation = True relation = 'IsA' if found_relation: rel_node = GRAPH.get_or_create_relation(relation) else: rel_node = GRAPH.get_or_create_concept('en', relation) print '%s(%s, %s)' % \ (relation, arg1, arg2), assertion = GRAPH.get_or_create_assertion( rel_node, [GRAPH.get_or_create_concept('en', arg1), GRAPH.get_or_create_concept('en', arg2)], {'dataset': 'reverb/en', 'license': 'CC-By-SA', 'normalized': True} ) GRAPH.derive_normalized(raw, assertion) conjunction = GRAPH.get_or_create_conjunction([raw, reverb_triple]) GRAPH.justify(conjunction, assertion) for source in sources: # Put in context with Wikipedia articles. topic = article_url_to_topic(source) context = GRAPH.get_or_create_concept('en', topic) context_normal = GRAPH.get_or_create_concept('en', *normalize_topic(topic)) GRAPH.add_context(assertion, context_normal) GRAPH.get_or_create_edge('normalized', context, context_normal) print "in", context_normal return assertion
def output_sentence(arg1, arg2, arg3, relation, raw, sources, prep=None): # arg3 is vestigial; we weren't getting sensible statements from it. if arg2.strip() == "": # Remove "A is for B" sentence return arg1 = normalize(arg1).strip() arg2 = normalize(arg2).strip() assertion = None if arg3 == None: print '%s(%s, %s)' % (relation, arg1, arg2) assertion = GRAPH.get_or_create_assertion('/relation/' + relation, [ GRAPH.get_or_create_concept('en', arg1), GRAPH.get_or_create_concept('en', arg2) ], { 'dataset': 'reverb/en', 'license': 'CC-By-SA', 'normalized': True }) assertions = (assertion, ) else: print '%s(%s, %s)' % \ (relation, arg1, arg2) assertion1 = GRAPH.get_or_create_assertion('/relation/' + relation, [ GRAPH.get_or_create_concept('en', arg1), GRAPH.get_or_create_concept('en', arg2) ], { 'dataset': 'reverb/en', 'license': 'CC-By-SA', 'normalized': True }) #arg3 = normalize(arg3).strip() #assertion2 = GRAPH.get_or_create_assertion( # GRAPH.get_or_create_concept('en', prep, 'p'), # [GRAPH.get_or_create_concept('en', arg2), # GRAPH.get_or_create_concept('en', arg3)], # {'dataset': 'reverb/en', 'license': 'CC-By-SA', # 'normalized': True} #) assertions = (assertion1, ) for assertion in assertions: conjunction = GRAPH.get_or_create_conjunction([raw, reverb_object]) GRAPH.justify(conjunction, assertion) for source in sources: # Put in context with Wikipedia articles. topic = article_url_to_topic(source) context = GRAPH.get_or_create_concept('en', *normalize_topic(topic)) GRAPH.add_context(assertion, context) return assertion
def output_sentence(arg1, arg2, arg3, relation, raw, sources, prep=None): # arg3 is vestigial; we weren't getting sensible statements from it. if arg2.strip() == "": # Remove "A is for B" sentence return arg1 = normalize(arg1).strip() arg2 = normalize(arg2).strip() assertion = None if arg3 == None: print '%s(%s, %s)' % (relation, arg1, arg2) assertion = GRAPH.get_or_create_assertion( '/relation/'+relation, [GRAPH.get_or_create_concept('en', arg1), GRAPH.get_or_create_concept('en', arg2)], {'dataset': 'reverb/en', 'license': 'CC-By-SA', 'normalized': True} ) assertions = (assertion,) else: print '%s(%s, %s)' % \ (relation, arg1, arg2) assertion1 = GRAPH.get_or_create_assertion( '/relation/'+relation, [GRAPH.get_or_create_concept('en', arg1), GRAPH.get_or_create_concept('en', arg2)], {'dataset': 'reverb/en', 'license': 'CC-By-SA', 'normalized': True} ) #arg3 = normalize(arg3).strip() #assertion2 = GRAPH.get_or_create_assertion( # GRAPH.get_or_create_concept('en', prep, 'p'), # [GRAPH.get_or_create_concept('en', arg2), # GRAPH.get_or_create_concept('en', arg3)], # {'dataset': 'reverb/en', 'license': 'CC-By-SA', # 'normalized': True} #) assertions = (assertion1,) for assertion in assertions: conjunction = GRAPH.get_or_create_conjunction( [raw, reverb_object] ) GRAPH.justify(conjunction, assertion) for source in sources: # Put in context with Wikipedia articles. topic = article_url_to_topic(source) context = GRAPH.get_or_create_concept('en', *normalize_topic(topic)) GRAPH.add_context(assertion, context) return assertion
def normalize_topic_url(url): url = urllib.unquote(url).decode('utf-8', 'ignore') return normalize_topic(url.strip('/').split('/')[-1].split('#')[-1])