Example #1
0
def output_raw(raw_arg1, raw_arg2, raw_relation, sources):
    frame = u"{1} %s {2}" % (raw_relation)
    raw = GRAPH.get_or_create_assertion(
        GRAPH.get_or_create_frame('en', frame),
        [GRAPH.get_or_create_concept('en', raw_arg1),
         GRAPH.get_or_create_concept('en', raw_arg2)],
        {'dataset': 'reverb/en', 'license': 'CC-By-SA',
         'sources': '|'.join(sources)}
    )
    
    # Turns out that only en.wikipedia.org matters as a domain. The rest are
    # all mirrors.
    conjunction = GRAPH.get_or_create_conjunction([wikipedia, reverb])
    
    # The assertions start with numbers are really bad in ReVerb.
    # We set a small weight on the justification edge, if we include
    # them at all.
    if raw_arg1[0].isdigit():
        GRAPH.justify(conjunction, raw, weight=0.2)
    else:
        GRAPH.justify(conjunction, raw, weight=0.7)

    for source in sources:
        # Put in context with Wikipedia articles.
        topic = article_url_to_topic(source)
        context = GRAPH.get_or_create_concept('en', topic)
        context_normal = GRAPH.get_or_create_concept('en', *normalize_topic(topic))
        GRAPH.add_context(raw, context)
        GRAPH.get_or_create_edge('normalized', context, context_normal)
    return raw
Example #2
0
def output_triple(arg1, arg2, relation, raw, sources):
    arg1 = normalize(arg1).strip()
    arg2 = normalize(arg2).strip()
    relation = normalize_rel(relation).strip()
    found_relation = False
    if relation == 'be for':
        found_relation = True
        relation = 'UsedFor'
    if relation == 'be used for':
        found_relation = True
        relation = 'UsedFor'
    if relation == 'be not':
        found_relation = True
        relation = 'IsNot'
    if relation == 'be part of':
        found_relation = True
        relation = 'PartOf'
    if relation == 'be similar to':
        found_relation = True
        relation = 'SimilarTo'
    if relation.startswith('be ') and relation.endswith(
            ' of') and relation[3:-3] in TYPE_WORDS:
        found_relation = True
        relation = 'IsA'
    if found_relation:
        rel_node = GRAPH.get_or_create_relation(relation)
    else:
        rel_node = GRAPH.get_or_create_concept('en', relation)
    print '%s(%s, %s)' % \
        (relation, arg1, arg2),

    assertion = GRAPH.get_or_create_assertion(rel_node, [
        GRAPH.get_or_create_concept('en', arg1),
        GRAPH.get_or_create_concept('en', arg2)
    ], {
        'dataset': 'reverb/en',
        'license': 'CC-By-SA',
        'normalized': True
    })
    GRAPH.derive_normalized(raw, assertion)

    conjunction = GRAPH.get_or_create_conjunction([raw, reverb_triple])
    GRAPH.justify(conjunction, assertion)
    for source in sources:
        # Put in context with Wikipedia articles.
        topic = article_url_to_topic(source)
        context = GRAPH.get_or_create_concept('en', topic)
        context_normal = GRAPH.get_or_create_concept('en',
                                                     *normalize_topic(topic))
        GRAPH.add_context(assertion, context_normal)
        GRAPH.get_or_create_edge('normalized', context, context_normal)
        print "in", context_normal
    return assertion
Example #3
0
def output_triple(arg1, arg2, relation, raw, sources):
    arg1 = normalize(arg1).strip()
    arg2 = normalize(arg2).strip()
    relation = normalize_rel(relation).strip()
    found_relation = False
    if relation == 'be for':
        found_relation = True
        relation = 'UsedFor'
    if relation == 'be used for':
        found_relation = True
        relation = 'UsedFor'
    if relation == 'be not':
        found_relation = True
        relation = 'IsNot'
    if relation == 'be part of':
        found_relation = True
        relation = 'PartOf'
    if relation == 'be similar to':
        found_relation = True
        relation = 'SimilarTo'
    if relation.startswith('be ') and relation.endswith(' of') and relation[3:-3] in TYPE_WORDS:
        found_relation = True
        relation = 'IsA'
    if found_relation:
        rel_node = GRAPH.get_or_create_relation(relation)
    else:
        rel_node = GRAPH.get_or_create_concept('en', relation)
    print '%s(%s, %s)' % \
        (relation, arg1, arg2),

    assertion = GRAPH.get_or_create_assertion(
        rel_node,
        [GRAPH.get_or_create_concept('en', arg1),
         GRAPH.get_or_create_concept('en', arg2)],
        {'dataset': 'reverb/en', 'license': 'CC-By-SA',
         'normalized': True}
    )
    GRAPH.derive_normalized(raw, assertion)
    
    conjunction = GRAPH.get_or_create_conjunction([raw, reverb_triple])
    GRAPH.justify(conjunction, assertion)
    for source in sources:
        # Put in context with Wikipedia articles.
        topic = article_url_to_topic(source)
        context = GRAPH.get_or_create_concept('en', topic)
        context_normal = GRAPH.get_or_create_concept('en', *normalize_topic(topic))
        GRAPH.add_context(assertion, context_normal)
        GRAPH.get_or_create_edge('normalized', context, context_normal)
        print "in", context_normal
    return assertion
Example #4
0
def output_sentence(arg1, arg2, arg3, relation, raw, sources, prep=None):
    # arg3 is vestigial; we weren't getting sensible statements from it.
    if arg2.strip() == "":  # Remove "A is for B" sentence
        return
    arg1 = normalize(arg1).strip()
    arg2 = normalize(arg2).strip()
    assertion = None
    if arg3 == None:
        print '%s(%s, %s)' % (relation, arg1, arg2)
        assertion = GRAPH.get_or_create_assertion('/relation/' + relation, [
            GRAPH.get_or_create_concept('en', arg1),
            GRAPH.get_or_create_concept('en', arg2)
        ], {
            'dataset': 'reverb/en',
            'license': 'CC-By-SA',
            'normalized': True
        })
        assertions = (assertion, )
    else:
        print '%s(%s, %s)' % \
            (relation, arg1, arg2)
        assertion1 = GRAPH.get_or_create_assertion('/relation/' + relation, [
            GRAPH.get_or_create_concept('en', arg1),
            GRAPH.get_or_create_concept('en', arg2)
        ], {
            'dataset': 'reverb/en',
            'license': 'CC-By-SA',
            'normalized': True
        })
        #arg3 = normalize(arg3).strip()
        #assertion2 = GRAPH.get_or_create_assertion(
        #    GRAPH.get_or_create_concept('en', prep, 'p'),
        #    [GRAPH.get_or_create_concept('en', arg2),
        #     GRAPH.get_or_create_concept('en', arg3)],
        #    {'dataset': 'reverb/en', 'license': 'CC-By-SA',
        #     'normalized': True}
        #)
        assertions = (assertion1, )

    for assertion in assertions:
        conjunction = GRAPH.get_or_create_conjunction([raw, reverb_object])
        GRAPH.justify(conjunction, assertion)
        for source in sources:
            # Put in context with Wikipedia articles.
            topic = article_url_to_topic(source)
            context = GRAPH.get_or_create_concept('en',
                                                  *normalize_topic(topic))
            GRAPH.add_context(assertion, context)

    return assertion
Example #5
0
def output_sentence(arg1, arg2, arg3, relation, raw, sources, prep=None):
    # arg3 is vestigial; we weren't getting sensible statements from it.
    if arg2.strip() == "": # Remove "A is for B" sentence
        return
    arg1 = normalize(arg1).strip()
    arg2 = normalize(arg2).strip()
    assertion = None
    if arg3 == None:
        print '%s(%s, %s)' % (relation, arg1, arg2)
        assertion = GRAPH.get_or_create_assertion(
            '/relation/'+relation,
            [GRAPH.get_or_create_concept('en', arg1),
             GRAPH.get_or_create_concept('en', arg2)],
            {'dataset': 'reverb/en', 'license': 'CC-By-SA',
             'normalized': True}
        )
        assertions = (assertion,)
    else:
        print '%s(%s, %s)' % \
            (relation, arg1, arg2)
        assertion1 = GRAPH.get_or_create_assertion(
            '/relation/'+relation,
            [GRAPH.get_or_create_concept('en', arg1),
             GRAPH.get_or_create_concept('en', arg2)],
            {'dataset': 'reverb/en', 'license': 'CC-By-SA',
             'normalized': True}
        )
        #arg3 = normalize(arg3).strip()
        #assertion2 = GRAPH.get_or_create_assertion(
        #    GRAPH.get_or_create_concept('en', prep, 'p'),
        #    [GRAPH.get_or_create_concept('en', arg2),
        #     GRAPH.get_or_create_concept('en', arg3)],
        #    {'dataset': 'reverb/en', 'license': 'CC-By-SA',
        #     'normalized': True}
        #)
        assertions = (assertion1,)
    
    for assertion in assertions:
        conjunction = GRAPH.get_or_create_conjunction(
            [raw, reverb_object]
        )
        GRAPH.justify(conjunction, assertion)
        for source in sources:
            # Put in context with Wikipedia articles.
            topic = article_url_to_topic(source)
            context = GRAPH.get_or_create_concept('en', *normalize_topic(topic))
            GRAPH.add_context(assertion, context)

    return assertion
Example #6
0
def normalize_topic_url(url):
    url = urllib.unquote(url).decode('utf-8', 'ignore')
    return normalize_topic(url.strip('/').split('/')[-1].split('#')[-1])