Beispiel #1
0
def make_edge(rel, start, end, dataset, license, sources,
              context='/ctx/all', surfaceText=None, weight=1.0):
    """
    Take in the information representing an edge (a justified assertion),
    and output that edge in dictionary form.

        >>> e = make_edge(rel='/r/HasProperty',
        ...               start='/c/en/fire',
        ...               end='/c/en/hot',
        ...               dataset='/d/conceptnet/4/en',
        ...               license=Licenses.cc_attribution,
        ...               sources='/and/[/.../]',
        ...               surfaceText='[[Fire]] is [[hot]]',
        ...               weight=1.0)
        >>> pprint(e)
        {'context': '/ctx/all',
         'dataset': '/d/conceptnet/4/en',
         'end': '/c/en/hot',
         'features': ['/c/en/fire /r/HasProperty -',
                      '/c/en/fire - /c/en/hot',
                      '- /r/HasProperty /c/en/hot'],
         'id': '/e/ee13e234ee835eabfcf7c906b358cc2229366b42',
         'license': '/l/CC/By',
         'rel': '/r/HasProperty',
         'source_uri': '/and/[/.../]',
         'sources': ['/...'],
         'start': '/c/en/fire',
         'surfaceText': '[[Fire]] is [[hot]]',
         'uri': '/a/[/r/HasProperty/,/c/en/fire/,/c/en/hot/]',
         'weight': 1.0}
    """
    features = [
        "%s %s -" % (start, rel),
        "%s - %s" % (start, end),
        "- %s %s" % (rel, end)
    ]
    uri = assertion_uri(rel, start, end)
    if isinstance(sources, list):
        source_tree = conjunction_uri(*sources)
        source_list = sources
    else:
        source_tree = sources
        source_list = parse_possible_compound_uri('or', sources)
    
    separate_source_lists = [
        parse_possible_compound_uri('and', source)
        for source in source_list
    ]
    flat_sources = [inner for outer in separate_source_lists
                          for inner in outer]
    flat_sources = sorted(set(flat_sources))

    # Generate a unique ID for the edge. This is the only opaque ID
    # that appears in ConceptNet objects. You can use it as a
    # pseudo-random sort order over edges.
    edge_unique_data = [uri, context, source_tree]
    edge_unique = ' '.join(edge_unique_data).encode('utf-8')
    id = '/e/'+sha1(edge_unique).hexdigest()
    obj = {
        'id': id,
        'uri': uri,
        'rel': rel,
        'start': start,
        'end': end,
        'context': context,
        'dataset': dataset,
        'sources': flat_sources,
        'source_uri': source_tree,
        'features': features,
        'license': license,
        'weight': weight,
        'surfaceText': surfaceText
    }
    return obj
Beispiel #2
0
def make_edge(rel,
              start,
              end,
              dataset,
              license,
              sources,
              surfaceText=None,
              surfaceStart=None,
              surfaceEnd=None,
              weight=1.0):
    """
    Take in the information representing an edge (a justified assertion),
    and output that edge in dictionary form.

        >>> from pprint import pprint
        >>> from conceptnet5.uri import Licenses
        >>> e = make_edge(rel='/r/HasProperty',
        ...               start='/c/en/fire',
        ...               end='/c/en/hot',
        ...               dataset='/d/conceptnet/4/en',
        ...               license=Licenses.cc_attribution,
        ...               sources=[{'contributor': '/s/contributor/omcs/dev'}],
        ...               surfaceText='[[Fire]] is [[hot]]',
        ...               weight=1.0)
        >>> pprint(e)
        {'dataset': '/d/conceptnet/4/en',
         'end': '/c/en/hot',
         'features': ['/c/en/fire /r/HasProperty -',
                      '/c/en/fire - /c/en/hot',
                      '- /r/HasProperty /c/en/hot'],
         'license': 'cc:by/4.0',
         'rel': '/r/HasProperty',
         'sources': [{'contributor': '/s/contributor/omcs/dev'}],
         'start': '/c/en/fire',
         'surfaceEnd': 'hot',
         'surfaceStart': 'Fire',
         'surfaceText': '[[Fire]] is [[hot]]',
         'uri': '/a/[/r/HasProperty/,/c/en/fire/,/c/en/hot/]',
         'weight': 1.0}
    """
    pstart = uri_prefix(start)
    pend = uri_prefix(end)
    if is_concept(pstart) and is_concept(pend):
        features = [
            "%s %s -" % (pstart, rel),
            "%s - %s" % (pstart, pend),
            "- %s %s" % (rel, pend)
        ]
    else:
        features = []
    uri = assertion_uri(rel, start, end)

    assert isinstance(sources, list), sources
    assert all([isinstance(source, dict) for source in sources]), sources

    if surfaceStart is None or surfaceEnd is None:
        surfaceStart, surfaceEnd = extract_surface_terms(surfaceText)
    obj = {
        'uri': uri,
        'rel': rel,
        'start': start,
        'end': end,
        'dataset': dataset,
        'sources': sources,
        'features': features,
        'license': license,
        'weight': weight,
        'surfaceText': surfaceText,
        'surfaceStart': surfaceStart,
        'surfaceEnd': surfaceEnd
    }
    return obj
Beispiel #3
0
def make_edge(rel, start, end, dataset, license, sources,
              surfaceText=None, surfaceStart=None, surfaceEnd=None, weight=1.0):
    """
    Take in the information representing an edge (a justified assertion),
    and output that edge in dictionary form.

        >>> from pprint import pprint
        >>> from conceptnet5.uri import Licenses
        >>> e = make_edge(rel='/r/HasProperty',
        ...               start='/c/en/fire',
        ...               end='/c/en/hot',
        ...               dataset='/d/conceptnet/4/en',
        ...               license=Licenses.cc_attribution,
        ...               sources=[{'contributor': '/s/contributor/omcs/dev'}],
        ...               surfaceText='[[Fire]] is [[hot]]',
        ...               weight=1.0)
        >>> pprint(e)
        {'dataset': '/d/conceptnet/4/en',
         'end': '/c/en/hot',
         'features': ['/c/en/fire /r/HasProperty -',
                      '/c/en/fire - /c/en/hot',
                      '- /r/HasProperty /c/en/hot'],
         'license': 'cc:by/4.0',
         'rel': '/r/HasProperty',
         'sources': [{'contributor': '/s/contributor/omcs/dev'}],
         'start': '/c/en/fire',
         'surfaceEnd': 'hot',
         'surfaceStart': 'Fire',
         'surfaceText': '[[Fire]] is [[hot]]',
         'uri': '/a/[/r/HasProperty/,/c/en/fire/,/c/en/hot/]',
         'weight': 1.0}
    """
    pstart = uri_prefix(start)
    pend = uri_prefix(end)
    if is_concept(pstart) and is_concept(pend):
        features = [
            "%s %s -" % (pstart, rel),
            "%s - %s" % (pstart, pend),
            "- %s %s" % (rel, pend)
        ]
    else:
        features = []
    uri = assertion_uri(rel, start, end)

    assert isinstance(sources, list), sources
    assert all([isinstance(source, dict) for source in sources]), sources

    if surfaceStart is None or surfaceEnd is None:
        surfaceStart, surfaceEnd = extract_surface_terms(surfaceText)
    obj = {
        'uri': uri,
        'rel': rel,
        'start': start,
        'end': end,
        'dataset': dataset,
        'sources': sources,
        'features': features,
        'license': license,
        'weight': weight,
        'surfaceText': surfaceText,
        'surfaceStart': surfaceStart,
        'surfaceEnd': surfaceEnd
    }
    return obj