Example #1
0
def build_sources(parts_dict, preposition_fix=False):
    """
    Create the 'source' information for an assertion.

    The output is a list of (conjunction, weight) tuples, where 'conjunction'
    is a list of sources that combined to produce this assertion. Later,
    inside the 'make_edge' function, these will be combined into an '/and'
    node.
    """
    activity = parts_dict["activity"]

    creator_node = join_uri(
        '/s/contributor/omcs',
        normalize_text(parts_dict["creator"], lowercase=False)
    )
    activity_node = join_uri('/s/activity/omcs', normalize_text(activity))
    if preposition_fix:
        conjunction = [creator_node, activity_node, '/s/rule/preposition_fix']
    else:
        conjunction = [creator_node, activity_node]
    weighted_sources = [(conjunction, 1)]

    for vote in parts_dict["votes"]:
        username = vote[0]
        vote_int = vote[1]
        conjunction = [
            join_uri('/s/contributor/omcs', username),
            '/s/activity/omcs/vote'
        ]
        weighted_sources.append((conjunction, vote_int))
    return weighted_sources
Example #2
0
def normalized_concept_name(lang, text):
    """
    Make a normalized form of the given text in the given language. If the
    language is English, reduce words to their root form using the tools in
    conceptnet5.language.english. Otherwise, simply apply the function called
    `conceptnet5.uri.normalize_text`.

    >>> normalized_concept_name('en', 'this is a test')
    'this_be_test'
    >>> normalized_concept_name('es', 'ESTO ES UNA PRUEBA')
    'esto_es_una_prueba'
    """
    if lang == 'en':
        stem = normalize_english(text) or text
        return normalize_text(stem)
    else:
        return normalize_text(text)
Example #3
0
def normalized_concept_uri(lang, text, *more):
    """
    Make the appropriate URI for a concept in a particular language, including
    stemming the text if necessary, normalizing it, and joining it into a
    concept URI.

    Items in 'more' will not be stemmed, but will go through the other
    normalization steps.

    >>> normalized_concept_uri('en', 'this is a test')
    '/c/en/this_be_test'
    >>> normalized_concept_uri('en', 'this is a test', 'n', 'example phrase')
    '/c/en/this_be_test/n/example_phrase'
    """
    norm_text = normalized_concept_name(lang, text)
    more_text = [normalize_text(item) for item in more]
    return concept_uri(lang, norm_text, *more_text)