Ejemplo n.º 1
0
def nel_title_elasticsearch(page_title):
    # TODO search on name and alt_name
    if id is not None:
        rd.hset('rosetta-mapping-success', page_title, json.dumps([id]))
        rd.sadd('rosetta-mapping-success-wikipedia-autosuggest', page_title)
        safe_print(id)
        print '--second'
        return [id]
def nel_title_elasticsearch(page_title):
    # TODO search on name and alt_name
    if id is not None:
        rd.hset('rosetta-mapping-success', page_title, json.dumps([id]))
        rd.sadd('rosetta-mapping-success-wikipedia-autosuggest',
            page_title)
        safe_print(id)
        print '--second'
        return [id]
Ejemplo n.º 3
0
def get_corres_wikipedia_algo_id(page):
    wikilinks = [linktitle
        for (linksite, linktitle) in list(page.iwlinks())
        if linksite == 'wp']

    if len(wikilinks) == 0:
        # no any wiki links
        rd.sadd('rosetta-mapping-error-no-wiki-links', page.page_title)
        return None

    # first, try wikilinks that has titles similar to the task name,
    # these links are sorted by confidence of fuzzy matching
    for link in get_sorted_similar_links(page.page_title, wikilinks):
        # check if indexed
        id = get_id_of_corresponding_algorithm(link, page.page_title)
        if id is None:
            # try to index this algorithm
            wikipage = get_wiki_page(link)
            id = index_corresponding_algorithm(wikipage, link, page.page_title)
            if id is None:
                continue

        rd.hset('rosetta-mapping-success', page.page_title,
            json.dumps([id]))
        rd.sadd('rosetta-mapping-similars-success', page.page_title)
        return [id]

    # then, if none of the links is similar to the task name,
    # 1, store the task description
    # 2, relate the implementation with ALL wiki algorithms pages
    #    mentioned in description
    ids = list()
    for link in wikilinks:
        wikipage = get_wiki_page(link)
        if wikipage is not None and is_algorithm_page(wikipage):
            # check if indexed
            id = get_id_of_corresponding_algorithm(link, page.page_title)
            if id is None:
                # try to index this algorithm
                wikipage = get_wiki_page(link)
                id = index_corresponding_algorithm(wikipage, link,
                    page.page_title)
                if id is None:
                    continue
            ids.append(id)
    if len(ids) > 0:
        rd.hset('rosetta-mapping-success', page.page_title,
            json.dumps(ids))
        return ids

    rd.sadd('rosetta-mapping-error-undefinable-wikilinks', page.page_title)
    return None
Ejemplo n.º 4
0
def nel_title_suggest(page_title, auto_suggest=True):
    wikipage = get_wiki_page(page_title, auto_suggest)
    if wikipage is not None:
        # check if indexed
        id = get_id_of_corresponding_algorithm(page_title, page_title)
        if id is None:
            # try to index this algorithm
            id = index_corresponding_algorithm(wikipage, page_title,
                                               page_title)
        if id is not None:
            rd.hset('rosetta-mapping-success', page_title, json.dumps([id]))
            rd.sadd('rosetta-mapping-success-wikipedia-autosuggest',
                    page_title)
            safe_print(id)
            print '--second'
            return [id]
def nel_title_suggest(page_title, auto_suggest=True):
    wikipage = get_wiki_page(page_title, auto_suggest)
    if wikipage is not None:
        # check if indexed
        id = get_id_of_corresponding_algorithm(page_title, page_title)
        if id is None:
            # try to index this algorithm
            id = index_corresponding_algorithm(wikipage, page_title,
                page_title)
        if id is not None:
            rd.hset('rosetta-mapping-success', page_title,
                json.dumps([id]))
            rd.sadd('rosetta-mapping-success-wikipedia-autosuggest',
                page_title)
            safe_print(id)
            print '--second'
            return [id]
Ejemplo n.º 6
0
def nel_wikilinks_match_all(wikilinks, page_title):
    ids = list()
    for link in wikilinks:
        wikipage = get_wiki_page(link)
        if wikipage is not None and is_algorithm_page(wikipage):
            # check if indexed
            id = get_id_of_corresponding_algorithm(link, page_title)
            if id is None:
                # try to index this algorithm
                id = index_corresponding_algorithm(wikipage, link, page_title)
                if id is None:
                    continue
            ids.append(id)
    if len(ids) > 0:
        rd.hset('rosetta-mapping-success', page_title, json.dumps(ids))
        rd.sadd('rosetta-mapping-success-all-algo-links', page_title)
        safe_print(ids)
        print '--all-link'

    return ids
def nel_wikilinks_match_all(wikilinks, page_title):
    ids = list()
    for link in wikilinks:
        wikipage = get_wiki_page(link)
        if wikipage is not None and is_algorithm_page(wikipage):
            # check if indexed
            id = get_id_of_corresponding_algorithm(link, page_title)
            if id is None:
                # try to index this algorithm
                id = index_corresponding_algorithm(wikipage, link,
                    page_title)
                if id is None:
                    continue
            ids.append(id)
    if len(ids) > 0:
        rd.hset('rosetta-mapping-success', page_title,
            json.dumps(ids))
        rd.sadd('rosetta-mapping-success-all-algo-links', page_title)
        safe_print(ids)
        print '--all-link'

    return ids
Ejemplo n.º 8
0
def nel_title_crosswikis(page_title):
    query = "SELECT cprob, entity FROM queries WHERE anchor = %s"
    suggested_wikilinks = list(session.execute(query, [page_title]))
    suggested_wikilinks = sorted(suggested_wikilinks, key=lambda tup: tup[0])
    if len(suggested_wikilinks) > 0:
        # get the most confident link
        toplink = suggested_wikilinks[0][1]
        wikipage = get_wiki_page(toplink.replace('_', ' '))
        if wikipage is not None:
            # check if indexed
            id = get_id_of_corresponding_algorithm(toplink, page_title)
            if id is None:
                # try to index this algorithm
                id = index_corresponding_algorithm(wikipage, toplink,
                                                   page_title)
            if id is not None:
                rd.hset('rosetta-mapping-success', page_title,
                        json.dumps([id]))
                rd.sadd('rosetta-mapping-success-crosswikis', page_title)
                safe_print(id)
                print '--third'
                return [id]
def nel_title_crosswikis(page_title):
    query = "SELECT cprob, entity FROM queries WHERE anchor = %s"
    suggested_wikilinks = list(session.execute(query, [page_title]))
    suggested_wikilinks = sorted(suggested_wikilinks,
        key=lambda tup: tup[0])
    if len(suggested_wikilinks) > 0:
        # get the most confident link
        toplink = suggested_wikilinks[0][1]
        wikipage = get_wiki_page(toplink.replace('_', ' '))
        if wikipage is not None:
            # check if indexed
            id = get_id_of_corresponding_algorithm(toplink, page_title)
            if id is None:
                # try to index this algorithm
                id = index_corresponding_algorithm(wikipage, toplink,
                    page_title)
            if id is not None:
                rd.hset('rosetta-mapping-success', page_title,
                    json.dumps([id]))
                rd.sadd('rosetta-mapping-success-crosswikis',
                    page_title)
                safe_print(id)
                print '--third'
                return [id]
def nel_wikilinks_fuzzy(wikilinks, page_title):
    if len(wikilinks) == 0:
        # no any wiki links
        rd.sadd('rosetta-mapping-error-no-wiki-links', page_title)
    else:
        # first, try wikilinks that has titles similar to the task name,
        # these links are sorted by confidence of fuzzy matching
        for link in get_sorted_similar_links(page_title, wikilinks):
            # check if indexed
            id = get_id_of_corresponding_algorithm(link, page_title)
            if id is None:
                # try to index this algorithm
                wikipage = get_wiki_page(link)
                if wikipage is not None:
                    id = index_corresponding_algorithm(wikipage, link,
                        page_title)
                safe_print(id)
            if id is not None:
                rd.hset('rosetta-mapping-success', page_title,
                    json.dumps([id]))
                rd.sadd('rosetta-mapping-similars-success', page_title)
                safe_print(id)
                print '--first'
                return [id]
Ejemplo n.º 11
0
def nel_wikilinks_fuzzy(wikilinks, page_title):
    if len(wikilinks) == 0:
        # no any wiki links
        rd.sadd('rosetta-mapping-error-no-wiki-links', page_title)
    else:
        # first, try wikilinks that has titles similar to the task name,
        # these links are sorted by confidence of fuzzy matching
        for link in get_sorted_similar_links(page_title, wikilinks):
            # check if indexed
            id = get_id_of_corresponding_algorithm(link, page_title)
            if id is None:
                # try to index this algorithm
                wikipage = get_wiki_page(link)
                if wikipage is not None:
                    id = index_corresponding_algorithm(wikipage, link,
                                                       page_title)
                safe_print(id)
            if id is not None:
                rd.hset('rosetta-mapping-success', page_title,
                        json.dumps([id]))
                rd.sadd('rosetta-mapping-similars-success', page_title)
                safe_print(id)
                print '--first'
                return [id]