def nel_title_elasticsearch(page_title): # TODO search on name and alt_name if id is not None: rd.hset('rosetta-mapping-success', page_title, json.dumps([id])) rd.sadd('rosetta-mapping-success-wikipedia-autosuggest', page_title) safe_print(id) print '--second' return [id]
def get_corres_wikipedia_algo_id(page): wikilinks = [linktitle for (linksite, linktitle) in list(page.iwlinks()) if linksite == 'wp'] if len(wikilinks) == 0: # no any wiki links rd.sadd('rosetta-mapping-error-no-wiki-links', page.page_title) return None # first, try wikilinks that has titles similar to the task name, # these links are sorted by confidence of fuzzy matching for link in get_sorted_similar_links(page.page_title, wikilinks): # check if indexed id = get_id_of_corresponding_algorithm(link, page.page_title) if id is None: # try to index this algorithm wikipage = get_wiki_page(link) id = index_corresponding_algorithm(wikipage, link, page.page_title) if id is None: continue rd.hset('rosetta-mapping-success', page.page_title, json.dumps([id])) rd.sadd('rosetta-mapping-similars-success', page.page_title) return [id] # then, if none of the links is similar to the task name, # 1, store the task description # 2, relate the implementation with ALL wiki algorithms pages # mentioned in description ids = list() for link in wikilinks: wikipage = get_wiki_page(link) if wikipage is not None and is_algorithm_page(wikipage): # check if indexed id = get_id_of_corresponding_algorithm(link, page.page_title) if id is None: # try to index this algorithm wikipage = get_wiki_page(link) id = index_corresponding_algorithm(wikipage, link, page.page_title) if id is None: continue ids.append(id) if len(ids) > 0: rd.hset('rosetta-mapping-success', page.page_title, json.dumps(ids)) return ids rd.sadd('rosetta-mapping-error-undefinable-wikilinks', page.page_title) return None
def nel_title_suggest(page_title, auto_suggest=True): wikipage = get_wiki_page(page_title, auto_suggest) if wikipage is not None: # check if indexed id = get_id_of_corresponding_algorithm(page_title, page_title) if id is None: # try to index this algorithm id = index_corresponding_algorithm(wikipage, page_title, page_title) if id is not None: rd.hset('rosetta-mapping-success', page_title, json.dumps([id])) rd.sadd('rosetta-mapping-success-wikipedia-autosuggest', page_title) safe_print(id) print '--second' return [id]
def nel_wikilinks_match_all(wikilinks, page_title): ids = list() for link in wikilinks: wikipage = get_wiki_page(link) if wikipage is not None and is_algorithm_page(wikipage): # check if indexed id = get_id_of_corresponding_algorithm(link, page_title) if id is None: # try to index this algorithm id = index_corresponding_algorithm(wikipage, link, page_title) if id is None: continue ids.append(id) if len(ids) > 0: rd.hset('rosetta-mapping-success', page_title, json.dumps(ids)) rd.sadd('rosetta-mapping-success-all-algo-links', page_title) safe_print(ids) print '--all-link' return ids
def nel_title_crosswikis(page_title): query = "SELECT cprob, entity FROM queries WHERE anchor = %s" suggested_wikilinks = list(session.execute(query, [page_title])) suggested_wikilinks = sorted(suggested_wikilinks, key=lambda tup: tup[0]) if len(suggested_wikilinks) > 0: # get the most confident link toplink = suggested_wikilinks[0][1] wikipage = get_wiki_page(toplink.replace('_', ' ')) if wikipage is not None: # check if indexed id = get_id_of_corresponding_algorithm(toplink, page_title) if id is None: # try to index this algorithm id = index_corresponding_algorithm(wikipage, toplink, page_title) if id is not None: rd.hset('rosetta-mapping-success', page_title, json.dumps([id])) rd.sadd('rosetta-mapping-success-crosswikis', page_title) safe_print(id) print '--third' return [id]
def nel_wikilinks_fuzzy(wikilinks, page_title): if len(wikilinks) == 0: # no any wiki links rd.sadd('rosetta-mapping-error-no-wiki-links', page_title) else: # first, try wikilinks that has titles similar to the task name, # these links are sorted by confidence of fuzzy matching for link in get_sorted_similar_links(page_title, wikilinks): # check if indexed id = get_id_of_corresponding_algorithm(link, page_title) if id is None: # try to index this algorithm wikipage = get_wiki_page(link) if wikipage is not None: id = index_corresponding_algorithm(wikipage, link, page_title) safe_print(id) if id is not None: rd.hset('rosetta-mapping-success', page_title, json.dumps([id])) rd.sadd('rosetta-mapping-similars-success', page_title) safe_print(id) print '--first' return [id]