Beispiel #1
0
    # get outgoing links for the graph and content for tokenization
    body, links_on_page = parser.parse(url)

    # add document to indexer
    indexer.add_document(url, body)

    # build our webgraph
    node = web_graph.get_node(url)
    if node is None:
        node = web_graph.add_node(url)

    for out_link in links_on_page:
        web_graph.add_edge(url, out_link)

    # hand links to the frontier to make sure they are all crawled
    frontier.add_urls(links_on_page)

# for node in web_graph:
#     print(node)
#
# print()

ranker = Ranker(web_graph)
ranker.calculate_rank(curb_factor=0.95, delta=0.04)
print(ranker)

#
# for k in indexer.index:
#     print(k, indexer.index[k])
# print(indexer.find('supervised'))