Ejemplo n.º 1
0
def saveToNeo4j(found_pages, found_links):
    print "++: saving to neo4j"
    title_index = GRAPHDB.get_or_create_index(neo4j.Node, "TitleIndex")
    i=0
    for link in found_links:
        pageA, pageB = link
        nodeA = title_index.get_or_create("title", pageA, {"title": pageA})
        nodeB = title_index.get_or_create("title", pageB, {"title": pageB})
        GRAPHDB.create((nodeA, "links_to", nodeB))
        if not i % 100:
            print i
        i += 1
Ejemplo n.º 2
0
def saveToNeo4jBatch(found_pages, found_links):
    url_index = GRAPHDB.get_or_create_index(neo4j.Node, "UrlIndex")
    pageToNode = {}
    for page in found_pages:
        name = getNameFromLink(page)
        node = GRAPHDB.create({"name":name, "url":page})[0]
        # TODO: add labels based on infobox
        pageToNode[page] = node
    # save links
    i = 0
    batch = neo4j.WriteBatch(GRAPHDB)
    for link in found_links:
        pageA, pageB = link
        nodeA = pageToNode.get(pageA)
        nodeB = pageToNode.get(pageB)
        batch.get_or_create_path(nodeA, "links_to", nodeB)
        if not i % 100:
            print "i: " + str(i)
            batch.run()
            batch = neo4j.WriteBatch(GRAPHDB)
        i += 1
    batch.run()
    print "total num links created: " + str(i)