예제 #1
0
webentity_store = WebEntityStore('./scripts/data/webentities.json')

traph = Traph(overwrite=True,
              folder='./scripts/data/',
              default_webentity_creation_rule=default_webentity_creation_rule,
              webentity_creation_rules=webentity_creation_rules)
trie = traph.lru_trie
links = traph.link_store

print trie.header
print links.header

for page in PAGES:
    traph.add_page(page)

traph.add_links(LINKS)

for source_lru, target_lru in traph.links_iter():
    print 'Source: %s, Target: %s' % (source_lru, target_lru)

for node in links.nodes_iter():
    print node

print '\nDetailed DFS...'
g = nx.Graph()
for state in trie.detailed_dfs_iter():
    print state

    g.add_node(state.node.block, label=state.node.char_as_str())

    if state.node.is_root():
예제 #2
0
        if source_lru in data:
            links = data[source_lru]
        else:
            links = []
        links.append(target_lru)
        data[source_lru] = links
    report = traph.index_batch_crawl(data)
    webentity_store.data['webentities'].update(report.created_webentities)
else:
    for lru in PAGES:
        # add page
        report = traph.add_page(lru)
        webentity_store.data['webentities'].update(report.created_webentities)

    # add links
    links_report = traph.add_links(LINKS)
    webentity_store.data['webentities'].update(
        links_report.created_webentities)

print '...data stored.'

# Log result
print '\nPages:'
for node, lru in traph.pages_iter():
    print ' - ' + lru

print '\nPage Links:'
i = 0
for source_lru, target_lru in traph.links_iter():
    i += 1
    print ' - %s\t->  %s' % (source_lru, target_lru)
예제 #3
0
else:
    for i in range(len(SOURCE_PAGES)):
        lru = SOURCE_PAGES[i]

        # add page
        report = traph.add_page(lru)
        webentity_store.data['webentities'].update(report.created_webentities)

        # build links
        links = []
        for j in range(len(TARGET_PAGES)):
            if j % 4 == i:
                links.append([lru, TARGET_PAGES[j]])

        # add links
        links_report = traph.add_links(links)
        webentity_store.data['webentities'].update(
            links_report.created_webentities)

print '\n:: Stats'
print '- %s webentities in the Store' % (len(
    webentity_store.data['webentities']))
webentities = set()
for node, lru in traph.webentity_prefix_iter():
    webentities.add(node.webentity())
print '- %s webentities in the Traph' % (len(webentities))
pages = []
for node, lru in traph.lru_trie.dfs_iter():
    if node.is_page():
        pages.append(lru)
print '- %s pages in the Traph' % (len(pages))