예제 #1
0
    path_size = random.choice(path_sizes)
    protocol = 's:http|'
    tld = 'h:com|'
    host = 'h:%s|' % (random.choice(voc))
    path = ''
    for p in range(path_size):
        path += 'p:%s|' % (random.choice(voc))
    lru = protocol + tld + host + path
    # print lru
    report = traph.add_page(lru)
    webentity_store.data['webentities'].update(report.created_webentities)

print '\n:: Webentities'
print '\nExisting webentities from Store:'
for weid, prefixes in webentity_store.data['webentities'].items():
    print ' - Webentity %s:' % (weid)
    for prefix in prefixes:
        print '\t\t' + prefix

print '\nPrefixes from Traph:'
for node, lru in traph.webentity_prefix_iter():
    print ' - (%s) \t%s' % (node.webentity(), lru)

print '\n:: Pages in "Lorem" webentity'
lorem_weid = traph.get_webentity_by_prefix('s:http|h:com|h:lorem|')
lorem_prefixes = webentity_store.data['webentities'][lorem_weid]
for lru in traph.get_webentity_pages(lorem_weid, lorem_prefixes):
    print ' - %s' % (lru)

traph.close()
예제 #2
0
print '- %s pages in the Traph' % (len(pages))

print '\n:: Traph: LRU trie'
print traph.lru_trie.representation()

print '\n:: Breakdown by webentity'
for weid in webentities:
    print '\nWebentity %s' % (weid)

    we_prefixes = webentity_store.data['webentities'][weid]
    print ' - %s prefixes (store)' % (len(we_prefixes))

    for prefix in we_prefixes:
        print ' \t- %s' % (prefix)

    we_pages = traph.get_webentity_pages(weid, we_prefixes)
    print ' - %s pages (traph)' % (len(we_pages))

    for lru in we_pages:
        print ' \t- %s' % (lru)

    we_crawled_pages = traph.get_webentity_crawled_pages(weid, we_prefixes)
    print ' - %s crawled pages (traph)' % (len(we_crawled_pages))

    for lru in we_crawled_pages:
        print ' \t- %s' % (lru)

    we_most_linked_pages = traph.get_webentity_most_linked_pages(
        weid, we_prefixes, 3)
    print ' - %s most linked pages (traph, max 3)' % (
        len(we_most_linked_pages))