Esempi in Python per Traph.add_page

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: traph

Classe/tipologia: Traph

Metodo/funzione: add_page

Esempi su hotexamples.com: 5

Traph.add_page in Python: 5 esempi trovati. Questi sono i migliori esempi reali in Python per traph.Traph.add_page, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Traph(6)

add_page(5)

create_webentity(3)

add_links(3)

get_webentity_pages(2)

close(2)

index_batch_crawl(2)

retrieve_webentity(1)

retrieve_prefix(1)

remove_webentity_creation_rule(1)

pages_iter(1)

move_prefix_to_webentity(1)

make_traph(1)

links_iter(1)

get_webentity_parent_webentities(1)

get_webentity_most_linked_pages(1)

get_webentity_pagelinks(1)

get_webentity_outlinks(1)

get_webentity_inlinks(1)

get_webentity_crawled_pages(1)

get_webentity_child_webentities(1)

get_webentity_by_prefix(1)

get_webentities_links(1)

get_page_links(1)

delete_webentity(1)

add_webentity_creation_rule(1)

webentity_prefix_iter(1)

Esempio n. 1

Mostra file

if use_index_batch_crawl:
    data = {}
    for source_lru, target_lru in LINKS:
        if source_lru in data:
            links = data[source_lru]
        else:
            links = []
        links.append(target_lru)
        data[source_lru] = links
    report = traph.index_batch_crawl(data)
    webentity_store.data['webentities'].update(report.created_webentities)
else:
    for lru in PAGES:
        # add page
        report = traph.add_page(lru)
        webentity_store.data['webentities'].update(report.created_webentities)

    # add links
    links_report = traph.add_links(LINKS)
    webentity_store.data['webentities'].update(
        links_report.created_webentities)

print '...data stored.'

# Log result
print '\nPages:'
for node, lru in traph.pages_iter():
    print ' - ' + lru

print '\nPage Links:'

Esempio n. 2

Mostra file

}

webentity_store = WebEntityStore('./scripts/data/webentities.json')

traph = Traph(overwrite=True,
              folder='./scripts/data/',
              default_webentity_creation_rule=default_webentity_creation_rule,
              webentity_creation_rules=webentity_creation_rules)
trie = traph.lru_trie
links = traph.link_store

print trie.header
print links.header

for page in PAGES:
    traph.add_page(page)

traph.add_links(LINKS)

for source_lru, target_lru in traph.links_iter():
    print 'Source: %s, Target: %s' % (source_lru, target_lru)

for node in links.nodes_iter():
    print node

print '\nDetailed DFS...'
g = nx.Graph()
for state in trie.detailed_dfs_iter():
    print state

    g.add_node(state.node.block, label=state.node.char_as_str())

Esempio n. 3

Mostra file

print '\nResult - Existing webentities from Store:'
for weid, prefixes in webentity_store.data['webentities'].items():
    print ' - Webentity %s:' % (weid)
    for prefix in prefixes:
        print '\t\t' + prefix

print '\nResult - Prefixes from Traph:'
for node, lru in traph.webentity_prefix_iter():
    print ' - (%s) \t%s' % (node.webentity(), lru)

# Step 4
print '\n:: Step 4 - Add the "Airbus/blog" page'
print 'Expected: Create the NON-HTTPS Airbus webentity'

report = traph.add_page('s:http|h:com|h:airbus|p:blog|')
webentity_store.data['webentities'].update(report.created_webentities)

print '\nResult - Existing webentities from Store:'
for weid, prefixes in webentity_store.data['webentities'].items():
    print ' - Webentity %s:' % (weid)
    for prefix in prefixes:
        print '\t\t' + prefix

print '\nResult - Prefixes from Traph:'
for node, lru in traph.webentity_prefix_iter():
    print ' - (%s) \t%s' % (node.webentity(), lru)

print '\nResult - Airbus blog page belongs to webentity %s via prefix %s' % (
    traph.retrieve_webentity('s:http|h:com|h:airbus|p:blog|'),
    traph.retrieve_prefix('s:http|h:com|h:airbus|p:blog|'))

Esempio n. 4

Mostra file

File: example_twitter_family.py Progetto: CDonnees/hyphe-traph

# Though the traph could retrieve them, it would not be efficient.
# In a real situation, these would be tracked elsewhere.
# That's what we are simulating with this store.
webentity_store = WebEntityStore('./scripts/data/webentities.json')
webentity_store.data['webentities'] = {}

# Instanciate the traph
traph = Traph(overwrite=True,
              folder='./scripts/data/',
              default_webentity_creation_rule=default_webentity_creation_rule,
              webentity_creation_rules=webentity_creation_rules)

# Store data
print 'Store pages...'
for page in PAGES:
    report = traph.add_page(page)
    webentity_store.data['webentities'].update(report.created_webentities)
    # print report

print 'Store links...'
links_report = traph.add_links(LINKS)
webentity_store.data['webentities'].update(links_report.created_webentities)
# print links_report

print '...data stored.'

# Log result
print '\nPages:'
for node, lru in traph.pages_iter():
    print ' - ' + lru

Esempio n. 5

Mostra file

# In a real situation, these would be tracked elsewhere.
# That's what we are simulating with this store.
webentity_store = WebEntityStore('./scripts/data/webentities.json')
webentity_store.data['webentities'] = {}

# Instanciate the traph
traph = Traph(overwrite=True,
              folder='./scripts/data/',
              default_webentity_creation_rule=default_webentity_creation_rule,
              webentity_creation_rules=webentity_creation_rules)

# Step 1
print '\n:: Step 1: Add the "Madrid" page'
print 'Expected: "Europe" webentity created (matching the rule given at init), "World" not created'

report = traph.add_page('s:http|h:com|h:world|p:europe|p:spain|p:madrid|')
webentity_store.data['webentities'].update(report.created_webentities)

print '\nResult - Existing webentities:'
for weid, prefixes in webentity_store.data['webentities'].items():
    print ' - Webentity %s\t%s + %s other prefixes' % (weid, prefixes[0],
                                                       len(prefixes) - 1)

# Step 2
print '\n:: Step 2: Remove the "Continents" rule and add the "Tokyo" page'
print 'Expected: "World" webentity created, "Asia" not created'

traph.remove_webentity_creation_rule('s:http|h:com|h:world|')
report = traph.add_page('s:http|h:com|h:world|p:asia|p:japan|p:tokyo|')
webentity_store.data['webentities'].update(report.created_webentities)