コード例 #1
0
ファイル: web_scraper.py プロジェクト: ohjw/POODLE
def scrape_page(crawled_graph):
    checked_urls = []

    for url in crawled_graph:
        if url not in checked_urls:
            get_page_text(url, index)
            checked_urls.append(url)

    # calculate the page ranks using the crawled graph
    page_rank.compute_ranks(crawled_graph)
コード例 #2
0
ファイル: poodle.py プロジェクト: ohjw/POODLE
def build_database():
    # https://dunluce.infc.ulst.ac.uk/d12wo/Web/B3/test_index.html
    # http://adrianmoore.net/com506/test_web/index.html
    web_crawler.get_url()  # crawl url

    # page has been crawled and scraped so set this to True to enable more options
    global database_built
    database_built = True

    # getting values for graph, index and ranks to add to the poodleData dictionary
    graph = web_crawler.get_graph()
    index = web_scraper.get_index()
    ranks = page_rank.compute_ranks(graph)

    # add all data to poodle dictionary
    poodle_data['graph'] = graph
    poodle_data['index'] = index
    poodle_data['ranks'] = ranks

    init_menu_options()