def scrape_page(crawled_graph): checked_urls = [] for url in crawled_graph: if url not in checked_urls: get_page_text(url, index) checked_urls.append(url) # calculate the page ranks using the crawled graph page_rank.compute_ranks(crawled_graph)
def build_database(): # https://dunluce.infc.ulst.ac.uk/d12wo/Web/B3/test_index.html # http://adrianmoore.net/com506/test_web/index.html web_crawler.get_url() # crawl url # page has been crawled and scraped so set this to True to enable more options global database_built database_built = True # getting values for graph, index and ranks to add to the poodleData dictionary graph = web_crawler.get_graph() index = web_scraper.get_index() ranks = page_rank.compute_ranks(graph) # add all data to poodle dictionary poodle_data['graph'] = graph poodle_data['index'] = index poodle_data['ranks'] = ranks init_menu_options()