def draw_chart(word,n): table_name = 'PageDetails' response = db.get_top_urls_for_word(word, n, 'PageDetails') G = nx.Graph() G.add_node(word) #urls = response(['Items']) # print(response) for url in response['Items']: G.add_node(url['url']) G.add_edge(word,url['url'],weight = url['word_count']) nx.draw(G,with_labels=True) plt.savefig("chart.png") plt.show()
url = stdin.readline().rstrip('\n') print("Choose (bfs / dfs): ") search_type = stdin.readline().rstrip('\n') print("Enter the maximum number of web-pages to crawl (around 200 gives good coverage): ") max_pages = int(stdin.readline().rstrip('\n')) # url = "http://www.cmu.edu" if search_type == 'bfs': bfs_spider(url, keyword, max_pages) else: dfs_spider(url, keyword, max_pages) print(' ') print("In the generated star graph, the closer a URL is to the the word in the center, the more is its relevance.") print("Relevance is decided by the number of times a word occurs in a URL.") draw_chart(keyword, 15) else: top_urls = get_top_urls_for_word(keyword, 10, 'PageDetails') if top_urls['Count'] == 0: print('This keyword is not present in the database.') else: print(' ') print('The top 10 pages with this keyword are: ') for item in top_urls['Items']: print(item['url']) print(' ') print("In the generated star graph, the closer a URL is to the the word in the center, the more is its relevance.") print("Relevance is decided by the number of times a word occurs in a URL.") draw_chart(keyword, 10) # for item in top_urls['Items']: # print(item['word_count'])