예제 #1
0
def draw_chart(word,n):
  table_name = 'PageDetails'
  response = db.get_top_urls_for_word(word, n, 'PageDetails')
  G = nx.Graph()

  G.add_node(word)
  #urls = response(['Items'])
  # print(response)
  for url in response['Items']:
    G.add_node(url['url'])
    G.add_edge(word,url['url'],weight = url['word_count'])

  nx.draw(G,with_labels=True)
  plt.savefig("chart.png")
  plt.show()
예제 #2
0
        url = stdin.readline().rstrip('\n')
        print("Choose (bfs / dfs): ")
        search_type = stdin.readline().rstrip('\n')
        print("Enter the maximum number of web-pages to crawl (around 200 gives good coverage): ")
        max_pages = int(stdin.readline().rstrip('\n'))
        # url = "http://www.cmu.edu"
        if search_type == 'bfs':
            bfs_spider(url, keyword, max_pages)
        else:
            dfs_spider(url, keyword, max_pages)
        print(' ')
        print("In the generated star graph, the closer a URL is to the the word in the center, the more is its relevance.")
        print("Relevance is decided by the number of times a word occurs in a URL.")
        draw_chart(keyword, 15)
    else:
        top_urls = get_top_urls_for_word(keyword, 10, 'PageDetails')
        if top_urls['Count'] == 0:
            print('This keyword is not present in the database.')
        else:
            print(' ')
            print('The top 10 pages with this keyword are: ')
            for item in top_urls['Items']:
                print(item['url'])
            print(' ')
            print("In the generated star graph, the closer a URL is to the the word in the center, the more is its relevance.")
            print("Relevance is decided by the number of times a word occurs in a URL.")
            draw_chart(keyword, 10)

    # for item in top_urls['Items']:
    #     print(item['word_count'])