예제 #1
0
 def test_crawl_these_major_sites(self):
     urls = [
         "http://play.google.com", "https://news.google.ie/",
         "http://facebook.com", "http://amazon.com",
         "http://bookdepository.com", "http://mongohq.com",
         "http://roblynch.info", "http://ierlang.org", "http://erlang.org",
         "http://python.org", "http://github.com", "http://bitbucket.org",
         "http://hackerrank.com", "http://stackoverflow.com",
         "http://yahoo.com", "http://google.com", "http://bing.com",
         "http://microsoft.com", "http://aol.ie", "http://wikipedia.com"
     ]
     for url in urls:
         crawl_website_insert_to_database(url)
         print("Crawling -", url)
     print("done")
예제 #2
0
 def test_crawl_these_major_sites(self):
     urls =  [
                 "http://play.google.com",
                 "https://news.google.ie/",
                 "http://facebook.com", "http://amazon.com",
                 "http://bookdepository.com", "http://mongohq.com",
                 "http://roblynch.info", "http://ierlang.org", "http://erlang.org",
                 "http://python.org", "http://github.com", "http://bitbucket.org",
                 "http://hackerrank.com", "http://stackoverflow.com", "http://yahoo.com",
                 "http://google.com", "http://bing.com", "http://microsoft.com",
                 "http://aol.ie", "http://wikipedia.com"
             ]
     for url in urls:
         crawl_website_insert_to_database(url)
         print("Crawling -", url)
     print("done")
예제 #3
0
 def test_get_top_websites(self):
     for url in top_websites:
         crawl_website_insert_to_database(url)
예제 #4
0
 def test_crawl_website_insert_to_database(self):
     urls = ["http://roblynch.info/about"]
     for url in urls:
         crawl_website_insert_to_database(url)
         print("Crawl complete on -", url)
     print("done")
예제 #5
0
 def test_get_top_websites(self):
     for url in top_websites:
         crawl_website_insert_to_database(url)
예제 #6
0
 def test_crawl_website_insert_to_database(self):
     urls = ["http://roblynch.info/about"]
     for url in urls:
         crawl_website_insert_to_database(url)
         print("Crawl complete on -", url)
     print("done")