Beispiel #1
0
def getCrawlerResult(keyword, site):
    parse_data = {'domain':[]} # A dictionary of scraped data from specific web pages
    try:
        search_results = getURLs(keyword, site) # results of custom Google Search using keywords
        for domain,url in search_results.iteritems():
            parse_data[domain] = web_crawl(url, keyword)
            parse_data['domain'].append(domain)
    except urllib2.HTTPError, err:
        abort(err.code, message=str(err.reason))
 def test_topresult(self):
     address = "http://www.hgtv.com/"
     print dumps(web_crawler.web_crawl(address, self.keyword),indent=2,sort_keys=True)
 def test_bizrate(self):
     address = "http://www.bizrate.com/television-tv/"
     print dumps(web_crawler.web_crawl(address, self.keyword),indent=2,sort_keys=True)
 def test_shopzilla(self):
     address = "http://www.shopzilla.com/tv/11520000/products"
     print dumps(web_crawler.web_crawl(address, self.keyword),indent=2,sort_keys=True)