def CheckPopularity(ip): # DUH cache_path = os.path.join(CACHE_DIR, ip) + '.pickle.pickle' if os.path.exists(cache_path): f = open(cache_path) results = pickle.load(f) else: print "miss: %s" % ip try: query = '"%s" %s' % (ip, QUERY_MODIFIERS) srch = WebSearch(APP_ID, query=query, results=50) results = srch.parse_results() pf = open(cache_path, 'w') pickle.dump(results.results, pf) pf.close() except yahoo.search.SearchError: print "%s failed" % (ip) return [] use_results = [] for result in results: reject = False for regexp in BANNED_URL_KEYWORDS: if re.search(regexp, result['Url'], re.I): reject = True if not reject: use_results.append(result) return use_results
def queryYahoo(self, query): from yahoo.search.web import WebSearch srch = WebSearch(config.yahoo_appid, query=query, results=self.count) dom = srch.get_results() results = srch.parse_results(dom) for res in results: url = res.Url yield url
def main(): if (len(sys.argv) != 4): usage() sys.exit(2) srch = WebSearch(app_id=sys.argv[1]) srch.query = sys.argv[2] # only the first 100 results are queryable srch.results = sys.argv[3] # Disable content filter to get all available results srch.adult_ok = 1 LinkIdx = 0 for res in srch.parse_results(): LinkIdx = LinkIdx + 1 print res.Url
def CheckPopularity(ip): cache_path = os.path.join(CACHE_DIR, ip) + '.pickle' if os.path.exists(cache_path): f = open(cache_path) return pickle.load(f) else: try: query = '"%s" %s' % (ip, QUERY_MODIFIERS) srch = WebSearch(APP_ID, query=query, results=50) results = srch.parse_results() pf = open(cache_path + '.pickle', 'w') pickle.dump(results.results, pf) pf.close() return results except yahoo.search.SearchError: print "%s failed" % (ip) return []
return n def printSortedDict(adict): keys = adict.keys() keys.sort() for k in keys: print k print adict[k]['Title'] print adict[k]['Url'] print adict[k]['Summary'] print " " app_id = "NCD-Probe-Demo" srch = WebSearch(app_id, language='en') srch.query = "Radisson" srch.results = 50 dom = srch.get_results() results = srch.parse_results(dom) ranked = {} for res in results: # strip out search word from summary summary = str(res['Summary']) stripped_summary = summary.replace('Radisson', '') distance = ncd_probe(xbytes, cx, stripped_summary) dstr = 'NCD: ' + str(distance) ranked[dstr] = res