# Yahoo appid yahoosearch_json.APP_ID = appid queries = open(inputfile).readlines() if len(queries) > 5000: print "Query limit Exceeded" exit() results = {} for query in queries: query = query.strip() print query info = yahoosearch_json.search(query) if info.has_key( 'totalResultsAvailable') and info['totalResultsAvailable'] >= 10: results[query] = [] for result in info['Result']: try: url = ((result["ClickUrl"]).decode("utf-8", "replace")).encode( "utf-8", "replace") except: continue pass size = -1 if result.has_key("Cache"): size = result['Cache']['Size'] results[query].append([url, size])
yahoosearch_json.APP_ID= appid queries= open(inputfile).readlines() if len(queries) > 5000: print "Query limit Exceeded" exit() results= {} for query in queries: query= query.strip() print query info= yahoosearch_json.search(query) if info.has_key('totalResultsAvailable') and info['totalResultsAvailable'] >= 10: results[query]= [] for result in info['Result']: try: url= ((result["ClickUrl"]).decode("utf-8", "replace")).encode("utf-8", "replace") except: continue; pass; size= -1 if result.has_key("Cache"): size= result['Cache']['Size'] results[query].append([url, size]) print url
from yahoosearch_json import search import sys import pickle seeds = open(sys.argv[1]).readlines() scores = {} for seed in seeds: seed = seed.strip() results = search(seed) scores[seed] = results['totalResultsAvailable'] pickle.dump(scores, open(sys.argv[2], "w"))
from yahoosearch_json import search import sys import pickle seeds= open(sys.argv[1]).readlines() scores= {} for seed in seeds: seed= seed.strip() results= search(seed) scores[seed]= results['totalResultsAvailable'] pickle.dump(scores, open(sys.argv[2], "w"))