コード例 #1
0
# Yahoo appid

yahoosearch_json.APP_ID = appid

queries = open(inputfile).readlines()

if len(queries) > 5000:
    print "Query limit Exceeded"
    exit()

results = {}

for query in queries:
    query = query.strip()
    print query
    info = yahoosearch_json.search(query)

    if info.has_key(
            'totalResultsAvailable') and info['totalResultsAvailable'] >= 10:
        results[query] = []
        for result in info['Result']:
            try:
                url = ((result["ClickUrl"]).decode("utf-8", "replace")).encode(
                    "utf-8", "replace")
            except:
                continue
                pass
            size = -1
            if result.has_key("Cache"):
                size = result['Cache']['Size']
            results[query].append([url, size])
コード例 #2
0
ファイル: getURLs_using_yahoo.py プロジェクト: girish/utils
yahoosearch_json.APP_ID= appid


queries= open(inputfile).readlines()

if len(queries) > 5000:
    print "Query limit Exceeded"
    exit()
    
results= {}

for query in queries:
    query= query.strip()
    print query
    info= yahoosearch_json.search(query)
    	
    if info.has_key('totalResultsAvailable') and info['totalResultsAvailable'] >= 10:
        results[query]= []
        for result in info['Result']:
	    try:	
	            url= ((result["ClickUrl"]).decode("utf-8", "replace")).encode("utf-8", "replace")
	    except:
		    continue;	
		    pass;
            size= -1
            if result.has_key("Cache"):
                size= result['Cache']['Size']
            results[query].append([url, size])
            print url
コード例 #3
0
from yahoosearch_json import search
import sys
import pickle

seeds = open(sys.argv[1]).readlines()
scores = {}
for seed in seeds:
    seed = seed.strip()
    results = search(seed)
    scores[seed] = results['totalResultsAvailable']

pickle.dump(scores, open(sys.argv[2], "w"))
コード例 #4
0
ファイル: getresults.py プロジェクト: girish/utils
from yahoosearch_json import search
import sys
import pickle

seeds= open(sys.argv[1]).readlines()
scores= {}
for seed in seeds:
    seed= seed.strip()
    results= search(seed)
    scores[seed]= results['totalResultsAvailable']

pickle.dump(scores, open(sys.argv[2], "w"))