def CheckPopularity(ip):
    # DUH
    cache_path = os.path.join(CACHE_DIR, ip) + '.pickle.pickle'
    if os.path.exists(cache_path):
        f = open(cache_path)
        results = pickle.load(f)
    else:
        print "miss: %s" % ip
        try:
            query = '"%s" %s' % (ip, QUERY_MODIFIERS)
            srch = WebSearch(APP_ID, query=query, results=50)
            results = srch.parse_results()
            pf = open(cache_path, 'w')
            pickle.dump(results.results, pf)
            pf.close()
        except yahoo.search.SearchError:
            print "%s failed" % (ip)
            return []

    use_results = []
    for result in results:
        reject = False
        for regexp in BANNED_URL_KEYWORDS:
            if re.search(regexp, result['Url'], re.I):
                reject = True
        if not reject:
            use_results.append(result)
    return use_results
Esempio n. 2
0
 def queryYahoo(self, query):
     from yahoo.search.web import WebSearch
     srch = WebSearch(config.yahoo_appid, query=query, results=self.count)
     dom = srch.get_results()
     results = srch.parse_results(dom)
     for res in results:
         url = res.Url
         yield url
Esempio n. 3
0
def main():

    if (len(sys.argv) != 4):
    	usage()
    	sys.exit(2)
    	
    srch = WebSearch(app_id=sys.argv[1])
    srch.query = sys.argv[2]
    # only the first 100 results are queryable
    srch.results = sys.argv[3]
    # Disable content filter to get all available results
    srch.adult_ok = 1
    LinkIdx = 0
	
    for res in srch.parse_results():
        LinkIdx = LinkIdx + 1
        print res.Url
def CheckPopularity(ip):
    cache_path = os.path.join(CACHE_DIR, ip) + '.pickle'
    if os.path.exists(cache_path):
        f = open(cache_path)
        return pickle.load(f)
    else:
        try:
            query = '"%s" %s' % (ip, QUERY_MODIFIERS)
            srch = WebSearch(APP_ID, query=query, results=50)
            results = srch.parse_results()
            pf = open(cache_path + '.pickle', 'w')
            pickle.dump(results.results, pf)
            pf.close()
            return results
        except yahoo.search.SearchError:
            print "%s failed" % (ip)
            return []
    return n


def printSortedDict(adict):
    keys = adict.keys()
    keys.sort()
    for k in keys:
        print k
        print adict[k]['Title']
        print adict[k]['Url']
        print adict[k]['Summary']
        print " "


app_id = "NCD-Probe-Demo"
srch = WebSearch(app_id, language='en')
srch.query = "Radisson"
srch.results = 50

dom = srch.get_results()
results = srch.parse_results(dom)

ranked = {}
for res in results:
    # strip out search word from summary
    summary = str(res['Summary'])
    stripped_summary = summary.replace('Radisson', '')
    distance = ncd_probe(xbytes, cx, stripped_summary)
    dstr = 'NCD: ' + str(distance)
    ranked[dstr] = res