def getShenmaRelateDict(word): # 神马搜索页面URL需要进行BASE64编码转换 base64_text = base64.encodestring(word).strip() base64_text = base64_text.replace('+', '!') #神马搜索会把编码后的文字中的'+'换成'!' url = u'http://aibing.cc/shenma/' + base64_text + u'.html' # print '正在获取神马搜索相关搜索...' '''try: response=requests.get(url=url) except Exception: print format_exc() return None ''' mingtemp = getRemoteClient(url) tree = etree.HTML(mingtemp) xglist = tree.xpath( '/html/body/div[@id="hd_main"]/div[@id="res"]/div[@class="xglist"]') if xglist is not None and len(xglist) > 0: allLi = xglist[0].xpath('.//li') mutex.acquire() for li in allLi: try: a = li.xpath('./a')[0] title = a.xpath('string(.)').strip() href = a.attrib.get('href') if (datas.has_key(title)): datas[title] = datas[title] + ';shen_ma' else: datas[title] = 'shen_ma' except: pass mutex.release()
def chinasoSuggest(word, delay=False): global mutex url = "http://www.chinaso.com/search/suggest?callback=jsonpHandle&k=" + word '''try: response = requests.get(url,timeout=2) except: print "get suggestion from chinaso failed" return result = response.text''' result = getRemoteClient(url) # print result pattern = r"\[(.*?)\]" matchObj = re.findall(pattern, result, re.M) if (len(matchObj) > 0): mutex.acquire() for obj in matchObj[0].split(","): obj = '"' + re.sub('"', '', obj) + '"' key = eval("u" + obj) if (datas.has_key(key)): datas[key] = datas[key] + ";chinaSo" else: datas[key] = "chinaSo" mutex.release()
def bingSuggestion(word, delay=False): global mutex # cvid = "74EB888789494B158BBA8A3950C3ED3F" # cvid = "F696908625C84BB79E4CF88B64485758" cvid = getBingCVID() if (cvid == None): return url = "http://cn.bing.com/AS/Suggestions?pt=page.serp&bq=" + word + "&mkt=zh-cn&qry=" + word + "&cp=" + str( len(word)) + "&o=hs&css=1&cvid=" + cvid '''try: response = requests.get(url,timeout=2) except: print "get suggestion from bing failed" return result = response.text''' result = getRemoteClient(url) # print result # pattern = r"query=(.*?) nav" pattern = r"query=\"(.*?)\"" matchObj = re.findall(pattern, result, re.M) if (len(matchObj) > 0): # print matchObj mutex.acquire() for obj in matchObj: # key=obj[1:-1] key = obj if (datas.has_key(key)): datas[key] = datas[key] + ";bing" else: datas[key] = "bing" mutex.release()
def sogouSuggestion(word, delay=False): global mutex url = "https://www.sogou.com/suggnew/ajajjson?key=" + word + "&type=web&ori=yes&pr=web&abtestid=0&ipn=false" '''try: response = requests.get(url,timeout=2) except: print "get suggestion from sogou failed" return result = response.text''' result = getRemoteClient(url) pattern = r",\[(.*?)\]" matchObj = re.findall(pattern, result, re.M) if (len(matchObj) > 0): mutex.acquire() # print matchObj[0] for w in matchObj[0].split('",'): w = '"' + re.sub('"', '', w) + '"' # print "["+w+"]" if (len(w) > 0): key = eval("u" + w) # key = key[1:-1] if (datas.has_key(key)): datas[key] = datas[key] + ";sogou" else: datas[key] = "sogou" mutex.release()
def baiduSuggestion(word, delay=False): global mutex url = "http://suggestion.baidu.com/su?wd=" + word '''try: response = requests.get(url,timeout=2) except: print "get suggestion from baidu failed" return result = response.text''' result = getRemoteClient(url) pattern = r"\[(.*?)\]" matchObj = re.findall(pattern, result, re.M) if (len(matchObj) > 0): mutex.acquire() # print matchObj[0] pattern = "\"(.*?)\"" matchObj = re.findall(pattern, matchObj[0], re.M) for w in matchObj: # print w key = w if (datas.has_key(key)): datas[key] = datas[key] + ";baidu" else: datas[key] = "baidu" mutex.release()
def bingRecommend(word, flag=False): url = 'https://www.bing.com/search?q=' + str(word) + '&first=1' '''try: response = requests.get(url,timeout=2) except: print "get recommend from bing failed" return result=response.text''' result = getRemoteClient(url) bingExtract(result)
def sogouRecommend(word, flag=False): url = "http://www.sogou.com/web?query=" + str(word) + "&page=1" '''try: response = requests.get(url,timeout=2) except: print "get recommend from sogou failed" return result = response.text''' result = getRemoteClient(url) # print result sogouExtract(result)
def chinaSoRecommend(word, flag=False): url = "http://www.chinaso.com/search/pagesearch.htm?q=" + word + "&page=1" '''try: response = requests.get(url,timeout=5) except: print "get recommend from china_so failed" return result = response.text ''' result = getRemoteClient(url) chinaSoExtractFor(result)
def baiduRecommend(word, flag=False): url = "http://220.181.111.188/s?wd=" + word + "&pn=1" '''try: response = requests.get(url,timeout=10) except: print "get recommend from baidu failed" return result = response.text ''' result = getRemoteClient(url) baiduExtractFor(result)
def sogouRecommend(word, flag=False): url = "https://www.sogou.com/web?query=" + word + "&page=1" '''try: response = requests.get(url,timeout=5) except: print "get recommend from china_so failed" return result = response.text ''' result = getRemoteClient(url) sogouExtractFor(result)
def chinaSoRecommend(word, flag=False): # url="http://www.chinaso.com/search/pagesearch.htm?q="+str(word)+"&t="+str(time.mktime(datetime.datetime.now().timetuple())) url = "http://www.chinaso.com/search/pagesearch.htm?q=" + str(word) '''try: response = requests.get(url,timeout=3) except: print "get recommend from chinaso failed" return result = response.text''' result = getRemoteClient(url) # print result chinaSoExtract(result)
def baiduRecommend(word, flag=False): url = "http://www.baidu.com/s?wd=" + str(word) + "&pn=1" # print url '''try: response = requests.get(url,timeout=2) # print response.request.headers # print response.headers except: print "get recommend from baidu failed" return result = response.text''' result = getRemoteClient(url) # print result baiduExtract(result)
def shenmaRecommend(word, flag=False): base64_text = base64.encodestring(word).strip() base64_text = base64_text.replace("+", "!") url = u'http://aibing.cc/shenma/' + base64_text + u'.html' #print "url:"+url '''try: response = requests.get(url,timeout=2) except: print "get recommen from shenma failed" return result = response.text''' result = getRemoteClient(url) # print result shenmaExtract(result)
def getBingCVID(): url = "http://cn.bing.com/" try: '''response = requests.get(url,timeout=3) ''' html_text = getRemoteClient(url) # response.text try: m = re.search("IG:\"(.*)\",EventID:", html_text) if m: IG = m.group(1) return IG except Exception as e: print(e) except: print "Get Bing cvid error" return None
def getBingCVID(): url = "http://cn.bing.com/" try: '''response = requests.get(url,timeout=2) html_text = response.text''' html_text = getRemoteClient(url) try: # IID= re.findall("<div id=\"lap_w\" data-ajaxiid=\"(.*)\" data-date=\"",html_text)[0] m = re.search("IG:\"(.*)\",EventID:", html_text) if m: IG = m.group(1) # #print "cvid="+IG return IG except Exception as e: print(e) except: print "Get Bing cvid error" return None
def so360Suggest(word, delay=False): global mutex url = "http://sug.so.360.cn/suggest/word?callback=suggest_so&encodein=utf-8&encodeout=utf-8&word=" + word '''try: response = requests.get(url,timeout=2) except: print "get suggestion from so360 failed" return result = response.text''' result = getRemoteClient(url) pattern = r"\[(.*?)\]" matchObj = re.findall(pattern, result, re.M) if (len(matchObj) > 0): mutex.acquire() for obj in matchObj[0].split(","): key = obj[1:-1] if (datas.has_key(key)): datas[key] = datas[key] + ";so360" else: datas[key] = "so360" mutex.release()