def getShenmaRelateDict(word):
    # 神马搜索页面URL需要进行BASE64编码转换
    base64_text = base64.encodestring(word).strip()
    base64_text = base64_text.replace('+', '!')  #神马搜索会把编码后的文字中的'+'换成'!'
    url = u'http://aibing.cc/shenma/' + base64_text + u'.html'
    # print '正在获取神马搜索相关搜索...'
    '''try:
        response=requests.get(url=url)
    except Exception:
        print format_exc()
        return None
'''
    mingtemp = getRemoteClient(url)
    tree = etree.HTML(mingtemp)
    xglist = tree.xpath(
        '/html/body/div[@id="hd_main"]/div[@id="res"]/div[@class="xglist"]')
    if xglist is not None and len(xglist) > 0:
        allLi = xglist[0].xpath('.//li')
        mutex.acquire()
        for li in allLi:
            try:
                a = li.xpath('./a')[0]
                title = a.xpath('string(.)').strip()
                href = a.attrib.get('href')

                if (datas.has_key(title)):
                    datas[title] = datas[title] + ';shen_ma'
                else:
                    datas[title] = 'shen_ma'

            except:
                pass
        mutex.release()
def chinasoSuggest(word, delay=False):
    global mutex
    url = "http://www.chinaso.com/search/suggest?callback=jsonpHandle&k=" + word
    '''try:
        response = requests.get(url,timeout=2)
    except:
        print "get suggestion from chinaso failed"
        return

    result = response.text'''
    result = getRemoteClient(url)

    # print result
    pattern = r"\[(.*?)\]"
    matchObj = re.findall(pattern, result, re.M)
    if (len(matchObj) > 0):
        mutex.acquire()
        for obj in matchObj[0].split(","):
            obj = '"' + re.sub('"', '', obj) + '"'
            key = eval("u" + obj)
            if (datas.has_key(key)):
                datas[key] = datas[key] + ";chinaSo"
            else:
                datas[key] = "chinaSo"
        mutex.release()
def bingSuggestion(word, delay=False):
    global mutex
    # cvid = "74EB888789494B158BBA8A3950C3ED3F"
    # cvid = "F696908625C84BB79E4CF88B64485758"
    cvid = getBingCVID()
    if (cvid == None):
        return
    url = "http://cn.bing.com/AS/Suggestions?pt=page.serp&bq=" + word + "&mkt=zh-cn&qry=" + word + "&cp=" + str(
        len(word)) + "&o=hs&css=1&cvid=" + cvid
    '''try:
        response = requests.get(url,timeout=2)
    except:
        print "get suggestion from bing failed"
        return

    result = response.text'''
    result = getRemoteClient(url)
    # print result
    # pattern = r"query=(.*?) nav"
    pattern = r"query=\"(.*?)\""
    matchObj = re.findall(pattern, result, re.M)
    if (len(matchObj) > 0):
        # print matchObj
        mutex.acquire()
        for obj in matchObj:
            # key=obj[1:-1]
            key = obj
            if (datas.has_key(key)):
                datas[key] = datas[key] + ";bing"
            else:
                datas[key] = "bing"
        mutex.release()
def sogouSuggestion(word, delay=False):
    global mutex
    url = "https://www.sogou.com/suggnew/ajajjson?key=" + word + "&type=web&ori=yes&pr=web&abtestid=0&ipn=false"
    '''try:
        response = requests.get(url,timeout=2)
    except:
        print "get suggestion from sogou failed"
        return

    result = response.text'''
    result = getRemoteClient(url)
    pattern = r",\[(.*?)\]"
    matchObj = re.findall(pattern, result, re.M)
    if (len(matchObj) > 0):
        mutex.acquire()
        # print matchObj[0]
        for w in matchObj[0].split('",'):
            w = '"' + re.sub('"', '', w) + '"'
            # print "["+w+"]"
            if (len(w) > 0):
                key = eval("u" + w)
                # key = key[1:-1]
                if (datas.has_key(key)):
                    datas[key] = datas[key] + ";sogou"
                else:
                    datas[key] = "sogou"
        mutex.release()
def baiduSuggestion(word, delay=False):
    global mutex
    url = "http://suggestion.baidu.com/su?wd=" + word
    '''try:
        response = requests.get(url,timeout=2)
    except:
        print "get suggestion from baidu failed"
        return

    result = response.text'''
    result = getRemoteClient(url)
    pattern = r"\[(.*?)\]"
    matchObj = re.findall(pattern, result, re.M)
    if (len(matchObj) > 0):
        mutex.acquire()
        # print matchObj[0]
        pattern = "\"(.*?)\""
        matchObj = re.findall(pattern, matchObj[0], re.M)
        for w in matchObj:
            # print w
            key = w
            if (datas.has_key(key)):
                datas[key] = datas[key] + ";baidu"
            else:
                datas[key] = "baidu"
        mutex.release()
Example #6
0
def bingRecommend(word, flag=False):
    url = 'https://www.bing.com/search?q=' + str(word) + '&first=1'
    '''try:
        response = requests.get(url,timeout=2)
    except:
        print "get recommend from bing failed"
        return
    result=response.text'''
    result = getRemoteClient(url)
    bingExtract(result)
Example #7
0
def sogouRecommend(word, flag=False):
    url = "http://www.sogou.com/web?query=" + str(word) + "&page=1"
    '''try:
        response = requests.get(url,timeout=2)
    except:
        print "get recommend from sogou failed"
        return
    result = response.text'''
    result = getRemoteClient(url)
    # print result
    sogouExtract(result)
def chinaSoRecommend(word, flag=False):
    url = "http://www.chinaso.com/search/pagesearch.htm?q=" + word + "&page=1"
    '''try:
        response = requests.get(url,timeout=5)
    except:
        print "get recommend from china_so failed"
        return
    result = response.text
'''
    result = getRemoteClient(url)
    chinaSoExtractFor(result)
def baiduRecommend(word, flag=False):
    url = "http://220.181.111.188/s?wd=" + word + "&pn=1"
    '''try:
        response = requests.get(url,timeout=10)
    except:
        print "get recommend from baidu failed"
        return
    result = response.text
'''
    result = getRemoteClient(url)
    baiduExtractFor(result)
def sogouRecommend(word, flag=False):
    url = "https://www.sogou.com/web?query=" + word + "&page=1"
    '''try:
        response = requests.get(url,timeout=5)
    except:
        print "get recommend from china_so failed"
        return
    result = response.text
'''
    result = getRemoteClient(url)
    sogouExtractFor(result)
Example #11
0
def chinaSoRecommend(word, flag=False):
    # url="http://www.chinaso.com/search/pagesearch.htm?q="+str(word)+"&t="+str(time.mktime(datetime.datetime.now().timetuple()))
    url = "http://www.chinaso.com/search/pagesearch.htm?q=" + str(word)
    '''try:
        response = requests.get(url,timeout=3)
    except:
        print "get recommend from chinaso failed"
        return
    result = response.text'''
    result = getRemoteClient(url)
    # print result
    chinaSoExtract(result)
Example #12
0
def baiduRecommend(word, flag=False):
    url = "http://www.baidu.com/s?wd=" + str(word) + "&pn=1"
    # print url
    '''try:
        response = requests.get(url,timeout=2)
        # print response.request.headers
        # print response.headers
    except:
        print "get recommend from baidu failed"
        return
    result = response.text'''
    result = getRemoteClient(url)
    # print result
    baiduExtract(result)
Example #13
0
def shenmaRecommend(word, flag=False):
    base64_text = base64.encodestring(word).strip()
    base64_text = base64_text.replace("+", "!")
    url = u'http://aibing.cc/shenma/' + base64_text + u'.html'
    #print "url:"+url
    '''try:
        response = requests.get(url,timeout=2)
    except:
        print "get recommen from shenma failed"
        return
    result = response.text'''
    result = getRemoteClient(url)
    # print result
    shenmaExtract(result)
def getBingCVID():
    url = "http://cn.bing.com/"
    try:
        '''response = requests.get(url,timeout=3)
'''
        html_text = getRemoteClient(url)  # response.text
        try:
            m = re.search("IG:\"(.*)\",EventID:", html_text)
            if m:
                IG = m.group(1)
                return IG
        except Exception as e:
            print(e)
    except:
        print "Get Bing cvid error"
    return None
Example #15
0
def getBingCVID():
    url = "http://cn.bing.com/"
    try:
        '''response = requests.get(url,timeout=2)
        html_text = response.text'''
        html_text = getRemoteClient(url)
        try:
            # IID= re.findall("<div id=\"lap_w\" data-ajaxiid=\"(.*)\" data-date=\"",html_text)[0]
            m = re.search("IG:\"(.*)\",EventID:", html_text)
            if m:
                IG = m.group(1)
                #                #print "cvid="+IG
                return IG
        except Exception as e:
            print(e)
    except:
        print "Get Bing cvid error"
    return None
Example #16
0
def so360Suggest(word, delay=False):
    global mutex
    url = "http://sug.so.360.cn/suggest/word?callback=suggest_so&encodein=utf-8&encodeout=utf-8&word=" + word
    '''try:
        response = requests.get(url,timeout=2)
    except:
        print "get suggestion from so360 failed"
        return

    result = response.text'''
    result = getRemoteClient(url)
    pattern = r"\[(.*?)\]"
    matchObj = re.findall(pattern, result, re.M)
    if (len(matchObj) > 0):
        mutex.acquire()
        for obj in matchObj[0].split(","):
            key = obj[1:-1]
            if (datas.has_key(key)):
                datas[key] = datas[key] + ";so360"
            else:
                datas[key] = "so360"
        mutex.release()