def googleDropdown(url=urls[4],headers=headers,values=values): res=urllib2.urlopen(urls[4]) content=res.read() # encodeJson=re.findall(re.compile('completion = (.*);String\(\);'),content)[0] j=fixLazyJsonWithComments(content) #check the encoding of j and decode it to unicode k=chardet.detect(j) j=j.decode(k['encoding']) js_list=json.loads(j) return js_list
def aliExDropdown(url=urls[1],headers=headers,values=values): data = urllib.urlencode(values) req=urllib2.Request(urls[1],data,headers) response=urllib2.urlopen(req) content=response.read() #去除多余的行\n,空格\t content=content.replace('\n','').replace('\t','') encodeJson=re.findall(re.compile('window.intelSearchData = (.*)'),content)[0][:-1] j=fixLazyJsonWithComments(encodeJson) js_list=json.loads(j) return js_list
def alibabaDropdown(url=urls[0],headers=headers,values=values): data = urllib.urlencode(values) req=urllib2.Request(urls[0],data,headers) response=urllib2.urlopen(req) content=response.read() #正则查找所要处理部分 encodeJson=re.findall(re.compile('window.intelSearchData=(.*)'),content)[0][:-1] # j=encodeJson.replace('(','[').replace(')',']').replace('\'','"') #处理成符合json格式的数据 j=fixLazyJsonWithComments(encodeJson) #还原数据为列表类型 js_list=json.loads(j) return js_list