def get_360_captcha(count=1000): url = r'http://captcha.360.cn/image.php?app=i360&r=0.874529683496803' site = '360cn' if not os.access(site, os.F_OK): os.mkdir(site) for i in range(count): r = urllib4.urlopen(url) content = r.read() fname = os.path.join(site, "_".join([site, str(i)]) + ".jpg") with open(fname, 'wb') as f: f.write(content) print i
def get_icbc_captcha(count=1000): """https://passport.baidu.com/?verifypic""" url_base = 'https://vip.icbc.com.cn/servlet/com.icbc.inbs.person.servlet.Verifyimage2?disFlag=2&randomKey=132098559975661896' site = 'icbc' if not os.access(site, os.F_OK): os.mkdir(site) start_time = time.time()*100 for i in range(count): url = url_base r = urllib4.urlopen(url, headers=header) content = r.read() fname = os.path.join(site, "_".join([site, str(i)]) + ".jpg") with open(fname, 'wb') as f: f.write(content) print i
def get_baidu_captcha(count=1000): """https://passport.baidu.com/?verifypic""" baidu_captcha_url = 'https://passport.baidu.com/?verifypic&t=%s' site = 'baidu' if not os.access(site, os.F_OK): os.mkdir(site) start_time = time.time()*100 for i in range(count): url = baidu_captcha_url % str(start_time+i) r = urllib4.urlopen(url, headers=header) content = r.read() fname = os.path.join(site, "_".join([site, str(i)]) + ".jpg") with open(fname, 'wb') as f: f.write(content) print i
def __iter__(self): while self.params['start'] < self.count: url = self.SEARCH_PATH + '?' + urlencode(self.params) response = json.loads(urlopen(url, user_agent=CHROME_USER_AGENT).read()) if response['responseStatus'] == 200: results = response['responseData']['results'] for result in results: self.params['start'] += 1 if self.params['start'] >= self.count: break yield GoogleQueryResult(result)
def get_taobao_pay_captcha(count=1000): """ nwe: http://checkcode.taobao.com/auction/checkcode?sessionID=4b9b83e4f8e6821c2e2929089f7b09bd&t=1323411260198&t=1323411260862&t=1323411261318&t=1323411261798&t=1323411262046&t=1323411262246&t=1323411262430&t=1323411262614&t=1323411262806&t=1323411262974&t=1323411263151&t=1323411263334&t=1323411263519&t=1323411263702&t=1323411263895&t=1323411264070&t=1323411264255&t=1323411264439&t=1323411264624&t=1323411264814&t=1323411265015&t=1323411265198&t=1323411265390&t=1323411265574&t=1323411265766&t=1323411265950&t=1323411266126&t=1323411266318&t=1323411266494&t=1323411266686&t=1323411266870&t=1323411267054&t=1323411267214&t=1323411267398&t=1323411277182 """ url_base = 'http://checkcode.taobao.com/auction/checkcode?sessionID=b5b6083c712d5a524d046e3324b3cec9&t=1321946037628' site = 'taobao' if not os.access(site, os.F_OK): os.mkdir(site) start_time = time.time()*100 url = url_base for i in range(count): r = urllib4.urlopen(url, headers=header) content = r.read() fname = os.path.join(site, "_".join([site, str(i)]) + ".jpg") with open(fname, 'wb') as f: f.write(content) print i
def search(self, ip, top=50, _format='json'): url = str(gurl.Url(URL % (ip, _format, top))) req = self.request(url) fo = urllib4.urlopen(req) result = fo.read() return json.loads(result)