def parse_captcha(self, site): soup_obj = HttpUtils.get("https://pt.sjtu.edu.cn/login.php", headers=site.login_headers) captcha_image_list = soup_obj.select("form img") # if captcha image exists, parse expression and return if len(captcha_image_list) > 0: image_url = "https://pt.sjtu.edu.cn/" + captcha_image_list[0]["src"] HttpUtils.download_file(image_url, "/tmp/cap.png", over_write=True) return PuTaoCaptchaParser.analyze("/tmp/cap.png") else: return "XxXx"
def do_process(cls, item): path = item[0] url = item[1] retry = item[2] if retry <= 5: if not HttpUtils.download_file(url=url, dest_path=path): item[2] += 1 cls.task_pool.put(item) cls.init_thread() else: print("Exceed max retry time: " + path)
def login(self, site): if not self.isLogin and site.login_needed and not self.check_login( site): soup_obj = HttpUtils.get("https://pt.sjtu.edu.cn/login.php", headers=site.login_headers) # parse captcha image and return result image_url = "https://pt.sjtu.edu.cn/" + soup_obj.select( "form img")[0]["src"] HttpUtils.download_file(image_url, "/tmp/cap.png", over_write=True) site.check_code = PuTaoCaptchaParser.analyze("/tmp/cap.png") resp = HttpUtils.post(site.login_page, data=self.build_post_data(site), headers=site.login_headers, returnRaw=True) self.isLogin = self.check_login(site) return self.isLogin else: self.isLogin = True return True
def convert(cls, text, audio_file_path): url = 'http://tts.baidu.com/text2audio?idx=1&tex=%s&cuid=baidu_speech_demo&cod=1&lan=zh&ctp=1&pdt=1&spd=4&per=5&vol=5&pit=7' % text url = quote(url, safe=string.printable) HttpUtils.download_file(url, audio_file_path, over_write=True)