def on_result(self, curl, resp): AioRunner.on_result(self, curl, resp) con = resp if con is None or con.text.strip() == "": spider.runtime.Log.error("Request return nothing! Readd...." + self.job.__str__()) self.master.re_add_job(self.job) return else: corp_name_list = re.findall( r'<h3 class="site-list-title">(.*?)<small', con.text, re.S) if len(corp_name_list) == 0: spider.runtime.Log.warning("line " + str(self.job["lineno"]) + ", key:" + self.job["key"] + ", no data...") self.failfile.write(self.job["line"].strip() + " no data.\n") self.failfile.flush() return else: self.save_name(self.job, corp_name_list) print resp.request.url, resp.code
def on_result(self, curl, resp): AioRunner.on_result(self, curl, resp) con = resp if con is None or con.text.strip() == "": spider.runtime.Log.error("Request return nothing! Readd...." + self.job.__str__()) self.master.re_add_job(self.job) return elif con.code == 521: f = open("login.js", "w+b") f.write( con.text.replace("<script>", "").replace("</script>", "").replace( "document.cookie=dc", "console.log(dc)")) f.close() os.system("nodejs login.js > cookiestr.txt") f = open("cookiestr.txt", "r+b") self.__jsl_clearance = re.findall(r"__jsl_clearance=(.*?);", f.read(), re.S)[0] if "Set-Cookie:" in con.headers: setcookie = re.findall(r"Set-Cookie:(.*?)path", con.headers, re.S)[0] self.__jsluid = re.findall(r"__jsluid=(.*?);", setcookie, re.S)[0] self.master.re_add_job(self.job) else: corp_name_list = re.findall( r'class="search-result-title"><em>(.*?)</a>', con.text, re.S) if len(corp_name_list) == 0: spider.runtime.Log.warning("line " + str(self.job["lineno"]) + ", key:" + self.job["key"] + ", no data...") self.failfile.write(self.job["line"].strip() + " no data.\n") self.failfile.flush() return else: self.save_name(self.job, corp_name_list) print resp.request.url, resp.code
def on_result(self, curl, resp): self.dbg('result') AioRunner.on_result(self, curl, resp) print resp.request.url, resp.code