Esempio n. 1
0
 def on_result(self, curl, resp):
     AioRunner.on_result(self, curl, resp)
     con = resp
     if con is None or con.text.strip() == "":
         spider.runtime.Log.error("Request return nothing! Readd...." +
                                  self.job.__str__())
         self.master.re_add_job(self.job)
         return
     else:
         corp_name_list = re.findall(
             r'<h3 class="site-list-title">(.*?)<small', con.text, re.S)
         if len(corp_name_list) == 0:
             spider.runtime.Log.warning("line " + str(self.job["lineno"]) +
                                        ", key:" + self.job["key"] +
                                        ", no data...")
             self.failfile.write(self.job["line"].strip() + " no data.\n")
             self.failfile.flush()
             return
         else:
             self.save_name(self.job, corp_name_list)
     print resp.request.url, resp.code
Esempio n. 2
0
 def on_result(self, curl, resp):
     AioRunner.on_result(self, curl, resp)
     con = resp
     if con is None or con.text.strip() == "":
         spider.runtime.Log.error("Request return nothing! Readd...." +
                                  self.job.__str__())
         self.master.re_add_job(self.job)
         return
     elif con.code == 521:
         f = open("login.js", "w+b")
         f.write(
             con.text.replace("<script>",
                              "").replace("</script>", "").replace(
                                  "document.cookie=dc", "console.log(dc)"))
         f.close()
         os.system("nodejs login.js > cookiestr.txt")
         f = open("cookiestr.txt", "r+b")
         self.__jsl_clearance = re.findall(r"__jsl_clearance=(.*?);",
                                           f.read(), re.S)[0]
         if "Set-Cookie:" in con.headers:
             setcookie = re.findall(r"Set-Cookie:(.*?)path", con.headers,
                                    re.S)[0]
             self.__jsluid = re.findall(r"__jsluid=(.*?);", setcookie,
                                        re.S)[0]
         self.master.re_add_job(self.job)
     else:
         corp_name_list = re.findall(
             r'class="search-result-title"><em>(.*?)</a>', con.text, re.S)
         if len(corp_name_list) == 0:
             spider.runtime.Log.warning("line " + str(self.job["lineno"]) +
                                        ", key:" + self.job["key"] +
                                        ", no data...")
             self.failfile.write(self.job["line"].strip() + " no data.\n")
             self.failfile.flush()
             return
         else:
             self.save_name(self.job, corp_name_list)
     print resp.request.url, resp.code
Esempio n. 3
0
 def on_result(self, curl, resp):
     self.dbg('result')
     AioRunner.on_result(self, curl, resp)
     print resp.request.url, resp.code