def prepare_req(self, job, curl, proxies): pr = AioRunner.prepare_req(self, job, curl, proxies) if pr is not None: return pr url, headers = {} curl.prepare_req(url, headers=headers, proxies=proxies) return True
def prepare_req(self, job, curl, proxies): self.dbg('prepare') pa = AioRunner.prepare_req(self, job, curl, proxies) if pa is not None: return pa if 'value' in job: url = "https://www.linkedin.com/jobs2/view/%d" % job['value'] else: url = job['url'] print "[%d] prepare %s proxies=" % (self.idx, url), proxies headers={} if 'ip.cn' in url: headers['User-Agent'] = 'curl/7.20.1' curl.prepare_req(url, headers=headers, proxies=proxies) return True
def prepare_req(self, job, curl, proxies): self.dbg('prepare') pa = AioRunner.prepare_req(self, job, curl, proxies) if pa is not None: return pa if 'key' in job: key = spider.util.utf8str(job['key']) url = "http://qichacha.com/search?key=" + quote(key) + "&sType=0" else: Log.error("Invalid job.===>" + job.__str__()) print "[%d] prepare %s proxies=" % (self.idx, url), proxies headers = {} if 'ip.cn' in url: headers['User-Agent'] = 'curl/7.20.1' curl.prepare_req(url, headers=headers, proxies=proxies) return True
def prepare_req(self, job, curl, proxies): self.dbg('prepare') pa = AioRunner.prepare_req(self, job, curl, proxies) if pa is not None: return pa if 'key' in job: key = spider.util.utf8str(job['key']) url = r"http://www.qixin007.com/search/?key=" + quote( key) + "&type=enterprise&source=&isGlobal=Y" # url = "http://qichacha.com/search?key=" + quote(key) + "&sType=0" else: Log.error("Invalid job.===>" + job.__str__()) print "[%d] prepare %s proxies=" % (self.idx, url), proxies headers = {} if 'ip.cn' in url: headers['User-Agent'] = 'curl/7.20.1' if self.__jsl_clearance: headers["Cookie"] = "__jsl_clearance=" + self.__jsl_clearance + ";" if self.__jsluid: headers["Cookie"] += "__jsluid=" + self.__jsluid curl.prepare_req(url, headers=headers, proxies=proxies) return True