def _start_requests(self): req = [] logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key)) # 51job全站 for _url,name in self.start_urls.items(): url = 'http://search.51job.com/list/%s,000000,0000,00,9,{0:{1}2d},%%2520,2,{2}.html' % _url for i in range(1,13): start_url = url.format(i, '0', 1) formater = url.format(i, '0', '{}') yield self.request(start_url, headers=self.default_header, redis_flag=True, meta={'formater': formater,'money':i}, callback=self.job_in) # 智联全站 yield self.request('http://company.zhaopin.com/beijing/', headers=self.default_header, callback=self.zhilian_in) # 猎聘全站 yield self.request('https://www.liepin.com/company/', headers=self.default_header, callback=self.liepin_in)
def start_requests(self): req = [] logger.info('Start Crawl Spider %s at rediskey %s' % (self.name, self.redis_key)) for url in self.start_urls: if url == 'http://insurance.jrj.com.cn/action/SearchIPJson.jspa': req.append( self.request(url, method='POST', body=self.post_data(1), redis_flag=REDISFLAG, headers=self.default_header, callback=self.jrj_insurance_in)) elif url == 'http://bank.jrj.com.cn/txtBank/banklist_1.html': req.append( self.request(url, redis_flag=REDISFLAG, headers=self.default_header, callback=self.jrj_bank_in)) elif url == 'http://insurance.jrj.com.cn/html/ic/list/ics-0.shtml': req.append( self.request(url, redis_flag=REDISFLAG, headers=self.default_header, callback=self.jrj_insurance_org_in)) return req
def get_cookies(): url = 'https://www.licai.com/api/v1/auth/login/pass' body = json.dumps({"username": "******", "password": "******"}) while True: try: return requests.put(url, body).cookies.get_dict() except BaseException as e: logger.info(repr(e))
def _start_requests(self): req = [] logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key)) for url in self.start_urls: yield self.request(url, headers=self.default_header, redis_flag=True, callback=self.baidu_image_in)
def _start_requests(self): req = [] logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key)) for url in self.start_urls: if url == 'https://www.chinawealth.com.cn/zzlc/jsp/lccp.jsp': req.append(self.request( url, redis_flag=REDISFLAG, callback=self.chinawealth_jumps)) return req
def start_requests(self): req = [] logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key)) for url in self.start_urls: if url == 'http://www1.hkex.com.hk': req.append(self.request( url, redis_flag=REDISFLAG, callback=self.HK_in)) return req
def start_requests(self): req = [] logger.info('Start Crawl Spider %s at rediskey %s' % (self.name, self.redis_key)) for url in self.start_urls: if url == 'http://www.p2peye.com/platform/all/p1/': req.append( self.request(url, redis_flag=REDISFLAG, callback=self.p2peye_list)) return req
def _start_requests(self): req = [] logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key)) for url in self.start_urls: if url == 'http://www.cfachina.org/': req.append(self.request( url, redis_flag=REDISFLAG, headers=self.default_header, callback=self.cfa_in)) return req
def _start_requests(self): req = [] logger.info('Start Crawl Spider %s at rediskey %s' % (self.name, self.redis_key)) for url in self.start_urls: yield self.request(url, headers=self.default_header, cookies=get_cookie(), meta={'cookiejar': 1}, redis_flag=True, callback=self.licai_in)
def _start_requests(self): req = [] logger.info('Start Crawl Spider %s at rediskey %s' % (self.name, self.redis_key)) for url in self.start_urls: if url == 'http://wwv.cyzone.cn': req.append( self.request(url, redis_flag=REDISFLAG, callback=self.cyzone_start)) return req
def start_requests(self): req = [] logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key)) for url in self.start_urls: req.append(self.request( url, method='POST', body=self.post_data(1), redis_flag=REDISFLAG, headers=self.default_header, cookies=COOKIE, callback=self.howbuy_in)) return req