def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name) result = next(result_iter) else: result = pubUtil.getUrl(self.carrier, 1) if not result: time.sleep(3) continue for data in result: (dt_st, dep, arr, days) = vyUtil.analysisData(data) for i in range(int(days)): dt = vyUtil.get_real_date(dt_st, i) params = { 'origination-airport': dep, 'destination-airport': arr, 'departure-date': dt, 'number-adult-passengers': self.custom_settings.get('SEAT_SEARCH'), 'number-senior-passengers': 0, 'currency': 'USD', } total_url = self.start_urls + parse.urlencode(params) yield scrapy.Request( url=total_url, method="GET", dont_filter=True, callback=self.parse, errback=self.errback, )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl(self.name, 1) if not result: self.log('get task error'), 20 time.sleep(10) continue for data in result: (dt, dep, to, days) = vyUtil.analysisData(data) # 把获取到的data格式化 # dep, to, dt, days= 'RHO', 'PMO', '2018-08-15', 30 dt_datetime = datetime.strptime(dt, '%Y%m%d') end_date = dt_datetime + timedelta(days=int(days)) dt = dt_datetime.strftime('%Y-%m-%d') if dt_datetime.month != end_date.month: next_fday = datetime(end_date.year, end_date.month, 1) days_before = (next_fday - dt_datetime).days next_fday_str = next_fday.strftime('%Y-%m-%d') yield self.first_request(dep, to, dt, days_before + 1) yield self.first_request(dep, to, next_fday_str, int(days) - days_before) else: yield self.first_request(dep, to, dt, days)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name) result = next(result_iter) else: result = pubUtil.getUrl(self.carrier, 1) if not result: time.sleep(3) continue for data in result: (dt_st, dep, arr, days) = vyUtil.analysisData(data) for i in range(int(days)): dt = vyUtil.get_real_date(dt_st, i) pay_load = dict( depCity1=dep, arrCity1=arr, depDate1=dt, ) pay_load.update(self.custom_settings.get('PAY_LOAD')) yield scrapy.FormRequest( self.start_url, formdata=pay_load, meta={'payload': pay_load}, callback=self.parse, dont_filter=True, errback=self.err_back, )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name) result = next(result_iter) else: result = pubUtil.getUrl(self.carrier, 1) if not result: time.sleep(6) continue for data in result: (_dt, dep, to, days) = vyUtil.analysisData(data) for i in range(int(days)): dt = (datetime.strptime(_dt, '%Y%m%d') + timedelta(days=i)).strftime('%Y-%m-%d') # dt, dep, to = '2019-02-28', 'BLR', 'BKK' post_data = self.custom_settings.get( 'POST_DATA_FORMAT').copy() post_data['query'] = post_data.get('query') % (self.seats, to, dep, dt) yield scrapy.Request( url=self.start_urls, method="POST", body=json.dumps(post_data), meta={'post_data': post_data}, dont_filter=True, )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, 60) result = next(result_iter) else: result = pubUtil.getUrl(self.carrier, 1) if not result: time.sleep(3) continue for data in result: (dt_st, dep, arr, days) = vyUtil.analysisData(data) for i in range(int(days)): dt = vyUtil.get_real_date(dt_st, i) data = dict(beginCity=dep, endCity=arr, beginDate=dt) data.update(self.custom_settings.get('DEFAULT_DATA')) yield scrapy.Request( url=self.start_urls, method="POST", body=json.dumps(data), meta=dict(data=data), dont_filter=True, callback=self.parse, errback=self.errback, )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name) result = next(result_iter) else: result = pubUtil.getUrl(self.carrier, 1) if not result: time.sleep(6) continue hour = datetime.now().hour + 2 self.cookie = self.cookies[hour % len(self.cookies)] installid = self.InstallationID[hour % len(self.InstallationID)] for data in result: (dt_st, dep, to, days) = vyUtil.analysisData(data) # 把获取到的data格式化 # dep, to = 'CDG', 'VIE' for i in range(int(days)): dt = vyUtil.get_real_date(dt_st, i) # dt = '2018-11-01' self.task.append({ 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': settings.INVALID_TIME }) dt = dt + 'T00:00:00' data_list = { 'InstallationID': installid, 'AirportDateTimeList': [{ 'MarketDateDeparture': dt, 'DepartureStation': dep, 'ArrivalStation': to, }] } data_list.update(self.custom_settings.get('DEFAULT_DATA')) yield scrapy.Request( method='POST', url=self.start_url, headers={'Cookie': self.cookie}, body=json.dumps(data_list), meta={'data_list': data_list}, callback=self.parse, dont_filter=True, errback=lambda x: self.download_errback(x, data_list))
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.carrier, self.num, permins, self.version)) while True: result = pubUtil.getUrl(self.carrier, 5) if not result: logging.info('get task error') time.sleep(3) continue for data in result: (_dt, dep, to, days) = vyUtil.analysisData(data) for i in range(int(days)): dt = (datetime.strptime(_dt, '%Y%m%d') + timedelta(days=i)).strftime('%Y/%m/%d') self.task.append({ 'date': dt.replace('/', ''), 'depAirport': dep, 'arrAirport': to, 'mins': settings.INVALID_TIME }) post_data = { "flight_search_parameter[0][departure_date]": dt, "flight_search_parameter[0][departure_airport_code]": dep, "flight_search_parameter[0][arrival_airport_code]": to, } post_data.update( self.custom_settings.get('DEFAULT_POST_DATA')) # print(post_data) yield scrapy.Request( url=self.start_urls[1], method="GET", # body=json.dumps(post_data), # formdata=post_data, meta={'post_data': post_data}, dont_filter=True, callback=self.parse, errback=self.errback, )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) self.get_headers() while True: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to, days) = vyUtil.analysisData(data) # 把获取到的data格式化 # (dt, dep, to, days) = ('20181026', 'LTN', 'IAS', 30) dt_datetime = datetime.strptime(dt, '%Y%m%d') end_date = dt_datetime + timedelta(days=int(days)) dt = dt_datetime.strftime('%Y-%m-%d') data_post = dict( DepartureDate=dt, DepartureStation=dep, ArrivalStation=to, ) data_post.update(self.custom_settings.get('GET_DATE_DATA')) yield scrapy.Request( method='POST', url=self.start_urls[0], # formdata=data_post, body=json.dumps(data_post), headers=self.custom_settings.get( 'DEFAULT_REQUEST_HEADERS'), meta={'end_date': end_date}, dont_filter=True, callback=self.date_parse, errback=lambda x: self.download_errback( x, data_post, end_date), )