def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name) result = next(result_iter) else: result = pubUtil.getUrl(self.carrier, 1) if not result: time.sleep(6) continue for data in result: (_dt, dep, to, days) = vyUtil.analysisData(data) for i in range(int(days)): dt = (datetime.strptime(_dt, '%Y%m%d') + timedelta(days=i)).strftime('%Y-%m-%d') # dt, dep, to = '2019-02-28', 'BLR', 'BKK' post_data = self.custom_settings.get( 'POST_DATA_FORMAT').copy() post_data['query'] = post_data.get('query') % (self.seats, to, dep, dt) yield scrapy.Request( url=self.start_urls, method="POST", body=json.dumps(post_data), meta={'post_data': post_data}, dont_filter=True, )
def start_requests(self): while True: result = pubUtil.getUrl('BE', 10) if not result: logging.info('no task! sleep 10s...') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # 把获取到的data格式化 if pubUtil.dateIsInvalid(dt): continue temp = { 'depart': dep, 'arr': to, 'departing': dt, 'returning': '', 'promo-code': '', 'adults': 1, 'teens': 0, 'children': 0, 'infants': 0 } try: params = urllib.parse.urlencode(temp) except: params = urllib.urlencode(temp) url = '%s%s/%s?%s' % (self.start_urls, dep, to, params) yield scrapy.Request( url, callback=self.parse, dont_filter=True, errback=lambda x: self.download_errback(x, url))
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=1) result = next(result_iter) else: result = pubUtil.getUrl('aq', 1) if not result: logging.info('get task error') time.sleep(20) continue for data in result: # logging.info("###input data: " + data) (dt, dep, to) = pubUtil.analysisData(data) # dt,dep,to='2019-03-28','PVG','TPE' # ua = UserAgent() # self.headers['user-agent'] = ua.random post_data = 'B_LOCATION_1=' + dep + '&E_LOCATION_1=' + to + '&B_DATE_1=' + dt.replace( '-', '' ) + '0000&B_ANY_TIME_1=True&EMBEDDED_TRANSACTION=FlexPricerAvailability&ARRANGE_BY=D&DISPLAY_TYPE=2&PRICING_TYPE=O&SO_SITE_MATRIX_CALENDAR=FALSE&SO_SITE_RUI_CAL_AVAI_NO_RECO=TRUE&SO_SITE_RUI_FP_AVAI_PRESEL=FALSE&COMMERCIAL_FARE_FAMILY_1=NEWECOOW&COMMERCIAL_FARE_FAMILY_2=NEWDELOW&COMMERCIAL_FARE_FAMILY_3=NEWBIZOW&SO_SITE_RUI_AX_CAL_ENABLED=TRUE&SO_SITE_CAL_CHANGE_WEEK=TRUE&SO_SITE_RUI_HIDE_MDF_SRC=FALSE&EXTERNAL_ID%236=OW&TRAVELLER_TYPE_1=ADT&TRIP_TYPE=O&TRIP_FLOW=YES&SO_SITE_EXPORT_CONFIRM=TRUE&SO_SITE_EXPORT_CONF_URL=https%3A%2F%2Fbooking.evaair.com%2Fexporttripplan%2Fwebservice.aspx&SO_SITE_THREEDS_USE=N&SO_SITE_BILLING_NOT_REQUIRED=Y&SO_SITE_BILL_ADD_OPTIONS=BILL_ADD_HIDDEN&SO_SITE_PREBOOK_CANCELLATION=TRUE&SO_GL=%3C%3Fxml+version%3D%221.0%22+encoding%3D%22iso-8859-1%22%3F%3E%0D%0A%3CSO_GL%3E%0D%0A%3CGLOBAL_LIST+mode%3D%22partial%22%3E%0D%0A%3CNAME%3ESL_AIR_MOP%3C%2FNAME%3E%0D%0A%3CLIST_ELEMENT%3E%0D%0A%3CCODE%3ECC%3C%2FCODE%3E%0D%0A%3CLIST_VALUE%3ECredit+Card%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EY%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECC%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECryptic%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECC%25T%25I%2F%25E%2F%25C%25F%2FN%25A%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%2F%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECC%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3C%2FLIST_ELEMENT%3E%0D%0A%3C%2FGLOBAL_LIST%3E%0D%0A%3C%2FSO_GL%3E&SO_SITE_FD_DISPLAY_MODE=1&SO_SITE_CURRENCY_FORMAT_JAVA=0&SO_SITE_ENABLE_SRV_POLICY=BAG%2CCOA&SO_SITE_ALLOW_SPEC_REQ_SERV=FALSE&SO_SITE_SD_TRUE_OP_CARRIER=TRUE&SO_SITE_BARCODE_ENABLE=TRUE&SO_SITE_ALLOW_CS_CODE_SHARE=FALSE&SO_SITE_USE_PAYMENT_ACTION=TRUE&EXTERNAL_ID=AIBS&EXTERNAL_ID%232=&EXTERNAL_ID%233=&EXTERNAL_ID%234=NEWECOOW&EXTERNAL_ID%235=&EXTERNAL_ID%2314=N&EXTERNAL_ID%2312=&EXTERNAL_ID%2313=zh_CN&EXTERNAL_ID%2399=C5WBKT102%23%23flyeva&DIRECT_LOGIN=NO&SO_SITE_RUI_MULTIDEV_ENABLED=TRUE&SO_SITE_RUI_TABLET_PG_LIST=ALL&SO_SITE_RUI_MOBILE_PG_LIST=ALL&SO_SITE_RUI_DISP_FF_TABLE=TRUE&SO_SITE_RUI_UPSLL_T_MDL=TRUE&SO_SITE_RUI_UPSLL_T_MDL_ATC=TRUE&SO_SITE_RUI_DPICKER_NATIVE=TABLET%2CMOBILE&MC_FORCE_DEVICE_TYPE=MOBILE&SO_SITE_RUI_MOBILE_FLOW=ALL&SO_SITE_RUI_TABLET_FLOW=ALL&SO_SITE_RUI_COLLAPSE_BOUND_T=TWO_STEPS&SO_SITE_RUI_UPSLL_HIDE_BTNS=FALSE&SO_SITE_OFFICE_ID=SHABR08AA&LANGUAGE=CN&SITE=CAWXCNEW' url_data = { "ENCT": "1", "ENC": "990572D723A7BC83F77B4C6C03C696340674137066140FF11D721B8765E55FF8DC0562E080CE4BD1CD01272028CBBA89", # 传入当前查询时间 "ENC_TIME": time.strftime("%Y%m%d%H%M%S", time.localtime()) } # 设置无效 invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } url_data = urllib.urlencode(url_data) self.url = self.start_urls[0] + '?' + url_data # print '# url: ', url # print '# url_data: ', url_data # ip = '127.0.0.1:8888' # ip = '127.0.0.1:1080' yield scrapy.Request( self.url, headers=self.headers, body=post_data, callback=self.parse, dont_filter=True, # meta={'invalid': invalid, 'proxy': ip}, meta={'invalid': invalid}, method='POST', errback=self.errback)
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name) result = next(result_iter) else: result = pubUtil.getUrl(self.carrier, 1) if not result: time.sleep(3) continue for data in result: (dt_st, dep, arr, days) = vyUtil.analysisData(data) for i in range(int(days)): dt = vyUtil.get_real_date(dt_st, i) params = { 'origination-airport': dep, 'destination-airport': arr, 'departure-date': dt, 'number-adult-passengers': self.custom_settings.get('SEAT_SEARCH'), 'number-senior-passengers': 0, 'currency': 'USD', } total_url = self.start_urls + parse.urlencode(params) yield scrapy.Request( url=total_url, method="GET", dont_filter=True, callback=self.parse, errback=self.errback, )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name) result = next(result_iter) else: result = pubUtil.getUrl(self.carrier, 1) if not result: time.sleep(3) continue for data in result: (dt_st, dep, arr, days) = vyUtil.analysisData(data) for i in range(int(days)): dt = vyUtil.get_real_date(dt_st, i) pay_load = dict( depCity1=dep, arrCity1=arr, depDate1=dt, ) pay_load.update(self.custom_settings.get('PAY_LOAD')) yield scrapy.FormRequest( self.start_url, formdata=pay_load, meta={'payload': pay_load}, callback=self.parse, dont_filter=True, errback=self.err_back, )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, 60) result = next(result_iter) else: result = pubUtil.getUrl(self.carrier, 1) if not result: time.sleep(3) continue for data in result: (dt_st, dep, arr, days) = vyUtil.analysisData(data) for i in range(int(days)): dt = vyUtil.get_real_date(dt_st, i) data = dict(beginCity=dep, endCity=arr, beginDate=dt) data.update(self.custom_settings.get('DEFAULT_DATA')) yield scrapy.Request( url=self.start_urls, method="POST", body=json.dumps(data), meta=dict(data=data), dont_filter=True, callback=self.parse, errback=self.errback, )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl(self.name, 1) if not result: self.log('get task error'), 20 time.sleep(10) continue for data in result: (dt, dep, to, days) = vyUtil.analysisData(data) # 把获取到的data格式化 # dep, to, dt, days= 'RHO', 'PMO', '2018-08-15', 30 dt_datetime = datetime.strptime(dt, '%Y%m%d') end_date = dt_datetime + timedelta(days=int(days)) dt = dt_datetime.strftime('%Y-%m-%d') if dt_datetime.month != end_date.month: next_fday = datetime(end_date.year, end_date.month, 1) days_before = (next_fday - dt_datetime).days next_fday_str = next_fday.strftime('%Y-%m-%d') yield self.first_request(dep, to, dt, days_before + 1) yield self.first_request(dep, to, next_fday_str, int(days) - days_before) else: yield self.first_request(dep, to, dt, days)
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('je', 1) if not result: logging.info('get task error') time.sleep(10) continue for data in result: # logging.info("###input data: " + data) (dt, dep, to) = pubUtil.analysisData(data) # 目标地址参数字典 post_data = { "AgencyCode": "", "AirportFrom": dep, "AirportTo": to, "BoardDate": dt, "CarPackage": 'false', "ReturnDate": "", "SearchType": "Normal", "AvailType": "", "IsReturnFlight": 'false', "IsBusiness": 'false', "Adults": self.ADT, "Children": "0", "Infants": "0", "FareDesignator": "", "EdgarsClubCard": "", "VoyagerState": '0', "HaveErrors": 'false', "IsChangeBooking": 'false', "MomentumClientNumber": "", "OutSegmentKeyFromRedirect": "", "InSegmentKeyFromRedirect": "", "isMobile": 'false', "CriteriaSearchType": "Day" } # 设置无效 invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } PayLoad = json.dumps(post_data) yield scrapy.Request(self.start_urls[0], body=PayLoad, callback=self.parse, dont_filter=True, meta={'invalid': invalid}, method='POST', errback=self.errback)
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name,self.name,self.num,permins,self.version)) while True: result = pubUtil.getUrl('FZ', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # for data in self.get_task(): # dep, to, dt = data # dt,dep,to= '2018-09-13','DXB','BEY' dt_change = datetime.strptime(dt,'%Y-%m-%d').strftime('%m/%d/%Y') print(dep, to, dt) seat = self.custom_settings.get('SEAT') payload = { "journeyType": "ow", "isOriginMetro": False, "isDestMetro": False, "variant": "0", "searchCriteria": [{ "origin": dep, "dest":to, "originDesc": "", "destDesc": "", "isOriginMetro": False, "isDestMetro": False, "direction": "outBound", "date": "%s 12:00 AM"%dt_change }], "paxInfo": { "adultCount": seat, "infantCount": 0, "childCount": 0 } } invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } body=json.dumps(payload) meta_data = dict( invalid=invalid, payload=body, aaa = (dep, to, dt) ) yield scrapy.Request(self.start_urls[0], callback=self.parse, method='POST', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, body=body, errback=self.errback )
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('BJ', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to, days) = pubUtil.analysisData_5j(data) # for i in range(1): # for data in self.get_task(): # dep, to, dt = data # dt,dep,to= '2018-11-16','TUN','MRS' print(dep, to, dt) seat = self.custom_settings.get('SEAT') duration = self.custom_settings.get("DURATION") # self.log('%s:%s:%s:%s' % (dt, dep, to, days), 20) for i in range(0, int(days), duration): begin_dt, end_dt = pubUtil.time_add_5j(dt, i, duration) payload = { 'adultes': seat, 'aller': begin_dt, 'bebes': '0', 'devise': 'TND', 'enfants': '0', 'felxibilite': '3', 'retour': '', 'sens': '1' } body = '' for key in payload: body = body + key + '=' + str(payload.get(key)) + '&' url = self.start_urls[0] + '%s/%s'%(dep,to) invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME'), } meta_data = dict( invalid=invalid, payload=body, aaa=(dep, to, dt), url=url, ) yield scrapy.Request(url, callback=self.parse, method='POST', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, body=body, errback=self.errback )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name) result = next(result_iter) else: result = pubUtil.getUrl(self.carrier, 1) if not result: time.sleep(6) continue hour = datetime.now().hour + 2 self.cookie = self.cookies[hour % len(self.cookies)] installid = self.InstallationID[hour % len(self.InstallationID)] for data in result: (dt_st, dep, to, days) = vyUtil.analysisData(data) # 把获取到的data格式化 # dep, to = 'CDG', 'VIE' for i in range(int(days)): dt = vyUtil.get_real_date(dt_st, i) # dt = '2018-11-01' self.task.append({ 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': settings.INVALID_TIME }) dt = dt + 'T00:00:00' data_list = { 'InstallationID': installid, 'AirportDateTimeList': [{ 'MarketDateDeparture': dt, 'DepartureStation': dep, 'ArrivalStation': to, }] } data_list.update(self.custom_settings.get('DEFAULT_DATA')) yield scrapy.Request( method='POST', url=self.start_url, headers={'Cookie': self.cookie}, body=json.dumps(data_list), meta={'data_list': data_list}, callback=self.parse, dont_filter=True, errback=lambda x: self.download_errback(x, data_list))
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl(self.name) if not result: time.sleep(6) continue for data in result: # 处理任务 [u'TLN-CFE:20181110:1'] task_data_list = data.split(':') count = int(task_data_list[2]) (dt, dep, arr) = pubUtil.analysisData(task_data_list[0] + ':' + task_data_list[1]) _date = datetime.strptime(dt, '%Y-%m-%d') for i in range(count): date = _date + timedelta(days=i) date = date.strftime('%Y%m%d0000') dep = self.port_city.get(dep, dep) arr = self.port_city.get(arr, arr) # logging.info('# input data: ' + dep + '' + arr + '' + date) # 设置无效 invalid = { 'date': date.replace('-', ''), 'depAirport': dep, 'arrAirport': arr, 'mins': self.custom_settings.get('INVALID_TIME') } post_data = urllib.urlencode( ly_post_data.second_post_data(dep, arr, date, self.ADT)) yield scrapy.Request(self.start_urls[0], body=post_data, callback=self.parse, dont_filter=True, meta={'invalid': invalid}, errback=self.errback, method='POST')
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('by', 1) if not result: logging.info('get task error') time.sleep(10) continue for data in result: logging.info("###input data: " + data) (dt, dep, to) = pubUtil.analysisData(data) self.dep = dep self.arr = to self.date = dt second_data = { 'flyingFrom[]': self.dep, 'flyingTo[]': self.arr, 'depDate': self.date, 'returnDate': '', 'adults': self.ADT, 'children': '0', 'infants': '0', 'infantAge': '', 'isOneWay': 'true', 'childAge': '', 'searchType': 'selected', 'tabId': dep, 'cycleDates': dt, 'duration': '0' } second_url = '%s%s' % (self.second_url[0], urllib.urlencode(second_data)) # 设置无效 invalid = { 'date': self.date.replace('-', ''), 'depAirport': self.dep, 'arrAirport': self.arr, 'mins': self.custom_settings.get('INVALID_TIME') } yield scrapy.Request(second_url, callback=self.parse, dont_filter=True, meta={'invalid': invalid}, errback=self.errback)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None result = None while True: if hasattr(self, 'local'): if not result_iter or not result: result_iter = self.get_task() result = next(result_iter) else: result = pubUtil.getUrl('TT', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # dt, dep, to = '2019-01-25', 'SYD', 'PER' seat = self.custom_settings.get('SEAT') payload = { 'currencyCode': 'AUD', 'departureDate': dt, 'destination': to, 'numAdults': seat, 'numChildren': 0, 'numInfants': 0, 'origin': dep, 'promoCode': '' } invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict( invalid=invalid, payload=payload, ) headers = self.custom_settings.get('HEADERS') # headers['User-Agent'] = random.choice(self.ua_data)[0] headers['User-Agent'] = self.ua_construction() yield scrapy.Request(self.start_urls[0], callback=self.parse, headers=headers, method='POST', meta={'meta_data': meta_data}, body=json.dumps(payload), errback=self.errback)
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(20) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # dep, to, dt = 'FUK', 'YNT', '2019-03-27' post_data = { "tripType": "OW", "orgCode": dep, "dstCode": to, "takeoffdate1": dt, } # 随机UA ua = UserAgent() self.headers['User-Agent'] = ua.random # post_data = urllib.urlencode(post_data) # logging.info("###input data: " + dep + to + dt) # 设置无效 invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } yield scrapy.Request( self.start_urls[0], headers=self.headers, body=json.dumps(post_data), # body=post_data, callback=self.parse, dont_filter=True, # meta={'invalid': invalid, 'proxy': 'http://127.0.0.1:8888'}, meta={'invalid': invalid}, method='POST', errback=self.errback)
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None result = None while True: if hasattr(self, 'local'): if not result_iter or not result: # 本地任务未编写 result_iter = self.get_task() result = next(result_iter) else: result = pubUtil.getUrl('TR', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # dt, dep, to = '2019-01-12', 'SIN', 'TAO' print dt, dep, to seat = self.custom_settings.get('SEAT') querystring = { 'adt': seat, 'arcity': to, 'chd': '0', 'dpcity': dep, 'dpdate': dt, 'inft': '0', 'promo': '', 'type': '1' } data = '' for key in querystring: data = data + key + '=' + str(querystring.get(key)) + '&' invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict( invalid=invalid, data=data, ) yield scrapy.Request(self.start_urls[0], callback=self.parse, headers=self.custom_settings.get('HEADERS'), method='POST', meta={'meta_data': meta_data}, body=data, errback=self.errback)
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, step=7) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(10) continue # 循环多个任务,现在默认一个 for data in result: # 处理任务 BVE-LYS-201812030000-15 count = int(data.split(':')[-1]) (date, dep, arr) = pubUtil.analysisData(data[:-2]) date = date.replace('-', '') + '0000' # logging.info('# input data: ' + dep + '-' + arr + '-' + date + '-' + str(count)) task_data = { 'dep': dep, 'arr': arr, 'date': date, 'count': count } post_data = urllib.urlencode( a5_post_data.first_post_data(dep, arr, date, self.ADT)) # 获取session yield scrapy.Request( self.get_session_url[0], body=post_data, callback=self.get_session, dont_filter=True, meta={ 'post_data': post_data, 'task_data': task_data }, method='POST', errback=self.errback, )
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(20) continue self.session_data['tck'] = random.choice(self.id_pool) for data in result: # logging.info("###input data: " + data) (dt, dep, to) = pubUtil.analysisData(data) # dt,dep,to='2019-02-28','CAN','RGN' post_data = { 'traveldate': dt, 'ori': dep, 'currency': 'CNY', 'dest': to } # 设置无效 invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } post_data = urllib.urlencode(post_data) yield scrapy.Request(self.start_urls[0], headers=self.headers, body=post_data, callback=self.parse, dont_filter=True, meta={'invalid': invalid}, method='POST', errback=self.errback)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('KC', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, arr) = pubUtil.analysisData(data) dt = re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3.\2.\1', dt) # print(dt) # dt = time.strftime('%d.%m.%Y',dt) print(dep, arr, dt) payload = { 'captchaResponse': '', 'pReturnDateStr': '', 'pFlightDateStr': dt, 'pRequest': { 'TwoWayRoute': 'false', 'DateAreFlexible': 'true', 'Origin': dep, 'Destination': arr, 'Bookingclass': 'ECO', 'Adult': '3', 'Child': '0', 'Infant': '0', 'Resident': 'false' }, } invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': arr, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict(invalid=invalid, payload=payload) yield scrapy.Request( self.custom_settings.get('sessionID_url'), callback=self.data_requests, method='POST', headers=self.custom_settings.get('start_headers'), meta={'meta_data': meta_data}, body=json.dumps(payload), errback=self.errback)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('LA', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # for i in range(1): # for data in self.get_task(): # dep, to, dt = data # dt,dep,to= '2018-10-16','CWB','SAO' # print(dep, to, dt) currency = "BR" seat = self.custom_settings.get('SEAT') querystring = { "country": currency, "origin": dep, "destination": to, "departure": dt, "adult": seat, } url = self.start_urls[0] + '?' for key in querystring: url = url + key + '=' + str(querystring.get(key)) + '&' invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict(invalid=invalid, params=querystring, aaa=(dep, to, dt), flight_time=dt) yield scrapy.Request( url, callback=self.parse, method='GET', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, errback=self.errback)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to, days) = ddUtil.analysisData(data) # 把获取到的data格式化 # (dt_st, dep, to, days) = '20180510', 'DMK', 'AM1', 20 # self.task.append({'date':dt.replace('-', ''), # 'depAirport': dep, # 'arrAirport': to, # 'mins': settings.INVALID_TIME # }) data_dict = { 'GetAvailabilityDetail': { "Infant": 0, "DepartureAirport": dep, "ArrivalAirport": to, "Child": 0, "Currency": 'THB', "RoundTripFlag": "0", "Adult": 3, "AgencyCode": "", "ReturnDate": "", "BoardDate": dt, "PromotionCode": "" } } data_dict.update(self.custom_settings.get('DEFAULT_DATA')) yield scrapy.Request( method='POST', url=self.start_urls, body=json.dumps(data_dict), meta={'data_dict': data_dict}, callback=self.parse, dont_filter=True, errback=lambda x: self.download_errback(x, data_dict))
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.carrier, self.num, permins, self.version)) while True: result = pubUtil.getUrl(self.carrier, 5) if not result: logging.info('get task error') time.sleep(3) continue for data in result: (_dt, dep, to, days) = vyUtil.analysisData(data) for i in range(int(days)): dt = (datetime.strptime(_dt, '%Y%m%d') + timedelta(days=i)).strftime('%Y/%m/%d') self.task.append({ 'date': dt.replace('/', ''), 'depAirport': dep, 'arrAirport': to, 'mins': settings.INVALID_TIME }) post_data = { "flight_search_parameter[0][departure_date]": dt, "flight_search_parameter[0][departure_airport_code]": dep, "flight_search_parameter[0][arrival_airport_code]": to, } post_data.update( self.custom_settings.get('DEFAULT_POST_DATA')) # print(post_data) yield scrapy.Request( url=self.start_urls[1], method="GET", # body=json.dumps(post_data), # formdata=post_data, meta={'post_data': post_data}, dont_filter=True, callback=self.parse, errback=self.errback, )
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.carrier, self.num, permins, self.version)) result_iter, result = None, None while True: if hasattr(self, 'local'): if not result_iter or not result: result_iter = self.get_task() result = next(result_iter) else: result = pubUtil.getUrl('BE', 10) if not result: time.sleep(60) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # 把获取到的data格式化 # dt, dep, to = '2018-11-01', 'EXT', 'JER' if pubUtil.dateIsInvalid(dt): continue temp = { 'depart': dep, 'arr': to, 'departing': dt, 'returning': '', 'promo-code': '', 'adults': 3, 'teens': 0, 'children': 0, 'infants': 0 } try: params = urllib.parse.urlencode(temp) except: params = urllib.urlencode(temp) url = '%s%s/%s?%s' % (self.start_urls, dep, to, params) yield scrapy.Request(url, callback=self.parse, dont_filter=True, errback=self.err_back)
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl('TW', 5) if not result: logging.info('get task error') time.sleep(10) continue today = datetime.now().strftime('%Y%m%d') for data in result: (dt, dep, to) = pubUtil.analysisData(data) # 把获取到的data格式化 # dt, dep, to = '20180722', 'ICN', 'KIX' # 测试某条数据 params = urllib.urlencode(dict( origin=dep, destination=to, onwardDateStr=dt.replace('-', ''), # pointOfPurchase='KR', paxTypeCountStr='3,0,0', today=today, travelType='OW', searchType='byDate', # domesticYn='Y', bundleAmountOW=0, bundleAmountRT=0, routeCls='AS', _=int(time.time() * 1000) )) total_url = self.start_urls[0] + params yield scrapy.Request(url=total_url, callback=self.transit, meta={'params': params, 'flag': 1}, dont_filter=True)
def start_requests(self): permins = 0 print( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) self.get_headers() while True: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to, days) = vyUtil.analysisData(data) # 把获取到的data格式化 # (dt, dep, to, days) = ('20181026', 'LTN', 'IAS', 30) dt_datetime = datetime.strptime(dt, '%Y%m%d') end_date = dt_datetime + timedelta(days=int(days)) dt = dt_datetime.strftime('%Y-%m-%d') data_post = dict( DepartureDate=dt, DepartureStation=dep, ArrivalStation=to, ) data_post.update(self.custom_settings.get('GET_DATE_DATA')) yield scrapy.Request( method='POST', url=self.start_urls[0], # formdata=data_post, body=json.dumps(data_post), headers=self.custom_settings.get( 'DEFAULT_REQUEST_HEADERS'), meta={'end_date': end_date}, dont_filter=True, callback=self.date_parse, errback=lambda x: self.download_errback( x, data_post, end_date), )
def start_requests(self): permins = 0 logging.info(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None # IP使用时长计时器 # start_time = time.time() while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=10) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(20) continue for data in result: # logging.info("## input data: " + data) # 处理任务 [u'TLN-CFE:20181110:1'] count = int(data.split(':')[-1]) (date, dep, arr) = pubUtil.analysisData(data[:-2]) _date = datetime.strptime(date, '%Y-%m-%d') for i in range(count): temp_date = _date + timedelta(days=i) date = temp_date.strftime('%m/%d/%Y') invalid_date = temp_date.strftime('%Y%m%d') # logging.info('# input data: ' + dep + '-' + arr + '-' + date) # dep, arr, date = 'FLL', 'LAS', '2019-01-13' # IP超过使用时长,强制更换 # logging.info('ip used time: ' + str(time.time() - start_time)) # if time.time() - start_time > self.use_time: # self.proxy_flag = True # logging.info('### ip invalid:' + self.proxy) if self.proxy_flag: while True: # 俄罗斯代理 # self.proxy = pubUtil.nk_get_ip() # 小池子代理 self.proxy = pubUtil.get_proxy(self.name) if self.proxy is None: logging.info('# no get proxy, continue') # time.sleep(60) continue logging.info('# get a new ip: ' + self.proxy) ip_proxies = {"https": "https://" + self.proxy} # 获取session try: response = requests.get(self.get_session_url, proxies=ip_proxies, timeout=15) self.cookies_str = json.dumps(requests.utils.dict_from_cookiejar(response.cookies))[ 1:-1].replace( '\"', '').replace( ':', '=').replace(' ', '').replace(',', '; ') except Exception as e: logging.info(e) self.proxy_flag = True logging.info('# get session error') continue # IP正常使用,开始计时 # start_time = time.time() self.proxy_flag = False break headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Cookie': self.cookies_str } post_data = { 'from': dep, 'to': arr, # 'from': 'AXM', # 'to': 'ATL', 'departDate': date, 'departDateDisplay': date, 'ADT': self.ADT } post_data.update(self.custom_settings.get('POST_DATA')) post_data = urllib.urlencode(post_data) # 设置无效 invalid = { 'date': invalid_date, 'depAirport': dep, 'arrAirport': arr, 'mins': self.custom_settings.get('INVALID_TIME') } yield scrapy.Request(url=self.start_urls[0], body=post_data, headers=headers, callback=self.parse, dont_filter=True, meta={'invalid': invalid, 'proxy': self.proxy}, errback=self.errback, method='POST')
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) while True: result = pubUtil.getUrl('G9', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # # for i in range(1): # for data in self.get_task(): # dep, to, dt = data # dt,dep,to = '2018-10-16','SHJ','DMM' seat = self.custom_settings.get('SEAT') payload = { "dataModel": { "app": { "apiKey": "api_key", "appVersion": "4.0.3", "language": "en", "os": "android" }, "isReturn": False, "journeyInfo": [{ "departureDateTime": "%sT00:00:00"%dt, "departureVariance": 0, "destination": to, "destinationCity": False, "origin": dep, "originCity": False }], "preferences": { "cabinClass": "Y", "currency": "USD", "logicalCabinClass": "Y", "promotion": { "code": "", "type": "PROMO_CODE" } }, "travellerQuantity": { "adultCount": seat, "childCount": "0", "infantCount": "0" } } } body = json.dumps(payload) invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } meta_data = dict( invalid=invalid, payload=body, aaa=(dep, to, dt), flight_time=dt ) yield scrapy.Request(self.start_urls[0], callback=self.parse, method='POST', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, body=body, errback=self.errback )
def start_requests(self): permins = 0 logging.info( pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version)) result_iter = None while True: if hasattr(self, 'local'): if not result_iter: result_iter = pubUtil.get_task(self.name, days=30) result = next(result_iter) else: result = pubUtil.getUrl(self.name, 1) if not result: logging.info('get task error') time.sleep(5) continue for data in result: # logging.info("## input data: " + data) # 处理任务 [u'TLN-CFE:20181110:1'] count = int(data.split(':')[-1]) (date, dep, arr) = pubUtil.analysisData(data[:-2]) _date = datetime.strptime(date, '%Y-%m-%d') for i in range(count): _date = _date + timedelta(days=i) date = _date.strftime('%Y%m%d') # dep = 'KIX' # arr = 'ICN' # logging.info('# input data: ' + dep + '-' + arr + '-' + date) city_code = self.city_dict.get(dep) if city_code is None: logging.info('# not found city: ' + dep) body = json.dumps( ze_post_data.get_data(dep, arr, date, self.ADT, city_code)) # 设置无效 invalid = { 'date': date.replace('-', ''), 'depAirport': dep, 'arrAirport': arr, 'mins': self.custom_settings.get('INVALID_TIME') } task_data = { 'dep': dep, 'arr': arr, 'date': date, 'city_code': city_code, 'body': body } yield scrapy.Request(self.start_urls[0], headers=self.headers, body=body, callback=self.parse, dont_filter=True, meta={ 'invalid': invalid, 'task_data': task_data }, errback=self.errback, method='POST')
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name,self.name,self.num,permins,self.version)) while True: result = pubUtil.getUrl('AD', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to) = pubUtil.analysisData(data) # for i in range(1): # for data in self.get_task(): # dep, to, dt = data # dt,dep,to= '2018-11-07','JJD','CNF' print(dep, to, dt) seat = self.custom_settings.get('SEAT') payload = { 'ControlGroupSearch$SearchMainSearchView$TextBoxMarketOrigin1': dep, 'ControlGroupSearch$SearchMainSearchView$TextBoxMarketDestination1': to, 'ControlGroupSearch$SearchMainSearchView$DropDownListMarketMonth1': re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\1-\2', dt), 'ControlGroupSearch$SearchMainSearchView$DropDownListMarketDay1': re.sub(r'(\d{4})-(\d{2})-(\d{2})',r'\3', dt), 'ControlGroupSearch$SearchMainSearchView$DropDownListPassengerType_ADT': '5', 'ControlGroupSearch$SearchMainSearchView$TextBoxPromoCode': 'CALLCENT', 'culture': 'en-US', 'ControlGroupSearch$SearchMainSearchView$DropDownListPassengerType_CHD': '0', # 'departure1': '09/29/2018', 'ControlGroupSearch$SearchMainSearchView$CheckBoxUseMacDestination1': '', 'ControlGroupSearch$SearchMainSearchView$DropDownListPassengerType_INFANT': '0', # 'originIata1': 'CWB', # 'origin1': 'Curitiba (CWB)', 'ControlGroupSearch$SearchMainSearchView$CheckBoxUseMacOrigin1': '', 'ControlGroupSearch$SearchMainSearchView$RadioButtonMarketStructure': 'OneWay', # 'destinationIata1': 'VCP', # '_authkey_': '106352422A4DEB0810953636A6FBE2079955529786098DE8B0D32416202E380E34C245FA99C431C7C7A75560FDE65150', 'ControlGroupSearch$SearchMainSearchView$DropDownListFareTypes': 'R', '__EVENTTARGET': 'ControlGroupSearch$LinkButtonSubmit', # 'destination1': 'Campinas (VCP)', # 'hdfSearchCodeDeparture1': '1N', # 'hdfSearchCodeArrival1': '1N', # "ControlGroupSearch$SearchMainSearchView$DropDownListSearchBy": 'columnView', } # payload = { # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListPassengerType_ADT': seat, # 'hdfSearchCodeArrival1': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$TextBoxMarketOrigin1': dep, # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketMonth1': re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\1-\2', dt), # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$TextBoxMarketDestination1': to, # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketDay1':re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3', dt), # 'faretypes': 'R', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketDay2': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListSearchBy': 'columnView', # '__EVENTTARGET': 'SearchControlGroupAvailabilityView$LinkButtonSubmit', # 'NavigationHeaderInputAvailabilityView$MemberLoginAvailabilityView$PasswordFieldPassword': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListFareTypes': 'R', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketMonth2': '', # 'pageToken': '', # # 'culture':'en-US', # 'loginDomain': 'AZUL_LOGIN', # '_authkey_': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacOrigin1': '', # 'arrival': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListPassengerType_CHD': '0', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacDestination1': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListPassengerType_INFANT': '0', # 'hdfSearchCodeArrival2': '', # 'NavigationHeaderInputAvailabilityView$MemberLoginAvailabilityView$TextBoxUserID': '', # '__VIEWSTATE': '/wEPDwUBMGRkZ/qdcJAW2QnebbciaZoBYUGCuQI=', 'password-ta': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$RadioButtonMarketStructure': 'OneWay', # 'hdfSearchCodeDeparture2': '', # '__EVENTARGUMENT': '', # 'departure2': '', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacOrigin2': '', # 'AvailabilityInputAvailabilityView$DropdownListOrderFlights': '0', # 'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacDestination2': '', # 'hdfSearchCodeDeparture1': '', # 'login-ta': '', # } invalid = { 'date': dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } body ='' for key in payload: body = body + key + '=' + str(payload.get(key)) + '&' meta_data = dict( invalid=invalid, payload=body, aaa = (dep, to, dt), flight_time = dt ) yield scrapy.Request(self.start_urls[0], callback=self.parse, method='POST', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, body=body, errback=self.errback )
def start_requests(self): permins = 0 print(pubUtil.heartbeat(self.host_name,self.name,self.num,permins,self.version)) result_iter = None result = None while True: if hasattr(self, 'local'): if not result_iter or not result: result_iter = self.get_task() result = next(result_iter) else: result = pubUtil.getUrl('DG', 5) if not result: logging.info('get task error') time.sleep(10) continue for data in result: (dt, dep, to,days) = pubUtil.analysisData_5j(data) # for data in self.get_task(): # dep, to, dt, days = data print(dep, to, dt) # dt,dep,to,days = '2018-09-04','MNL','PEK',7 seat = self.custom_settings.get('SEAT') duration = self.custom_settings.get("DURATION") # self.log('%s:%s:%s:%s' % (dt, dep, to, days), 20) for i in range(0,int(days),duration): begin_dt,end_dt =pubUtil.time_add_5j(dt,i, duration) # = pubUtil.time_add_5j(dt,i) # print(dep,to,begin_dt,end_dt) payload = { "IncludeTaxesAndFees": True, "Passengers": { "Types": [{ "Type": "ADT", "Count": seat }] }, "Codes": { "Currency": "" }, "Filters": { "BookingClasses": ["Y", "U", "S", "B", "H", "M", "K", "L", "Q", "G", "W", "V", "C", "D", "E", "Z", "PA", "PD", "A", "P", "FC", "FD", "FE", "T", "O", "TA", "TC", "TD", "TJ", "OM"], "ProductClasses": [] }, "Criteria": [{ "DestinationStationCodes": [to], "OriginStationCodes": [dep], "BeginDate": "%sT00:00:00"%begin_dt, "EndDate": "%sT00:00:00"%end_dt }] } invalid = { 'date': begin_dt.replace('-', ''), 'depAirport': dep, 'arrAirport': to, 'mins': self.custom_settings.get('INVALID_TIME') } body=json.dumps(payload) meta_data = dict( invalid=invalid, payload=body, begin_dt=begin_dt, add_day = i, duration = duration ) yield scrapy.Request(self.start_urls[0], callback=self.parse, method='POST', headers=self.custom_settings.get('HEADERS'), meta={'meta_data': meta_data}, body=body, errback=self.errback )