Esempio n. 1
0
    def start_requests(self):
        while True:
            result = pubUtil.getUrl('BE', 10)
            if not result:
                logging.info('no task! sleep 10s...')
                time.sleep(10)
                continue
            for data in result:
                (dt, dep, to) = pubUtil.analysisData(data)  # 把获取到的data格式化
                if pubUtil.dateIsInvalid(dt):
                    continue
                temp = {
                    'depart': dep,
                    'arr': to,
                    'departing': dt,
                    'returning': '',
                    'promo-code': '',
                    'adults': 1,
                    'teens': 0,
                    'children': 0,
                    'infants': 0
                }
                try:
                    params = urllib.parse.urlencode(temp)
                except:
                    params = urllib.urlencode(temp)

                url = '%s%s/%s?%s' % (self.start_urls, dep, to, params)
                yield scrapy.Request(
                    url,
                    callback=self.parse,
                    dont_filter=True,
                    errback=lambda x: self.download_errback(x, url))
Esempio n. 2
0
    def start_requests(self):
        permins = 0
        logging.info(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        result_iter = None
        while True:
            if hasattr(self, 'local'):
                if not result_iter:
                    result_iter = pubUtil.get_task(self.name, days=1)
                result = next(result_iter)
            else:
                result = pubUtil.getUrl('aq', 1)
            if not result:
                logging.info('get task error')
                time.sleep(20)
                continue
            for data in result:
                # logging.info("###input data: " + data)
                (dt, dep, to) = pubUtil.analysisData(data)

                # dt,dep,to='2019-03-28','PVG','TPE'

                # ua = UserAgent()
                # self.headers['user-agent'] = ua.random
                post_data = 'B_LOCATION_1=' + dep + '&E_LOCATION_1=' + to + '&B_DATE_1=' + dt.replace(
                    '-', ''
                ) + '0000&B_ANY_TIME_1=True&EMBEDDED_TRANSACTION=FlexPricerAvailability&ARRANGE_BY=D&DISPLAY_TYPE=2&PRICING_TYPE=O&SO_SITE_MATRIX_CALENDAR=FALSE&SO_SITE_RUI_CAL_AVAI_NO_RECO=TRUE&SO_SITE_RUI_FP_AVAI_PRESEL=FALSE&COMMERCIAL_FARE_FAMILY_1=NEWECOOW&COMMERCIAL_FARE_FAMILY_2=NEWDELOW&COMMERCIAL_FARE_FAMILY_3=NEWBIZOW&SO_SITE_RUI_AX_CAL_ENABLED=TRUE&SO_SITE_CAL_CHANGE_WEEK=TRUE&SO_SITE_RUI_HIDE_MDF_SRC=FALSE&EXTERNAL_ID%236=OW&TRAVELLER_TYPE_1=ADT&TRIP_TYPE=O&TRIP_FLOW=YES&SO_SITE_EXPORT_CONFIRM=TRUE&SO_SITE_EXPORT_CONF_URL=https%3A%2F%2Fbooking.evaair.com%2Fexporttripplan%2Fwebservice.aspx&SO_SITE_THREEDS_USE=N&SO_SITE_BILLING_NOT_REQUIRED=Y&SO_SITE_BILL_ADD_OPTIONS=BILL_ADD_HIDDEN&SO_SITE_PREBOOK_CANCELLATION=TRUE&SO_GL=%3C%3Fxml+version%3D%221.0%22+encoding%3D%22iso-8859-1%22%3F%3E%0D%0A%3CSO_GL%3E%0D%0A%3CGLOBAL_LIST+mode%3D%22partial%22%3E%0D%0A%3CNAME%3ESL_AIR_MOP%3C%2FNAME%3E%0D%0A%3CLIST_ELEMENT%3E%0D%0A%3CCODE%3ECC%3C%2FCODE%3E%0D%0A%3CLIST_VALUE%3ECredit+Card%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EY%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECC%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECryptic%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECC%25T%25I%2F%25E%2F%25C%25F%2FN%25A%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%2F%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3ECC%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3CLIST_VALUE%3EN%3C%2FLIST_VALUE%3E%0D%0A%3C%2FLIST_ELEMENT%3E%0D%0A%3C%2FGLOBAL_LIST%3E%0D%0A%3C%2FSO_GL%3E&SO_SITE_FD_DISPLAY_MODE=1&SO_SITE_CURRENCY_FORMAT_JAVA=0&SO_SITE_ENABLE_SRV_POLICY=BAG%2CCOA&SO_SITE_ALLOW_SPEC_REQ_SERV=FALSE&SO_SITE_SD_TRUE_OP_CARRIER=TRUE&SO_SITE_BARCODE_ENABLE=TRUE&SO_SITE_ALLOW_CS_CODE_SHARE=FALSE&SO_SITE_USE_PAYMENT_ACTION=TRUE&EXTERNAL_ID=AIBS&EXTERNAL_ID%232=&EXTERNAL_ID%233=&EXTERNAL_ID%234=NEWECOOW&EXTERNAL_ID%235=&EXTERNAL_ID%2314=N&EXTERNAL_ID%2312=&EXTERNAL_ID%2313=zh_CN&EXTERNAL_ID%2399=C5WBKT102%23%23flyeva&DIRECT_LOGIN=NO&SO_SITE_RUI_MULTIDEV_ENABLED=TRUE&SO_SITE_RUI_TABLET_PG_LIST=ALL&SO_SITE_RUI_MOBILE_PG_LIST=ALL&SO_SITE_RUI_DISP_FF_TABLE=TRUE&SO_SITE_RUI_UPSLL_T_MDL=TRUE&SO_SITE_RUI_UPSLL_T_MDL_ATC=TRUE&SO_SITE_RUI_DPICKER_NATIVE=TABLET%2CMOBILE&MC_FORCE_DEVICE_TYPE=MOBILE&SO_SITE_RUI_MOBILE_FLOW=ALL&SO_SITE_RUI_TABLET_FLOW=ALL&SO_SITE_RUI_COLLAPSE_BOUND_T=TWO_STEPS&SO_SITE_RUI_UPSLL_HIDE_BTNS=FALSE&SO_SITE_OFFICE_ID=SHABR08AA&LANGUAGE=CN&SITE=CAWXCNEW'
                url_data = {
                    "ENCT": "1",
                    "ENC":
                    "990572D723A7BC83F77B4C6C03C696340674137066140FF11D721B8765E55FF8DC0562E080CE4BD1CD01272028CBBA89",
                    # 传入当前查询时间
                    "ENC_TIME": time.strftime("%Y%m%d%H%M%S", time.localtime())
                }

                # 设置无效
                invalid = {
                    'date': dt.replace('-', ''),
                    'depAirport': dep,
                    'arrAirport': to,
                    'mins': self.custom_settings.get('INVALID_TIME')
                }
                url_data = urllib.urlencode(url_data)
                self.url = self.start_urls[0] + '?' + url_data
                # print '# url: ', url
                # print '# url_data: ', url_data

                # ip = '127.0.0.1:8888'
                # ip = '127.0.0.1:1080'
                yield scrapy.Request(
                    self.url,
                    headers=self.headers,
                    body=post_data,
                    callback=self.parse,
                    dont_filter=True,
                    # meta={'invalid': invalid, 'proxy': ip},
                    meta={'invalid': invalid},
                    method='POST',
                    errback=self.errback)
Esempio n. 3
0
    def start_requests(self):
        permins = 0
        logging.info(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        while True:
            result = pubUtil.getUrl('je', 1)
            if not result:
                logging.info('get task error')
                time.sleep(10)
                continue
            for data in result:
                # logging.info("###input data: " + data)
                (dt, dep, to) = pubUtil.analysisData(data)

                # 目标地址参数字典
                post_data = {
                    "AgencyCode": "",
                    "AirportFrom": dep,
                    "AirportTo": to,
                    "BoardDate": dt,
                    "CarPackage": 'false',
                    "ReturnDate": "",
                    "SearchType": "Normal",
                    "AvailType": "",
                    "IsReturnFlight": 'false',
                    "IsBusiness": 'false',
                    "Adults": self.ADT,
                    "Children": "0",
                    "Infants": "0",
                    "FareDesignator": "",
                    "EdgarsClubCard": "",
                    "VoyagerState": '0',
                    "HaveErrors": 'false',
                    "IsChangeBooking": 'false',
                    "MomentumClientNumber": "",
                    "OutSegmentKeyFromRedirect": "",
                    "InSegmentKeyFromRedirect": "",
                    "isMobile": 'false',
                    "CriteriaSearchType": "Day"
                }

                # 设置无效
                invalid = {
                    'date': dt.replace('-', ''),
                    'depAirport': dep,
                    'arrAirport': to,
                    'mins': self.custom_settings.get('INVALID_TIME')
                }

                PayLoad = json.dumps(post_data)

                yield scrapy.Request(self.start_urls[0],
                                     body=PayLoad,
                                     callback=self.parse,
                                     dont_filter=True,
                                     meta={'invalid': invalid},
                                     method='POST',
                                     errback=self.errback)
Esempio n. 4
0
 def start_requests(self):
     permins = 0
     print(pubUtil.heartbeat(self.host_name,self.name,self.num,permins,self.version))
     while True:
         result = pubUtil.getUrl('FZ', 5)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             (dt, dep, to) = pubUtil.analysisData(data)
         # for data in self.get_task():
         #     dep, to, dt = data
         #     dt,dep,to= '2018-09-13','DXB','BEY'
             dt_change = datetime.strptime(dt,'%Y-%m-%d').strftime('%m/%d/%Y')
             print(dep, to, dt)
             seat = self.custom_settings.get('SEAT')
             payload = {
                 "journeyType": "ow",
                 "isOriginMetro": False,
                 "isDestMetro": False,
                 "variant": "0",
                 "searchCriteria": [{
                     "origin": dep,
                     "dest":to,
                     "originDesc": "",
                     "destDesc": "",
                     "isOriginMetro": False,
                     "isDestMetro": False,
                     "direction": "outBound",
                     "date": "%s 12:00 AM"%dt_change
                 }],
                 "paxInfo": {
                     "adultCount": seat,
                     "infantCount": 0,
                     "childCount": 0
                 }
             }
             invalid = {
                 'date': dt.replace('-', ''),
                 'depAirport': dep,
                 'arrAirport': to,
                 'mins': self.custom_settings.get('INVALID_TIME')
             }
             body=json.dumps(payload)
             meta_data = dict(
                 invalid=invalid,
                 payload=body,
                 aaa = (dep, to, dt)
             )
             yield scrapy.Request(self.start_urls[0],
                                  callback=self.parse,
                                  method='POST',
                                  headers=self.custom_settings.get('HEADERS'),
                                  meta={'meta_data': meta_data},
                                  body=body,
                                  errback=self.errback
                                  )
Esempio n. 5
0
    def start_requests(self):
        permins = 0
        print(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        result_iter = None
        while True:
            if hasattr(self, 'local'):
                if not result_iter:
                    result_iter = pubUtil.get_task(self.name, days=30)
                result = next(result_iter)
            else:
                result = pubUtil.getUrl(self.name)
            if not result:
                time.sleep(6)
                continue

            for data in result:

                # 处理任务 [u'TLN-CFE:20181110:1']
                task_data_list = data.split(':')
                count = int(task_data_list[2])
                (dt, dep, arr) = pubUtil.analysisData(task_data_list[0] + ':' +
                                                      task_data_list[1])
                _date = datetime.strptime(dt, '%Y-%m-%d')

                for i in range(count):
                    date = _date + timedelta(days=i)
                    date = date.strftime('%Y%m%d0000')

                    dep = self.port_city.get(dep, dep)
                    arr = self.port_city.get(arr, arr)

                    # logging.info('# input data: ' + dep + '' + arr + '' + date)

                    # 设置无效
                    invalid = {
                        'date': date.replace('-', ''),
                        'depAirport': dep,
                        'arrAirport': arr,
                        'mins': self.custom_settings.get('INVALID_TIME')
                    }

                    post_data = urllib.urlencode(
                        ly_post_data.second_post_data(dep, arr, date,
                                                      self.ADT))

                    yield scrapy.Request(self.start_urls[0],
                                         body=post_data,
                                         callback=self.parse,
                                         dont_filter=True,
                                         meta={'invalid': invalid},
                                         errback=self.errback,
                                         method='POST')
Esempio n. 6
0
 def start_requests(self):
     permins = 0
     print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version))
     result_iter = None
     result = None
     while True:
         if hasattr(self, 'local'):
             if not result_iter or not result:
                 # 本地任务未编写
                 result_iter = self.get_task()
             result = next(result_iter)
         else:
             result = pubUtil.getUrl('TR', 5)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             (dt, dep, to) = pubUtil.analysisData(data)
             # dt, dep, to = '2019-01-12', 'SIN', 'TAO'
             print dt, dep, to
             seat = self.custom_settings.get('SEAT')
             querystring = {
                 'adt': seat,
                 'arcity': to,
                 'chd': '0',
                 'dpcity': dep,
                 'dpdate': dt,
                 'inft': '0',
                 'promo': '',
                 'type': '1'
             }
             data = ''
             for key in querystring:
                 data = data + key + '=' + str(querystring.get(key)) + '&'
             invalid = {
                 'date': dt.replace('-', ''),
                 'depAirport': dep,
                 'arrAirport': to,
                 'mins': self.custom_settings.get('INVALID_TIME')
             }
             meta_data = dict(
                 invalid=invalid,
                 data=data,
             )
             yield scrapy.Request(self.start_urls[0],
                                  callback=self.parse,
                                  headers=self.custom_settings.get('HEADERS'),
                                  method='POST',
                                  meta={'meta_data': meta_data},
                                  body=data,
                                  errback=self.errback)
Esempio n. 7
0
 def start_requests(self):
     permins = 0
     print(
         pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                           self.version))
     result_iter = None
     result = None
     while True:
         if hasattr(self, 'local'):
             if not result_iter or not result:
                 result_iter = self.get_task()
             result = next(result_iter)
         else:
             result = pubUtil.getUrl('TT', 5)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             (dt, dep, to) = pubUtil.analysisData(data)
             # dt, dep, to = '2019-01-25', 'SYD', 'PER'
             seat = self.custom_settings.get('SEAT')
             payload = {
                 'currencyCode': 'AUD',
                 'departureDate': dt,
                 'destination': to,
                 'numAdults': seat,
                 'numChildren': 0,
                 'numInfants': 0,
                 'origin': dep,
                 'promoCode': ''
             }
             invalid = {
                 'date': dt.replace('-', ''),
                 'depAirport': dep,
                 'arrAirport': to,
                 'mins': self.custom_settings.get('INVALID_TIME')
             }
             meta_data = dict(
                 invalid=invalid,
                 payload=payload,
             )
             headers = self.custom_settings.get('HEADERS')
             # headers['User-Agent'] = random.choice(self.ua_data)[0]
             headers['User-Agent'] = self.ua_construction()
             yield scrapy.Request(self.start_urls[0],
                                  callback=self.parse,
                                  headers=headers,
                                  method='POST',
                                  meta={'meta_data': meta_data},
                                  body=json.dumps(payload),
                                  errback=self.errback)
Esempio n. 8
0
    def start_requests(self):
        permins = 0
        logging.info(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        while True:
            result = pubUtil.getUrl('by', 1)
            if not result:
                logging.info('get task error')
                time.sleep(10)
                continue
            for data in result:
                logging.info("###input data: " + data)
                (dt, dep, to) = pubUtil.analysisData(data)

                self.dep = dep
                self.arr = to
                self.date = dt

                second_data = {
                    'flyingFrom[]': self.dep,
                    'flyingTo[]': self.arr,
                    'depDate': self.date,
                    'returnDate': '',
                    'adults': self.ADT,
                    'children': '0',
                    'infants': '0',
                    'infantAge': '',
                    'isOneWay': 'true',
                    'childAge': '',
                    'searchType': 'selected',
                    'tabId': dep,
                    'cycleDates': dt,
                    'duration': '0'
                }

                second_url = '%s%s' % (self.second_url[0],
                                       urllib.urlencode(second_data))

                # 设置无效
                invalid = {
                    'date': self.date.replace('-', ''),
                    'depAirport': self.dep,
                    'arrAirport': self.arr,
                    'mins': self.custom_settings.get('INVALID_TIME')
                }

                yield scrapy.Request(second_url,
                                     callback=self.parse,
                                     dont_filter=True,
                                     meta={'invalid': invalid},
                                     errback=self.errback)
Esempio n. 9
0
    def start_requests(self):
        permins = 0
        logging.info(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        result_iter = None
        while True:
            if hasattr(self, 'local'):
                if not result_iter:
                    result_iter = pubUtil.get_task(self.name, days=30)
                result = next(result_iter)
            else:
                result = pubUtil.getUrl(self.name, 1)
            if not result:
                logging.info('get task error')
                time.sleep(20)
                continue
            for data in result:
                (dt, dep, to) = pubUtil.analysisData(data)

                # dep, to, dt = 'FUK', 'YNT', '2019-03-27'
                post_data = {
                    "tripType": "OW",
                    "orgCode": dep,
                    "dstCode": to,
                    "takeoffdate1": dt,
                }

                # 随机UA
                ua = UserAgent()
                self.headers['User-Agent'] = ua.random
                # post_data = urllib.urlencode(post_data)
                # logging.info("###input data: " + dep + to + dt)
                # 设置无效
                invalid = {
                    'date': dt.replace('-', ''),
                    'depAirport': dep,
                    'arrAirport': to,
                    'mins': self.custom_settings.get('INVALID_TIME')
                }

                yield scrapy.Request(
                    self.start_urls[0],
                    headers=self.headers,
                    body=json.dumps(post_data),
                    # body=post_data,
                    callback=self.parse,
                    dont_filter=True,
                    # meta={'invalid': invalid, 'proxy': 'http://127.0.0.1:8888'},
                    meta={'invalid': invalid},
                    method='POST',
                    errback=self.errback)
Esempio n. 10
0
    def start_requests(self):

        permins = 0
        logging.info(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        result_iter = None
        while True:
            if hasattr(self, 'local'):
                if not result_iter:
                    result_iter = pubUtil.get_task(self.name, step=7)
                result = next(result_iter)
            else:
                result = pubUtil.getUrl(self.name, 1)
            if not result:
                logging.info('get task error')
                time.sleep(10)
                continue

            # 循环多个任务,现在默认一个
            for data in result:
                # 处理任务 BVE-LYS-201812030000-15
                count = int(data.split(':')[-1])
                (date, dep, arr) = pubUtil.analysisData(data[:-2])
                date = date.replace('-', '') + '0000'

                # logging.info('# input data: ' + dep + '-' + arr + '-' + date + '-' + str(count))

                task_data = {
                    'dep': dep,
                    'arr': arr,
                    'date': date,
                    'count': count
                }

                post_data = urllib.urlencode(
                    a5_post_data.first_post_data(dep, arr, date, self.ADT))
                # 获取session
                yield scrapy.Request(
                    self.get_session_url[0],
                    body=post_data,
                    callback=self.get_session,
                    dont_filter=True,
                    meta={
                        'post_data': post_data,
                        'task_data': task_data
                    },
                    method='POST',
                    errback=self.errback,
                )
Esempio n. 11
0
    def start_requests(self):
        permins = 0
        logging.info(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        result_iter = None
        while True:
            if hasattr(self, 'local'):
                if not result_iter:
                    result_iter = pubUtil.get_task(self.name, days=30)
                result = next(result_iter)
            else:
                result = pubUtil.getUrl(self.name, 1)
            if not result:
                logging.info('get task error')
                time.sleep(20)
                continue

            self.session_data['tck'] = random.choice(self.id_pool)
            for data in result:
                # logging.info("###input data: " + data)
                (dt, dep, to) = pubUtil.analysisData(data)

                # dt,dep,to='2019-02-28','CAN','RGN'
                post_data = {
                    'traveldate': dt,
                    'ori': dep,
                    'currency': 'CNY',
                    'dest': to
                }

                # 设置无效
                invalid = {
                    'date': dt.replace('-', ''),
                    'depAirport': dep,
                    'arrAirport': to,
                    'mins': self.custom_settings.get('INVALID_TIME')
                }

                post_data = urllib.urlencode(post_data)

                yield scrapy.Request(self.start_urls[0],
                                     headers=self.headers,
                                     body=post_data,
                                     callback=self.parse,
                                     dont_filter=True,
                                     meta={'invalid': invalid},
                                     method='POST',
                                     errback=self.errback)
Esempio n. 12
0
 def start_requests(self):
     permins = 0
     print(
         pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                           self.version))
     while True:
         result = pubUtil.getUrl('KC', 5)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             (dt, dep, arr) = pubUtil.analysisData(data)
             dt = re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3.\2.\1', dt)
             # print(dt)
             # dt = time.strftime('%d.%m.%Y',dt)
             print(dep, arr, dt)
             payload = {
                 'captchaResponse': '',
                 'pReturnDateStr': '',
                 'pFlightDateStr': dt,
                 'pRequest': {
                     'TwoWayRoute': 'false',
                     'DateAreFlexible': 'true',
                     'Origin': dep,
                     'Destination': arr,
                     'Bookingclass': 'ECO',
                     'Adult': '3',
                     'Child': '0',
                     'Infant': '0',
                     'Resident': 'false'
                 },
             }
             invalid = {
                 'date': dt.replace('-', ''),
                 'depAirport': dep,
                 'arrAirport': arr,
                 'mins': self.custom_settings.get('INVALID_TIME')
             }
             meta_data = dict(invalid=invalid, payload=payload)
             yield scrapy.Request(
                 self.custom_settings.get('sessionID_url'),
                 callback=self.data_requests,
                 method='POST',
                 headers=self.custom_settings.get('start_headers'),
                 meta={'meta_data': meta_data},
                 body=json.dumps(payload),
                 errback=self.errback)
Esempio n. 13
0
    def start_requests(self):
        permins = 0
        print(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        while True:
            result = pubUtil.getUrl('LA', 5)
            if not result:
                logging.info('get task error')
                time.sleep(10)
                continue
            for data in result:
                (dt, dep, to) = pubUtil.analysisData(data)
                # for i in range(1):
                # for data in self.get_task():
                #     dep, to, dt = data
                # dt,dep,to= '2018-10-16','CWB','SAO'
                # print(dep, to, dt)
                currency = "BR"
                seat = self.custom_settings.get('SEAT')
                querystring = {
                    "country": currency,
                    "origin": dep,
                    "destination": to,
                    "departure": dt,
                    "adult": seat,
                }
                url = self.start_urls[0] + '?'
                for key in querystring:
                    url = url + key + '=' + str(querystring.get(key)) + '&'

                invalid = {
                    'date': dt.replace('-', ''),
                    'depAirport': dep,
                    'arrAirport': to,
                    'mins': self.custom_settings.get('INVALID_TIME')
                }
                meta_data = dict(invalid=invalid,
                                 params=querystring,
                                 aaa=(dep, to, dt),
                                 flight_time=dt)
                yield scrapy.Request(
                    url,
                    callback=self.parse,
                    method='GET',
                    headers=self.custom_settings.get('HEADERS'),
                    meta={'meta_data': meta_data},
                    errback=self.errback)
Esempio n. 14
0
    def start_requests(self):
        permins = 0
        # print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version))
        while True:
            # CRL-OTP:20181203:30
            # result = pubUtil.getUrl(self.name, 1)
            result = ["CRL-OTP:20181220:30"]
            if not result:
                logging.info('get task error')
                time.sleep(10)
                continue
            for data in result:
                (dt, dep, to) = pubUtil.analysisData(data)  # 把获取到的data格式化
                # (dt, dep, to, days) = ('20181026', 'LTN', 'IAS', 30)
                dt_datetime = datetime.strptime(dt, '%Y-%m-%d')
                # end_date = dt_datetime + timedelta(days=int(days))
                dt = dt_datetime.strftime('%Y-%m-%d')

                # dep = 'AES'
                # to = 'GDN'
                # dt = '2018-12-20'
                logging.info('# input data: ' + dep + '-' + to + '-' + dt)
                data_post = dict(
                    DepartureDate=dt,
                    DepartureStation=dep,
                    ArrivalStation=to,
                )

                # 设置无效
                invalid = {
                    'date': dt.replace('-', ''),
                    'depAirport': dep,
                    'arrAirport': to,
                    'mins': self.custom_settings.get('INVALID_TIME')
                }

                self.custom_settings['POST_DATA']['flightList'] = [data_post]
                pay_load = json.dumps(self.custom_settings.get('POST_DATA'))
                yield scrapy.Request(method='POST',
                                     url=self.start_urls[0],
                                     body=pay_load,
                                     dont_filter=True,
                                     callback=self.parse,
                                     meta={'invalid': invalid, 'pay_load': pay_load},
                                     errback=self.errback,
                                     )
Esempio n. 15
0
    def start_requests(self):
        permins = 0
        print(
            pubUtil.heartbeat(self.host_name, self.carrier, self.num, permins,
                              self.version))
        result_iter, result = None, None
        while True:
            if hasattr(self, 'local'):
                if not result_iter or not result:
                    result_iter = self.get_task()
                result = next(result_iter)
            else:
                result = pubUtil.getUrl('BE', 10)
            if not result:
                time.sleep(60)
                continue
            for data in result:
                (dt, dep, to) = pubUtil.analysisData(data)  # 把获取到的data格式化
                # dt, dep, to = '2018-11-01', 'EXT', 'JER'
                if pubUtil.dateIsInvalid(dt):
                    continue
                temp = {
                    'depart': dep,
                    'arr': to,
                    'departing': dt,
                    'returning': '',
                    'promo-code': '',
                    'adults': 3,
                    'teens': 0,
                    'children': 0,
                    'infants': 0
                }
                try:
                    params = urllib.parse.urlencode(temp)
                except:
                    params = urllib.urlencode(temp)

                url = '%s%s/%s?%s' % (self.start_urls, dep, to, params)
                yield scrapy.Request(url,
                                     callback=self.parse,
                                     dont_filter=True,
                                     errback=self.err_back)
Esempio n. 16
0
    def start_requests(self):
        permins = 0
        print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version))
        result_iter = None
        while True:

            if hasattr(self, 'local'):
                if not result_iter:
                    result_iter = pubUtil.get_task(self.name, days=30)
                result = next(result_iter)
            else:
                result = pubUtil.getUrl('TW', 5)

            if not result:
                logging.info('get task error')
                time.sleep(10)
                continue
            today = datetime.now().strftime('%Y%m%d')
            for data in result:
                (dt, dep, to) = pubUtil.analysisData(data)  # 把获取到的data格式化
                # dt, dep, to = '20180722', 'ICN', 'KIX' # 测试某条数据
                params = urllib.urlencode(dict(
                    origin=dep,
                    destination=to,
                    onwardDateStr=dt.replace('-', ''),
                    # pointOfPurchase='KR',
                    paxTypeCountStr='3,0,0',
                    today=today,
                    travelType='OW',
                    searchType='byDate',
                    # domesticYn='Y',
                    bundleAmountOW=0,
                    bundleAmountRT=0,
                    routeCls='AS',
                    _=int(time.time() * 1000)
                ))
                total_url = self.start_urls[0] + params
                yield scrapy.Request(url=total_url,
                                     callback=self.transit,
                                     meta={'params': params, 'flag': 1},
                                     dont_filter=True)
Esempio n. 17
0
 def start_requests(self):
     permins = 0
     print(
         pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                           self.version))
     result_iter = None
     result = None
     while True:
         if hasattr(self, 'local'):
             if not result_iter or not result:
                 result_iter = self.get_task()
             result = next(result_iter)
         else:
             result = pubUtil.getUrl('PC', 5)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             print data
             (dt, dep, to) = pubUtil.analysisData(data)
             # dt,dep,to = '2018-08-30','TZX','SAW'
             seat = self.custom_settings.get('SEAT')
             payload = {
                 "flightSearchList": [{
                     "arrivalPort": to,
                     "departurePort": dep,
                     "departureDate": dt
                 }],
                 "adultCount":
                 seat,
                 "childCount":
                 0,
                 "infantCount":
                 0,
                 "soldierCount":
                 0,
                 "currency":
                 "TL",
                 "operationCode":
                 "TK",
                 "ffRedemption":
                 False,
                 "openFlightSearch":
                 False,
                 "personnelFlightSearch":
                 False,
                 "dateOption":
                 1
             }
             invalid = {
                 'date': dt.replace('-', ''),
                 'depAirport': dep,
                 'arrAirport': to,
                 'mins': self.custom_settings.get('INVALID_TIME')
             }
             yield scrapy.Request(
                 self.start_urls[0],
                 callback=self.parse,
                 method='POST',
                 headers=self.custom_settings.get('HEADERS'),
                 meta={'invalid': invalid},
                 body=json.dumps(payload),
                 errback=self.errback)
Esempio n. 18
0
 def start_requests(self):
     permins = 0
     print(
         pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                           self.version))
     result_iter = None
     result = None
     while True:
         if hasattr(self, 'local'):
             if not result_iter or not result:
                 result_iter = self.get_task()
             result = next(result_iter)
             print result[0]
         else:
             result = pubUtil.getUrl('4O', 5)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             (dt, dep, to) = pubUtil.analysisData(data)
             # for i in range(1):
             # for data in self.get_task():
             #     dep, to, dt = data
             # dt,dep,to= '2018-11-21','MEX','MTY'
             # print(dep, to, dt)
             seat = self.custom_settings.get('SEAT')
             querystring = {
                 'ArrivalStation': to,
                 'CurrencyCode': 'USD',
                 'DepartureDate': dt,
                 'DepartureStation': dep,
                 'PaxResidentCountry': 'US',
                 'ReturnDate': '',
                 'RoleCode': 'WWWA',
                 # 'Signature': '7g45Wa4GSDU%3D%7CFGktbwO8EpThKsp1cB6fKOAqUvn3guMXZi8UTGptOepMgEP4vfdsuDVx9CUEK6PPNKDct2Otx5ujxMUtBdbGAKypdlrDs58IJ5egu0MpkyBUcJAzA3CC5OLpbNF%2B2XmVvSljYUJspk0%3D',
                 # 'Signature': 'D0QxiSJmAVA%3D%7Cu8aeufqyaeaQ8rZDzn%2FfgXgUix%2Fw6vE1NV1boWcJkMxA%2FST6xYdjipCYmvebA4zP%2BVfgbgxJPChcxCy2jn8ur4hAPBprYZ%2F7PBBZr3%2FaXo6aZ018F3GSPm3jNzQzeC3UBXtJs77215s%3D'
                 'TotalPaxAdt': seat,
                 'TotalPaxChd': '0',
                 'TotalPaxInf': '0',
                 'TotalPaxSrc': '0',
                 'Version': '7'
             }
             url = self.start_urls[0] + '?'
             for key in querystring:
                 url = url + key + '=' + str(querystring.get(key)) + '&'
             headers = self.custom_settings.get('HEADERS')
             # headers['User-Agent'] = random.choice(self.ua_data)[0]
             headers['User-Agent'] = self.ua_construction()
             # print '请求:', headers['User-Agent']
             invalid = {
                 'date': dt.replace('-', ''),
                 'depAirport': dep,
                 'arrAirport': to,
                 'mins': self.custom_settings.get('INVALID_TIME'),
             }
             meta_data = dict(
                 invalid=invalid,
                 params=querystring,
                 aaa=(dep, to, dt),
                 flight_time=dt,
                 url=url,
             )
             yield scrapy.Request(url,
                                  callback=self.parse,
                                  method='GET',
                                  headers=headers,
                                  meta={'meta_data': meta_data},
                                  errback=self.errback)
Esempio n. 19
0
 def start_requests(self):
     permins = 0
     print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version))
     while True:
         result = pubUtil.getUrl('G9', 5)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             (dt, dep, to) = pubUtil.analysisData(data)
         # # for i in range(1):
         # for data in self.get_task():
         #     dep, to, dt = data
         #     dt,dep,to = '2018-10-16','SHJ','DMM'
             seat = self.custom_settings.get('SEAT')
             payload = {
                 "dataModel": {
                     "app": {
                         "apiKey": "api_key",
                         "appVersion": "4.0.3",
                         "language": "en",
                         "os": "android"
                     },
                     "isReturn": False,
                     "journeyInfo": [{
                         "departureDateTime": "%sT00:00:00"%dt,
                         "departureVariance": 0,
                         "destination": to,
                         "destinationCity": False,
                         "origin": dep,
                         "originCity": False
                     }],
                     "preferences": {
                         "cabinClass": "Y",
                         "currency": "USD",
                         "logicalCabinClass": "Y",
                         "promotion": {
                             "code": "",
                             "type": "PROMO_CODE"
                         }
                     },
                     "travellerQuantity": {
                         "adultCount": seat,
                         "childCount": "0",
                         "infantCount": "0"
                     }
                 }
             }
             body = json.dumps(payload)
             invalid = {
                 'date': dt.replace('-', ''),
                 'depAirport': dep,
                 'arrAirport': to,
                 'mins': self.custom_settings.get('INVALID_TIME')
             }
             meta_data = dict(
                 invalid=invalid,
                 payload=body,
                 aaa=(dep, to, dt),
                 flight_time=dt
             )
             yield scrapy.Request(self.start_urls[0],
                                  callback=self.parse,
                                  method='POST',
                                  headers=self.custom_settings.get('HEADERS'),
                                  meta={'meta_data': meta_data},
                                  body=body,
                                  errback=self.errback
                                  )
Esempio n. 20
0
    def start_requests(self):
        permins = 0
        logging.info(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        result_iter = None
        while True:
            if hasattr(self, 'local'):
                if not result_iter:
                    result_iter = pubUtil.get_task(self.name, days=30)
                result = next(result_iter)
            else:
                result = pubUtil.getUrl(self.name, 1)
            if not result:
                logging.info('get task error')
                time.sleep(5)
                continue
            for data in result:
                # logging.info("## input data: " + data)

                # 处理任务 [u'TLN-CFE:20181110:1']
                count = int(data.split(':')[-1])
                (date, dep, arr) = pubUtil.analysisData(data[:-2])
                _date = datetime.strptime(date, '%Y-%m-%d')

                for i in range(count):
                    _date = _date + timedelta(days=i)
                    date = _date.strftime('%Y%m%d')
                    # dep = 'KIX'
                    # arr = 'ICN'
                    # logging.info('# input data: ' + dep + '-' + arr + '-' + date)
                    city_code = self.city_dict.get(dep)
                    if city_code is None:
                        logging.info('# not found city: ' + dep)
                    body = json.dumps(
                        ze_post_data.get_data(dep, arr, date, self.ADT,
                                              city_code))

                    # 设置无效
                    invalid = {
                        'date': date.replace('-', ''),
                        'depAirport': dep,
                        'arrAirport': arr,
                        'mins': self.custom_settings.get('INVALID_TIME')
                    }
                    task_data = {
                        'dep': dep,
                        'arr': arr,
                        'date': date,
                        'city_code': city_code,
                        'body': body
                    }

                    yield scrapy.Request(self.start_urls[0],
                                         headers=self.headers,
                                         body=body,
                                         callback=self.parse,
                                         dont_filter=True,
                                         meta={
                                             'invalid': invalid,
                                             'task_data': task_data
                                         },
                                         errback=self.errback,
                                         method='POST')
Esempio n. 21
0
 def start_requests(self):
     permins = 0
     print(pubUtil.heartbeat(self.host_name,self.name,self.num,permins,self.version))
     while True:
         result = pubUtil.getUrl('XQ', 5)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             (dt, dep, to) = pubUtil.analysisData(data)
         # for data in self.get_task():
         #     dep, to, dt = data
             # dt,dep,to= '2018-09-18','AYT','DUS'
             dt_change = datetime.strptime(dt,'%Y-%m-%d').strftime('%d-%b-%Y')
             print(dep, to, dt,dt_change)
             seat = self.custom_settings.get('SEAT')
             payload = {
                 'origin': dep,
                 'adults':seat,
                 'fareRT': '',
                 'flightNumberRT': '',
                 'wvm': 'WVMD',
                 'ibeScreenId': 'IBE000',
                 'bookingSource': '',
                 'fareOW': '',
                 'children': '0',
                 'cabinClass': 'ECONOMY',
                 '_eventId': 'showWtLblResult',
                 'travelDate': dt_change,
                 'destination': to,
                 'fareLevel': '',
                 'deviceType': '',
                 'tripType': 'OW',
                 'channel': 'DEBD',
                 'pointOfPurchase': 'OTHERS',
                 'flightNumberOW': '',
                 'access_token': '',
                 'promoCode': '',
                 'ccType': '',
                 'skyscanner_redirectid': '',
                 'mode': 'searchResultInter',
                 'infants': '0',
                 'flexTrvlDates': ''}
             invalid = {
                 'date': dt.replace('-', ''),
                 'depAirport': dep,
                 'arrAirport': to,
                 'mins': self.custom_settings.get('INVALID_TIME')
             }
             body =''
             for key in payload:
                 body = body + key + '=' + payload.get(key) + '&'
             meta_data = dict(
                 invalid=invalid,
                 payload=body,
                 aaa = (dep, to, dt),
                 flight_time = dt
             )
             yield scrapy.Request(self.start_urls[0],
                                  callback=self.parse,
                                  method='POST',
                                  headers=self.custom_settings.get('HEADERS'),
                                  meta={'meta_data': meta_data},
                                  body=body,
                                  errback=self.errback
                                  )
Esempio n. 22
0
 def start_requests(self):
     permins = 0
     print(
         pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                           self.version))
     while True:
         result = pubUtil.getUrl('SL', 1)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             (dt, dep, to) = pubUtil.analysisData(data)  # 把获取到的data格式化
             dt_com = data.split(':')[1]
             self.task.append({
                 'date': dt_com,
                 'depAirport': dep,
                 'arrAirport': to,
                 'mins': settings.INVALID_TIME
             })
             dep = str(dep)
             to = str(to)
             if pubUtil.dateIsInvalid(dt):
                 logging.info('date is invalid ,next~')
                 continue
             dt_stamp = time.mktime(time.strptime(dt,
                                                  '%Y-%m-%d')) + 8 * 60 * 60
             data_post = {
                 "sd": {
                     "Adults": self.seats,
                     "AirlineCode": "",
                     "ArrivalCity": to,
                     "ArrivalCityName": None,
                     "BookingClass": None,
                     "CabinClass": 0,
                     "ChildAge": [],
                     "Children": 0,
                     "CustomerId": 0,
                     "CustomerType": 0,
                     "CustomerUserId": 230,
                     "DepartureCity": dep,
                     "DepartureCityName": None,
                     "DepartureDate": "/Date(%s)/" % int(dt_stamp * 1000),
                     "DepartureDateGap": 0,
                     "DirectFlightsOnly": False,
                     "Infants": 0,
                     "IsPackageUpsell": False,
                     "JourneyType": 1,
                     "PreferredCurrency": "THB",
                     "ReturnDate": "/Date(-2208988800000)/",
                     "ReturnDateGap": 0,
                     "SearchOption": 1
                 },
                 "fsc": "0"
             }
             yield scrapy.Request(
                 method='POST',
                 url=self.start_urls,
                 body=json.dumps(data_post),
                 callback=self.parse,
                 meta=data_post,
                 dont_filter=True,
                 errback=lambda x: self.download_errback(x, data_post))
Esempio n. 23
0
    def start_requests(self):
        permins = 0
        print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version))
        result_iter = None
        result = None
        while True:
            if hasattr(self, 'local'):
                if not result_iter or not result:
                    result_iter = self.get_task()
                result = next(result_iter)
                print result[0]
            else:
                result = pubUtil.getUrl('B6', 5)
            if not result:
                logging.info('get task error')
                time.sleep(10)
                continue
            for data in result:
                (dt, dep, to) = pubUtil.analysisData(data)
        #     for i in range(1):
        #     for data in self.get_task():
        #         dep, to, dt = data
                # dt,dep,to= '2018-11-21','RDU','JFK'
                # dt,dep,to= '2018-11-23','BOS','JAX'
                # print(dep, to, dt)
                seat = self.custom_settings.get('SEAT')
                querystring = {
                    'departureAirportCode': dep,
                    'env': 'prod',
                    'jbBookerCurrency-flights': 'usd',
                    'journeySpan': 'OW',
                    'numAdults': seat,
                    'numChildren': '0',
                    'numInfants': '0',
                    'returnAirportCode': to,
                    'startDate': re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3-\2-\1', dt),
                    'submitted-form': 'bkSearch',
                    'un_jtt_application_platform': 'android',
                    'version': 'ANDROID-v4.6.4'
                }
                url = self.start_urls[0] + '?'
                for key in querystring:
                    url = url + key + '=' + str(querystring.get(key)) + '&'

                invalid = {
                    'date': dt.replace('-', ''),
                    'depAirport': dep,
                    'arrAirport': to,
                    'mins': self.custom_settings.get('INVALID_TIME'),
                }
                meta_data = dict(
                    invalid=invalid,
                    params=querystring,
                    aaa=(dep, to, dt),
                    flight_time=dt,
                    url=url,
                )
                yield scrapy.Request(url,
                                     callback=self.parse,
                                     method='GET',
                                     headers=self.custom_settings.get('HEADERS'),
                                     meta={'meta_data': meta_data},
                                     errback=self.errback
                                     )
Esempio n. 24
0
 def start_requests(self):
     permins = 0
     print(
         pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                           self.version))
     result_iter = None
     result = None
     while True:
         if hasattr(self, 'local'):
             if not result_iter or not result:
                 result_iter = self.get_task()
             result = next(result_iter)
         else:
             result = pubUtil.getUrl('9C', 5)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             (dt, dep, to) = pubUtil.analysisData(data)
             # for i in range(10):
             #     dt, dep, to = '2019-02-25', 'SJW', 'TPE'
             #     print(dt, dep, to)
             seat = self.custom_settings.get('SEAT')
             payload = {
                 'Arrival': to,
                 'IsIJFlight': 'false',
                 'CabinActId': 'null',
                 'SeatsNum': seat,
                 'Currency': '0',
                 'IsLittleGroupFlight': 'false',
                 'ReturnDate': 'null',
                 'Departure': dep,
                 'IsUM': 'false',
                 'IsBg': 'false',
                 'IsJC': 'false',
                 'Active9s': '',
                 'IsShowTaxprice': 'false',
                 'DepartureDate': dt,
                 'isdisplayold': 'false',
                 'ActId': '0',
                 'IfRet': 'false',
                 'IsEmployee': 'false',
                 'SType': '0'
             }
             form = ''
             for b in payload:
                 form = form + b + '=' + str(payload.get(b)) + '&'
             invalid = {
                 'date': dt.replace('-', ''),
                 'depAirport': dep,
                 'arrAirport': to,
                 'mins': self.custom_settings.get('INVALID_TIME')
             }
             meta_data = dict(
                 invalid=invalid,
                 form=form,
             )
             yield scrapy.Request(
                 self.start_urls[0],
                 callback=self.parse,
                 headers=self.custom_settings.get('HEADERS'),
                 method='POST',
                 meta={'meta_data': meta_data},
                 body=form,
                 errback=self.errback)
Esempio n. 25
0
    def start_requests(self):
        permins = 0
        print(pubUtil.heartbeat(self.host_name,self.name,self.num,permins,self.version))
        while True:
            result = pubUtil.getUrl('AD', 5)
            if not result:
                logging.info('get task error')
                time.sleep(10)
                continue
            for data in result:
                (dt, dep, to) = pubUtil.analysisData(data)
            # for i in range(1):
            # for data in self.get_task():
            #     dep, to, dt = data
            #     dt,dep,to= '2018-11-07','JJD','CNF'
                print(dep, to, dt)
                seat = self.custom_settings.get('SEAT')
                payload = {
                    'ControlGroupSearch$SearchMainSearchView$TextBoxMarketOrigin1': dep,
                    'ControlGroupSearch$SearchMainSearchView$TextBoxMarketDestination1': to,
                    'ControlGroupSearch$SearchMainSearchView$DropDownListMarketMonth1': re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\1-\2', dt),
                    'ControlGroupSearch$SearchMainSearchView$DropDownListMarketDay1': re.sub(r'(\d{4})-(\d{2})-(\d{2})',r'\3', dt),
                    'ControlGroupSearch$SearchMainSearchView$DropDownListPassengerType_ADT': '5',
                    'ControlGroupSearch$SearchMainSearchView$TextBoxPromoCode': 'CALLCENT',
                    'culture': 'en-US',
                    'ControlGroupSearch$SearchMainSearchView$DropDownListPassengerType_CHD': '0',
                    # 'departure1': '09/29/2018',
                    'ControlGroupSearch$SearchMainSearchView$CheckBoxUseMacDestination1': '',
                    'ControlGroupSearch$SearchMainSearchView$DropDownListPassengerType_INFANT': '0',
                    # 'originIata1': 'CWB',
                    # 'origin1': 'Curitiba (CWB)',
                    'ControlGroupSearch$SearchMainSearchView$CheckBoxUseMacOrigin1': '',
                    'ControlGroupSearch$SearchMainSearchView$RadioButtonMarketStructure': 'OneWay',
                    # 'destinationIata1': 'VCP',
                    # '_authkey_': '106352422A4DEB0810953636A6FBE2079955529786098DE8B0D32416202E380E34C245FA99C431C7C7A75560FDE65150',
                    'ControlGroupSearch$SearchMainSearchView$DropDownListFareTypes': 'R',
                    '__EVENTTARGET': 'ControlGroupSearch$LinkButtonSubmit',
                    # 'destination1': 'Campinas (VCP)',
                    # 'hdfSearchCodeDeparture1': '1N',
                    # 'hdfSearchCodeArrival1': '1N',
                    # "ControlGroupSearch$SearchMainSearchView$DropDownListSearchBy": 'columnView',
                }
                # payload = {
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListPassengerType_ADT': seat,
                #     'hdfSearchCodeArrival1': '',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$TextBoxMarketOrigin1': dep,
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketMonth1': re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\1-\2', dt),
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$TextBoxMarketDestination1': to,
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketDay1':re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3', dt),
                #     'faretypes': 'R',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketDay2': '',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListSearchBy': 'columnView',
                #     '__EVENTTARGET': 'SearchControlGroupAvailabilityView$LinkButtonSubmit',
                #     'NavigationHeaderInputAvailabilityView$MemberLoginAvailabilityView$PasswordFieldPassword': '',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListFareTypes': 'R',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListMarketMonth2': '',
                #     'pageToken': '',
                #     # 'culture':'en-US',
                #     'loginDomain': 'AZUL_LOGIN',
                #     '_authkey_': '',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacOrigin1': '',
                #     'arrival': '',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListPassengerType_CHD': '0',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacDestination1': '',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$DropDownListPassengerType_INFANT': '0',
                #     'hdfSearchCodeArrival2': '',
                #     'NavigationHeaderInputAvailabilityView$MemberLoginAvailabilityView$TextBoxUserID': '',
                #     '__VIEWSTATE': '/wEPDwUBMGRkZ/qdcJAW2QnebbciaZoBYUGCuQI=', 'password-ta': '',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$RadioButtonMarketStructure': 'OneWay',
                #     'hdfSearchCodeDeparture2': '',
                #     '__EVENTARGUMENT': '',
                #     'departure2': '',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacOrigin2': '',
                #     'AvailabilityInputAvailabilityView$DropdownListOrderFlights': '0',
                #     'SearchControlGroupAvailabilityView$SearchControlAvailabilityView$CheckBoxUseMacDestination2': '',
                #     'hdfSearchCodeDeparture1': '',
                #     'login-ta': '',
                # }

                invalid = {
                    'date': dt.replace('-', ''),
                    'depAirport': dep,
                    'arrAirport': to,
                    'mins': self.custom_settings.get('INVALID_TIME')
                }
                body =''
                for key in payload:
                    body = body + key + '=' + str(payload.get(key)) + '&'
                meta_data = dict(
                    invalid=invalid,
                    payload=body,
                    aaa = (dep, to, dt),
                    flight_time = dt
                )
                yield scrapy.Request(self.start_urls[0],
                                     callback=self.parse,
                                     method='POST',
                                     headers=self.custom_settings.get('HEADERS'),
                                     meta={'meta_data': meta_data},
                                     body=body,
                                     errback=self.errback
                                     )
Esempio n. 26
0
    def start_requests(self):
        permins = 0
        logging.info(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        result_iter = None
        while True:
            if hasattr(self, 'local'):
                if not result_iter:
                    result_iter = pubUtil.get_task(self.name, days=30)
                result = next(result_iter)
            else:
                result = pubUtil.getUrl(self.name, 1)
            if not result:
                logging.info('get task error')
                time.sleep(5)
                continue
            for data in result:

                # logging.info("## input data: " + data)
                # 处理任务 [u'TLN-CFE:20181110:1']
                count = int(data.split(':')[-1])
                (date, dep, arr) = pubUtil.analysisData(data[:-2])
                _date = datetime.strptime(date, '%Y-%m-%d')
                for i in range(count):
                    temp_date = _date + timedelta(days=i)
                    date = temp_date.strftime('%Y-%m-%d')

                    # logging.info('# input data: ' + dep + '-' + arr + '-' + date)
                    # dep, arr, date = 'MNL', 'SIN', '2019-01-04'
                    post_data = {
                        "originIata": dep,
                        "destinationIata": arr,
                        "departureDate": date + "T00:00:00+08:00",
                        "passengerComposition": {
                            "adult": self.ADT,
                            "children": 0,
                            "infant": 0
                        }
                    }
                    body = json.dumps(post_data)

                    # 设置无效
                    invalid = {
                        'date': date[:10].replace('-', ''),
                        'depAirport': dep,
                        'arrAirport': arr,
                        'mins': self.custom_settings.get('INVALID_TIME')
                    }
                    task_data = {
                        'dep': dep,
                        'arr': arr,
                        'date': date,
                        'body': body
                    }
                    yield scrapy.Request(url=self.start_urls[0],
                                         body=body,
                                         callback=self.parse,
                                         dont_filter=True,
                                         meta={
                                             'invalid': invalid,
                                             'task_data': task_data
                                         },
                                         errback=self.errback,
                                         method='POST')
Esempio n. 27
0
    def start_requests(self):
        permins = 0
        logging.info(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version))
        result_iter = None
        # IP使用时长计时器
        # start_time = time.time()
        while True:
            if hasattr(self, 'local'):
                if not result_iter:
                    result_iter = pubUtil.get_task(self.name, days=10)
                result = next(result_iter)
            else:
                result = pubUtil.getUrl(self.name, 1)
            if not result:
                logging.info('get task error')
                time.sleep(20)
                continue
            for data in result:
                # logging.info("## input data: " + data)
                # 处理任务 [u'TLN-CFE:20181110:1']
                count = int(data.split(':')[-1])
                (date, dep, arr) = pubUtil.analysisData(data[:-2])
                _date = datetime.strptime(date, '%Y-%m-%d')

                for i in range(count):
                    temp_date = _date + timedelta(days=i)
                    date = temp_date.strftime('%m/%d/%Y')
                    invalid_date = temp_date.strftime('%Y%m%d')

                    # logging.info('# input data: ' + dep + '-' + arr + '-' + date)
                    # dep, arr, date = 'FLL', 'LAS', '2019-01-13'

                    # IP超过使用时长,强制更换
                    # logging.info('ip used time: ' + str(time.time() - start_time))
                    # if time.time() - start_time > self.use_time:
                    #     self.proxy_flag = True
                    #     logging.info('### ip invalid:' + self.proxy)
                    if self.proxy_flag:
                        while True:
                            # 俄罗斯代理
                            # self.proxy = pubUtil.nk_get_ip()
                            # 小池子代理
                            self.proxy = pubUtil.get_proxy(self.name)
                            if self.proxy is None:
                                logging.info('# no get proxy, continue')
                                # time.sleep(60)
                                continue
                            logging.info('# get a new ip: ' + self.proxy)
                            ip_proxies = {"https": "https://" + self.proxy}
                            # 获取session
                            try:
                                response = requests.get(self.get_session_url, proxies=ip_proxies, timeout=15)
                                self.cookies_str = json.dumps(requests.utils.dict_from_cookiejar(response.cookies))[
                                                   1:-1].replace(
                                    '\"',
                                    '').replace(
                                    ':', '=').replace(' ', '').replace(',', '; ')

                            except Exception as e:
                                logging.info(e)
                                self.proxy_flag = True
                                logging.info('# get session error')
                                continue
                            # IP正常使用,开始计时
                            # start_time = time.time()
                            self.proxy_flag = False

                            break
                    headers = {
                        'Content-Type': 'application/x-www-form-urlencoded',
                        'Cookie': self.cookies_str
                    }

                    post_data = {
                        'from': dep,
                        'to': arr,
                        # 'from': 'AXM',
                        # 'to': 'ATL',
                        'departDate': date,
                        'departDateDisplay': date,
                        'ADT': self.ADT
                    }

                    post_data.update(self.custom_settings.get('POST_DATA'))
                    post_data = urllib.urlencode(post_data)

                    # 设置无效
                    invalid = {
                        'date': invalid_date,
                        'depAirport': dep,
                        'arrAirport': arr,
                        'mins': self.custom_settings.get('INVALID_TIME')
                    }
                    yield scrapy.Request(url=self.start_urls[0],
                                         body=post_data,
                                         headers=headers,
                                         callback=self.parse,
                                         dont_filter=True,
                                         meta={'invalid': invalid, 'proxy': self.proxy},
                                         errback=self.errback,
                                         method='POST')
Esempio n. 28
0
    def start_requests(self):
        permins = 0
        print(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        while True:
            #增加睡眠时间
            if pubUtil.get_mm_adult():
                result = pubUtil.getUrl('MM', 5)
                if not result:
                    logging.info('get task error')
                    time.sleep(10)
                    continue
                for data in result:
                    (dt, dep, arr) = pubUtil.analysisData(data)
                    # dep,arr,dt = 'KIX','HKG','2018-10-12'
                    # print(dep,arr,dt)
                    from_data = {
                        'F_departure_cd':
                        dep,  # 出发
                        'F_destination_cd':
                        arr,  # 到达
                        'F_go_ym':
                        int(re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\1\2', dt)),
                        'F_go_d':
                        int(re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3', dt)),
                        'F_trip_type':
                        1,
                        'F_adult_count':
                        5,  # 座位
                        'F_child_count':
                        0,
                        'F_infant_count':
                        0,
                        'F_p_token':
                        '',
                        'PCMD':
                        'searchflightresult',
                        'BPCMD':
                        'searchflight',
                        'Campaign':
                        '',
                        'next':
                        'next',
                    }
                    # print(from_data)
                    form = ''
                    for b in from_data:
                        form = form + b + '=' + str(from_data.get(b)) + '&'
                    invalid = {
                        'date': dt.replace('-', ''),
                        'depAirport': dep,
                        'arrAirport': arr,
                        'mins': self.custom_settings.get('INVALID_TIME'),
                    }
                    meta_data = dict(invalid=invalid,
                                     maxSeats=from_data.get('F_adult_count'),
                                     year=dt[:4],
                                     form=form)

                    yield scrapy.Request(
                        self.start_urls[0],
                        method='POST',
                        headers=self.custom_settings.get('headers'),
                        body=form,
                        callback=self.parse,
                        meta={'meta_data': meta_data},
                        errback=self.errback)
            else:
                print('Non-issue time,sleep:60s')
                time.sleep(60)