Exemple #1
0
 def start_requests(self):
     permins = 0
     print(pubUtil.heartbeat(self.host_name, self.name, self.num, permins, self.version))
     result_iter = None
     while True:
         if hasattr(self, 'local'):
             if not result_iter:
                 result_iter = pubUtil.get_task(self.name)
             result = next(result_iter)
         else:
             result = pubUtil.getUrl(self.carrier, 1)
         if not result:
             time.sleep(3)
             continue
         for data in result:
             (dt_st, dep, arr, days) = vyUtil.analysisData(data) 
             for i in range(int(days)):
                 dt = vyUtil.get_real_date(dt_st, i)
                 params = {
                     'origination-airport': dep,
                     'destination-airport': arr,
                     'departure-date': dt,
                     'number-adult-passengers': self.custom_settings.get('SEAT_SEARCH'),
                     'number-senior-passengers': 0,
                     'currency': 'USD',
                 }
                 total_url = self.start_urls + parse.urlencode(params)
                 yield scrapy.Request(
                     url=total_url,
                     method="GET",
                     dont_filter=True,
                     callback=self.parse,
                     errback=self.errback,
                 )
Exemple #2
0
    def start_requests(self):
        permins = 0
        print(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        while True:
            result = pubUtil.getUrl(self.name, 1)
            if not result:
                self.log('get task error'), 20
                time.sleep(10)
                continue
            for data in result:
                (dt, dep, to, days) = vyUtil.analysisData(data)  # 把获取到的data格式化
                # dep, to, dt, days= 'RHO', 'PMO', '2018-08-15', 30
                dt_datetime = datetime.strptime(dt, '%Y%m%d')
                end_date = dt_datetime + timedelta(days=int(days))
                dt = dt_datetime.strftime('%Y-%m-%d')

                if dt_datetime.month != end_date.month:
                    next_fday = datetime(end_date.year, end_date.month, 1)
                    days_before = (next_fday - dt_datetime).days
                    next_fday_str = next_fday.strftime('%Y-%m-%d')
                    yield self.first_request(dep, to, dt, days_before + 1)
                    yield self.first_request(dep, to, next_fday_str,
                                             int(days) - days_before)
                else:
                    yield self.first_request(dep, to, dt, days)
Exemple #3
0
 def start_requests(self):
     permins = 0
     print(
         pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                           self.version))
     result_iter = None
     while True:
         if hasattr(self, 'local'):
             if not result_iter:
                 result_iter = pubUtil.get_task(self.name)
             result = next(result_iter)
         else:
             result = pubUtil.getUrl(self.carrier, 1)
         if not result:
             time.sleep(3)
             continue
         for data in result:
             (dt_st, dep, arr, days) = vyUtil.analysisData(data)
             for i in range(int(days)):
                 dt = vyUtil.get_real_date(dt_st, i)
                 pay_load = dict(
                     depCity1=dep,
                     arrCity1=arr,
                     depDate1=dt,
                 )
                 pay_load.update(self.custom_settings.get('PAY_LOAD'))
                 yield scrapy.FormRequest(
                     self.start_url,
                     formdata=pay_load,
                     meta={'payload': pay_load},
                     callback=self.parse,
                     dont_filter=True,
                     errback=self.err_back,
                 )
Exemple #4
0
 def start_requests(self):
     permins = 0
     print(
         pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                           self.version))
     result_iter = None
     while True:
         if hasattr(self, 'local'):
             if not result_iter:
                 result_iter = pubUtil.get_task(self.name)
             result = next(result_iter)
         else:
             result = pubUtil.getUrl(self.carrier, 1)
         if not result:
             time.sleep(6)
             continue
         for data in result:
             (_dt, dep, to, days) = vyUtil.analysisData(data)
             for i in range(int(days)):
                 dt = (datetime.strptime(_dt, '%Y%m%d') +
                       timedelta(days=i)).strftime('%Y-%m-%d')
                 # dt, dep, to = '2019-02-28', 'BLR', 'BKK'
                 post_data = self.custom_settings.get(
                     'POST_DATA_FORMAT').copy()
                 post_data['query'] = post_data.get('query') % (self.seats,
                                                                to, dep, dt)
                 yield scrapy.Request(
                     url=self.start_urls,
                     method="POST",
                     body=json.dumps(post_data),
                     meta={'post_data': post_data},
                     dont_filter=True,
                 )
Exemple #5
0
 def start_requests(self):
     permins = 0
     print(
         pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                           self.version))
     result_iter = None
     while True:
         if hasattr(self, 'local'):
             if not result_iter:
                 result_iter = pubUtil.get_task(self.name, 60)
             result = next(result_iter)
         else:
             result = pubUtil.getUrl(self.carrier, 1)
         if not result:
             time.sleep(3)
             continue
         for data in result:
             (dt_st, dep, arr, days) = vyUtil.analysisData(data)
             for i in range(int(days)):
                 dt = vyUtil.get_real_date(dt_st, i)
                 data = dict(beginCity=dep, endCity=arr, beginDate=dt)
                 data.update(self.custom_settings.get('DEFAULT_DATA'))
                 yield scrapy.Request(
                     url=self.start_urls,
                     method="POST",
                     body=json.dumps(data),
                     meta=dict(data=data),
                     dont_filter=True,
                     callback=self.parse,
                     errback=self.errback,
                 )
Exemple #6
0
    def start_requests(self):
        permins = 0
        print(
            pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                              self.version))
        result_iter = None
        while True:
            if hasattr(self, 'local'):
                if not result_iter:
                    result_iter = pubUtil.get_task(self.name)
                result = next(result_iter)
            else:
                result = pubUtil.getUrl(self.carrier, 1)
            if not result:
                time.sleep(6)
                continue
            hour = datetime.now().hour + 2
            self.cookie = self.cookies[hour % len(self.cookies)]
            installid = self.InstallationID[hour % len(self.InstallationID)]

            for data in result:
                (dt_st, dep, to,
                 days) = vyUtil.analysisData(data)  # 把获取到的data格式化
                # dep, to = 'CDG', 'VIE'
                for i in range(int(days)):
                    dt = vyUtil.get_real_date(dt_st, i)
                    # dt = '2018-11-01'
                    self.task.append({
                        'date': dt.replace('-', ''),
                        'depAirport': dep,
                        'arrAirport': to,
                        'mins': settings.INVALID_TIME
                    })
                    dt = dt + 'T00:00:00'
                    data_list = {
                        'InstallationID':
                        installid,
                        'AirportDateTimeList': [{
                            'MarketDateDeparture': dt,
                            'DepartureStation': dep,
                            'ArrivalStation': to,
                        }]
                    }

                    data_list.update(self.custom_settings.get('DEFAULT_DATA'))
                    yield scrapy.Request(
                        method='POST',
                        url=self.start_url,
                        headers={'Cookie': self.cookie},
                        body=json.dumps(data_list),
                        meta={'data_list': data_list},
                        callback=self.parse,
                        dont_filter=True,
                        errback=lambda x: self.download_errback(x, data_list))
Exemple #7
0
    def start_requests(self):
        permins = 0
        print(
            pubUtil.heartbeat(self.host_name, self.carrier, self.num, permins,
                              self.version))
        while True:
            result = pubUtil.getUrl(self.carrier, 5)
            if not result:
                logging.info('get task error')
                time.sleep(3)
                continue
            for data in result:
                (_dt, dep, to, days) = vyUtil.analysisData(data)
                for i in range(int(days)):

                    dt = (datetime.strptime(_dt, '%Y%m%d') +
                          timedelta(days=i)).strftime('%Y/%m/%d')
                    self.task.append({
                        'date': dt.replace('/', ''),
                        'depAirport': dep,
                        'arrAirport': to,
                        'mins': settings.INVALID_TIME
                    })
                    post_data = {
                        "flight_search_parameter[0][departure_date]": dt,
                        "flight_search_parameter[0][departure_airport_code]":
                        dep,
                        "flight_search_parameter[0][arrival_airport_code]": to,
                    }
                    post_data.update(
                        self.custom_settings.get('DEFAULT_POST_DATA'))
                    # print(post_data)
                    yield scrapy.Request(
                        url=self.start_urls[1],
                        method="GET",
                        # body=json.dumps(post_data),
                        # formdata=post_data,
                        meta={'post_data': post_data},
                        dont_filter=True,
                        callback=self.parse,
                        errback=self.errback,
                    )
Exemple #8
0
 def start_requests(self):
     permins = 0
     print(
         pubUtil.heartbeat(self.host_name, self.name, self.num, permins,
                           self.version))
     self.get_headers()
     while True:
         result = pubUtil.getUrl(self.name, 1)
         if not result:
             logging.info('get task error')
             time.sleep(10)
             continue
         for data in result:
             (dt, dep, to, days) = vyUtil.analysisData(data)  # 把获取到的data格式化
             # (dt, dep, to, days) = ('20181026', 'LTN', 'IAS', 30)
             dt_datetime = datetime.strptime(dt, '%Y%m%d')
             end_date = dt_datetime + timedelta(days=int(days))
             dt = dt_datetime.strftime('%Y-%m-%d')
             data_post = dict(
                 DepartureDate=dt,
                 DepartureStation=dep,
                 ArrivalStation=to,
             )
             data_post.update(self.custom_settings.get('GET_DATE_DATA'))
             yield scrapy.Request(
                 method='POST',
                 url=self.start_urls[0],
                 #  formdata=data_post,
                 body=json.dumps(data_post),
                 headers=self.custom_settings.get(
                     'DEFAULT_REQUEST_HEADERS'),
                 meta={'end_date': end_date},
                 dont_filter=True,
                 callback=self.date_parse,
                 errback=lambda x: self.download_errback(
                     x, data_post, end_date),
             )