Ejemplo n.º 1
0
    def parse_park(self, response):
        self.save_park_json(response)
        # get park string
        park = codecs.decode(response.body, 'utf8')
        # convert json string to JSon
        park_dict = json.loads(park)

        park_info = park_dict['d']['ListJsonPlaceInfos'][0]

        # parse park information
        park_item = ParkItem()
        park_item['name'] = park_info["Name"]
        park_item['parkId'] = park_info["PlaceId"]
        park_item['contractCode'] = "ca"
        park_item['_id'] = '%s::%s' % (park_item['parkId'], park_item['contractCode'])
        park_item['url'] = park_info["PlaceinfoUrl"]
        park_item['services'] = park_info["AllHightlights"].split(",")

        yield park_item

        facility_infos = park_info['JsonFacilityInfos']

        # get each campsite group
        while len(facility_infos):
            facility = facility_infos.pop()

            if not self.get_env("CA_NOT_CRAWL_CAMPSITES"):
                body = set_night_by_place_id_and_facility_id_on_unit_grid.copy()
                body['placeId'] = facility['PlaceId']
                body['facilityId'] = facility['FacilityId']
                self.cookie_index = self.cookie_index + 1
                # step 4: click reserve button, first set night by place id and facility id
                yield Request(url=unique_url(self.url_set_by_place_id_facility_id),
                              method="POST",
                              body=json.dumps(body),
                              meta={'cookiejar': self.cookie_index,
                                    'FacilityId': facility['FacilityId'],
                                    'PlaceId': facility['PlaceId']},
                              dont_filter=True,
                              headers={'Content-Type': 'application/json; charset=UTF-8'},
                              callback=self.after_set_park_facility
                              )
            if not self.get_env("CA_NOT_CRAWL_RESERVATIONS"):
                campsite_list_body = campsites_reservations_post_body.copy()
                campsite_list_body['FacilityId'] = facility['FacilityId']
                campsite_list_body['PlaceId'] = facility['PlaceId']
                # step 7: get campsites reservations in each campsite group
                # yield Request(url=unique_url(self.url_campsites_reservations),
                yield Request(url=unique_url(self.url_campsites_reservations),
                              method="POST",
                              meta={'cookiejar': response.meta['cookiejar'],
                                    'FacilityId': facility['FacilityId'],
                                    'PlaceId': facility['PlaceId']},
                              body=json.dumps(campsite_list_body),
                              dont_filter=True,
                              headers={'Content-Type': 'application/json'},
                              callback=self.parse_campsites_reservations)
Ejemplo n.º 2
0
 def index_page(self,response):
     body = post_body_park_info_by_name.copy()
     body['name'] = response.meta['parkName']
     # step 2: use park name get park information
     yield Request(url=unique_url(self.url_get_park_info_by_name),
                   meta={'cookiejar': response.meta['cookiejar']},
                   method="POST",
                   body=json.dumps(body),
                   headers={'Content-Type': 'application/json; charset=UTF-8'},
                   dont_filter=True,
                   callback=self.set_select_park)
Ejemplo n.º 3
0
 def after_set_park_facility(self, response):
     # step 6: get campsites by click facility
     date_str = self.first_date.strftime('%m/%d/%Y')
     form_data = advance_search_form.copy()
     form_data['ctl01$mainContent$hdnFacilityid'] = str(response.meta['FacilityId'])
     form_data['ctl01$mainContent$hdnPlaceid'] = str(response.meta['PlaceId'])
     form_data['ctl01$mainContent$txtDateRange'] = date_str
     yield FormRequest(url=unique_url(self.url_advance_search),
                       meta={'cookiejar': response.meta['cookiejar'],
                             'FacilityId': response.meta['FacilityId'],
                             'PlaceId': response.meta['PlaceId']},
                       formdata=form_data,
                       callback=self.parse_campsite_list)
Ejemplo n.º 4
0
 def home_page(self, response):
     park = response.meta['park']
     body = park_post_body.copy()
     body['googlePlaceSearchParameters']['Latitude'] = str(park['Latitude'])
     body['googlePlaceSearchParameters']['Longitude'] = str(park['Longitude'])
     body['googlePlaceSearchParameters']['MapboxPlaceid'] = str(park['CityParkId'])
     # step 5: click reserve button, get google map place data
     yield Request(url=unique_url(self.url_get_google_map_place_data),
                   method="POST",
                   meta={'cookiejar': response.meta['cookiejar']},
                   body=json.dumps(body),
                   headers={'Content-Type': 'application/json; charset=UTF-8'},
                   dont_filter=True,
                   callback=self.parse_park)
Ejemplo n.º 5
0
 def set_select_park(self, response):
     body = codecs.decode(response.body, 'utf8')
     parks = json.loads(body)
     park = parks['d'][0]
     body = web_home.copy()
     date_str = self.first_date.strftime('%m/%d/%Y')
     body['ctl00$ctl00$mainContent$txtArrivalDate'] = date_str
     body['ctl00$ctl00$mainContent$hdnMasterPlaceId'] = str(park['CityParkId'])
     # step 3: set select park
     yield FormRequest(url=unique_url(self.url_webhome),
                       meta={'cookiejar': response.meta['cookiejar'],
                         'park':park},
                       method="POST",
                       formdata=body,
                       dont_filter=True,
                       callback=self.home_page)
Ejemplo n.º 6
0
    def start_requests(self):
        if self.get_env("DEBUG"):
            receives_dir = './receives'
            if os.path.exists(receives_dir):
                shutil.rmtree(receives_dir)
            os.makedirs(receives_dir)

        crawl_parks = self.get_crawl_parks()

        logging.debug("=======================================")
        logging.debug("crawl_parks: %s", json.dumps(crawl_parks))
        logging.debug("=======================================")

        while len(crawl_parks):
            park = crawl_parks.pop()
            # step 1: Go to reserve california home page
            yield Request(url=unique_url(self.url_default),
                          meta={
                              'cookiejar': self.cookie_index,
                              'parkName': park['name']
                          },
                          dont_filter=True,
                          callback=self.index_page)
            self.cookie_index = self.cookie_index + 1
Ejemplo n.º 7
0
    def parse_campsite_list(self, response):
        self.save_campsite_list_html(response)
        sites = response.xpath('//div[@id="divUnitGridlist"]/div/table/tr[@class="unitdata"]/td[2]/@onclick').extract()
        for link in sites:
            reservation_item = self.parse_campsite_from_url_link(link,
                                                                 response.meta['PlaceId'],
                                                                 response.meta['FacilityId'])
            is_available = False
            if reservation_item['status'] == 'a':
                is_available = True

            url = self.url_template_campsite % (reservation_item['facilityId'],
                                                reservation_item['siteId'],
                                                reservation_item['date'],
                                                is_available)
            # step 7: get each campsite information
            yield Request(
                url=unique_url(url),
                meta={'cookiejar': response.meta['cookiejar'],
                      'FacilityId': response.meta['FacilityId'],
                      'PlaceId': response.meta['PlaceId'],
                      'SiteId': reservation_item['siteId']},
                dont_filter=True,
                callback=self.parse_campsite)