def parse(self, response): def strip_dollar(x): return x.strip('$') self.driver.get(response.url) try: WebDriverWait(self.driver, 15).until( EC.presence_of_element_located( (By.XPATH, '//*[@id="depart-container"]/div[2]/div[1]/div/[@style="width: 0%;"]'))) except TimeoutException: print 'Page load time out' pass while True: try: try: WebDriverWait(self.driver, 15).until( EC.presence_of_element_located( (By.XPATH, '//*[@id="depart-container"]/div/div/div/button'))) except TimeoutException: break next = self.driver.find_element_by_xpath( '//*[@id="depart-container"]/div/div/div/button') next.click() except ElementNotVisibleException: break for trips in Selector( text=self.driver.page_source).xpath(self.trips_list_xpath): loader = ItemLoader(BusTrip(), selector=trips) loader.default_input_processor = MapCompose(unicode.strip) loader.default_output_processor = Join() loader.price_in = MapCompose(strip_dollar) for field, xpath in self.item_fields.iteritems(): loader.add_xpath(field, xpath) dateoftrip = str(response.url).split("/")[-1] loader.add_value('dateoftrip', dateoftrip.decode('unicode-escape')) yield loader.load_item()
def parse(self, response): def strip_dollar(x): return x.strip('$') self.driver.get(response.url) try: WebDriverWait(self.driver, 15).until( EC.presence_of_element_located(( By.XPATH, '//*[@id="depart-container"]/div[2]/div[1]/div/[@style="width: 0%;"]' ))) except TimeoutException: print 'Page load time out' pass while True: try: try: WebDriverWait(self.driver, 15).until( EC.presence_of_element_located(( By.XPATH, '//*[@id="depart-container"]/div/div/div/button'))) except TimeoutException: break next = self.driver.find_element_by_xpath( '//*[@id="depart-container"]/div/div/div/button') next.click() except ElementNotVisibleException: break for trips in Selector(text=self.driver.page_source).xpath( self.trips_list_xpath): loader = ItemLoader(BusTrip(), selector=trips) loader.default_input_processor = MapCompose(unicode.strip) loader.default_output_processor = Join() loader.price_in = MapCompose(strip_dollar) for field, xpath in self.item_fields.iteritems(): loader.add_xpath(field, xpath) dateoftrip = str(response.url).split("/")[-1] loader.add_value('dateoftrip', dateoftrip.decode('unicode-escape')) yield loader.load_item()
def parse(self, response): def clean_price(x): return x.strip('$ \t\n\r') def clean_city(x): return x.strip(': \t\n\r') def clean_features(x): return x.replace('\t', '').replace('\n', '') i = 1 for trips in Selector(response).xpath(self.trips_list_xpath): loader = ItemLoader(BusTrip(), selector=trips) loader.default_input_processor = MapCompose(unicode.strip) loader.default_output_processor = Join() loader.price_in = MapCompose(clean_price) loader.originCity_in = MapCompose(clean_city) loader.destinationCity_in = MapCompose(clean_city) loader.features_in = MapCompose(clean_features) for field, xpath in self.item_fields.iteritems(): loader.add_xpath(field, xpath) loader.add_xpath( 'originCity', '//*[@id="displayb' + str(i) + '_0"]/td/div/ul[1]/ul/li[1]/strong/text()') loader.add_xpath( 'originLocation', '//*[@id="displayb' + str(i) + '_0"]/td/div/ul[1]/ul/li/div[2]/a/div/text()') loader.add_xpath( 'destinationCity', '//*[@id="displayb' + str(i) + '_0"]/td/div/ul[2]/ul/li[1]/strong/text()') loader.add_xpath( 'destinationLocation', '//*[@id="displayb' + str(i) + '_0"]/td/div/ul[2]/ul/li/div[2]/a/div/text()') i = i + 1 dateoftrip = str(response.url).split("=")[-1] loader.add_value('dateoftrip', dateoftrip.decode('unicode-escape')) yield loader.load_item()