예제 #1
0
    def parse(self, response):
        def strip_dollar(x):
            return x.strip('$')




        self.driver.get(response.url)
        try:
            WebDriverWait(self.driver, 15).until(
                EC.presence_of_element_located(
                    (By.XPATH,
                        '//*[@id="depart-container"]/div[2]/div[1]/div/[@style="width: 0%;"]')))
        except TimeoutException:
            print 'Page load time out'
            pass

        while True:
            try:
                try:
                    WebDriverWait(self.driver, 15).until(
                        EC.presence_of_element_located(
                            (By.XPATH,
                                '//*[@id="depart-container"]/div/div/div/button')))
                except TimeoutException:
                    break

                next = self.driver.find_element_by_xpath(
                    '//*[@id="depart-container"]/div/div/div/button')
                next.click()

            except ElementNotVisibleException:
                break
        for trips in Selector(
                text=self.driver.page_source).xpath(self.trips_list_xpath):
            loader = ItemLoader(BusTrip(), selector=trips)

            loader.default_input_processor = MapCompose(unicode.strip)
            loader.default_output_processor = Join()
            loader.price_in = MapCompose(strip_dollar)


            for field, xpath in self.item_fields.iteritems():
                loader.add_xpath(field, xpath)
            dateoftrip = str(response.url).split("/")[-1]
            loader.add_value('dateoftrip', dateoftrip.decode('unicode-escape'))
            yield loader.load_item()
예제 #2
0
    def parse(self, response):
        def strip_dollar(x):
            return x.strip('$')

        self.driver.get(response.url)
        try:
            WebDriverWait(self.driver, 15).until(
                EC.presence_of_element_located((
                    By.XPATH,
                    '//*[@id="depart-container"]/div[2]/div[1]/div/[@style="width: 0%;"]'
                )))
        except TimeoutException:
            print 'Page load time out'
            pass

        while True:
            try:
                try:
                    WebDriverWait(self.driver, 15).until(
                        EC.presence_of_element_located((
                            By.XPATH,
                            '//*[@id="depart-container"]/div/div/div/button')))
                except TimeoutException:
                    break

                next = self.driver.find_element_by_xpath(
                    '//*[@id="depart-container"]/div/div/div/button')
                next.click()

            except ElementNotVisibleException:
                break
        for trips in Selector(text=self.driver.page_source).xpath(
                self.trips_list_xpath):
            loader = ItemLoader(BusTrip(), selector=trips)

            loader.default_input_processor = MapCompose(unicode.strip)
            loader.default_output_processor = Join()
            loader.price_in = MapCompose(strip_dollar)

            for field, xpath in self.item_fields.iteritems():
                loader.add_xpath(field, xpath)
            dateoftrip = str(response.url).split("/")[-1]
            loader.add_value('dateoftrip', dateoftrip.decode('unicode-escape'))
            yield loader.load_item()
예제 #3
0
    def parse(self, response):
        def clean_price(x):
            return x.strip('$ \t\n\r')

        def clean_city(x):
            return x.strip(': \t\n\r')

        def clean_features(x):
            return x.replace('\t', '').replace('\n', '')

        i = 1
        for trips in Selector(response).xpath(self.trips_list_xpath):
            loader = ItemLoader(BusTrip(), selector=trips)

            loader.default_input_processor = MapCompose(unicode.strip)
            loader.default_output_processor = Join()
            loader.price_in = MapCompose(clean_price)
            loader.originCity_in = MapCompose(clean_city)
            loader.destinationCity_in = MapCompose(clean_city)
            loader.features_in = MapCompose(clean_features)

            for field, xpath in self.item_fields.iteritems():
                loader.add_xpath(field, xpath)

            loader.add_xpath(
                'originCity', '//*[@id="displayb' + str(i) +
                '_0"]/td/div/ul[1]/ul/li[1]/strong/text()')
            loader.add_xpath(
                'originLocation', '//*[@id="displayb' + str(i) +
                '_0"]/td/div/ul[1]/ul/li/div[2]/a/div/text()')
            loader.add_xpath(
                'destinationCity', '//*[@id="displayb' + str(i) +
                '_0"]/td/div/ul[2]/ul/li[1]/strong/text()')
            loader.add_xpath(
                'destinationLocation', '//*[@id="displayb' + str(i) +
                '_0"]/td/div/ul[2]/ul/li/div[2]/a/div/text()')

            i = i + 1

            dateoftrip = str(response.url).split("=")[-1]
            loader.add_value('dateoftrip', dateoftrip.decode('unicode-escape'))
            yield loader.load_item()