Example #1
0
 def parse(self, response):
     hxs = HtmlXPathSelector(response)
     # flight_type: 0 - arrival; 1 - departure
     flight_type = 0 if response.request.url == self.start_urls[0] else 1
     items = []
     flights = hxs.select('//table[@id="TimeTable"]/tbody/tr')
     for flight in flights:
         loader = TimetableLoader(item=TimetableItem(), selector=flight)
         fields = ('flight', 'airline', 'airport_of_departure',
                   'airport_of_arrival', 'flight_status',
                   'datetime_scheduled', 'datetime_estimated',
                   'datetime_actual', 'terminal')
         for idx, field in enumerate(fields, start=1):
             loader.add_xpath(field, 'td[%s]//text()' % idx)
         fields = ('checkin_desk', 'comment')
         field_xpath, field_value = fields if flight_type else (fields[1],
                                                                fields[0])
         loader.add_xpath(field_xpath, 'td[10]//text()')
         loader.add_value('airport', u'VKO')
         item = loader.load_item()
         item[field_value] = u''
         item['flight_type'] = flight_type
         city_airport_dict = {}
         for direction in ('departure', 'arrival'):
             city_airport = re.findall(r'[^\(\)]+',
                                       item['airport_of_%s' % direction],
                                       re.U)
             if len(city_airport) == 2:
                 city, airport = city_airport
             else:
                 city, airport = city_airport[0], u''
             city_airport_dict[direction] = (city, airport)
         if flight_type:
             item['city_of_arrival'], item[
                 'airport_of_arrival'] = city_airport_dict['arrival']
             item['city_of_departure'], item[
                 'airport_of_departure'] = u'Москва', u'Внуково'
         else:
             item['city_of_arrival'], item[
                 'airport_of_arrival'] = u'Москва', u'Внуково'
             item['city_of_departure'], item[
                 'airport_of_departure'] = city_airport_dict['departure']
         #items.append(item)
         yield item
Example #2
0
 def parse(self, response):
     hxs = HtmlXPathSelector(response)
     # flight_type: 0 - arrival; 1 - departure
     flight_type = 0 if response.request.url == self.start_urls[0] else 1
     items = []
     flights = hxs.select('//table[@id="TimeTable"]/tbody/tr')
     for flight in flights:
         loader = TimetableLoader(item=TimetableItem(), selector=flight)
         fields = (
             "flight",
             "airline",
             "airport_of_departure",
             "airport_of_arrival",
             "flight_status",
             "datetime_scheduled",
             "datetime_estimated",
             "datetime_actual",
             "terminal",
         )
         for idx, field in enumerate(fields, start=1):
             loader.add_xpath(field, "td[%s]//text()" % idx)
         fields = ("checkin_desk", "comment")
         field_xpath, field_value = fields if flight_type else (fields[1], fields[0])
         loader.add_xpath(field_xpath, "td[10]//text()")
         loader.add_value("airport", u"VKO")
         item = loader.load_item()
         item[field_value] = u""
         item["flight_type"] = flight_type
         city_airport_dict = {}
         for direction in ("departure", "arrival"):
             city_airport = re.findall(r"[^\(\)]+", item["airport_of_%s" % direction], re.U)
             if len(city_airport) == 2:
                 city, airport = city_airport
             else:
                 city, airport = city_airport[0], u""
             city_airport_dict[direction] = (city, airport)
         if flight_type:
             item["city_of_arrival"], item["airport_of_arrival"] = city_airport_dict["arrival"]
             item["city_of_departure"], item["airport_of_departure"] = u"Москва", u"Внуково"
         else:
             item["city_of_arrival"], item["airport_of_arrival"] = u"Москва", u"Внуково"
             item["city_of_departure"], item["airport_of_departure"] = city_airport_dict["departure"]
         # items.append(item)
         yield item
Example #3
0
 def parse_main_contents(self, flight, response, flight_type):
     loader = TimetableLoader(item=TimetableItem(), selector=flight)
     loader.add_xpath('flight', 'td[1]//text()')
     loader.add_xpath('datetime_scheduled', 'td[3]//text()')
     loader.add_xpath('datetime_actual', 'td[4]//text()')
     loader.add_xpath('flight_status', 'td[6]//text()')
     loader.add_value('airport', u'DME')
     loader.add_value('flight_type', flight_type)
     loader.add_value('terminal', u'')
     item = loader.load_item()
     details = re.findall(r'\w+', flight.select('@onclick').extract()[0])[1]
     url = 'http://www.domodedovo.ru/ru/main/airindicator/detailsnew2.asp?id=%s' % details
     request = Request(url, callback=lambda r: self.parse_url_contents(r))
     request.meta['item'] = item
     yield request
Example #4
0
 def parse(self, response):
     hxs = HtmlXPathSelector(response)
     # flight_type: 0 - arrival; 1 - departure
     flight_type = 0 if response.request.url in self.start_urls[:2] else 1
     items = []
     flights = hxs.select(
         '//table[@class="tablo tabloBigNew bigTableZebra"]/tr[position() != 1 and position() != 8]'
     )
     for flight in flights[::2]:
         loader = TimetableLoader(item=TimetableItem(), selector=flight)
         loader.add_xpath('flight', 'td[1]//text()')
         loader.add_xpath('datetime_scheduled', 'td[3]//text()')
         loader.add_xpath('datetime_actual', 'td[4]//text()')
         loader.add_xpath('flight_status', 'td[5]//text()')
         loader.add_xpath('airline', 'td[6]//text()')
         city_airport = flight.select('td[2]//text()').extract()[0]
         city_airport = re.findall(r'[^\(\)]+', city_airport, re.U)
         if len(city_airport) == 2:
             city, airport = city_airport
         else:
             city, airport = city_airport[0], u''
         if flight_type:
             loader.add_value('city_of_arrival', city)
             loader.add_value('airport_of_arrival', airport)
             loader.add_value('city_of_departure', u'Санкт-Петербург')
             loader.add_value('airport_of_departure', u'Пулково')
         else:
             loader.add_value('city_of_departure', city)
             loader.add_value('airport_of_departure', airport)
             loader.add_value('city_of_arrival', u'Санкт-Петербург')
             loader.add_value('airport_of_arrival', u'Пулково')
         loader.add_value('terminal',
                          response.request.url[-1:].decode('utf-8'))
         loader.add_value('airport', u'LED')
         item = loader.load_item()
         yield item
Example #5
0
    def parse_main_contents(self, flight, response):
        # flight_type: 0 - arrival; 1 - departure
        flight_type = flight.select('@class').extract()[0].split()
        flight_type = 0 if 'sA' in flight_type else 1
        loader = TimetableLoader(item=TimetableItem(), selector=flight)
        loader.add_xpath('flight', 'td[2]//text()')
        loader.add_xpath('airline', 'td[3]//@alt')
        loader.add_xpath('city_of_departure' if flight_type else 'city_of_arrival', 'td[4]//text()')
        loader.add_xpath('flight_status', 'td[5]//text()')
        loader.add_xpath('datetime_scheduled', 'td[7]//text()')
        loader.add_xpath('datetime_estimated', 'td[8]//text()')
        loader.add_xpath('datetime_actual', 'td[9]//text()')
        loader.add_xpath('terminal', 'td[10]//text()')
        loader.add_value('airport', u'SVO')
        loader.add_value('city_of_arrival' if flight_type else 'city_of_departure', u'Москва')
        loader.add_value('airport_of_arrival' if flight_type else 'airport_of_departure', u'Шереметьево')
        item = loader.load_item()
        nowdate = datetime.date(datetime.now())
        item['datetime_scheduled'] = item['datetime_scheduled'].replace(
                month=nowdate.month, day=nowdate.day)
        if item.get('datetime_estimated'):
            item['datetime_estimated'] = item['datetime_estimated'].replace(
                month=nowdate.month, day=nowdate.day)
        if item.get('datetime_actual'):
            item['datetime_actual'] = item['datetime_actual'].replace(
                month=nowdate.month, day=nowdate.day)
        item['flight_type'] = flight_type

        url = 'http://svo.aero%s' % (flight.select('td[2]//a/@href').extract()[0])
        request = Request(url, callback = lambda r: self.parse_url_contents(r))
        request.meta['item'] = item
        yield request
Example #6
0
 def parse(self, response):
     hxs = HtmlXPathSelector(response)
     # flight_type: 0 - arrival; 1 - departure
     flight_type = 0 if response.request.url in self.start_urls[:2] else 1
     items = []
     flights = hxs.select('//table[@class="tablo tabloBigNew bigTableZebra"]/tr[position() != 1 and position() != 8]')
     for flight in flights[::2]:
         loader = TimetableLoader(item=TimetableItem(), selector=flight)
         loader.add_xpath('flight', 'td[1]//text()')
         loader.add_xpath('datetime_scheduled', 'td[3]//text()')
         loader.add_xpath('datetime_actual', 'td[4]//text()')
         loader.add_xpath('flight_status', 'td[5]//text()')
         loader.add_xpath('airline', 'td[6]//text()')
         city_airport = flight.select('td[2]//text()').extract()[0]
         city_airport = re.findall(r'[^\(\)]+', city_airport, re.U)
         if len(city_airport) == 2:
             city, airport = city_airport
         else:
             city, airport = city_airport[0], u''
         if flight_type:
             loader.add_value('city_of_arrival', city)
             loader.add_value('airport_of_arrival', airport)
             loader.add_value('city_of_departure', u'Санкт-Петербург')
             loader.add_value('airport_of_departure', u'Пулково')
         else:
             loader.add_value('city_of_departure', city)
             loader.add_value('airport_of_departure', airport)
             loader.add_value('city_of_arrival', u'Санкт-Петербург')
             loader.add_value('airport_of_arrival', u'Пулково')
         loader.add_value('terminal', response.request.url[-1:].decode('utf-8'))
         loader.add_value('airport', u'LED')
         item = loader.load_item()
         yield item
Example #7
0
 def parse_main_contents(self, flight, response, flight_type):
     loader = TimetableLoader(item=TimetableItem(), selector=flight)
     loader.add_xpath('flight', 'td[1]//text()')
     loader.add_xpath('datetime_scheduled', 'td[3]//text()')
     loader.add_xpath('datetime_actual', 'td[4]//text()')
     loader.add_xpath('flight_status', 'td[6]//text()')
     loader.add_value('airport', u'DME')
     loader.add_value('flight_type', flight_type)
     loader.add_value('terminal', u'')
     item = loader.load_item()
     details = re.findall(r'\w+', flight.select('@onclick').extract()[0])[1]
     url = 'http://www.domodedovo.ru/ru/main/airindicator/detailsnew2.asp?id=%s' % details
     request = Request(url, callback = lambda r: self.parse_url_contents(r))
     request.meta['item'] = item
     yield request