def parse_schedules_suburban(): html = parser_utils.download_page( SuburbanParser.SUBURBAN_TRANSPORT_PAGE_URL ) suburban_buses = SuburbanParser.parse_html(html) for bus in suburban_buses: from_city = in_dict(bus, 'from_city') if from_city: # print bus['number'] from_city = no_whitespaces(from_city) schedule_from_city = SuburbanScheduleParser.parse_schedule( from_city ) bus['schedule_from_city'] = schedule_from_city if bus['number'] == '159': print schedule_from_city to_city = in_dict(bus, 'to_city') if to_city: # print bus['number'] to_city = no_whitespaces(to_city) schedule_to_city = SuburbanScheduleParser.parse_schedule( to_city ) bus['schedule_to_city'] = schedule_to_city if bus['number'] == '159': print schedule_to_city bus_id = in_dict(bus, 'number') bus_id = no_whitespaces(bus_id) bus_id = bus_id.replace(',', '_') postfix = unidecode(bus['station']).lower().replace('/', '_') postfix = postfix.replace('.', '') postfix = postfix.replace('\'', '') postfix = postfix.replace(' ', '_') bus_id = unidecode(bus_id) + '_' + postfix bus_id = "sub_" + bus_id # print bus_id bus['id'] = bus_id parser_utils.save_json_file( (parser_configs.directories["JSON_DIR"] + "suburban_transport.json"), suburban_buses )
def handle_data(self, data): if self._schedule_table_started: if self._table_header_started: d = no_whitespaces(unicode(data)).lower() self._table_header.append(d) if d in SuburbanParser.COLUMNS_HEADERS: self._table_header_dict[ SuburbanParser.COLUMNS_HEADERS[d] ] = ( self._column_counter - 1 ) elif self._current_bus_counter < self._column_counter: self._current_bus.append(unicode(data)) self._current_bus_counter += 1 if self._current_bus_counter == self._column_counter: self._result.append(self._current_bus) self._current_bus_counter = 0 self._current_bus = []