def read_stops_from_csv(db, csv_f): exread = csv.DictReader(csv_f) for stop in exread: s = Station() s.id = int(stop['reader_id'], 0) if 'stop_name' in stop and stop['stop_name']: s.name.english = stop['stop_name'] if 'local_name' in stop and stop['local_name']: s.name.local = stop['local_name'] if 'short_name' in stop and stop['short_name']: s.name.english_short = stop['short_name'] if 'operator_id' in stop and stop['operator_id']: s.operator_id = int(stop['operator_id']) if 'line_id' in stop and stop['line_id']: s.line_id.extend([int(x.strip()) for x in stop['line_id'].split(',')]) y = stop.get('stop_lat', '').strip() x = stop.get('stop_lon', '').strip() if y and x: s.latitude = float(y) s.longitude = float(x) db.push_station(s)
def compile_stops_from_gtfs(input_gtfs_f, output_f, all_matching_f=None, version=None, strip_suffixes='', agency_id=-1, tts_hint_language=None, operators_f=None, extra_f=None, local_languages=None, license_notice_f=None): if all_matching_f is not None: all_matching_f = [ codecs.getreader('utf-8-sig')(x) for x in all_matching_f ] if operators_f is not None: operators_f = codecs.getreader('utf-8-sig')(operators_f) if extra_f is not None: extra_f = codecs.getreader('utf-8-sig')(extra_f) # trim whitespace strip_suffixes = [x.strip().lower() for x in strip_suffixes.split(',')] all_gtfs = [Gtfs(x) for x in input_gtfs_f] first_gtfs = all_gtfs[0] if version is None: try: feed_info = first_gtfs.open('feed_info.txt') except KeyError: # feed_info.txt is not in the file. Find the newest file in the archive feed_start_date = None for f in first_gtfs.infolist(): ts = datetime(*f.date_time) if feed_start_date is None or feed_start_date < ts: feed_start_date = ts else: row = next(feed_info) feed_start_date = row['feed_start_date'] assert len(feed_start_date) == 8 feed_start_date = datetime.strptime(feed_start_date, '%Y%m%d') version = (feed_start_date - VERSION_EPOCH).days print('Data version: %s (%s)' % (version, feed_start_date.date().isoformat())) operators = {} if operators_f is not None: operators = mdst.read_operators_from_csv(operators_f) operators_f.close() db = mdst.MdstWriter( fh=open(output_f, 'wb'), version=version, operators=operators, local_languages=local_languages.split(',') if local_languages is not None else [], tts_hint_language=tts_hint_language, license_notice_f=license_notice_f, ) station_count = 0 for num, gtfs in enumerate(all_gtfs): stops = gtfs.open('stops.txt') # See if there is a matching file if all_matching_f is not None and len(all_matching_f) > num: matching_f = all_matching_f[num] else: matching_f = None if matching_f is None: # No matching data, dump all stops. stop_map = map( lambda stop: [ stop['stop_id'], massage_name(stop['stop_name'], strip_suffixes), stop[ 'stop_lat'].strip(), stop['stop_lon'].strip() ], stops) for stop_id, stop_name, lat, lon in stop_map: s = Station() s.id = int(stop_id) s.name.english = stop_name if lat and lon: s.latitude = float(lat) s.longitude = float(lon) db.push_station(s) station_count += 1 else: # Matching data is available. Lets use that. matching = csv.DictReader(matching_f) stop_codes = {} stop_ids = {} short_names = {} for match in matching: if 'stop_code' in match and match['stop_code']: if match['stop_code'] not in stop_codes: stop_codes[match['stop_code']] = [] stop_codes[match['stop_code']].append(match['reader_id']) elif 'stop_id' in match and match['stop_id']: if match['stop_id'] not in stop_ids: stop_ids[match['stop_id']] = [] stop_ids[match['stop_id']].append(match['reader_id']) else: raise Exception( 'neither stop_id or stop_code specified in row') if 'short_name' in match and match['short_name']: short_names[match['reader_id']] = match['short_name'] total_gtfs_stations = 0 dropped_gtfs_stations = 0 # Now run through the stops for stop in stops: # preprocess stop data name = massage_name(stop['stop_name'], strip_suffixes) y = float(stop['stop_lat'].strip()) x = float(stop['stop_lon'].strip()) used = False # Insert rows where a stop_id is specified for the reader_id stop_rows = [] for reader_id in stop_ids.get( stop.get('stop_id', 'stop_id_absent'), []): s = Station() s.id = int(reader_id, 0) s.name.english = name if y and x: s.latitude = y s.longitude = x if reader_id in short_names: s.name.english_short = short_names[reader_id] if agency_id >= 0: s.operator_id = agency_id db.push_station(s) station_count += 1 used = True # Insert rows where a stop_code is specified for the reader_id stop_rows = [] for reader_id in stop_codes.get( stop.get('stop_code', 'stop_code_absent'), []): s = Station() s.id = int(reader_id, 0) s.name.english = name if y and x: s.latitude = y s.longitude = x if reader_id in short_names: s.name.english_short = short_names[reader_id] if agency_id >= 0: s.operator_id = agency_id db.push_station(s) station_count += 1 used = True total_gtfs_stations += 1 if not used: dropped_gtfs_stations += 1 matching_f.close() print('Finished parsing GTFS ' + str(num) + '. Here\'s the stats:') print(' - Dropped %d out of %d GTFS stations' % (dropped_gtfs_stations, total_gtfs_stations)) print() if extra_f is not None: mdst.read_stops_from_csv(db, extra_f) extra_f.close() index_end_off = db.finalise() print('Finished writing database. Here\'s the stats:') print(' - total ............ %8d stations' % station_count) print(' %8d bytes' % index_end_off) print() station_count = float(station_count) print(' - header ........... %8d bytes' % db.stationlist_off) stations_len = (db.index_off - db.stationlist_off) print(' - stations ......... %8d bytes (%.1f per record)' % (stations_len, stations_len / station_count)) index_len = (index_end_off - db.index_off) print(' - index ............ %8d bytes (%.1f per record)' % (index_len, index_len / station_count))
station_count = 0 for row in cur: operator_id = operators[row[3]][0] line_id = lines[row[4]][0] # pack an int with the area/line/station code station_id = ((int(row[0]) & 0xff) << 16) + ( (int(row[1]) & 0xff) << 8) + (int(row[2]) & 0xff) # create the record s = Station() s.id = station_id s.name.local = row[5] s.name.english = row[6] if row[7] is not None and row[8] is not None: s.latitude = row[7] s.longitude = row[8] #else: # print('Missing location: (%d) %s' % (s.id, s.name.english)) s.operator_id = operator_id s.line_id.append(line_id) db.push_station(s) station_count += 1 print('Building index...') index_end_off = db.finalise() print('Finished writing database. Here\'s the stats:') print(' - total ............ %8d stations' % station_count) print(' %8d bytes' % index_end_off)
line_id = lines[row['city'].strip()] # Skip TLS if operator_id == 0: continue # pack an int with the company + station code # Reduce so Connexxion is 0, this way the integers are shorter. station_id = ((operator_id - 1) << 16) + (int(row['ovcid']) & 0xffff) # create the record s = Station() s.id = station_id s.name.english = row['name'].strip() if 'lat' in row and 'lon' in row and row['lat'] is not None and row[ 'lon'] is not None: s.latitude = float(row['lat'].strip()) s.longitude = float(row['lon'].strip()) #else: # print('Missing location: (%d) %s' % (s.id, s.name.english)) s.operator_id = operator_id if line_id is not None: s.line_id.append(line_id) # Write it out db.push_station(s) station_count += 1 print('Building index...') index_end_off = db.finalise() print('Finished writing database. Here\'s the stats:')