def compile_stops_from_csv(csv_f, output_f, version=None, tts_hint_language=None, operators_f=None, local_languages=None, lines_f=None, notice_f=None): csv_f = codecs.getreader('utf-8-sig')(csv_f) operators = {} lines = {} if operators_f is not None: operators_f = codecs.getreader('utf-8-sig')(operators_f) operators = mdst.read_operators_from_csv(operators_f) operators_f.close() if lines_f is not None: lines_f = codecs.getreader('utf-8-sig')(lines_f) lines = mdst.read_lines_from_csv(lines_f) lines_f.close() db = mdst.MdstWriter( fh=open(output_f, 'wb'), version=version, operators=operators, lines=lines, local_languages=local_languages.split(',') if local_languages is not None else [], tts_hint_language=tts_hint_language, license_notice_f=notice_f, ) mdst.read_stops_from_csv(db, csv_f) csv_f.close() index_end_off = db.finalise() print('Finished writing database.')
def compile_stops_from_csv(csv_f, output_f, version=None, tts_hint_language=None, operators_f=None, local_languages=None, lines_f=None): csv_f = codecs.getreader('utf-8-sig')(csv_f) operators = {} lines = {} if operators_f is not None: operators_f = codecs.getreader('utf-8-sig')(operators_f) operators = mdst.read_operators_from_csv(operators_f) operators_f.close() if lines_f is not None: lines_f = codecs.getreader('utf-8-sig')(lines_f) lineread = csv.DictReader(lines_f) for line in lineread: linepb = Line() linepb.name.english = line['name'] if 'short_name' in line and line['short_name']: linepb.name.english_short = line['short_name'] if 'local_name' in line and line['local_name']: linepb.name.local = line['local_name'] lines[int(line['id'], 0)] = linepb db = mdst.MdstWriter( fh=open(output_f, 'wb'), version=version, operators=operators, lines=lines, local_languages=local_languages.split(',') if local_languages is not None else [], tts_hint_language=tts_hint_language, ) mdst.read_stops_from_csv(db, csv_f) csv_f.close() index_end_off = db.finalise() print('Finished writing database.')
def compile_stops_from_gtfs(input_gtfs_f, output_f, all_matching_f=None, version=None, strip_suffixes='', agency_id=-1, tts_hint_language=None, operators_f=None, extra_f=None, local_languages=None, license_notice_f=None): if all_matching_f is not None: all_matching_f = [ codecs.getreader('utf-8-sig')(x) for x in all_matching_f ] if operators_f is not None: operators_f = codecs.getreader('utf-8-sig')(operators_f) if extra_f is not None: extra_f = codecs.getreader('utf-8-sig')(extra_f) # trim whitespace strip_suffixes = [x.strip().lower() for x in strip_suffixes.split(',')] all_gtfs = [Gtfs(x) for x in input_gtfs_f] first_gtfs = all_gtfs[0] if version is None: try: feed_info = first_gtfs.open('feed_info.txt') except KeyError: # feed_info.txt is not in the file. Find the newest file in the archive feed_start_date = None for f in first_gtfs.infolist(): ts = datetime(*f.date_time) if feed_start_date is None or feed_start_date < ts: feed_start_date = ts else: row = next(feed_info) feed_start_date = row['feed_start_date'] assert len(feed_start_date) == 8 feed_start_date = datetime.strptime(feed_start_date, '%Y%m%d') version = (feed_start_date - VERSION_EPOCH).days print('Data version: %s (%s)' % (version, feed_start_date.date().isoformat())) operators = {} if operators_f is not None: operators = mdst.read_operators_from_csv(operators_f) operators_f.close() db = mdst.MdstWriter( fh=open(output_f, 'wb'), version=version, operators=operators, local_languages=local_languages.split(',') if local_languages is not None else [], tts_hint_language=tts_hint_language, license_notice_f=license_notice_f, ) station_count = 0 for num, gtfs in enumerate(all_gtfs): stops = gtfs.open('stops.txt') # See if there is a matching file if all_matching_f is not None and len(all_matching_f) > num: matching_f = all_matching_f[num] else: matching_f = None if matching_f is None: # No matching data, dump all stops. stop_map = map( lambda stop: [ stop['stop_id'], massage_name(stop['stop_name'], strip_suffixes), stop[ 'stop_lat'].strip(), stop['stop_lon'].strip() ], stops) for stop_id, stop_name, lat, lon in stop_map: s = Station() s.id = int(stop_id) s.name.english = stop_name if lat and lon: s.latitude = float(lat) s.longitude = float(lon) db.push_station(s) station_count += 1 else: # Matching data is available. Lets use that. matching = csv.DictReader(matching_f) stop_codes = {} stop_ids = {} short_names = {} for match in matching: if 'stop_code' in match and match['stop_code']: if match['stop_code'] not in stop_codes: stop_codes[match['stop_code']] = [] stop_codes[match['stop_code']].append(match['reader_id']) elif 'stop_id' in match and match['stop_id']: if match['stop_id'] not in stop_ids: stop_ids[match['stop_id']] = [] stop_ids[match['stop_id']].append(match['reader_id']) else: raise Exception( 'neither stop_id or stop_code specified in row') if 'short_name' in match and match['short_name']: short_names[match['reader_id']] = match['short_name'] total_gtfs_stations = 0 dropped_gtfs_stations = 0 # Now run through the stops for stop in stops: # preprocess stop data name = massage_name(stop['stop_name'], strip_suffixes) y = float(stop['stop_lat'].strip()) x = float(stop['stop_lon'].strip()) used = False # Insert rows where a stop_id is specified for the reader_id stop_rows = [] for reader_id in stop_ids.get( stop.get('stop_id', 'stop_id_absent'), []): s = Station() s.id = int(reader_id, 0) s.name.english = name if y and x: s.latitude = y s.longitude = x if reader_id in short_names: s.name.english_short = short_names[reader_id] if agency_id >= 0: s.operator_id = agency_id db.push_station(s) station_count += 1 used = True # Insert rows where a stop_code is specified for the reader_id stop_rows = [] for reader_id in stop_codes.get( stop.get('stop_code', 'stop_code_absent'), []): s = Station() s.id = int(reader_id, 0) s.name.english = name if y and x: s.latitude = y s.longitude = x if reader_id in short_names: s.name.english_short = short_names[reader_id] if agency_id >= 0: s.operator_id = agency_id db.push_station(s) station_count += 1 used = True total_gtfs_stations += 1 if not used: dropped_gtfs_stations += 1 matching_f.close() print('Finished parsing GTFS ' + str(num) + '. Here\'s the stats:') print(' - Dropped %d out of %d GTFS stations' % (dropped_gtfs_stations, total_gtfs_stations)) print() if extra_f is not None: mdst.read_stops_from_csv(db, extra_f) extra_f.close() index_end_off = db.finalise() print('Finished writing database. Here\'s the stats:') print(' - total ............ %8d stations' % station_count) print(' %8d bytes' % index_end_off) print() station_count = float(station_count) print(' - header ........... %8d bytes' % db.stationlist_off) stations_len = (db.index_off - db.stationlist_off) print(' - stations ......... %8d bytes (%.1f per record)' % (stations_len, stations_len / station_count)) index_len = (index_end_off - db.index_off) print(' - index ............ %8d bytes (%.1f per record)' % (index_len, index_len / station_count))