Example #1
0
def read_stops_from_csv(db, csv_f):
  exread = csv.DictReader(csv_f)

  for stop in exread:
    s = Station()
    s.id = int(stop['reader_id'], 0)
    if 'stop_name' in stop and stop['stop_name']:
      s.name.english = stop['stop_name']
    if 'local_name' in stop and stop['local_name']:
      s.name.local = stop['local_name']
    if 'short_name' in stop and stop['short_name']:
      s.name.english_short = stop['short_name']
    if 'operator_id' in stop and stop['operator_id']:
      s.operator_id = int(stop['operator_id'])
    if 'line_id' in stop and stop['line_id']:
      s.line_id.extend([int(x.strip()) for x in stop['line_id'].split(',')])
    y = stop.get('stop_lat', '').strip()
    x = stop.get('stop_lon', '').strip()
    if y and x:
      s.latitude = float(y)
      s.longitude = float(x)

    db.push_station(s)
def compile_stops_from_gtfs(input_gtfs_f,
                            output_f,
                            all_matching_f=None,
                            version=None,
                            strip_suffixes='',
                            agency_id=-1,
                            tts_hint_language=None,
                            operators_f=None,
                            extra_f=None,
                            local_languages=None,
                            license_notice_f=None):
    if all_matching_f is not None:
        all_matching_f = [
            codecs.getreader('utf-8-sig')(x) for x in all_matching_f
        ]
    if operators_f is not None:
        operators_f = codecs.getreader('utf-8-sig')(operators_f)
    if extra_f is not None:
        extra_f = codecs.getreader('utf-8-sig')(extra_f)
    # trim whitespace
    strip_suffixes = [x.strip().lower() for x in strip_suffixes.split(',')]

    all_gtfs = [Gtfs(x) for x in input_gtfs_f]
    first_gtfs = all_gtfs[0]

    if version is None:
        try:
            feed_info = first_gtfs.open('feed_info.txt')
        except KeyError:
            # feed_info.txt is not in the file. Find the newest file in the archive
            feed_start_date = None
            for f in first_gtfs.infolist():
                ts = datetime(*f.date_time)
                if feed_start_date is None or feed_start_date < ts:
                    feed_start_date = ts
        else:
            row = next(feed_info)
            feed_start_date = row['feed_start_date']
            assert len(feed_start_date) == 8
            feed_start_date = datetime.strptime(feed_start_date, '%Y%m%d')

        version = (feed_start_date - VERSION_EPOCH).days
        print('Data version: %s (%s)' %
              (version, feed_start_date.date().isoformat()))

    operators = {}

    if operators_f is not None:
        operators = mdst.read_operators_from_csv(operators_f)
        operators_f.close()

    db = mdst.MdstWriter(
        fh=open(output_f, 'wb'),
        version=version,
        operators=operators,
        local_languages=local_languages.split(',')
        if local_languages is not None else [],
        tts_hint_language=tts_hint_language,
        license_notice_f=license_notice_f,
    )

    station_count = 0

    for num, gtfs in enumerate(all_gtfs):
        stops = gtfs.open('stops.txt')
        # See if there is a matching file
        if all_matching_f is not None and len(all_matching_f) > num:
            matching_f = all_matching_f[num]
        else:
            matching_f = None
        if matching_f is None:
            # No matching data, dump all stops.
            stop_map = map(
                lambda stop: [
                    stop['stop_id'],
                    massage_name(stop['stop_name'], strip_suffixes), stop[
                        'stop_lat'].strip(), stop['stop_lon'].strip()
                ], stops)

            for stop_id, stop_name, lat, lon in stop_map:
                s = Station()
                s.id = int(stop_id)
                s.name.english = stop_name
                if lat and lon:
                    s.latitude = float(lat)
                    s.longitude = float(lon)

                db.push_station(s)
                station_count += 1
        else:
            # Matching data is available.  Lets use that.
            matching = csv.DictReader(matching_f)

            stop_codes = {}
            stop_ids = {}
            short_names = {}
            for match in matching:
                if 'stop_code' in match and match['stop_code']:
                    if match['stop_code'] not in stop_codes:
                        stop_codes[match['stop_code']] = []
                    stop_codes[match['stop_code']].append(match['reader_id'])
                elif 'stop_id' in match and match['stop_id']:
                    if match['stop_id'] not in stop_ids:
                        stop_ids[match['stop_id']] = []
                    stop_ids[match['stop_id']].append(match['reader_id'])
                else:
                    raise Exception(
                        'neither stop_id or stop_code specified in row')
                if 'short_name' in match and match['short_name']:
                    short_names[match['reader_id']] = match['short_name']

            total_gtfs_stations = 0
            dropped_gtfs_stations = 0

            # Now run through the stops
            for stop in stops:
                # preprocess stop data
                name = massage_name(stop['stop_name'], strip_suffixes)
                y = float(stop['stop_lat'].strip())
                x = float(stop['stop_lon'].strip())

                used = False

                # Insert rows where a stop_id is specified for the reader_id
                stop_rows = []
                for reader_id in stop_ids.get(
                        stop.get('stop_id', 'stop_id_absent'), []):
                    s = Station()
                    s.id = int(reader_id, 0)
                    s.name.english = name
                    if y and x:
                        s.latitude = y
                        s.longitude = x
                    if reader_id in short_names:
                        s.name.english_short = short_names[reader_id]
                    if agency_id >= 0:
                        s.operator_id = agency_id

                    db.push_station(s)
                    station_count += 1
                    used = True

                # Insert rows where a stop_code is specified for the reader_id
                stop_rows = []
                for reader_id in stop_codes.get(
                        stop.get('stop_code', 'stop_code_absent'), []):
                    s = Station()
                    s.id = int(reader_id, 0)
                    s.name.english = name

                    if y and x:
                        s.latitude = y
                        s.longitude = x

                    if reader_id in short_names:
                        s.name.english_short = short_names[reader_id]
                    if agency_id >= 0:
                        s.operator_id = agency_id

                    db.push_station(s)
                    station_count += 1
                    used = True
                total_gtfs_stations += 1
                if not used:
                    dropped_gtfs_stations += 1

            matching_f.close()
            print('Finished parsing GTFS ' + str(num) +
                  '.  Here\'s the stats:')
            print(' - Dropped %d out of %d GTFS stations' %
                  (dropped_gtfs_stations, total_gtfs_stations))
            print()

    if extra_f is not None:
        mdst.read_stops_from_csv(db, extra_f)
        extra_f.close()

    index_end_off = db.finalise()

    print('Finished writing database.  Here\'s the stats:')
    print(' - total ............ %8d stations' % station_count)
    print('                      %8d bytes' % index_end_off)
    print()
    station_count = float(station_count)
    print(' - header ........... %8d bytes' % db.stationlist_off)
    stations_len = (db.index_off - db.stationlist_off)
    print(' - stations ......... %8d bytes (%.1f per record)' %
          (stations_len, stations_len / station_count))
    index_len = (index_end_off - db.index_off)
    print(' - index ............ %8d bytes (%.1f per record)' %
          (index_len, index_len / station_count))
Example #3
0
station_count = 0
for row in cur:
    operator_id = operators[row[3]][0]
    line_id = lines[row[4]][0]

    # pack an int with the area/line/station code
    station_id = ((int(row[0]) & 0xff) << 16) + (
        (int(row[1]) & 0xff) << 8) + (int(row[2]) & 0xff)

    # create the record
    s = Station()
    s.id = station_id
    s.name.local = row[5]
    s.name.english = row[6]
    if row[7] is not None and row[8] is not None:
        s.latitude = row[7]
        s.longitude = row[8]
    #else:
    #  print('Missing location: (%d) %s' % (s.id, s.name.english))
    s.operator_id = operator_id
    s.line_id.append(line_id)

    db.push_station(s)
    station_count += 1

print('Building index...')
index_end_off = db.finalise()

print('Finished writing database.  Here\'s the stats:')
print(' - total ............ %8d stations' % station_count)
print('                      %8d bytes' % index_end_off)
Example #4
0
            line_id = lines[row['city'].strip()]

        # Skip TLS
        if operator_id == 0: continue

        # pack an int with the company + station code
        # Reduce so Connexxion is 0, this way the integers are shorter.
        station_id = ((operator_id - 1) << 16) + (int(row['ovcid']) & 0xffff)

        # create the record
        s = Station()
        s.id = station_id
        s.name.english = row['name'].strip()
        if 'lat' in row and 'lon' in row and row['lat'] is not None and row[
                'lon'] is not None:
            s.latitude = float(row['lat'].strip())
            s.longitude = float(row['lon'].strip())
        #else:
        #  print('Missing location: (%d) %s' % (s.id, s.name.english))
        s.operator_id = operator_id
        if line_id is not None:
            s.line_id.append(line_id)

        # Write it out
        db.push_station(s)
        station_count += 1

print('Building index...')
index_end_off = db.finalise()

print('Finished writing database.  Here\'s the stats:')