Ejemplo n.º 1
0
    def run(self, context, skip_shape_dist=False, **kwargs):

        columns = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "stop_headsign", "pickup_type", "drop_off_type", "timepoint"]
        if not skip_shape_dist:
            columns.append("shape_dist_traveled")
        with PrettyCsv("stop_times.txt", columns, **kwargs) as csvout:
            ntrips = 0
            for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=False, prefetch_stop_times=True, prefetch_calendars=False, prefetch_routes=False):
                if ntrips % 1000 == 0:
                    print("%d trips..." % (ntrips))
                ntrips += 1
                for stoptime in trip.stop_times:
                    row = [ trip.trip_id,
                            fmttime(stoptime.arrival_time if stoptime.arrival_time else stoptime.departure_time),
                            fmttime(stoptime.departure_time if stoptime.departure_time else stoptime.arrival_time),
                            stoptime.stop_id,
                            stoptime.stop_sequence,
                            stoptime.stop_headsign,
                            stoptime.pickup_type,
                            stoptime.drop_off_type,
                            stoptime.timepoint ]
                    if not skip_shape_dist:
                        row.append(stoptime.shape_dist_traveled)
                    csvout.writerow(row)
            print("Processed %d trips" % (ntrips))
Ejemplo n.º 2
0
    def test_demo(self):
        dao = Dao(DAO_URL, sql_logging=False)
        dao.load_gtfs(DUMMY_GTFS)

        print("List of stops named '...Bordeaux...':")
        stops_bordeaux = list(dao.stops(fltr=(Stop.stop_name.ilike('%Bordeaux%')) & (Stop.location_type == Stop.TYPE_STOP)))
        for stop in stops_bordeaux:
            print(stop.stop_name)

        print("List of routes passing by those stops:")
        routes_bordeaux = dao.routes(fltr=or_(StopTime.stop == stop for stop in stops_bordeaux))
        for route in routes_bordeaux:
            print("%s - %s" % (route.route_short_name, route.route_long_name))

        july4 = CalendarDate.ymd(2016, 7, 4)
        print("All departures from those stops on %s:" % (july4.as_date()))
        departures = list(dao.stoptimes(fltr=(or_(StopTime.stop == stop for stop in stops_bordeaux)) & (StopTime.departure_time != None) & (func.date(CalendarDate.date) == july4.date)))
        print("There is %d departures" % (len(departures)))
        for departure in departures:
            print("%30.30s %10.10s %-20.20s > %s" % (departure.stop.stop_name, fmttime(departure.departure_time), departure.trip.route.route_long_name, departure.trip.trip_headsign))

        print("Number of departures and time range per stop on %s:" % (july4.as_date()))
        departure_by_stop = defaultdict(list)
        for departure in departures:
            departure_by_stop[departure.stop].append(departure)
        for stop, deps in departure_by_stop.items():
            min_dep = min(d.departure_time for d in deps)
            max_dep = max(d.departure_time for d in deps)
            print("%30.30s %3d departures (from %s to %s)" % (stop.stop_name, len(deps), fmttime(min_dep), fmttime(max_dep)))

        # Compute the average distance and time to next stop by route type
        ntd = [ [0, 0, 0.0] for type in range(0, Route.TYPE_FUNICULAR + 1) ]
        for departure in departures:
            # The following is guaranteed to succeed as we have departure_time == Null for last stop time in trip
            next_arrival = departure.trip.stop_times[departure.stop_sequence + 1]
            hop_dist = next_arrival.shape_dist_traveled - departure.shape_dist_traveled
            hop_time = next_arrival.arrival_time - departure.departure_time
            route_type = departure.trip.route.route_type
            ntd[route_type][0] += 1
            ntd[route_type][1] += hop_time
            ntd[route_type][2] += hop_dist
        for route_type in range(0, len(ntd)):
            n, t, d = ntd[route_type]
            if n > 0:
                print("The average distance to the next stop on those departures for route type %d is %.2f meters" % (route_type, d / n))
                print("The average time in sec to the next stop on those departures for route type %d is %s" % (route_type, fmttime(t / n)))
Ejemplo n.º 3
0
def _convert_gtfs_model(feed_id,
                        gtfs,
                        dao,
                        lenient=False,
                        disable_normalization=False):

    feedinfo2 = None
    logger.info("Importing feed ID '%s'" % feed_id)
    n_feedinfo = 0
    for feedinfo in gtfs.feedinfo():
        n_feedinfo += 1
        if n_feedinfo > 1:
            logger.error(
                "Feed info should be unique if defined. Taking first one." %
                (n_feedinfo))
            break
        # TODO Automatically compute from calendar range if missing?
        feedinfo['feed_start_date'] = _todate(feedinfo.get('feed_start_date'))
        feedinfo['feed_end_date'] = _todate(feedinfo.get('feed_end_date'))
        feedinfo2 = FeedInfo(feed_id, **feedinfo)
    if feedinfo2 is None:
        # Optional, generate empty feed info
        feedinfo2 = FeedInfo(feed_id)
    dao.add(feedinfo2)
    dao.flush()
    logger.info("Imported %d feedinfo" % n_feedinfo)

    logger.info("Importing agencies...")
    n_agencies = 0
    single_agency = None
    agency_ids = set()
    for agency in gtfs.agencies():
        # agency_id is optional only if we have a single agency
        if n_agencies == 0 and agency.get('agency_id') is None:
            agency['agency_id'] = ''
        agency2 = Agency(feed_id, **agency)
        if n_agencies == 0:
            single_agency = agency2
        else:
            single_agency = None
        n_agencies += 1
        dao.add(agency2)
        agency_ids.add(agency2.agency_id)
    dao.flush()
    logger.info("Imported %d agencies" % n_agencies)

    def import_stop(stop, stoptype, zone_ids, item_ids, station_ids=None):
        zone_id = stop.get('zone_id')
        if zone_id and zone_id not in zone_ids:
            # Lazy-creation of zone
            zone = Zone(feed_id, zone_id)
            zone_ids.add(zone_id)
            dao.add(zone)
        stop['location_type'] = _toint(stop.get('location_type'),
                                       Stop.TYPE_STOP)
        if stop['location_type'] != stoptype:
            return 0
        stop['wheelchair_boarding'] = _toint(stop.get('wheelchair_boarding'),
                                             Stop.WHEELCHAIR_UNKNOWN)
        lat = _tofloat(stop.get('stop_lat'), None)
        lon = _tofloat(stop.get('stop_lon'), None)
        if lat is None or lon is None:
            if lenient:
                logger.error("Missing lat/lon for '%s', set to default (0,0)" %
                             (stop, ))
                if lat is None:
                    lat = 0
                if lon is None:
                    lon = 0
            else:
                raise ValueError("Missing mandatory lat/lon for '%s'." %
                                 (stop, ))
        stop['stop_lat'] = lat
        stop['stop_lon'] = lon
        # This field has been renamed for consistency
        parent_id = stop.get('parent_station')
        stop['parent_station_id'] = parent_id if parent_id else None
        if parent_id and station_ids and parent_id not in station_ids:
            if lenient:
                logger.error(
                    "Parent station ID '%s' in '%s' is invalid, resetting." %
                    (parent_id, stop))
                stop['parent_station_id'] = None
            else:
                raise KeyError("Parent station ID '%s' in '%s' is invalid." %
                               (parent_id, stop))
        stop.pop('parent_station', None)
        stop2 = Stop(feed_id, **stop)
        dao.add(stop2)
        item_ids.add(stop2.stop_id)
        return 1

    stop_ids = set()
    station_ids = set()
    zone_ids = set()
    logger.info("Importing zones, stations and stops...")
    n_stations = n_stops = 0
    for station in gtfs.stops():
        n_stations += import_stop(station, Stop.TYPE_STATION, zone_ids,
                                  station_ids)
    for stop in gtfs.stops():
        n_stops += import_stop(stop, Stop.TYPE_STOP, zone_ids, stop_ids,
                               station_ids)
    dao.flush()
    logger.info("Imported %d zones, %d stations and %d stops" %
                (len(zone_ids), n_stations, n_stops))

    logger.info("Importing transfers...")
    n_transfers = 0
    for transfer in gtfs.transfers():
        from_stop_id = transfer.get('from_stop_id')
        to_stop_id = transfer.get('to_stop_id')
        transfer['transfer_type'] = _toint(transfer.get('transfer_type'), 0)
        for stop_id in (from_stop_id, to_stop_id):
            if stop_id not in station_ids and stop_id not in stop_ids:
                if lenient:
                    logger.error("Stop ID '%s' in '%s' is invalid, skipping." %
                                 (stop_id, transfer))
                    continue
                else:
                    raise KeyError("Stop ID '%s' in '%s' is invalid." %
                                   (stop_id, transfer))
        transfer2 = Transfer(feed_id, **transfer)
        n_transfers += 1
        dao.add(transfer2)
    dao.flush()
    logger.info("Imported %d transfers" % (n_transfers))

    logger.info("Importing routes...")
    n_routes = 0
    route_ids = set()
    for route in gtfs.routes():
        route['route_type'] = int(route.get('route_type'))
        agency_id = route.get('agency_id')
        if (agency_id is None
                or len(agency_id) == 0) and single_agency is not None:
            # Route.agency is optional if only a single agency exists.
            agency_id = route['agency_id'] = single_agency.agency_id
        if agency_id not in agency_ids:
            if lenient:
                logger.error(
                    "Agency ID '%s' in '%s' is invalid, skipping route." %
                    (agency_id, route))
                continue
            else:
                raise KeyError("agency ID '%s' in '%s' is invalid." %
                               (agency_id, route))
        route2 = Route(feed_id, **route)
        dao.add(route2)
        route_ids.add(route2.route_id)
        n_routes += 1
    dao.flush()
    logger.info("Imported %d routes" % n_routes)

    logger.info("Importing fares...")
    n_fares = 0
    for fare_attr in gtfs.fare_attributes():
        fare_id = fare_attr.get('fare_id')
        fare_price = _tofloat(fare_attr.get('price'))
        currency_type = fare_attr.get('currency_type')
        payment_method = _toint(fare_attr.get('payment_method'))
        n_transfers = None
        if fare_attr.get('transfers') is not None:
            n_transfers = _toint(fare_attr.get('transfers'))
        transfer_duration = None
        if fare_attr.get('transfer_duration') is not None:
            transfer_duration = _toint(fare_attr.get('transfer_duration'))
        fare = FareAttribute(feed_id, fare_id, fare_price, currency_type,
                             payment_method, n_transfers, transfer_duration)
        dao.add(fare)
        n_fares += 1
    dao.flush()
    fare_rules = set()
    for fare_rule in gtfs.fare_rules():
        fare_rule2 = FareRule(feed_id, **fare_rule)
        if fare_rule2 in fare_rules:
            if lenient:
                logger.error("Duplicated fare rule (%s), skipping." %
                             (fare_rule2))
                continue
            else:
                raise KeyError("Duplicated fare rule (%s)" % (fare_rule2))
        dao.add(fare_rule2)
        fare_rules.add(fare_rule2)
    dao.flush()
    logger.info("Imported %d fare and %d rules" % (n_fares, len(fare_rules)))

    logger.info("Importing calendars...")
    calanddates2 = {}
    for calendar in gtfs.calendars():
        calid = calendar.get('service_id')
        calendar2 = Calendar(feed_id, calid)
        dates2 = []
        start_date = CalendarDate.fromYYYYMMDD(calendar.get('start_date'))
        end_date = CalendarDate.fromYYYYMMDD(calendar.get('end_date'))
        for d in CalendarDate.range(start_date, end_date.next_day()):
            if int(calendar.get(DOW_NAMES[d.dow()])):
                dates2.append(d)
        calanddates2[calid] = (calendar2, set(dates2))

    logger.info("Normalizing calendar dates...")
    for caldate in gtfs.calendar_dates():
        calid = caldate.get('service_id')
        date2 = CalendarDate.fromYYYYMMDD(caldate.get('date'))
        addremove = int(caldate.get('exception_type'))
        if calid in calanddates2:
            calendar2, dates2 = calanddates2[calid]
        else:
            calendar2 = Calendar(feed_id, calid)
            dates2 = set([])
            calanddates2[calid] = (calendar2, dates2)
        if addremove == 1:
            dates2.add(date2)
        elif addremove == 2:
            if date2 in dates2:
                dates2.remove(date2)
    n_calendars = 0
    n_caldates = 0
    calendar_ids = set()
    for (calendar2, dates2) in calanddates2.values():
        calendar2.dates = [d for d in dates2]
        dao.add(calendar2)
        calendar_ids.add(calendar2.service_id)
        n_calendars += 1
        n_caldates += len(calendar2.dates)
    dao.flush()
    logger.info("Imported %d calendars and %d dates" %
                (n_calendars, n_caldates))

    logger.info("Importing shapes...")
    n_shape_pts = 0
    shape_ids = set()
    shapepts_q = []
    for shpt in gtfs.shapes():
        shape_id = shpt.get('shape_id')
        if shape_id not in shape_ids:
            dao.add(Shape(feed_id, shape_id))
            dao.flush()
            shape_ids.add(shape_id)
        pt_seq = _toint(shpt.get('shape_pt_sequence'))
        # This field is optional
        dist_traveled = _tofloat(shpt.get('shape_dist_traveled'), -999999)
        lat = _tofloat(shpt.get('shape_pt_lat'))
        lon = _tofloat(shpt.get('shape_pt_lon'))
        shape_point = ShapePoint(feed_id, shape_id, pt_seq, lat, lon,
                                 dist_traveled)
        shapepts_q.append(shape_point)
        n_shape_pts += 1
        if n_shape_pts % 100000 == 0:
            logger.info("%d shape points" % n_shape_pts)
            dao.bulk_save_objects(shapepts_q)
            dao.flush()
            shapepts_q = []
    dao.bulk_save_objects(shapepts_q)
    dao.flush()
    logger.info("Imported %d shapes and %d points" %
                (len(shape_ids), n_shape_pts))

    logger.info("Importing trips...")
    n_trips = 0
    trips_q = []
    trip_ids = set()
    for trip in gtfs.trips():
        trip['wheelchair_accessible'] = _toint(
            trip.get('wheelchair_accessible'), Trip.WHEELCHAIR_UNKNOWN)
        trip['bikes_allowed'] = _toint(trip.get('bikes_allowed'),
                                       Trip.BIKES_UNKNOWN)
        cal_id = trip.get('service_id')
        if cal_id not in calendar_ids:
            if lenient:
                logger.error(
                    "Calendar ID '%s' in '%s' is invalid. Skipping trip." %
                    (cal_id, trip))
                continue
            else:
                raise KeyError("Calendar ID '%s' in '%s' is invalid." %
                               (cal_id, trip))
        route_id = trip.get('route_id')
        if route_id not in route_ids:
            if lenient:
                logger.error(
                    "Route ID '%s' in '%s' is invalid. Skipping trip." %
                    (route_id, trip))
                continue
            else:
                raise KeyError("Route ID '%s' in trip '%s' is invalid." %
                               (route_id, trip))
        trip2 = Trip(feed_id, frequency_generated=False, **trip)

        trips_q.append(trip2)
        n_trips += 1
        if n_trips % 10000 == 0:
            dao.bulk_save_objects(trips_q)
            dao.flush()
            logger.info('%s trips' % n_trips)
            trips_q = []

        trip_ids.add(trip.get('trip_id'))
    dao.bulk_save_objects(trips_q)
    dao.flush()

    logger.info("Imported %d trips" % n_trips)

    logger.info("Importing stop times...")
    n_stoptimes = 0
    stoptimes_q = []
    for stoptime in gtfs.stop_times():
        stopseq = _toint(stoptime.get('stop_sequence'))
        # Mark times to interpolate later on
        arrtime = _timetoint(stoptime.get('arrival_time'), -999999)
        deptime = _timetoint(stoptime.get('departure_time'), -999999)
        if arrtime == -999999:
            arrtime = deptime
        if deptime == -999999:
            deptime = arrtime
        interp = arrtime < 0 and deptime < 0
        shpdist = _tofloat(stoptime.get('shape_dist_traveled'), -999999)
        pkptype = _toint(stoptime.get('pickup_type'),
                         StopTime.PICKUP_DROPOFF_REGULAR)
        drptype = _toint(stoptime.get('drop_off_type'),
                         StopTime.PICKUP_DROPOFF_REGULAR)
        trip_id = stoptime.get('trip_id')
        if trip_id not in trip_ids:
            if lenient:
                logger.error(
                    "Trip ID '%s' in '%s' is invalid. Skipping stop time." %
                    (trip_id, stoptime))
                continue
            else:
                raise KeyError("Trip ID '%s' in '%s' is invalid." %
                               (trip_id, stoptime))
        stop_id = stoptime.get('stop_id')
        if stop_id not in stop_ids:
            if lenient:
                logger.error(
                    "Stop ID '%s' in '%s' is invalid. Skipping stop time." %
                    (stop_id, stoptime))
                continue
            else:
                raise KeyError("Trip ID '%s' in stoptime '%s' is invalid." %
                               (stop_id, stoptime))
        stoptime2 = StopTime(feed_id,
                             trip_id,
                             stop_id,
                             stop_sequence=stopseq,
                             arrival_time=arrtime,
                             departure_time=deptime,
                             shape_dist_traveled=shpdist,
                             interpolated=interp,
                             pickup_type=pkptype,
                             drop_off_type=drptype,
                             stop_headsign=stoptime.get('stop_headsign'))
        stoptimes_q.append(stoptime2)
        n_stoptimes += 1
        # Commit every now and then
        if n_stoptimes % 50000 == 0:
            logger.info("%d stop times" % n_stoptimes)
            dao.bulk_save_objects(stoptimes_q)
            dao.flush()
            stoptimes_q = []
    dao.bulk_save_objects(stoptimes_q)

    logger.info("Imported %d stop times" % n_stoptimes)
    logger.info("Committing")
    dao.flush()
    # TODO Add option to enable/disable this commit
    # to ensure import is transactionnal
    dao.commit()
    logger.info("Commit done")

    def normalize_trip(trip, odometer):
        stopseq = 0
        n_stoptimes = len(trip.stop_times)
        last_stoptime_with_time = None
        to_interpolate = []
        odometer.reset()
        for stoptime in trip.stop_times:
            stoptime.stop_sequence = stopseq
            stoptime.shape_dist_traveled = odometer.dist_traveled(
                stoptime.stop, stoptime.shape_dist_traveled
                if stoptime.shape_dist_traveled != -999999 else None)
            if stopseq == 0:
                # Force first arrival time to NULL
                stoptime.arrival_time = None
            if stopseq == n_stoptimes - 1:
                # Force last departure time to NULL
                stoptime.departure_time = None
            if stoptime.interpolated:
                to_interpolate.append(stoptime)
            else:
                if len(to_interpolate) > 0:
                    # Interpolate
                    if last_stoptime_with_time is None:
                        logger.error(
                            "Cannot interpolate missing time at trip start: %s"
                            % trip)
                        for stti in to_interpolate:
                            # Use first defined time as fallback value.
                            stti.arrival_time = stoptime.arrival_time
                            stti.departure_time = stoptime.arrival_time
                    else:
                        tdist = stoptime.shape_dist_traveled - last_stoptime_with_time.shape_dist_traveled
                        ttime = stoptime.arrival_time - last_stoptime_with_time.departure_time
                        for stti in to_interpolate:
                            fdist = stti.shape_dist_traveled - last_stoptime_with_time.shape_dist_traveled
                            t = last_stoptime_with_time.departure_time + ttime * fdist // tdist
                            stti.arrival_time = t
                            stti.departure_time = t
                to_interpolate = []
                last_stoptime_with_time = stoptime
            stopseq += 1
        if len(to_interpolate) > 0:
            # Should not happen, but handle the case, we never know
            if last_stoptime_with_time is None:
                logger.error(
                    "Cannot interpolate missing time, no time at all: %s" %
                    trip)
                # Keep times NULL (TODO: or remove the trip?)
            else:
                logger.error(
                    "Cannot interpolate missing time at trip end: %s" % trip)
                for stti in to_interpolate:
                    # Use last defined time as fallback value
                    stti.arrival_time = last_stoptime_with_time.departure_time
                    stti.departure_time = last_stoptime_with_time.departure_time

    if disable_normalization:
        logger.info("Skipping shapes and trips normalization")
    else:
        logger.info("Normalizing shapes and trips...")
        nshapes = 0
        ntrips = 0
        odometer = _Odometer()
        # Process shapes and associated trips
        for shape in dao.shapes(fltr=Shape.feed_id == feed_id,
                                prefetch_points=True,
                                batch_size=50):
            # Shape will be registered in the normalize
            odometer.normalize_and_register_shape(shape)
            for trip in dao.trips(fltr=(Trip.feed_id == feed_id) &
                                  (Trip.shape_id == shape.shape_id),
                                  prefetch_stop_times=False,
                                  prefetch_stops=False,
                                  batch_size=800):
                normalize_trip(trip, odometer)
                ntrips += 1
                if ntrips % 1000 == 0:
                    logger.info("%d trips, %d shapes" % (ntrips, nshapes))
                    dao.flush()
            nshapes += 1
            #odometer._debug_cache()
        # Process trips w/o shapes
        for trip in dao.trips(fltr=(Trip.feed_id == feed_id) &
                              (Trip.shape_id == None),
                              prefetch_stop_times=False,
                              prefetch_stops=False,
                              batch_size=800):
            odometer.register_noshape()
            normalize_trip(trip, odometer)
            ntrips += 1
            if ntrips % 1000 == 0:
                logger.info("%d trips" % ntrips)
                dao.flush()
        dao.flush()
        logger.info("Normalized %d trips and %d shapes" % (ntrips, nshapes))

    # Note: we expand frequencies *after* normalization
    # for performances purpose only: that minimize the
    # number of trips to normalize. We can do that since
    # the expansion is neutral trip-normalization-wise.
    logger.info("Expanding frequencies...")
    n_freq = 0
    n_exp_trips = 0
    trips_to_delete = []
    for frequency in gtfs.frequencies():
        trip_id = frequency.get('trip_id')
        if trip_id not in trip_ids:
            if lenient:
                logger.error(
                    "Trip ID '%s' in '%s' is invalid. Skipping frequency." %
                    (trip_id, frequency))
                continue
            else:
                raise KeyError("Trip ID '%s' in '%s' is invalid." %
                               (trip_id, frequency))
        trip = dao.trip(trip_id, feed_id=feed_id)
        start_time = _timetoint(frequency.get('start_time'))
        end_time = _timetoint(frequency.get('end_time'))
        headway_secs = _toint(frequency.get('headway_secs'))
        exact_times = _toint(frequency.get('exact_times'), Trip.TIME_APPROX)
        for trip_dep_time in range(start_time, end_time, headway_secs):
            # Here we assume departure time are all different.
            # That's a requirement in the GTFS specs, but this may break.
            # TODO Make the expanded trip ID generation parametrable.
            trip_id2 = trip.trip_id + "@" + fmttime(trip_dep_time)
            trip2 = Trip(feed_id,
                         trip_id2,
                         trip.route_id,
                         trip.service_id,
                         wheelchair_accessible=trip.wheelchair_accessible,
                         bikes_allowed=trip.bikes_allowed,
                         exact_times=exact_times,
                         frequency_generated=True,
                         trip_headsign=trip.trip_headsign,
                         trip_short_name=trip.trip_short_name,
                         direction_id=trip.direction_id,
                         block_id=trip.block_id)
            trip2.stop_times = []
            base_time = trip.stop_times[0].departure_time
            for stoptime in trip.stop_times:
                arrtime = None if stoptime.arrival_time is None else stoptime.arrival_time - base_time + trip_dep_time
                deptime = None if stoptime.departure_time is None else stoptime.departure_time - base_time + trip_dep_time
                stoptime2 = StopTime(
                    feed_id,
                    trip_id2,
                    stoptime.stop_id,
                    stoptime.stop_sequence,
                    arrival_time=arrtime,
                    departure_time=deptime,
                    shape_dist_traveled=stoptime.shape_dist_traveled,
                    interpolated=stoptime.interpolated,
                    timepoint=stoptime.timepoint,
                    pickup_type=stoptime.pickup_type,
                    drop_off_type=stoptime.drop_off_type)
                trip2.stop_times.append(stoptime2)
            n_exp_trips += 1
            # This will add the associated stop times
            dao.add(trip2)
        # Do not delete trip now, as two frequency can refer to same trip
        trips_to_delete.append(trip)
        n_freq += 1
    for trip in trips_to_delete:
        # This also delete the associated stop times
        dao.delete(trip)
    dao.flush()
    dao.commit()
    logger.info("Expanded %d frequencies to %d trips." % (n_freq, n_exp_trips))

    logger.info("Feed '%s': import done." % feed_id)
Ejemplo n.º 4
0
def decret_2015_1610(trips,
                     trace=True,
                     required_distance=500,
                     required_ratio=2.5):

    affiche(trace, "Calcul decret 2015 1610 sur %d voyages." % (len(trips)))
    if len(trips) == 0:
        affiche(trace, "Aucun voyages, impossible de calculer.")
        return None, None, None

    affiche(trace, "Calcul de l'espacement moyen des arrêts...")
    espacement_moyen = 0
    w_esp = 0
    for trip in trips:
        # Note: on pondère par le nombre de jours chaque voyage est applicable.
        # Ceci permet de prendre en compte la fréquence: par exemple, la distance
        # d'un intervalle entre deux arrêt actif le lundi uniquement sera pondéré
        # 5 fois moins qu'un autre intervalle actif du lundi au vendredi.
        n_jours = len(trip.calendar.dates)
        for stoptime1, stoptime2 in trip.hops():
            espacement_moyen += (stoptime2.shape_dist_traveled -
                                 stoptime1.shape_dist_traveled) * n_jours
            w_esp += n_jours
    espacement_moyen /= w_esp
    affiche(
        trace,
        "L'espacement moyen entre arrêt du réseau est de %.2f mètres (max %.0fm)."
        % (espacement_moyen, float(required_distance)))

    affiche(trace,
            "Calcul du jour ayant la fréquence en voyage la plus élevée...")
    frequences = defaultdict(lambda: 0)
    for trip in trips:
        for date in trip.calendar.dates:
            frequences[date] += 1
    date_max = None
    freq_max = 0
    for date, frequence in frequences.items():
        if frequence > freq_max:
            freq_max = frequence
            date_max = date
    affiche(
        trace,
        "Le jour ayant le nombre de voyage le plus élevé est le %s, avec %d voyages."
        % (date_max.as_date(), freq_max))

    affiche(trace, "Calcul des fréquences sur la plage horaire 8h - 19h...")
    # TODO Est-ce que ce calcul est correct? Le décret est pas clair.
    # On calcule le nombre de voyages actifs pendant chaque minute.
    frequences = [0 for minute in range(0, 20 * 60)]
    for trip in trips:
        if date_max not in trip.calendar.dates:
            continue
        minute_depart = trip.stop_times[0].departure_time // 60
        minute_arrivee = trip.stop_times[-1].arrival_time // 60
        for minute in range(minute_depart, minute_arrivee + 1):
            if minute >= 8 * 60 and minute < 20 * 60:
                frequences[minute] += 1
    frequence_min = 99999999999
    minute_min = 0
    frequence_max = 0
    minute_max = 0
    # La fréquence horaire min/max est calculé en moyenne glissante
    # sur une heure en sommant les fréquences par minute.
    for minute in range(8 * 60, 19 * 60):
        freq = 0
        for delta_minute in range(0, 60):
            freq += frequences[minute + delta_minute]
        if freq > frequence_max:
            frequence_max = freq
            minute_max = minute
        if freq < frequence_min:
            frequence_min = freq
            minute_min = minute
    affiche(
        trace,
        "La fréquence minimale est de %.2f voyages/heure, entre %s et %s." %
        (frequence_min / 60.0, fmttime(
            minute_min * 60), fmttime((minute_min + 60) * 60)))
    affiche(
        trace,
        "La fréquence maximale est de %.2f voyages/heure, entre %s et %s." %
        (frequence_max / 60.0, fmttime(
            minute_max * 60), fmttime((minute_max + 60) * 60)))
    if frequence_min == 0:
        ratio_frequence = float('inf')
    else:
        ratio_frequence = frequence_max / float(frequence_min)
    affiche(
        trace, "Le ratio entre fréquence max et min est de %.3f (max %.2f)." %
        (ratio_frequence, float(required_ratio)))

    urbain = ratio_frequence < required_ratio and espacement_moyen < required_distance
    affiche(
        trace, "Ce service est %s au sens du décret n° 2015-1610." %
        ("URBAIN" if urbain else "NON URBAIN"))
    return urbain, espacement_moyen, ratio_frequence
Ejemplo n.º 5
0
    def test_demo(self):
        dao = Dao(DAO_URL, sql_logging=False)
        dao.load_gtfs(DUMMY_GTFS)

        print("List of stops named '...Bordeaux...':")
        stops_bordeaux = list(
            dao.stops(fltr=(Stop.stop_name.ilike('%Bordeaux%'))
                      & (Stop.location_type == Stop.TYPE_STOP)))
        for stop in stops_bordeaux:
            print(stop.stop_name)

        print("List of routes passing by those stops:")
        routes_bordeaux = dao.routes(fltr=or_(StopTime.stop == stop
                                              for stop in stops_bordeaux))
        for route in routes_bordeaux:
            print("%s - %s" % (route.route_short_name, route.route_long_name))

        july4 = CalendarDate.ymd(2016, 7, 4)
        print("All departures from those stops on %s:" % (july4.as_date()))
        departures = list(
            dao.stoptimes(fltr=(or_(StopTime.stop == stop
                                    for stop in stops_bordeaux))
                          & (StopTime.departure_time != None)
                          & (func.date(CalendarDate.date) == july4.date)))
        print("There is %d departures" % (len(departures)))
        for departure in departures:
            print("%30.30s %10.10s %-20.20s > %s" %
                  (departure.stop.stop_name, fmttime(departure.departure_time),
                   departure.trip.route.route_long_name,
                   departure.trip.trip_headsign))

        print("Number of departures and time range per stop on %s:" %
              (july4.as_date()))
        departure_by_stop = defaultdict(list)
        for departure in departures:
            departure_by_stop[departure.stop].append(departure)
        for stop, deps in departure_by_stop.items():
            min_dep = min(d.departure_time for d in deps)
            max_dep = max(d.departure_time for d in deps)
            print("%30.30s %3d departures (from %s to %s)" %
                  (stop.stop_name, len(deps), fmttime(min_dep),
                   fmttime(max_dep)))

        # Compute the average distance and time to next stop by route type
        ntd = [[0, 0, 0.0] for type in range(0, Route.TYPE_FUNICULAR + 1)]
        for departure in departures:
            # The following is guaranteed to succeed as we have departure_time == Null for last stop time in trip
            next_arrival = departure.trip.stop_times[departure.stop_sequence +
                                                     1]
            hop_dist = next_arrival.shape_dist_traveled - departure.shape_dist_traveled
            hop_time = next_arrival.arrival_time - departure.departure_time
            route_type = departure.trip.route.route_type
            ntd[route_type][0] += 1
            ntd[route_type][1] += hop_time
            ntd[route_type][2] += hop_dist
        for route_type in range(0, len(ntd)):
            n, t, d = ntd[route_type]
            if n > 0:
                print(
                    "The average distance to the next stop on those departures for route type %d is %.2f meters"
                    % (route_type, d / n))
                print(
                    "The average time in sec to the next stop on those departures for route type %d is %s"
                    % (route_type, fmttime(t / n)))
Ejemplo n.º 6
0
    def run(self, context, skip_shape_dist=False, bundle=None, **kwargs):

        with PrettyCsv("agency.txt", [
                "agency_id", "agency_name", "agency_url", "agency_timezone",
                "agency_lang", "agency_phone", "agency_fare_url",
                "agency_email"
        ], **kwargs) as csvout:
            nagencies = 0
            for agency in context.dao().agencies(fltr=context.args.filter):
                nagencies += 1
                csvout.writerow([
                    agency.agency_id, agency.agency_name, agency.agency_url,
                    agency.agency_timezone, agency.agency_lang,
                    agency.agency_phone, agency.agency_fare_url,
                    agency.agency_email
                ])
            print("Exported %d agencies" % (nagencies))

        stop_ids = set()
        zone_ids = set()

        def _output_stop(stop):
            csvout.writerow([
                stop.stop_id, stop.stop_code, stop.stop_name, stop.stop_desc,
                stop.stop_lat, stop.stop_lon, stop.zone_id, stop.stop_url,
                stop.location_type, stop.parent_station_id, stop.stop_timezone,
                stop.wheelchair_boarding
            ])

        with PrettyCsv("stops.txt", [
                "stop_id", "stop_code", "stop_name", "stop_desc", "stop_lat",
                "stop_lon", "zone_id", "stop_url", "location_type",
                "parent_station", "stop_timezone", "wheelchair_boarding"
        ], **kwargs) as csvout:
            nstops = 0
            station_ids = set()
            for stop in context.dao().stops(fltr=context.args.filter,
                                            prefetch_parent=False,
                                            prefetch_substops=False):
                _output_stop(stop)
                stop_ids.add((stop.feed_id, stop.stop_id))
                if stop.parent_station_id is not None:
                    station_ids.add((stop.feed_id, stop.parent_station_id))
                if stop.zone_id is not None:
                    zone_ids.add((stop.feed_id, stop.zone_id))
                nstops += 1
            # Only export parent station that have not been already seen
            station_ids -= stop_ids
            for feed_id, st_ids in group_pairs(station_ids, 1000):
                for station in context.dao().stops(
                        fltr=(Stop.feed_id == feed_id)
                        & (Stop.stop_id.in_(st_ids))):
                    _output_stop(station)
                    if station.zone_id is not None:
                        zone_ids.add((station.feed_id, station.zone_id))
                    nstops += 1
            print("Exported %d stops" % (nstops))
            stop_ids |= station_ids

        route_ids = set()
        with PrettyCsv("routes.txt", [
                "route_id", "agency_id", "route_short_name", "route_long_name",
                "route_desc", "route_type", "route_url", "route_color",
                "route_text_color"
        ], **kwargs) as csvout:
            nroutes = 0
            for route in context.dao().routes(fltr=context.args.filter):
                nroutes += 1
                csvout.writerow([
                    route.route_id, route.agency_id, route.route_short_name,
                    route.route_long_name, route.route_desc, route.route_type,
                    route.route_url, route.route_color, route.route_text_color
                ])
                route_ids.add((route.feed_id, route.route_id))
            print("Exported %d routes" % (nroutes))

        stop_times_columns = [
            "trip_id", "arrival_time", "departure_time", "stop_id",
            "stop_sequence", "stop_headsign", "pickup_type", "drop_off_type",
            "timepoint"
        ]
        if not skip_shape_dist:
            stop_times_columns.append("shape_dist_traveled")
        with PrettyCsv("trips.txt", [
                "route_id", "service_id", "trip_id", "trip_headsign",
                "trip_short_name", "direction_id", "block_id", "shape_id",
                "wheelchair_accessible", "bikes_allowed"
        ], **kwargs) as csvout1:
            with PrettyCsv("stop_times.txt", stop_times_columns,
                           **kwargs) as csvout2:
                ntrips = 0
                nstoptimes = 0
                for trip in context.dao().trips(fltr=context.args.filter,
                                                prefetch_stops=False,
                                                prefetch_stop_times=True,
                                                prefetch_calendars=False,
                                                prefetch_routes=False):
                    ntrips += 1
                    if ntrips % 1000 == 0:
                        print("%d trips..." % (ntrips))
                    csvout1.writerow([
                        trip.route_id, trip.service_id, trip.trip_id,
                        trip.trip_headsign, trip.trip_short_name,
                        trip.direction_id, trip.block_id, trip.shape_id,
                        trip.wheelchair_accessible, trip.bikes_allowed
                    ])
                    for stoptime in trip.stop_times:
                        nstoptimes += 1
                        row = [
                            trip.trip_id,
                            fmttime(stoptime.arrival_time if stoptime.
                                    arrival_time is not None else stoptime.
                                    departure_time),
                            fmttime(stoptime.departure_time if stoptime.
                                    departure_time is not None else stoptime.
                                    arrival_time), stoptime.stop_id,
                            stoptime.stop_sequence, stoptime.stop_headsign,
                            stoptime.pickup_type, stoptime.drop_off_type,
                            stoptime.timepoint
                        ]
                        if not skip_shape_dist:
                            row.append(stoptime.shape_dist_traveled)
                        csvout2.writerow(row)
                print("Exported %d trips with %d stop times" %
                      (ntrips, nstoptimes))

        # Note: GTFS' model does not have calendars objects to export,
        # since a calendar is renormalized/expanded to a list of dates.

        with PrettyCsv("calendar_dates.txt",
                       ["service_id", "date", "exception_type"],
                       **kwargs) as csvout:
            ncals = ndates = 0
            for calendar in context.dao().calendars(fltr=context.args.filter,
                                                    prefetch_dates=True):
                ncals += 1
                if ncals % 1000 == 0:
                    print("%d calendars, %d dates..." % (ncals, ndates))
                for date in calendar.dates:
                    ndates += 1
                    csvout.writerow(
                        [calendar.service_id,
                         date.toYYYYMMDD(), 1])
            print("Exported %d calendars with %d dates" % (ncals, ndates))

        fare_attr_ids = set()
        nfarerules = [0]

        def _output_farerule(farerule):
            if farerule.route_id is not None and (
                    farerule.feed_id, farerule.route_id) not in route_ids:
                return False
            if farerule.origin_id is not None and (
                    farerule.feed_id, farerule.origin_id) not in zone_ids:
                return False
            if farerule.contains_id is not None and (
                    farerule.feed_id, farerule.contains_id) not in zone_ids:
                return False
            if farerule.destination_id is not None and (
                    farerule.feed_id, farerule.destination_id) not in zone_ids:
                return False
            csvout.writerow([
                farerule.fare_id, farerule.route_id, farerule.origin_id,
                farerule.destination_id, farerule.contains_id
            ])
            fare_attr_ids.add((farerule.feed_id, farerule.fare_id))
            nfarerules[0] += 1
            return True

        with PrettyCsv("fare_rules.txt", [
                "fare_id", "route_id", "origin_id", "destination_id",
                "contains_id"
        ], **kwargs) as csvout:
            feed_ids = set()
            for feed_id, rt_ids in group_pairs(route_ids, 1000):
                feed_ids.add(feed_id)
                for farerule in context.dao().fare_rules(
                        fltr=(FareRule.feed_id == feed_id)
                        & FareRule.route_id.in_(rt_ids),
                        prefetch_fare_attributes=False):
                    if not _output_farerule(farerule):
                        continue
            for feed_id, zn_ids in group_pairs(zone_ids, 1000):
                feed_ids.add(feed_id)
                for farerule in context.dao().fare_rules(
                        fltr=(FareRule.feed_id == feed_id) &
                    (FareRule.origin_id.in_(zn_ids)
                     | FareRule.contains_id.in_(zn_ids)
                     | FareRule.destination_id.in_(zn_ids)),
                        prefetch_fare_attributes=False):
                    if not _output_farerule(farerule):
                        continue
            # Special code to include all fare rules w/o any relationships
            # of any feed_id we've encountered so far
            for feed_id in feed_ids:
                for farerule in context.dao().fare_rules(
                        fltr=(FareRule.feed_id == feed_id) &
                    (FareRule.route_id == None) & (FareRule.origin_id == None)
                        & (FareRule.contains_id == None) &
                    (FareRule.destination_id == None),
                        prefetch_fare_attributes=False):
                    if not _output_farerule(farerule):
                        continue
            print("Exported %d fare rules" % (nfarerules[0]))
        if nfarerules[0] == 0:
            os.remove("fare_rules.txt")

        with PrettyCsv("fare_attributes.txt", [
                "fare_id", "price", "currency_type", "payment_method",
                "transfers", "transfer_duration"
        ], **kwargs) as csvout:
            nfareattrs = 0
            for feed_id, fa_ids in group_pairs(fare_attr_ids, 1000):
                for fareattr in context.dao().fare_attributes(
                        fltr=(FareAttribute.feed_id == feed_id)
                        & FareAttribute.fare_id.in_(fa_ids),
                        prefetch_fare_rules=False):
                    nfareattrs += 1
                    csvout.writerow([
                        fareattr.fare_id, fareattr.price,
                        fareattr.currency_type, fareattr.payment_method,
                        fareattr.transfers, fareattr.transfer_duration
                    ])
            print("Exported %d fare attributes" % (nfareattrs))
        if nfareattrs == 0:
            os.remove("fare_attributes.txt")

        shapes_columns = [
            "shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"
        ]
        if not skip_shape_dist:
            shapes_columns.append("shape_dist_traveled")
        with PrettyCsv("shapes.txt", shapes_columns, **kwargs) as csvout:
            nshapes = nshapepoints = 0
            for shape in context.dao().shapes(fltr=context.args.filter,
                                              prefetch_points=True):
                nshapes += 1
                if nshapes % 100 == 0:
                    print("%d shapes, %d points..." % (nshapes, nshapepoints))
                for point in shape.points:
                    nshapepoints += 1
                    row = [
                        shape.shape_id, point.shape_pt_lat, point.shape_pt_lon,
                        point.shape_pt_sequence
                    ]
                    if not skip_shape_dist:
                        row.append(point.shape_dist_traveled)
                    csvout.writerow(row)
            print("Exported %d shapes with %d points" %
                  (nshapes, nshapepoints))
        if nshapes == 0:
            os.remove("shapes.txt")

        with PrettyCsv("transfers.txt", [
                "from_stop_id", "to_stop_id", "transfer_type",
                "min_transfer_time"
        ], **kwargs) as csvout:
            ntransfers = 0
            transfer_ids = set()
            for feed_id, st_ids in group_pairs(stop_ids, 1000):
                # Note: we can't use a & operator below instead of |,
                # as we would need to have *all* IDs in one batch.
                for transfer in context.dao().transfers(
                        fltr=(Transfer.feed_id == feed_id) &
                    (Transfer.from_stop_id.in_(st_ids)
                     | Transfer.to_stop_id.in_(st_ids)),
                        prefetch_stops=False):
                    # As we used from_stop_id.in(...) OR to_stop_id.in(...),
                    # we need to filter out the potential superfluous results.
                    from_stop_id = (transfer.feed_id, transfer.from_stop_id)
                    to_stop_id = (transfer.feed_id, transfer.to_stop_id)
                    if from_stop_id not in stop_ids or to_stop_id not in stop_ids:
                        continue
                    transfer_id = (from_stop_id, to_stop_id)
                    if transfer_id in transfer_ids:
                        # Prevent duplicates (can happen from grouping)
                        continue
                    transfer_ids.add(transfer_id)
                    ntransfers += 1
                    csvout.writerow([
                        transfer.from_stop_id, transfer.to_stop_id,
                        transfer.transfer_type, transfer.min_transfer_time
                    ])
            print("Exported %d transfers" % (ntransfers))
        if ntransfers == 0:
            os.remove("transfers.txt")

        if bundle:
            if not isinstance(bundle, six.string_types):
                # Allow the use of "--bundle" option only
                bundle = "gtfs.zip"
            if not bundle.endswith('.zip'):
                bundle = bundle + '.zip'
            print("Zipping result to %s (removing .txt files)" % (bundle))
            with zipfile.ZipFile(bundle, 'w', zipfile.ZIP_DEFLATED) as zipf:
                for f in [
                        "agency.txt", "stops.txt", "routes.txt", "trips.txt",
                        "stop_times.txt", "calendar_dates.txt",
                        "fare_rules.txt", "fare_attributes.txt", "shapes.txt",
                        "transfers.txt"
                ]:
                    if os.path.isfile(f):
                        zipf.write(f)
                        os.remove(f)
Ejemplo n.º 7
0
    def run(self, context, skip_shape_dist=False, bundle=None, **kwargs):

        with PrettyCsv("agency.txt", ["agency_id", "agency_name", "agency_url", "agency_timezone", "agency_lang", "agency_phone", "agency_fare_url", "agency_email" ], **kwargs) as csvout:
            nagencies = 0
            for agency in context.dao().agencies(fltr=context.args.filter):
                nagencies += 1
                csvout.writerow([ agency.agency_id, agency.agency_name, agency.agency_url, agency.agency_timezone, agency.agency_lang, agency.agency_phone, agency.agency_fare_url, agency.agency_email ])
            print("Exported %d agencies" % (nagencies))

        stop_ids = set()
        zone_ids = set()
        def _output_stop(stop):
            csvout.writerow([ stop.stop_id, stop.stop_code, stop.stop_name, stop.stop_desc, stop.stop_lat, stop.stop_lon, stop.zone_id, stop.stop_url, stop.location_type, stop.parent_station_id, stop.stop_timezone, stop.wheelchair_boarding ])

        with PrettyCsv("stops.txt", ["stop_id", "stop_code", "stop_name", "stop_desc", "stop_lat", "stop_lon", "zone_id", "stop_url", "location_type", "parent_station", "stop_timezone", "wheelchair_boarding" ], **kwargs) as csvout:
            nstops = 0
            station_ids = set()
            for stop in context.dao().stops(fltr=context.args.filter, prefetch_parent=False, prefetch_substops=False):
                _output_stop(stop)
                stop_ids.add((stop.feed_id, stop.stop_id))
                if stop.parent_station_id is not None:
                    station_ids.add((stop.feed_id, stop.parent_station_id))
                if stop.zone_id is not None:
                    zone_ids.add((stop.feed_id, stop.zone_id))
                nstops += 1
            # Only export parent station that have not been already seen
            station_ids -= stop_ids
            for feed_id, st_ids in group_pairs(station_ids, 1000):
                for station in context.dao().stops(fltr=(Stop.feed_id == feed_id) & (Stop.stop_id.in_(st_ids))):
                    _output_stop(station)
                    if station.zone_id is not None:
                        zone_ids.add((station.feed_id, station.zone_id))
                    nstops += 1
            print("Exported %d stops" % (nstops))
            stop_ids |= station_ids

        route_ids = set()
        with PrettyCsv("routes.txt", ["route_id", "agency_id", "route_short_name", "route_long_name", "route_desc", "route_type", "route_url", "route_color", "route_text_color" ], **kwargs) as csvout:
            nroutes = 0
            for route in context.dao().routes(fltr=context.args.filter):
                nroutes += 1
                csvout.writerow([ route.route_id, route.agency_id, route.route_short_name, route.route_long_name, route.route_desc, route.route_type, route.route_url, route.route_color, route.route_text_color ])
                route_ids.add((route.feed_id, route.route_id))
            print("Exported %d routes" % (nroutes))

        stop_times_columns = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "stop_headsign", "pickup_type", "drop_off_type", "timepoint"]
        if not skip_shape_dist:
            stop_times_columns.append("shape_dist_traveled")
        with PrettyCsv("trips.txt", ["route_id", "service_id", "trip_id", "trip_headsign", "trip_short_name", "direction_id", "block_id", "shape_id", "wheelchair_accessible", "bikes_allowed" ], **kwargs) as csvout1:
            with PrettyCsv("stop_times.txt", stop_times_columns, **kwargs) as csvout2:
                ntrips = 0
                nstoptimes = 0
                for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=False, prefetch_stop_times=True, prefetch_calendars=False, prefetch_routes=False):
                    ntrips += 1
                    if ntrips % 1000 == 0:
                        print("%d trips..." % (ntrips))
                    csvout1.writerow([ trip.route_id, trip.service_id, trip.trip_id, trip.trip_headsign, trip.trip_short_name, trip.direction_id, trip.block_id, trip.shape_id, trip.wheelchair_accessible, trip.bikes_allowed])
                    for stoptime in trip.stop_times:
                        nstoptimes += 1
                        row = [ trip.trip_id,
                                fmttime(stoptime.arrival_time if stoptime.arrival_time is not None else stoptime.departure_time),
                                fmttime(stoptime.departure_time if stoptime.departure_time is not None else stoptime.arrival_time),
                                stoptime.stop_id,
                                stoptime.stop_sequence,
                                stoptime.stop_headsign,
                                stoptime.pickup_type,
                                stoptime.drop_off_type,
                                stoptime.timepoint ]
                        if not skip_shape_dist:
                            row.append(stoptime.shape_dist_traveled)
                        csvout2.writerow(row)
                print("Exported %d trips with %d stop times" % (ntrips, nstoptimes))

        # Note: GTFS' model does not have calendars objects to export,
        # since a calendar is renormalized/expanded to a list of dates.

        with PrettyCsv("calendar_dates.txt", ["service_id", "date", "exception_type"], **kwargs) as csvout:
            ncals = ndates = 0
            for calendar in context.dao().calendars(fltr=context.args.filter, prefetch_dates=True):
                ncals += 1
                if ncals % 1000 == 0:
                    print("%d calendars, %d dates..." % (ncals, ndates))
                for date in calendar.dates:
                    ndates += 1
                    csvout.writerow([calendar.service_id, date.toYYYYMMDD(), 1])
            print("Exported %d calendars with %d dates" % (ncals, ndates))

        fare_attr_ids = set()
        nfarerules = [0]
        def _output_farerule(farerule):
            if farerule.route_id is not None and (farerule.feed_id, farerule.route_id) not in route_ids:
                return False
            if farerule.origin_id is not None and (farerule.feed_id, farerule.origin_id) not in zone_ids:
                return False
            if farerule.contains_id is not None and (farerule.feed_id, farerule.contains_id) not in zone_ids:
                return False
            if farerule.destination_id is not None and (farerule.feed_id, farerule.destination_id) not in zone_ids:
                return False
            csvout.writerow([ farerule.fare_id, farerule.route_id, farerule.origin_id, farerule.destination_id, farerule.contains_id ])
            fare_attr_ids.add((farerule.feed_id, farerule.fare_id))
            nfarerules[0] += 1
            return True
        with PrettyCsv("fare_rules.txt", ["fare_id", "route_id", "origin_id", "destination_id", "contains_id"], **kwargs) as csvout:
            feed_ids = set()
            for feed_id, rt_ids in group_pairs(route_ids, 1000):
                feed_ids.add(feed_id)
                for farerule in context.dao().fare_rules(fltr=(FareRule.feed_id==feed_id) & FareRule.route_id.in_(rt_ids), prefetch_fare_attributes=False):
                    if not _output_farerule(farerule):
                        continue
            for feed_id, zn_ids in group_pairs(zone_ids, 1000):
                feed_ids.add(feed_id)
                for farerule in context.dao().fare_rules(fltr=(FareRule.feed_id==feed_id) & (FareRule.origin_id.in_(zn_ids) | FareRule.contains_id.in_(zn_ids) | FareRule.destination_id.in_(zn_ids)), prefetch_fare_attributes=False):
                    if not _output_farerule(farerule):
                        continue
            # Special code to include all fare rules w/o any relationships
            # of any feed_id we've encountered so far
            for feed_id in feed_ids:
                for farerule in context.dao().fare_rules(fltr=(FareRule.feed_id==feed_id) & (FareRule.route_id==None) & (FareRule.origin_id==None) & (FareRule.contains_id==None) & (FareRule.destination_id==None), prefetch_fare_attributes=False):
                    if not _output_farerule(farerule):
                        continue
            print("Exported %d fare rules" % (nfarerules[0]))
        if nfarerules[0] == 0:
            os.remove("fare_rules.txt")

        with PrettyCsv("fare_attributes.txt", ["fare_id", "price", "currency_type", "payment_method", "transfers", "transfer_duration"], **kwargs) as csvout:
            nfareattrs = 0
            for feed_id, fa_ids in group_pairs(fare_attr_ids, 1000):
                for fareattr in context.dao().fare_attributes(fltr=(FareAttribute.feed_id==feed_id) & FareAttribute.fare_id.in_(fa_ids), prefetch_fare_rules=False):
                    nfareattrs += 1
                    csvout.writerow([ fareattr.fare_id, fareattr.price, fareattr.currency_type, fareattr.payment_method, fareattr.transfers, fareattr.transfer_duration ])
            print("Exported %d fare attributes" % (nfareattrs))
        if nfareattrs == 0:
            os.remove("fare_attributes.txt")

        shapes_columns = ["shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"]
        if not skip_shape_dist:
            shapes_columns.append("shape_dist_traveled")
        with PrettyCsv("shapes.txt", shapes_columns, **kwargs) as csvout:
            nshapes = nshapepoints = 0
            for shape in context.dao().shapes(fltr=context.args.filter, prefetch_points=True):
                nshapes += 1
                if nshapes % 100 == 0:
                    print("%d shapes, %d points..." % (nshapes, nshapepoints))
                for point in shape.points:
                    nshapepoints += 1
                    row = [shape.shape_id, point.shape_pt_lat, point.shape_pt_lon, point.shape_pt_sequence]
                    if not skip_shape_dist:
                        row.append(point.shape_dist_traveled)
                    csvout.writerow(row)
            print("Exported %d shapes with %d points" % (nshapes, nshapepoints))
        if nshapes == 0:
            os.remove("shapes.txt")

        with PrettyCsv("transfers.txt", ["from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time"], **kwargs) as csvout:
            ntransfers = 0
            transfer_ids = set()
            for feed_id, st_ids in group_pairs(stop_ids, 1000):
                # Note: we can't use a & operator below instead of |,
                # as we would need to have *all* IDs in one batch.
                for transfer in context.dao().transfers(fltr=(Transfer.feed_id == feed_id) & (Transfer.from_stop_id.in_(st_ids) | Transfer.to_stop_id.in_(st_ids)), prefetch_stops=False):
                    # As we used from_stop_id.in(...) OR to_stop_id.in(...),
                    # we need to filter out the potential superfluous results.
                    from_stop_id = (transfer.feed_id, transfer.from_stop_id)
                    to_stop_id = (transfer.feed_id, transfer.to_stop_id)
                    if from_stop_id not in stop_ids or to_stop_id not in stop_ids:
                        continue
                    transfer_id = (from_stop_id, to_stop_id)
                    if transfer_id in transfer_ids:
                        # Prevent duplicates (can happen from grouping)
                        continue
                    transfer_ids.add(transfer_id)
                    ntransfers += 1
                    csvout.writerow([ transfer.from_stop_id, transfer.to_stop_id, transfer.transfer_type, transfer.min_transfer_time ])
            print("Exported %d transfers" % (ntransfers))
        if ntransfers == 0:
            os.remove("transfers.txt")

        if bundle:
            if not isinstance(bundle, six.string_types):
                # Allow the use of "--bundle" option only
                bundle = "gtfs.zip"
            if not bundle.endswith('.zip'):
                bundle = bundle + '.zip'
            print("Zipping result to %s (removing .txt files)" % (bundle))
            with zipfile.ZipFile(bundle, 'w', zipfile.ZIP_DEFLATED) as zipf:
                for f in [ "agency.txt", "stops.txt", "routes.txt", "trips.txt", "stop_times.txt", "calendar_dates.txt", "fare_rules.txt", "fare_attributes.txt", "shapes.txt", "transfers.txt" ]:
                    if os.path.isfile(f):
                        zipf.write(f)
                        os.remove(f)
Ejemplo n.º 8
0
def _convert_gtfs_model(feed_id, gtfs, dao, lenient=False, disable_normalization=False):
    
    feedinfo2 = None
    logger.info("Importing feed ID '%s'" % feed_id)
    n_feedinfo = 0
    for feedinfo in gtfs.feedinfo():
        n_feedinfo += 1
        if n_feedinfo > 1:
            logger.error("Feed info should be unique if defined. Taking first one." % (n_feedinfo))
            break
        # TODO Automatically compute from calendar range if missing?
        feedinfo['feed_start_date'] = _todate(feedinfo.get('feed_start_date'))
        feedinfo['feed_end_date'] = _todate(feedinfo.get('feed_end_date'))
        feedinfo2 = FeedInfo(feed_id, **feedinfo)
    if feedinfo2 is None:
        # Optional, generate empty feed info
        feedinfo2 = FeedInfo(feed_id)
    dao.add(feedinfo2)
    dao.flush()
    logger.info("Imported %d feedinfo" % n_feedinfo)

    logger.info("Importing agencies...")
    n_agencies = 0
    single_agency = None
    agency_ids = set()
    for agency in gtfs.agencies():
        # agency_id is optional only if we have a single agency
        if n_agencies == 0 and agency.get('agency_id') is None:
            agency['agency_id'] = ''
        agency2 = Agency(feed_id, **agency)
        if n_agencies == 0:
            single_agency = agency2
        else:
            single_agency = None
        n_agencies += 1
        dao.add(agency2)
        agency_ids.add(agency2.agency_id)
    dao.flush()
    logger.info("Imported %d agencies" % n_agencies)

    def import_stop(stop, stoptype, zone_ids, item_ids, station_ids=None):
        zone_id = stop.get('zone_id')
        if zone_id and zone_id not in zone_ids:
            # Lazy-creation of zone
            zone = Zone(feed_id, zone_id)
            zone_ids.add(zone_id)
            dao.add(zone)
        stop['location_type'] = _toint(stop.get('location_type'), Stop.TYPE_STOP)
        if stop['location_type'] != stoptype:
            return 0
        stop['wheelchair_boarding'] = _toint(stop.get('wheelchair_boarding'), Stop.WHEELCHAIR_UNKNOWN)
        lat = _tofloat(stop.get('stop_lat'), None)
        lon = _tofloat(stop.get('stop_lon'), None)
        if lat is None or lon is None:
            if lenient:
                logger.error("Missing lat/lon for '%s', set to default (0,0)" % (stop,))
                if lat is None:
                    lat = 0
                if lon is None:
                    lon = 0
            else:
                raise ValueError("Missing mandatory lat/lon for '%s'." % (stop,))
        stop['stop_lat'] = lat
        stop['stop_lon'] = lon
        # This field has been renamed for consistency
        parent_id = stop.get('parent_station')
        stop['parent_station_id'] = parent_id if parent_id else None
        if parent_id and station_ids and parent_id not in station_ids:
            if lenient:
                logger.error("Parent station ID '%s' in '%s' is invalid, resetting." % (parent_id, stop))
                stop['parent_station_id'] = None
            else:
                raise KeyError("Parent station ID '%s' in '%s' is invalid." % (parent_id, stop))
        stop.pop('parent_station', None)
        stop2 = Stop(feed_id, **stop)
        dao.add(stop2)
        item_ids.add(stop2.stop_id)
        return 1

    stop_ids = set()
    station_ids = set()
    zone_ids = set()
    logger.info("Importing zones, stations and stops...")
    n_stations = n_stops = 0
    for station in gtfs.stops():
        n_stations += import_stop(station, Stop.TYPE_STATION, zone_ids, station_ids)
    for stop in gtfs.stops():
        n_stops += import_stop(stop, Stop.TYPE_STOP, zone_ids, stop_ids, station_ids)
    dao.flush()
    logger.info("Imported %d zones, %d stations and %d stops" % (len(zone_ids), n_stations, n_stops))

    logger.info("Importing transfers...")
    n_transfers = 0
    for transfer in gtfs.transfers():
        from_stop_id = transfer.get('from_stop_id')
        to_stop_id = transfer.get('to_stop_id')
        transfer['transfer_type'] = _toint(transfer.get('transfer_type'), 0)
        for stop_id in (from_stop_id, to_stop_id):
            if stop_id not in station_ids and stop_id not in stop_ids:
                if lenient:
                    logger.error("Stop ID '%s' in '%s' is invalid, skipping." % (stop_id, transfer))
                    continue
                else:
                    raise KeyError("Stop ID '%s' in '%s' is invalid." % (stop_id, transfer))
        transfer2 = Transfer(feed_id, **transfer)
        n_transfers += 1
        dao.add(transfer2)
    dao.flush()
    logger.info("Imported %d transfers" % (n_transfers))
    
    logger.info("Importing routes...")
    n_routes = 0
    route_ids = set()
    for route in gtfs.routes():
        route['route_type'] = int(route.get('route_type'))
        agency_id = route.get('agency_id')
        if (agency_id is None or len(agency_id) == 0) and single_agency is not None:
            # Route.agency is optional if only a single agency exists.
            agency_id = route['agency_id'] = single_agency.agency_id
        if agency_id not in agency_ids:
            if lenient:
                logger.error("Agency ID '%s' in '%s' is invalid, skipping route." % (agency_id, route))
                continue
            else:
                raise KeyError("agency ID '%s' in '%s' is invalid." % (agency_id, route))
        route2 = Route(feed_id, **route)
        dao.add(route2)
        route_ids.add(route2.route_id)
        n_routes += 1
    dao.flush()
    logger.info("Imported %d routes" % n_routes)

    logger.info("Importing fares...")
    n_fares = 0
    for fare_attr in gtfs.fare_attributes():
        fare_id = fare_attr.get('fare_id')
        fare_price = _tofloat(fare_attr.get('price'))
        currency_type = fare_attr.get('currency_type')
        payment_method = _toint(fare_attr.get('payment_method'))
        n_transfers = None
        if fare_attr.get('transfers') is not None:
            n_transfers = _toint(fare_attr.get('transfers'))
        transfer_duration = None
        if fare_attr.get('transfer_duration') is not None:
            transfer_duration = _toint(fare_attr.get('transfer_duration'))
        fare = FareAttribute(feed_id, fare_id, fare_price, currency_type,
                             payment_method, n_transfers, transfer_duration)
        dao.add(fare)
        n_fares += 1
    dao.flush()
    fare_rules = set()
    for fare_rule in gtfs.fare_rules():
        fare_rule2 = FareRule(feed_id, **fare_rule)
        if fare_rule2 in fare_rules:
            if lenient:
                logger.error("Duplicated fare rule (%s), skipping." % (fare_rule2))
                continue
            else:
                raise KeyError("Duplicated fare rule (%s)" % (fare_rule2))
        dao.add(fare_rule2)
        fare_rules.add(fare_rule2)
    dao.flush()
    logger.info("Imported %d fare and %d rules" % (n_fares, len(fare_rules)))

    logger.info("Importing calendars...")
    calanddates2 = {}
    for calendar in gtfs.calendars():
        calid = calendar.get('service_id')
        calendar2 = Calendar(feed_id, calid)
        dates2 = []
        start_date = CalendarDate.fromYYYYMMDD(calendar.get('start_date'))
        end_date = CalendarDate.fromYYYYMMDD(calendar.get('end_date'))
        for d in CalendarDate.range(start_date, end_date.next_day()):
            if int(calendar.get(DOW_NAMES[d.dow()])):
                dates2.append(d)
        calanddates2[calid] = (calendar2, set(dates2))

    logger.info("Normalizing calendar dates...")
    for caldate in gtfs.calendar_dates():
        calid = caldate.get('service_id')
        date2 = CalendarDate.fromYYYYMMDD(caldate.get('date'))
        addremove = int(caldate.get('exception_type'))
        if calid in calanddates2:
            calendar2, dates2 = calanddates2[calid]
        else:
            calendar2 = Calendar(feed_id, calid)
            dates2 = set([])
            calanddates2[calid] = (calendar2, dates2)
        if addremove == 1:
            dates2.add(date2)
        elif addremove == 2:
            if date2 in dates2:
                dates2.remove(date2)
    n_calendars = 0
    n_caldates = 0
    calendar_ids = set()
    for (calendar2, dates2) in calanddates2.values():
        calendar2.dates = [ d for d in dates2 ]
        dao.add(calendar2)
        calendar_ids.add(calendar2.service_id)
        n_calendars += 1
        n_caldates += len(calendar2.dates)
    dao.flush()
    logger.info("Imported %d calendars and %d dates" % (n_calendars, n_caldates))

    logger.info("Importing shapes...")
    n_shape_pts = 0
    shape_ids = set()
    shapepts_q = []
    for shpt in gtfs.shapes():
        shape_id = shpt.get('shape_id')
        if shape_id not in shape_ids:
            dao.add(Shape(feed_id, shape_id))
            dao.flush()
            shape_ids.add(shape_id)
        pt_seq = _toint(shpt.get('shape_pt_sequence'))
        # This field is optional
        dist_traveled = _tofloat(shpt.get('shape_dist_traveled'), -999999)
        lat = _tofloat(shpt.get('shape_pt_lat'))
        lon = _tofloat(shpt.get('shape_pt_lon'))
        shape_point = ShapePoint(feed_id, shape_id, pt_seq, lat, lon, dist_traveled)
        shapepts_q.append(shape_point)
        n_shape_pts += 1
        if n_shape_pts % 100000 == 0:
            logger.info("%d shape points" % n_shape_pts)
            dao.bulk_save_objects(shapepts_q)
            dao.flush()
            shapepts_q = []
    dao.bulk_save_objects(shapepts_q)
    dao.flush()
    logger.info("Imported %d shapes and %d points" % (len(shape_ids), n_shape_pts))

    logger.info("Importing trips...")
    n_trips = 0
    trips_q = []
    trip_ids = set()
    for trip in gtfs.trips():
        trip['wheelchair_accessible'] = _toint(trip.get('wheelchair_accessible'), Trip.WHEELCHAIR_UNKNOWN)
        trip['bikes_allowed'] = _toint(trip.get('bikes_allowed'), Trip.BIKES_UNKNOWN)
        cal_id = trip.get('service_id')
        if cal_id not in calendar_ids:
            if lenient:
                logger.error("Calendar ID '%s' in '%s' is invalid. Skipping trip." % (cal_id, trip))
                continue
            else:
                raise KeyError("Calendar ID '%s' in '%s' is invalid." % (cal_id, trip))
        route_id = trip.get('route_id')
        if route_id not in route_ids:
            if lenient:
                logger.error("Route ID '%s' in '%s' is invalid. Skipping trip." % (route_id, trip))
                continue
            else:
                raise KeyError("Route ID '%s' in trip '%s' is invalid." % (route_id, trip))
        trip2 = Trip(feed_id, frequency_generated=False, **trip)
        
        trips_q.append(trip2)
        n_trips += 1
        if n_trips % 10000 == 0:
            dao.bulk_save_objects(trips_q)
            dao.flush()
            logger.info('%s trips' % n_trips)
            trips_q = []

        trip_ids.add(trip.get('trip_id'))
    dao.bulk_save_objects(trips_q)
    dao.flush()
    
    logger.info("Imported %d trips" % n_trips)

    logger.info("Importing stop times...")
    n_stoptimes = 0
    stoptimes_q = []
    for stoptime in gtfs.stop_times():
        stopseq = _toint(stoptime.get('stop_sequence'))
        # Mark times to interpolate later on 
        arrtime = _timetoint(stoptime.get('arrival_time'), -999999)
        deptime = _timetoint(stoptime.get('departure_time'), -999999)
        if arrtime == -999999:
            arrtime = deptime
        if deptime == -999999:
            deptime = arrtime
        interp = arrtime < 0 and deptime < 0
        shpdist = _tofloat(stoptime.get('shape_dist_traveled'), -999999)
        pkptype = _toint(stoptime.get('pickup_type'), StopTime.PICKUP_DROPOFF_REGULAR)
        drptype = _toint(stoptime.get('drop_off_type'), StopTime.PICKUP_DROPOFF_REGULAR)
        trip_id = stoptime.get('trip_id')
        if trip_id not in trip_ids:
            if lenient:
                logger.error("Trip ID '%s' in '%s' is invalid. Skipping stop time." % (trip_id, stoptime))
                continue
            else:
                raise KeyError("Trip ID '%s' in '%s' is invalid." % (trip_id, stoptime))
        stop_id = stoptime.get('stop_id')
        if stop_id not in stop_ids:
            if lenient:
                logger.error("Stop ID '%s' in '%s' is invalid. Skipping stop time." % (stop_id, stoptime))
                continue
            else:
                raise KeyError("Trip ID '%s' in stoptime '%s' is invalid." % (stop_id, stoptime))
        stoptime2 = StopTime(feed_id, trip_id, stop_id,
                stop_sequence=stopseq, arrival_time=arrtime, departure_time=deptime,
                shape_dist_traveled=shpdist, interpolated=interp,
                pickup_type=pkptype, drop_off_type=drptype,
                stop_headsign=stoptime.get('stop_headsign'))
        stoptimes_q.append(stoptime2)
        n_stoptimes += 1
        # Commit every now and then
        if n_stoptimes % 50000 == 0:
            logger.info("%d stop times" % n_stoptimes)
            dao.bulk_save_objects(stoptimes_q)
            dao.flush()
            stoptimes_q = []
    dao.bulk_save_objects(stoptimes_q)

    logger.info("Imported %d stop times" % n_stoptimes)
    logger.info("Committing")
    dao.flush()
    # TODO Add option to enable/disable this commit
    # to ensure import is transactionnal
    dao.commit()
    logger.info("Commit done")

    def normalize_trip(trip, odometer):
        stopseq = 0
        n_stoptimes = len(trip.stop_times)
        last_stoptime_with_time = None
        to_interpolate = []
        odometer.reset()
        for stoptime in trip.stop_times:
            stoptime.stop_sequence = stopseq
            stoptime.shape_dist_traveled = odometer.dist_traveled(stoptime.stop,
                        stoptime.shape_dist_traveled if stoptime.shape_dist_traveled != -999999 else None)
            if stopseq == 0:
                # Force first arrival time to NULL
                stoptime.arrival_time = None
            if stopseq == n_stoptimes - 1:
                # Force last departure time to NULL
                stoptime.departure_time = None
            if stoptime.interpolated:
                to_interpolate.append(stoptime)
            else:
                if len(to_interpolate) > 0:
                    # Interpolate
                    if last_stoptime_with_time is None:
                        logger.error("Cannot interpolate missing time at trip start: %s" % trip)
                        for stti in to_interpolate:
                            # Use first defined time as fallback value.
                            stti.arrival_time = stoptime.arrival_time
                            stti.departure_time = stoptime.arrival_time
                    else:
                        tdist = stoptime.shape_dist_traveled - last_stoptime_with_time.shape_dist_traveled
                        ttime = stoptime.arrival_time - last_stoptime_with_time.departure_time
                        for stti in to_interpolate:
                            fdist = stti.shape_dist_traveled - last_stoptime_with_time.shape_dist_traveled
                            t = last_stoptime_with_time.departure_time + ttime * fdist // tdist
                            stti.arrival_time = t
                            stti.departure_time = t
                to_interpolate = []
                last_stoptime_with_time = stoptime
            stopseq += 1
        if len(to_interpolate) > 0:
            # Should not happen, but handle the case, we never know
            if last_stoptime_with_time is None:
                logger.error("Cannot interpolate missing time, no time at all: %s" % trip)
                # Keep times NULL (TODO: or remove the trip?)
            else:
                logger.error("Cannot interpolate missing time at trip end: %s" % trip)
                for stti in to_interpolate:
                    # Use last defined time as fallback value
                    stti.arrival_time = last_stoptime_with_time.departure_time
                    stti.departure_time = last_stoptime_with_time.departure_time

    if disable_normalization:
        logger.info("Skipping shapes and trips normalization")
    else:
        logger.info("Normalizing shapes and trips...")
        nshapes = 0
        ntrips = 0
        odometer = _Odometer()
        # Process shapes and associated trips
        for shape in dao.shapes(fltr=Shape.feed_id == feed_id, prefetch_points=True, batch_size=50):
            # Shape will be registered in the normalize
            odometer.normalize_and_register_shape(shape)
            for trip in dao.trips(fltr=(Trip.feed_id == feed_id) & (Trip.shape_id == shape.shape_id), prefetch_stop_times=True, prefetch_stops=True, batch_size=800):
                normalize_trip(trip, odometer)
                ntrips += 1
                if ntrips % 1000 == 0:
                    logger.info("%d trips, %d shapes" % (ntrips, nshapes))
                    dao.flush()
            nshapes += 1
            #odometer._debug_cache()
        # Process trips w/o shapes
        for trip in dao.trips(fltr=(Trip.feed_id == feed_id) & (Trip.shape_id == None), prefetch_stop_times=True, prefetch_stops=True, batch_size=800):
            odometer.register_noshape()
            normalize_trip(trip, odometer)
            ntrips += 1
            if ntrips % 1000 == 0:
                logger.info("%d trips" % ntrips)
                dao.flush()
        dao.flush()
        logger.info("Normalized %d trips and %d shapes" % (ntrips, nshapes))

    # Note: we expand frequencies *after* normalization
    # for performances purpose only: that minimize the
    # number of trips to normalize. We can do that since
    # the expansion is neutral trip-normalization-wise.
    logger.info("Expanding frequencies...")
    n_freq = 0
    n_exp_trips = 0
    trips_to_delete = []
    for frequency in gtfs.frequencies():
        trip_id = frequency.get('trip_id')
        if trip_id not in trip_ids:
            if lenient:
                logger.error("Trip ID '%s' in '%s' is invalid. Skipping frequency." % (trip_id, frequency))
                continue
            else:
                raise KeyError("Trip ID '%s' in '%s' is invalid." % (trip_id, frequency))
        trip = dao.trip(trip_id, feed_id=feed_id)
        start_time = _timetoint(frequency.get('start_time'))
        end_time = _timetoint(frequency.get('end_time'))
        headway_secs = _toint(frequency.get('headway_secs'))
        exact_times = _toint(frequency.get('exact_times'), Trip.TIME_APPROX)
        for trip_dep_time in range(start_time, end_time, headway_secs):
            # Here we assume departure time are all different.
            # That's a requirement in the GTFS specs, but this may break.
            # TODO Make the expanded trip ID generation parametrable.
            trip_id2 = trip.trip_id + "@" + fmttime(trip_dep_time)
            trip2 = Trip(feed_id, trip_id2, trip.route_id, trip.service_id,
                         wheelchair_accessible=trip.wheelchair_accessible,
                         bikes_allowed=trip.bikes_allowed,
                         exact_times=exact_times,
                         frequency_generated=True,
                         trip_headsign=trip.trip_headsign,
                         trip_short_name=trip.trip_short_name,
                         direction_id=trip.direction_id,
                         block_id=trip.block_id)
            trip2.stop_times = []
            base_time = trip.stop_times[0].departure_time
            for stoptime in trip.stop_times:
                arrtime = None if stoptime.arrival_time is None else stoptime.arrival_time - base_time + trip_dep_time
                deptime = None if stoptime.departure_time is None else stoptime.departure_time - base_time + trip_dep_time
                stoptime2 = StopTime(feed_id, trip_id2, stoptime.stop_id, stoptime.stop_sequence,
                            arrival_time=arrtime,
                            departure_time=deptime,
                            shape_dist_traveled=stoptime.shape_dist_traveled,
                            interpolated=stoptime.interpolated,
                            timepoint=stoptime.timepoint,
                            pickup_type=stoptime.pickup_type,
                            drop_off_type=stoptime.drop_off_type)
                trip2.stop_times.append(stoptime2)
            n_exp_trips += 1
            # This will add the associated stop times
            dao.add(trip2)
        # Do not delete trip now, as two frequency can refer to same trip
        trips_to_delete.append(trip)
        n_freq += 1
    for trip in trips_to_delete:
        # This also delete the associated stop times
        dao.delete(trip)
    dao.flush()
    dao.commit()
    logger.info("Expanded %d frequencies to %d trips." % (n_freq, n_exp_trips))

    logger.info("Feed '%s': import done." % feed_id)
Ejemplo n.º 9
0
    def run(self, context, skip_shape_dist=False, bundle=None, **kwargs):

        with PrettyCsv("agency.txt", ["agency_id", "agency_name", "agency_url", "agency_timezone", "agency_lang", "agency_phone", "agency_fare_url", "agency_email" ], **kwargs) as csvout:
            nagencies = 0
            for agency in context.dao().agencies(fltr=context.args.filter):
                nagencies += 1
                csvout.writerow([ agency.agency_id, agency.agency_name, agency.agency_url, agency.agency_timezone, agency.agency_lang, agency.agency_phone, agency.agency_fare_url, agency.agency_email ])
            print("Exported %d agencies" % (nagencies))

        with PrettyCsv("stops.txt", ["stop_id", "stop_code", "stop_name", "stop_desc", "stop_lat", "stop_lon", "zone_id", "stop_url", "location_type", "parent_station", "stop_timezone", "wheelchair_boarding" ], **kwargs) as csvout:
            nstops = 0
            for stop in context.dao().stops(fltr=context.args.filter, prefetch_parent=False, prefetch_substops=False):
                nstops += 1
                csvout.writerow([ stop.stop_id, stop.stop_code, stop.stop_name, stop.stop_desc, stop.stop_lat, stop.stop_lon, stop.zone_id, stop.stop_url, stop.location_type, stop.parent_station_id, stop.stop_timezone, stop.wheelchair_boarding ])
            print("Exported %d stops" % (nstops))

        with PrettyCsv("routes.txt", ["route_id", "agency_id", "route_short_name", "route_long_name", "route_desc", "route_type", "route_url", "route_color", "route_text_color" ], **kwargs) as csvout:
            nroutes = 0
            for route in context.dao().routes(fltr=context.args.filter):
                nroutes += 1
                csvout.writerow([ route.route_id, route.agency_id, route.route_short_name, route.route_long_name, route.route_desc, route.route_type, route.route_url, route.route_color, route.route_text_color ])
            print("Exported %d routes" % (nroutes))

        stop_times_columns = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "stop_headsign", "pickup_type", "drop_off_type", "timepoint"]
        if not skip_shape_dist:
            stop_times_columns.append("shape_dist_traveled")
        with PrettyCsv("trips.txt", ["route_id", "service_id", "trip_id", "trip_headsign", "trip_short_name", "direction_id", "block_id", "shape_id", "wheelchair_accessible", "bikes_allowed" ], **kwargs) as csvout1:
            with PrettyCsv("stop_times.txt", stop_times_columns, **kwargs) as csvout2:
                ntrips = 0
                nstoptimes = 0
                for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=False, prefetch_stop_times=True, prefetch_calendars=False, prefetch_routes=False):
                    ntrips += 1
                    if ntrips % 1000 == 0:
                        print("%d trips..." % (ntrips))
                    csvout1.writerow([ trip.route_id, trip.service_id, trip.trip_id, trip.trip_headsign, trip.trip_short_name, trip.direction_id, trip.block_id, trip.shape_id, trip.wheelchair_accessible, trip.bikes_allowed])
                    for stoptime in trip.stop_times:
                        nstoptimes += 1
                        row = [ trip.trip_id,
                                fmttime(stoptime.arrival_time if stoptime.arrival_time else stoptime.departure_time),
                                fmttime(stoptime.departure_time if stoptime.departure_time else stoptime.arrival_time),
                                stoptime.stop_id,
                                stoptime.stop_sequence,
                                stoptime.stop_headsign,
                                stoptime.pickup_type,
                                stoptime.drop_off_type,
                                stoptime.timepoint ]
                        if not skip_shape_dist:
                            row.append(stoptime.shape_dist_traveled)
                        csvout2.writerow(row)
                print("Exported %d trips with %d stop times" % (ntrips, nstoptimes))

        # Note: GTFS' model does not have calendars objects to export,
        # since a calendar is renormalized/expanded to a list of dates.

        with PrettyCsv("calendar_dates.txt", ["service_id", "date", "exception_type"], **kwargs) as csvout:
            ncals = ndates = 0
            for calendar in context.dao().calendars(fltr=context.args.filter, prefetch_dates=True):
                ncals += 1
                if ncals % 1000 == 0:
                    print("%d calendars, %d dates..." % (ncals, ndates))
                for date in calendar.dates:
                    ndates += 1
                    csvout.writerow([calendar.service_id, date.toYYYYMMDD(), 1])
            print("Exported %d calendars with %d dates" % (ncals, ndates))

        with PrettyCsv("fare_attributes.txt", ["fare_id", "price", "currency_type", "payment_method", "transfers", "transfer_duration"], **kwargs) as csvout:
            nfareattrs = 0
            for fareattr in context.dao().fare_attributes(fltr=context.args.filter, prefetch_fare_rules=False):
                nfareattrs += 1
                csvout.writerow([ fareattr.fare_id, fareattr.price, fareattr.currency_type, fareattr.payment_method, fareattr.transfers, fareattr.transfer_duration ])
            print("Exported %d fare attributes" % (nfareattrs))

        with PrettyCsv("fare_rules.txt", ["fare_id", "route_id", "origin_id", "destination_id", "contains_id"], **kwargs) as csvout:
            nfarerules = 0
            for farerule in context.dao().fare_rules(fltr=context.args.filter, prefetch_fare_attributes=False):
                nfarerules += 1
                csvout.writerow([ farerule.fare_id, farerule.route_id, farerule.origin_id, farerule.destination_id, farerule.contains_id ])
            print("Exported %d fare rules" % (nfarerules))

        shapes_columns = ["shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"]
        if not skip_shape_dist:
            shapes_columns.append("shape_dist_traveled")
        with PrettyCsv("shapes.txt", shapes_columns, **kwargs) as csvout:
            nshapes = nshapepoints = 0
            for shape in context.dao().shapes(fltr=context.args.filter, prefetch_points=True):
                nshapes += 1
                if nshapes % 100 == 0:
                    print("%d shapes, %d points..." % (nshapes, nshapepoints))
                for point in shape.points:
                    nshapepoints += 1
                    row = [shape.shape_id, point.shape_pt_lat, point.shape_pt_lon, point.shape_pt_sequence]
                    if not skip_shape_dist:
                        row.append(point.shape_dist_traveled)
                    csvout.writerow(row)
            print("Exported %d shapes with %d points" % (nshapes, nshapepoints))

        with PrettyCsv("transfers.txt", ["from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time"], **kwargs) as csvout:
            ntransfers = 0
            for transfer in context.dao().transfers(fltr=context.args.filter, prefetch_stops=False):
                ntransfers += 1
                csvout.writerow([ transfer.from_stop_id, transfer.to_stop_id, transfer.transfer_type, transfer.min_transfer_time ])
            print("Exported %d transfers" % (ntransfers))

        if bundle:
            if not isinstance(bundle, six.string_types):
                # Allow the use of "--bundle" option only
                bundle = "gtfs.zip"
            if not bundle.endswith('.zip'):
                bundle = bundle + '.zip'
            print("Zipping result to %s (removing .txt files)" % (bundle))
            with zipfile.ZipFile(bundle, 'w', zipfile.ZIP_DEFLATED) as zipf:
                for f in [ "agency.txt", "stops.txt", "routes.txt", "trips.txt", "stop_times.txt", "calendar_dates.txt", "fare_rules.txt", "fare_attributes.txt", "shapes.txt", "transfers.txt" ]:
                    zipf.write(f)
                    os.remove(f)
Ejemplo n.º 10
0
    def run(self, context, csv=None, cluster=0, dstp=0.5, samename=False, alldates=False, **kwargs):
        cluster_meters = float(cluster)
        dstp = float(dstp)

        print("Loading stops...")
        stops = set()
        sc = SpatialClusterizer(cluster_meters)
        for stop in context.dao().stops(fltr=context.args.filter):
            sc.add_point(stop)
            stops.add(stop)
        print("Loaded %d stops. Clusterize..." % (len(stops)))
        sc.clusterize(comparator=sc.make_comparator(samename, dstp))
        print("Aggregated in %d clusters" % (len(sc.clusters())))
        
        print("Loading calendar dates...")
        dates = set(context.dao().calendar_dates_date(fltr=context.args.filter))
        print("Loaded %d dates" % (len(dates)))
        
        print("Processing trips...")
        departures_by_clusters = defaultdict(lambda : defaultdict(list))
        ntrips = 0
        for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=True, prefetch_stop_times=True, prefetch_calendars=True):
            for stop_time in trip.stop_times:
                if not stop_time.departure_time:
                    continue
                if not stop_time.stop in stops:
                    continue
                cluster = sc.cluster_of(stop_time.stop)
                departures_by_dates = departures_by_clusters[cluster]
                for date in trip.calendar.dates:
                    if date.as_date() not in dates:
                        continue
                    departures_by_dates[date.as_date()].append(stop_time)
            if ntrips % 1000 == 0:
                print("%d trips..." % (ntrips))
            ntrips += 1

        with PrettyCsv(csv, ["cluster", "stop_id", "stop_name", "date", "departures", "min_time", "max_time", "dep_hour" ], **kwargs) as csvout:
            for cluster, departures_by_dates in departures_by_clusters.items():
                for stop in cluster.items:
                    csvout.writerow([ cluster.id, stop.stop_id, stop.stop_name ])
                if alldates:
                    # Print departure count for all dates
                    dates_to_print = list(departures_by_dates.keys())
                    dates_to_print.sort()
                else:
                    # Compute the max only
                    date_max = None
                    dep_max = 0
                    for date, departures in departures_by_dates.items():
                        ndep = len(departures)
                        if ndep >= dep_max:
                            dep_max = ndep
                            date_max = date
                    if date_max is None:
                        continue
                    dates_to_print = [ date_max ]
                for date in dates_to_print:
                    dep_times = [dep.departure_time for dep in departures_by_dates.get(date)]
                    max_hour = max(dep_times)
                    min_hour = min(dep_times)
                    delta_hour = max_hour - min_hour
                    avg_dep = float('inf') if delta_hour == 0 else len(dep_times) * 3600. / (max_hour - min_hour)
                    csvout.writerow([ cluster.id, None, None, date, len(dep_times), fmttime(min_hour), fmttime(max_hour), "%.3f" % avg_dep ])
Ejemplo n.º 11
0
def decret_2015_1610(trips, trace=True, required_distance=500, required_ratio=2.5):

    affiche(trace, "Calcul decret 2015 1610 sur %d voyages." % (len(trips)))
    if len(trips) == 0:
        affiche(trace, "Aucun voyages, impossible de calculer.")
        return None, None, None

    affiche(trace, "Calcul de l'espacement moyen des arrêts...")
    espacement_moyen = 0
    w_esp = 0
    for trip in trips:
        # Note: on pondère par le nombre de jours chaque voyage est applicable.
        # Ceci permet de prendre en compte la fréquence: par exemple, la distance
        # d'un intervalle entre deux arrêt actif le lundi uniquement sera pondéré
        # 5 fois moins qu'un autre intervalle actif du lundi au vendredi.
        n_jours = len(trip.calendar.dates)
        for stoptime1, stoptime2 in trip.hops():
            espacement_moyen += (stoptime2.shape_dist_traveled - stoptime1.shape_dist_traveled) * n_jours
            w_esp += n_jours
    espacement_moyen /= w_esp
    affiche(trace, "L'espacement moyen entre arrêt du réseau est de %.2f mètres (max 500m)." % espacement_moyen)

    affiche(trace, "Calcul du jour ayant la fréquence en voyage la plus élevée...")
    frequences = defaultdict(lambda: 0)
    for trip in trips:
        for date in trip.calendar.dates:
            frequences[date] += 1
    date_max = None
    freq_max = 0
    for date, frequence in frequences.items():
        if frequence > freq_max:
            freq_max = frequence
            date_max = date
    affiche(trace, "Le jour ayant le nombre de voyage le plus élevé est le %s, avec %d voyages." % (date_max.as_date(), freq_max))

    affiche(trace, "Calcul des fréquences sur la plage horaire 8h - 19h...")
    # TODO Est-ce que ce calcul est correct? Le décret est pas clair.
    # On calcule le nombre de voyages actifs pendant chaque minute.
    frequences = [ 0 for minute in range(0, 20 * 60) ]
    for trip in trips:
        if date_max not in trip.calendar.dates:
            continue
        minute_depart = trip.stop_times[0].departure_time // 60
        minute_arrivee = trip.stop_times[-1].arrival_time // 60
        for minute in range(minute_depart, minute_arrivee + 1):
            if minute >= 8 * 60 and minute < 20 * 60:
                frequences[minute] += 1
    frequence_min = 99999999999
    minute_min = 0
    frequence_max = 0
    minute_max = 0
    # La fréquence horaire min/max est calculé en moyenne glissante
    # sur une heure en sommant les fréquences par minute.
    for minute in range(8 * 60, 19 * 60):
        freq = 0
        for delta_minute in range(0, 60):
            freq += frequences[minute + delta_minute]
        if freq > frequence_max:
            frequence_max = freq
            minute_max = minute
        if freq < frequence_min:
            frequence_min = freq
            minute_min = minute
    affiche(trace, "La fréquence minimale est de %.2f voyages/heure, entre %s et %s." % (frequence_min / 60.0, fmttime(minute_min * 60), fmttime((minute_min + 60) * 60)))
    affiche(trace, "La fréquence maximale est de %.2f voyages/heure, entre %s et %s." % (frequence_max / 60.0, fmttime(minute_max * 60), fmttime((minute_max + 60) * 60)))
    if frequence_min == 0:
        ratio_frequence = float('inf')
    else:
        ratio_frequence = frequence_max / float(frequence_min)
    affiche(trace, "Le ratio entre fréquence max et min est de %.3f (max 2.5)." % ratio_frequence)

    urbain = ratio_frequence < required_ratio and espacement_moyen < required_distance
    affiche(trace, "Ce service est %s au sens du décret n° 2015-1610."
          % ("URBAIN" if urbain else "NON URBAIN"))
    return urbain, espacement_moyen, ratio_frequence