def run(self, context, skip_shape_dist=False, **kwargs): columns = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "stop_headsign", "pickup_type", "drop_off_type", "timepoint"] if not skip_shape_dist: columns.append("shape_dist_traveled") with PrettyCsv("stop_times.txt", columns, **kwargs) as csvout: ntrips = 0 for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=False, prefetch_stop_times=True, prefetch_calendars=False, prefetch_routes=False): if ntrips % 1000 == 0: print("%d trips..." % (ntrips)) ntrips += 1 for stoptime in trip.stop_times: row = [ trip.trip_id, fmttime(stoptime.arrival_time if stoptime.arrival_time else stoptime.departure_time), fmttime(stoptime.departure_time if stoptime.departure_time else stoptime.arrival_time), stoptime.stop_id, stoptime.stop_sequence, stoptime.stop_headsign, stoptime.pickup_type, stoptime.drop_off_type, stoptime.timepoint ] if not skip_shape_dist: row.append(stoptime.shape_dist_traveled) csvout.writerow(row) print("Processed %d trips" % (ntrips))
def test_demo(self): dao = Dao(DAO_URL, sql_logging=False) dao.load_gtfs(DUMMY_GTFS) print("List of stops named '...Bordeaux...':") stops_bordeaux = list(dao.stops(fltr=(Stop.stop_name.ilike('%Bordeaux%')) & (Stop.location_type == Stop.TYPE_STOP))) for stop in stops_bordeaux: print(stop.stop_name) print("List of routes passing by those stops:") routes_bordeaux = dao.routes(fltr=or_(StopTime.stop == stop for stop in stops_bordeaux)) for route in routes_bordeaux: print("%s - %s" % (route.route_short_name, route.route_long_name)) july4 = CalendarDate.ymd(2016, 7, 4) print("All departures from those stops on %s:" % (july4.as_date())) departures = list(dao.stoptimes(fltr=(or_(StopTime.stop == stop for stop in stops_bordeaux)) & (StopTime.departure_time != None) & (func.date(CalendarDate.date) == july4.date))) print("There is %d departures" % (len(departures))) for departure in departures: print("%30.30s %10.10s %-20.20s > %s" % (departure.stop.stop_name, fmttime(departure.departure_time), departure.trip.route.route_long_name, departure.trip.trip_headsign)) print("Number of departures and time range per stop on %s:" % (july4.as_date())) departure_by_stop = defaultdict(list) for departure in departures: departure_by_stop[departure.stop].append(departure) for stop, deps in departure_by_stop.items(): min_dep = min(d.departure_time for d in deps) max_dep = max(d.departure_time for d in deps) print("%30.30s %3d departures (from %s to %s)" % (stop.stop_name, len(deps), fmttime(min_dep), fmttime(max_dep))) # Compute the average distance and time to next stop by route type ntd = [ [0, 0, 0.0] for type in range(0, Route.TYPE_FUNICULAR + 1) ] for departure in departures: # The following is guaranteed to succeed as we have departure_time == Null for last stop time in trip next_arrival = departure.trip.stop_times[departure.stop_sequence + 1] hop_dist = next_arrival.shape_dist_traveled - departure.shape_dist_traveled hop_time = next_arrival.arrival_time - departure.departure_time route_type = departure.trip.route.route_type ntd[route_type][0] += 1 ntd[route_type][1] += hop_time ntd[route_type][2] += hop_dist for route_type in range(0, len(ntd)): n, t, d = ntd[route_type] if n > 0: print("The average distance to the next stop on those departures for route type %d is %.2f meters" % (route_type, d / n)) print("The average time in sec to the next stop on those departures for route type %d is %s" % (route_type, fmttime(t / n)))
def _convert_gtfs_model(feed_id, gtfs, dao, lenient=False, disable_normalization=False): feedinfo2 = None logger.info("Importing feed ID '%s'" % feed_id) n_feedinfo = 0 for feedinfo in gtfs.feedinfo(): n_feedinfo += 1 if n_feedinfo > 1: logger.error( "Feed info should be unique if defined. Taking first one." % (n_feedinfo)) break # TODO Automatically compute from calendar range if missing? feedinfo['feed_start_date'] = _todate(feedinfo.get('feed_start_date')) feedinfo['feed_end_date'] = _todate(feedinfo.get('feed_end_date')) feedinfo2 = FeedInfo(feed_id, **feedinfo) if feedinfo2 is None: # Optional, generate empty feed info feedinfo2 = FeedInfo(feed_id) dao.add(feedinfo2) dao.flush() logger.info("Imported %d feedinfo" % n_feedinfo) logger.info("Importing agencies...") n_agencies = 0 single_agency = None agency_ids = set() for agency in gtfs.agencies(): # agency_id is optional only if we have a single agency if n_agencies == 0 and agency.get('agency_id') is None: agency['agency_id'] = '' agency2 = Agency(feed_id, **agency) if n_agencies == 0: single_agency = agency2 else: single_agency = None n_agencies += 1 dao.add(agency2) agency_ids.add(agency2.agency_id) dao.flush() logger.info("Imported %d agencies" % n_agencies) def import_stop(stop, stoptype, zone_ids, item_ids, station_ids=None): zone_id = stop.get('zone_id') if zone_id and zone_id not in zone_ids: # Lazy-creation of zone zone = Zone(feed_id, zone_id) zone_ids.add(zone_id) dao.add(zone) stop['location_type'] = _toint(stop.get('location_type'), Stop.TYPE_STOP) if stop['location_type'] != stoptype: return 0 stop['wheelchair_boarding'] = _toint(stop.get('wheelchair_boarding'), Stop.WHEELCHAIR_UNKNOWN) lat = _tofloat(stop.get('stop_lat'), None) lon = _tofloat(stop.get('stop_lon'), None) if lat is None or lon is None: if lenient: logger.error("Missing lat/lon for '%s', set to default (0,0)" % (stop, )) if lat is None: lat = 0 if lon is None: lon = 0 else: raise ValueError("Missing mandatory lat/lon for '%s'." % (stop, )) stop['stop_lat'] = lat stop['stop_lon'] = lon # This field has been renamed for consistency parent_id = stop.get('parent_station') stop['parent_station_id'] = parent_id if parent_id else None if parent_id and station_ids and parent_id not in station_ids: if lenient: logger.error( "Parent station ID '%s' in '%s' is invalid, resetting." % (parent_id, stop)) stop['parent_station_id'] = None else: raise KeyError("Parent station ID '%s' in '%s' is invalid." % (parent_id, stop)) stop.pop('parent_station', None) stop2 = Stop(feed_id, **stop) dao.add(stop2) item_ids.add(stop2.stop_id) return 1 stop_ids = set() station_ids = set() zone_ids = set() logger.info("Importing zones, stations and stops...") n_stations = n_stops = 0 for station in gtfs.stops(): n_stations += import_stop(station, Stop.TYPE_STATION, zone_ids, station_ids) for stop in gtfs.stops(): n_stops += import_stop(stop, Stop.TYPE_STOP, zone_ids, stop_ids, station_ids) dao.flush() logger.info("Imported %d zones, %d stations and %d stops" % (len(zone_ids), n_stations, n_stops)) logger.info("Importing transfers...") n_transfers = 0 for transfer in gtfs.transfers(): from_stop_id = transfer.get('from_stop_id') to_stop_id = transfer.get('to_stop_id') transfer['transfer_type'] = _toint(transfer.get('transfer_type'), 0) for stop_id in (from_stop_id, to_stop_id): if stop_id not in station_ids and stop_id not in stop_ids: if lenient: logger.error("Stop ID '%s' in '%s' is invalid, skipping." % (stop_id, transfer)) continue else: raise KeyError("Stop ID '%s' in '%s' is invalid." % (stop_id, transfer)) transfer2 = Transfer(feed_id, **transfer) n_transfers += 1 dao.add(transfer2) dao.flush() logger.info("Imported %d transfers" % (n_transfers)) logger.info("Importing routes...") n_routes = 0 route_ids = set() for route in gtfs.routes(): route['route_type'] = int(route.get('route_type')) agency_id = route.get('agency_id') if (agency_id is None or len(agency_id) == 0) and single_agency is not None: # Route.agency is optional if only a single agency exists. agency_id = route['agency_id'] = single_agency.agency_id if agency_id not in agency_ids: if lenient: logger.error( "Agency ID '%s' in '%s' is invalid, skipping route." % (agency_id, route)) continue else: raise KeyError("agency ID '%s' in '%s' is invalid." % (agency_id, route)) route2 = Route(feed_id, **route) dao.add(route2) route_ids.add(route2.route_id) n_routes += 1 dao.flush() logger.info("Imported %d routes" % n_routes) logger.info("Importing fares...") n_fares = 0 for fare_attr in gtfs.fare_attributes(): fare_id = fare_attr.get('fare_id') fare_price = _tofloat(fare_attr.get('price')) currency_type = fare_attr.get('currency_type') payment_method = _toint(fare_attr.get('payment_method')) n_transfers = None if fare_attr.get('transfers') is not None: n_transfers = _toint(fare_attr.get('transfers')) transfer_duration = None if fare_attr.get('transfer_duration') is not None: transfer_duration = _toint(fare_attr.get('transfer_duration')) fare = FareAttribute(feed_id, fare_id, fare_price, currency_type, payment_method, n_transfers, transfer_duration) dao.add(fare) n_fares += 1 dao.flush() fare_rules = set() for fare_rule in gtfs.fare_rules(): fare_rule2 = FareRule(feed_id, **fare_rule) if fare_rule2 in fare_rules: if lenient: logger.error("Duplicated fare rule (%s), skipping." % (fare_rule2)) continue else: raise KeyError("Duplicated fare rule (%s)" % (fare_rule2)) dao.add(fare_rule2) fare_rules.add(fare_rule2) dao.flush() logger.info("Imported %d fare and %d rules" % (n_fares, len(fare_rules))) logger.info("Importing calendars...") calanddates2 = {} for calendar in gtfs.calendars(): calid = calendar.get('service_id') calendar2 = Calendar(feed_id, calid) dates2 = [] start_date = CalendarDate.fromYYYYMMDD(calendar.get('start_date')) end_date = CalendarDate.fromYYYYMMDD(calendar.get('end_date')) for d in CalendarDate.range(start_date, end_date.next_day()): if int(calendar.get(DOW_NAMES[d.dow()])): dates2.append(d) calanddates2[calid] = (calendar2, set(dates2)) logger.info("Normalizing calendar dates...") for caldate in gtfs.calendar_dates(): calid = caldate.get('service_id') date2 = CalendarDate.fromYYYYMMDD(caldate.get('date')) addremove = int(caldate.get('exception_type')) if calid in calanddates2: calendar2, dates2 = calanddates2[calid] else: calendar2 = Calendar(feed_id, calid) dates2 = set([]) calanddates2[calid] = (calendar2, dates2) if addremove == 1: dates2.add(date2) elif addremove == 2: if date2 in dates2: dates2.remove(date2) n_calendars = 0 n_caldates = 0 calendar_ids = set() for (calendar2, dates2) in calanddates2.values(): calendar2.dates = [d for d in dates2] dao.add(calendar2) calendar_ids.add(calendar2.service_id) n_calendars += 1 n_caldates += len(calendar2.dates) dao.flush() logger.info("Imported %d calendars and %d dates" % (n_calendars, n_caldates)) logger.info("Importing shapes...") n_shape_pts = 0 shape_ids = set() shapepts_q = [] for shpt in gtfs.shapes(): shape_id = shpt.get('shape_id') if shape_id not in shape_ids: dao.add(Shape(feed_id, shape_id)) dao.flush() shape_ids.add(shape_id) pt_seq = _toint(shpt.get('shape_pt_sequence')) # This field is optional dist_traveled = _tofloat(shpt.get('shape_dist_traveled'), -999999) lat = _tofloat(shpt.get('shape_pt_lat')) lon = _tofloat(shpt.get('shape_pt_lon')) shape_point = ShapePoint(feed_id, shape_id, pt_seq, lat, lon, dist_traveled) shapepts_q.append(shape_point) n_shape_pts += 1 if n_shape_pts % 100000 == 0: logger.info("%d shape points" % n_shape_pts) dao.bulk_save_objects(shapepts_q) dao.flush() shapepts_q = [] dao.bulk_save_objects(shapepts_q) dao.flush() logger.info("Imported %d shapes and %d points" % (len(shape_ids), n_shape_pts)) logger.info("Importing trips...") n_trips = 0 trips_q = [] trip_ids = set() for trip in gtfs.trips(): trip['wheelchair_accessible'] = _toint( trip.get('wheelchair_accessible'), Trip.WHEELCHAIR_UNKNOWN) trip['bikes_allowed'] = _toint(trip.get('bikes_allowed'), Trip.BIKES_UNKNOWN) cal_id = trip.get('service_id') if cal_id not in calendar_ids: if lenient: logger.error( "Calendar ID '%s' in '%s' is invalid. Skipping trip." % (cal_id, trip)) continue else: raise KeyError("Calendar ID '%s' in '%s' is invalid." % (cal_id, trip)) route_id = trip.get('route_id') if route_id not in route_ids: if lenient: logger.error( "Route ID '%s' in '%s' is invalid. Skipping trip." % (route_id, trip)) continue else: raise KeyError("Route ID '%s' in trip '%s' is invalid." % (route_id, trip)) trip2 = Trip(feed_id, frequency_generated=False, **trip) trips_q.append(trip2) n_trips += 1 if n_trips % 10000 == 0: dao.bulk_save_objects(trips_q) dao.flush() logger.info('%s trips' % n_trips) trips_q = [] trip_ids.add(trip.get('trip_id')) dao.bulk_save_objects(trips_q) dao.flush() logger.info("Imported %d trips" % n_trips) logger.info("Importing stop times...") n_stoptimes = 0 stoptimes_q = [] for stoptime in gtfs.stop_times(): stopseq = _toint(stoptime.get('stop_sequence')) # Mark times to interpolate later on arrtime = _timetoint(stoptime.get('arrival_time'), -999999) deptime = _timetoint(stoptime.get('departure_time'), -999999) if arrtime == -999999: arrtime = deptime if deptime == -999999: deptime = arrtime interp = arrtime < 0 and deptime < 0 shpdist = _tofloat(stoptime.get('shape_dist_traveled'), -999999) pkptype = _toint(stoptime.get('pickup_type'), StopTime.PICKUP_DROPOFF_REGULAR) drptype = _toint(stoptime.get('drop_off_type'), StopTime.PICKUP_DROPOFF_REGULAR) trip_id = stoptime.get('trip_id') if trip_id not in trip_ids: if lenient: logger.error( "Trip ID '%s' in '%s' is invalid. Skipping stop time." % (trip_id, stoptime)) continue else: raise KeyError("Trip ID '%s' in '%s' is invalid." % (trip_id, stoptime)) stop_id = stoptime.get('stop_id') if stop_id not in stop_ids: if lenient: logger.error( "Stop ID '%s' in '%s' is invalid. Skipping stop time." % (stop_id, stoptime)) continue else: raise KeyError("Trip ID '%s' in stoptime '%s' is invalid." % (stop_id, stoptime)) stoptime2 = StopTime(feed_id, trip_id, stop_id, stop_sequence=stopseq, arrival_time=arrtime, departure_time=deptime, shape_dist_traveled=shpdist, interpolated=interp, pickup_type=pkptype, drop_off_type=drptype, stop_headsign=stoptime.get('stop_headsign')) stoptimes_q.append(stoptime2) n_stoptimes += 1 # Commit every now and then if n_stoptimes % 50000 == 0: logger.info("%d stop times" % n_stoptimes) dao.bulk_save_objects(stoptimes_q) dao.flush() stoptimes_q = [] dao.bulk_save_objects(stoptimes_q) logger.info("Imported %d stop times" % n_stoptimes) logger.info("Committing") dao.flush() # TODO Add option to enable/disable this commit # to ensure import is transactionnal dao.commit() logger.info("Commit done") def normalize_trip(trip, odometer): stopseq = 0 n_stoptimes = len(trip.stop_times) last_stoptime_with_time = None to_interpolate = [] odometer.reset() for stoptime in trip.stop_times: stoptime.stop_sequence = stopseq stoptime.shape_dist_traveled = odometer.dist_traveled( stoptime.stop, stoptime.shape_dist_traveled if stoptime.shape_dist_traveled != -999999 else None) if stopseq == 0: # Force first arrival time to NULL stoptime.arrival_time = None if stopseq == n_stoptimes - 1: # Force last departure time to NULL stoptime.departure_time = None if stoptime.interpolated: to_interpolate.append(stoptime) else: if len(to_interpolate) > 0: # Interpolate if last_stoptime_with_time is None: logger.error( "Cannot interpolate missing time at trip start: %s" % trip) for stti in to_interpolate: # Use first defined time as fallback value. stti.arrival_time = stoptime.arrival_time stti.departure_time = stoptime.arrival_time else: tdist = stoptime.shape_dist_traveled - last_stoptime_with_time.shape_dist_traveled ttime = stoptime.arrival_time - last_stoptime_with_time.departure_time for stti in to_interpolate: fdist = stti.shape_dist_traveled - last_stoptime_with_time.shape_dist_traveled t = last_stoptime_with_time.departure_time + ttime * fdist // tdist stti.arrival_time = t stti.departure_time = t to_interpolate = [] last_stoptime_with_time = stoptime stopseq += 1 if len(to_interpolate) > 0: # Should not happen, but handle the case, we never know if last_stoptime_with_time is None: logger.error( "Cannot interpolate missing time, no time at all: %s" % trip) # Keep times NULL (TODO: or remove the trip?) else: logger.error( "Cannot interpolate missing time at trip end: %s" % trip) for stti in to_interpolate: # Use last defined time as fallback value stti.arrival_time = last_stoptime_with_time.departure_time stti.departure_time = last_stoptime_with_time.departure_time if disable_normalization: logger.info("Skipping shapes and trips normalization") else: logger.info("Normalizing shapes and trips...") nshapes = 0 ntrips = 0 odometer = _Odometer() # Process shapes and associated trips for shape in dao.shapes(fltr=Shape.feed_id == feed_id, prefetch_points=True, batch_size=50): # Shape will be registered in the normalize odometer.normalize_and_register_shape(shape) for trip in dao.trips(fltr=(Trip.feed_id == feed_id) & (Trip.shape_id == shape.shape_id), prefetch_stop_times=False, prefetch_stops=False, batch_size=800): normalize_trip(trip, odometer) ntrips += 1 if ntrips % 1000 == 0: logger.info("%d trips, %d shapes" % (ntrips, nshapes)) dao.flush() nshapes += 1 #odometer._debug_cache() # Process trips w/o shapes for trip in dao.trips(fltr=(Trip.feed_id == feed_id) & (Trip.shape_id == None), prefetch_stop_times=False, prefetch_stops=False, batch_size=800): odometer.register_noshape() normalize_trip(trip, odometer) ntrips += 1 if ntrips % 1000 == 0: logger.info("%d trips" % ntrips) dao.flush() dao.flush() logger.info("Normalized %d trips and %d shapes" % (ntrips, nshapes)) # Note: we expand frequencies *after* normalization # for performances purpose only: that minimize the # number of trips to normalize. We can do that since # the expansion is neutral trip-normalization-wise. logger.info("Expanding frequencies...") n_freq = 0 n_exp_trips = 0 trips_to_delete = [] for frequency in gtfs.frequencies(): trip_id = frequency.get('trip_id') if trip_id not in trip_ids: if lenient: logger.error( "Trip ID '%s' in '%s' is invalid. Skipping frequency." % (trip_id, frequency)) continue else: raise KeyError("Trip ID '%s' in '%s' is invalid." % (trip_id, frequency)) trip = dao.trip(trip_id, feed_id=feed_id) start_time = _timetoint(frequency.get('start_time')) end_time = _timetoint(frequency.get('end_time')) headway_secs = _toint(frequency.get('headway_secs')) exact_times = _toint(frequency.get('exact_times'), Trip.TIME_APPROX) for trip_dep_time in range(start_time, end_time, headway_secs): # Here we assume departure time are all different. # That's a requirement in the GTFS specs, but this may break. # TODO Make the expanded trip ID generation parametrable. trip_id2 = trip.trip_id + "@" + fmttime(trip_dep_time) trip2 = Trip(feed_id, trip_id2, trip.route_id, trip.service_id, wheelchair_accessible=trip.wheelchair_accessible, bikes_allowed=trip.bikes_allowed, exact_times=exact_times, frequency_generated=True, trip_headsign=trip.trip_headsign, trip_short_name=trip.trip_short_name, direction_id=trip.direction_id, block_id=trip.block_id) trip2.stop_times = [] base_time = trip.stop_times[0].departure_time for stoptime in trip.stop_times: arrtime = None if stoptime.arrival_time is None else stoptime.arrival_time - base_time + trip_dep_time deptime = None if stoptime.departure_time is None else stoptime.departure_time - base_time + trip_dep_time stoptime2 = StopTime( feed_id, trip_id2, stoptime.stop_id, stoptime.stop_sequence, arrival_time=arrtime, departure_time=deptime, shape_dist_traveled=stoptime.shape_dist_traveled, interpolated=stoptime.interpolated, timepoint=stoptime.timepoint, pickup_type=stoptime.pickup_type, drop_off_type=stoptime.drop_off_type) trip2.stop_times.append(stoptime2) n_exp_trips += 1 # This will add the associated stop times dao.add(trip2) # Do not delete trip now, as two frequency can refer to same trip trips_to_delete.append(trip) n_freq += 1 for trip in trips_to_delete: # This also delete the associated stop times dao.delete(trip) dao.flush() dao.commit() logger.info("Expanded %d frequencies to %d trips." % (n_freq, n_exp_trips)) logger.info("Feed '%s': import done." % feed_id)
def decret_2015_1610(trips, trace=True, required_distance=500, required_ratio=2.5): affiche(trace, "Calcul decret 2015 1610 sur %d voyages." % (len(trips))) if len(trips) == 0: affiche(trace, "Aucun voyages, impossible de calculer.") return None, None, None affiche(trace, "Calcul de l'espacement moyen des arrêts...") espacement_moyen = 0 w_esp = 0 for trip in trips: # Note: on pondère par le nombre de jours chaque voyage est applicable. # Ceci permet de prendre en compte la fréquence: par exemple, la distance # d'un intervalle entre deux arrêt actif le lundi uniquement sera pondéré # 5 fois moins qu'un autre intervalle actif du lundi au vendredi. n_jours = len(trip.calendar.dates) for stoptime1, stoptime2 in trip.hops(): espacement_moyen += (stoptime2.shape_dist_traveled - stoptime1.shape_dist_traveled) * n_jours w_esp += n_jours espacement_moyen /= w_esp affiche( trace, "L'espacement moyen entre arrêt du réseau est de %.2f mètres (max %.0fm)." % (espacement_moyen, float(required_distance))) affiche(trace, "Calcul du jour ayant la fréquence en voyage la plus élevée...") frequences = defaultdict(lambda: 0) for trip in trips: for date in trip.calendar.dates: frequences[date] += 1 date_max = None freq_max = 0 for date, frequence in frequences.items(): if frequence > freq_max: freq_max = frequence date_max = date affiche( trace, "Le jour ayant le nombre de voyage le plus élevé est le %s, avec %d voyages." % (date_max.as_date(), freq_max)) affiche(trace, "Calcul des fréquences sur la plage horaire 8h - 19h...") # TODO Est-ce que ce calcul est correct? Le décret est pas clair. # On calcule le nombre de voyages actifs pendant chaque minute. frequences = [0 for minute in range(0, 20 * 60)] for trip in trips: if date_max not in trip.calendar.dates: continue minute_depart = trip.stop_times[0].departure_time // 60 minute_arrivee = trip.stop_times[-1].arrival_time // 60 for minute in range(minute_depart, minute_arrivee + 1): if minute >= 8 * 60 and minute < 20 * 60: frequences[minute] += 1 frequence_min = 99999999999 minute_min = 0 frequence_max = 0 minute_max = 0 # La fréquence horaire min/max est calculé en moyenne glissante # sur une heure en sommant les fréquences par minute. for minute in range(8 * 60, 19 * 60): freq = 0 for delta_minute in range(0, 60): freq += frequences[minute + delta_minute] if freq > frequence_max: frequence_max = freq minute_max = minute if freq < frequence_min: frequence_min = freq minute_min = minute affiche( trace, "La fréquence minimale est de %.2f voyages/heure, entre %s et %s." % (frequence_min / 60.0, fmttime( minute_min * 60), fmttime((minute_min + 60) * 60))) affiche( trace, "La fréquence maximale est de %.2f voyages/heure, entre %s et %s." % (frequence_max / 60.0, fmttime( minute_max * 60), fmttime((minute_max + 60) * 60))) if frequence_min == 0: ratio_frequence = float('inf') else: ratio_frequence = frequence_max / float(frequence_min) affiche( trace, "Le ratio entre fréquence max et min est de %.3f (max %.2f)." % (ratio_frequence, float(required_ratio))) urbain = ratio_frequence < required_ratio and espacement_moyen < required_distance affiche( trace, "Ce service est %s au sens du décret n° 2015-1610." % ("URBAIN" if urbain else "NON URBAIN")) return urbain, espacement_moyen, ratio_frequence
def test_demo(self): dao = Dao(DAO_URL, sql_logging=False) dao.load_gtfs(DUMMY_GTFS) print("List of stops named '...Bordeaux...':") stops_bordeaux = list( dao.stops(fltr=(Stop.stop_name.ilike('%Bordeaux%')) & (Stop.location_type == Stop.TYPE_STOP))) for stop in stops_bordeaux: print(stop.stop_name) print("List of routes passing by those stops:") routes_bordeaux = dao.routes(fltr=or_(StopTime.stop == stop for stop in stops_bordeaux)) for route in routes_bordeaux: print("%s - %s" % (route.route_short_name, route.route_long_name)) july4 = CalendarDate.ymd(2016, 7, 4) print("All departures from those stops on %s:" % (july4.as_date())) departures = list( dao.stoptimes(fltr=(or_(StopTime.stop == stop for stop in stops_bordeaux)) & (StopTime.departure_time != None) & (func.date(CalendarDate.date) == july4.date))) print("There is %d departures" % (len(departures))) for departure in departures: print("%30.30s %10.10s %-20.20s > %s" % (departure.stop.stop_name, fmttime(departure.departure_time), departure.trip.route.route_long_name, departure.trip.trip_headsign)) print("Number of departures and time range per stop on %s:" % (july4.as_date())) departure_by_stop = defaultdict(list) for departure in departures: departure_by_stop[departure.stop].append(departure) for stop, deps in departure_by_stop.items(): min_dep = min(d.departure_time for d in deps) max_dep = max(d.departure_time for d in deps) print("%30.30s %3d departures (from %s to %s)" % (stop.stop_name, len(deps), fmttime(min_dep), fmttime(max_dep))) # Compute the average distance and time to next stop by route type ntd = [[0, 0, 0.0] for type in range(0, Route.TYPE_FUNICULAR + 1)] for departure in departures: # The following is guaranteed to succeed as we have departure_time == Null for last stop time in trip next_arrival = departure.trip.stop_times[departure.stop_sequence + 1] hop_dist = next_arrival.shape_dist_traveled - departure.shape_dist_traveled hop_time = next_arrival.arrival_time - departure.departure_time route_type = departure.trip.route.route_type ntd[route_type][0] += 1 ntd[route_type][1] += hop_time ntd[route_type][2] += hop_dist for route_type in range(0, len(ntd)): n, t, d = ntd[route_type] if n > 0: print( "The average distance to the next stop on those departures for route type %d is %.2f meters" % (route_type, d / n)) print( "The average time in sec to the next stop on those departures for route type %d is %s" % (route_type, fmttime(t / n)))
def run(self, context, skip_shape_dist=False, bundle=None, **kwargs): with PrettyCsv("agency.txt", [ "agency_id", "agency_name", "agency_url", "agency_timezone", "agency_lang", "agency_phone", "agency_fare_url", "agency_email" ], **kwargs) as csvout: nagencies = 0 for agency in context.dao().agencies(fltr=context.args.filter): nagencies += 1 csvout.writerow([ agency.agency_id, agency.agency_name, agency.agency_url, agency.agency_timezone, agency.agency_lang, agency.agency_phone, agency.agency_fare_url, agency.agency_email ]) print("Exported %d agencies" % (nagencies)) stop_ids = set() zone_ids = set() def _output_stop(stop): csvout.writerow([ stop.stop_id, stop.stop_code, stop.stop_name, stop.stop_desc, stop.stop_lat, stop.stop_lon, stop.zone_id, stop.stop_url, stop.location_type, stop.parent_station_id, stop.stop_timezone, stop.wheelchair_boarding ]) with PrettyCsv("stops.txt", [ "stop_id", "stop_code", "stop_name", "stop_desc", "stop_lat", "stop_lon", "zone_id", "stop_url", "location_type", "parent_station", "stop_timezone", "wheelchair_boarding" ], **kwargs) as csvout: nstops = 0 station_ids = set() for stop in context.dao().stops(fltr=context.args.filter, prefetch_parent=False, prefetch_substops=False): _output_stop(stop) stop_ids.add((stop.feed_id, stop.stop_id)) if stop.parent_station_id is not None: station_ids.add((stop.feed_id, stop.parent_station_id)) if stop.zone_id is not None: zone_ids.add((stop.feed_id, stop.zone_id)) nstops += 1 # Only export parent station that have not been already seen station_ids -= stop_ids for feed_id, st_ids in group_pairs(station_ids, 1000): for station in context.dao().stops( fltr=(Stop.feed_id == feed_id) & (Stop.stop_id.in_(st_ids))): _output_stop(station) if station.zone_id is not None: zone_ids.add((station.feed_id, station.zone_id)) nstops += 1 print("Exported %d stops" % (nstops)) stop_ids |= station_ids route_ids = set() with PrettyCsv("routes.txt", [ "route_id", "agency_id", "route_short_name", "route_long_name", "route_desc", "route_type", "route_url", "route_color", "route_text_color" ], **kwargs) as csvout: nroutes = 0 for route in context.dao().routes(fltr=context.args.filter): nroutes += 1 csvout.writerow([ route.route_id, route.agency_id, route.route_short_name, route.route_long_name, route.route_desc, route.route_type, route.route_url, route.route_color, route.route_text_color ]) route_ids.add((route.feed_id, route.route_id)) print("Exported %d routes" % (nroutes)) stop_times_columns = [ "trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "stop_headsign", "pickup_type", "drop_off_type", "timepoint" ] if not skip_shape_dist: stop_times_columns.append("shape_dist_traveled") with PrettyCsv("trips.txt", [ "route_id", "service_id", "trip_id", "trip_headsign", "trip_short_name", "direction_id", "block_id", "shape_id", "wheelchair_accessible", "bikes_allowed" ], **kwargs) as csvout1: with PrettyCsv("stop_times.txt", stop_times_columns, **kwargs) as csvout2: ntrips = 0 nstoptimes = 0 for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=False, prefetch_stop_times=True, prefetch_calendars=False, prefetch_routes=False): ntrips += 1 if ntrips % 1000 == 0: print("%d trips..." % (ntrips)) csvout1.writerow([ trip.route_id, trip.service_id, trip.trip_id, trip.trip_headsign, trip.trip_short_name, trip.direction_id, trip.block_id, trip.shape_id, trip.wheelchair_accessible, trip.bikes_allowed ]) for stoptime in trip.stop_times: nstoptimes += 1 row = [ trip.trip_id, fmttime(stoptime.arrival_time if stoptime. arrival_time is not None else stoptime. departure_time), fmttime(stoptime.departure_time if stoptime. departure_time is not None else stoptime. arrival_time), stoptime.stop_id, stoptime.stop_sequence, stoptime.stop_headsign, stoptime.pickup_type, stoptime.drop_off_type, stoptime.timepoint ] if not skip_shape_dist: row.append(stoptime.shape_dist_traveled) csvout2.writerow(row) print("Exported %d trips with %d stop times" % (ntrips, nstoptimes)) # Note: GTFS' model does not have calendars objects to export, # since a calendar is renormalized/expanded to a list of dates. with PrettyCsv("calendar_dates.txt", ["service_id", "date", "exception_type"], **kwargs) as csvout: ncals = ndates = 0 for calendar in context.dao().calendars(fltr=context.args.filter, prefetch_dates=True): ncals += 1 if ncals % 1000 == 0: print("%d calendars, %d dates..." % (ncals, ndates)) for date in calendar.dates: ndates += 1 csvout.writerow( [calendar.service_id, date.toYYYYMMDD(), 1]) print("Exported %d calendars with %d dates" % (ncals, ndates)) fare_attr_ids = set() nfarerules = [0] def _output_farerule(farerule): if farerule.route_id is not None and ( farerule.feed_id, farerule.route_id) not in route_ids: return False if farerule.origin_id is not None and ( farerule.feed_id, farerule.origin_id) not in zone_ids: return False if farerule.contains_id is not None and ( farerule.feed_id, farerule.contains_id) not in zone_ids: return False if farerule.destination_id is not None and ( farerule.feed_id, farerule.destination_id) not in zone_ids: return False csvout.writerow([ farerule.fare_id, farerule.route_id, farerule.origin_id, farerule.destination_id, farerule.contains_id ]) fare_attr_ids.add((farerule.feed_id, farerule.fare_id)) nfarerules[0] += 1 return True with PrettyCsv("fare_rules.txt", [ "fare_id", "route_id", "origin_id", "destination_id", "contains_id" ], **kwargs) as csvout: feed_ids = set() for feed_id, rt_ids in group_pairs(route_ids, 1000): feed_ids.add(feed_id) for farerule in context.dao().fare_rules( fltr=(FareRule.feed_id == feed_id) & FareRule.route_id.in_(rt_ids), prefetch_fare_attributes=False): if not _output_farerule(farerule): continue for feed_id, zn_ids in group_pairs(zone_ids, 1000): feed_ids.add(feed_id) for farerule in context.dao().fare_rules( fltr=(FareRule.feed_id == feed_id) & (FareRule.origin_id.in_(zn_ids) | FareRule.contains_id.in_(zn_ids) | FareRule.destination_id.in_(zn_ids)), prefetch_fare_attributes=False): if not _output_farerule(farerule): continue # Special code to include all fare rules w/o any relationships # of any feed_id we've encountered so far for feed_id in feed_ids: for farerule in context.dao().fare_rules( fltr=(FareRule.feed_id == feed_id) & (FareRule.route_id == None) & (FareRule.origin_id == None) & (FareRule.contains_id == None) & (FareRule.destination_id == None), prefetch_fare_attributes=False): if not _output_farerule(farerule): continue print("Exported %d fare rules" % (nfarerules[0])) if nfarerules[0] == 0: os.remove("fare_rules.txt") with PrettyCsv("fare_attributes.txt", [ "fare_id", "price", "currency_type", "payment_method", "transfers", "transfer_duration" ], **kwargs) as csvout: nfareattrs = 0 for feed_id, fa_ids in group_pairs(fare_attr_ids, 1000): for fareattr in context.dao().fare_attributes( fltr=(FareAttribute.feed_id == feed_id) & FareAttribute.fare_id.in_(fa_ids), prefetch_fare_rules=False): nfareattrs += 1 csvout.writerow([ fareattr.fare_id, fareattr.price, fareattr.currency_type, fareattr.payment_method, fareattr.transfers, fareattr.transfer_duration ]) print("Exported %d fare attributes" % (nfareattrs)) if nfareattrs == 0: os.remove("fare_attributes.txt") shapes_columns = [ "shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence" ] if not skip_shape_dist: shapes_columns.append("shape_dist_traveled") with PrettyCsv("shapes.txt", shapes_columns, **kwargs) as csvout: nshapes = nshapepoints = 0 for shape in context.dao().shapes(fltr=context.args.filter, prefetch_points=True): nshapes += 1 if nshapes % 100 == 0: print("%d shapes, %d points..." % (nshapes, nshapepoints)) for point in shape.points: nshapepoints += 1 row = [ shape.shape_id, point.shape_pt_lat, point.shape_pt_lon, point.shape_pt_sequence ] if not skip_shape_dist: row.append(point.shape_dist_traveled) csvout.writerow(row) print("Exported %d shapes with %d points" % (nshapes, nshapepoints)) if nshapes == 0: os.remove("shapes.txt") with PrettyCsv("transfers.txt", [ "from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time" ], **kwargs) as csvout: ntransfers = 0 transfer_ids = set() for feed_id, st_ids in group_pairs(stop_ids, 1000): # Note: we can't use a & operator below instead of |, # as we would need to have *all* IDs in one batch. for transfer in context.dao().transfers( fltr=(Transfer.feed_id == feed_id) & (Transfer.from_stop_id.in_(st_ids) | Transfer.to_stop_id.in_(st_ids)), prefetch_stops=False): # As we used from_stop_id.in(...) OR to_stop_id.in(...), # we need to filter out the potential superfluous results. from_stop_id = (transfer.feed_id, transfer.from_stop_id) to_stop_id = (transfer.feed_id, transfer.to_stop_id) if from_stop_id not in stop_ids or to_stop_id not in stop_ids: continue transfer_id = (from_stop_id, to_stop_id) if transfer_id in transfer_ids: # Prevent duplicates (can happen from grouping) continue transfer_ids.add(transfer_id) ntransfers += 1 csvout.writerow([ transfer.from_stop_id, transfer.to_stop_id, transfer.transfer_type, transfer.min_transfer_time ]) print("Exported %d transfers" % (ntransfers)) if ntransfers == 0: os.remove("transfers.txt") if bundle: if not isinstance(bundle, six.string_types): # Allow the use of "--bundle" option only bundle = "gtfs.zip" if not bundle.endswith('.zip'): bundle = bundle + '.zip' print("Zipping result to %s (removing .txt files)" % (bundle)) with zipfile.ZipFile(bundle, 'w', zipfile.ZIP_DEFLATED) as zipf: for f in [ "agency.txt", "stops.txt", "routes.txt", "trips.txt", "stop_times.txt", "calendar_dates.txt", "fare_rules.txt", "fare_attributes.txt", "shapes.txt", "transfers.txt" ]: if os.path.isfile(f): zipf.write(f) os.remove(f)
def run(self, context, skip_shape_dist=False, bundle=None, **kwargs): with PrettyCsv("agency.txt", ["agency_id", "agency_name", "agency_url", "agency_timezone", "agency_lang", "agency_phone", "agency_fare_url", "agency_email" ], **kwargs) as csvout: nagencies = 0 for agency in context.dao().agencies(fltr=context.args.filter): nagencies += 1 csvout.writerow([ agency.agency_id, agency.agency_name, agency.agency_url, agency.agency_timezone, agency.agency_lang, agency.agency_phone, agency.agency_fare_url, agency.agency_email ]) print("Exported %d agencies" % (nagencies)) stop_ids = set() zone_ids = set() def _output_stop(stop): csvout.writerow([ stop.stop_id, stop.stop_code, stop.stop_name, stop.stop_desc, stop.stop_lat, stop.stop_lon, stop.zone_id, stop.stop_url, stop.location_type, stop.parent_station_id, stop.stop_timezone, stop.wheelchair_boarding ]) with PrettyCsv("stops.txt", ["stop_id", "stop_code", "stop_name", "stop_desc", "stop_lat", "stop_lon", "zone_id", "stop_url", "location_type", "parent_station", "stop_timezone", "wheelchair_boarding" ], **kwargs) as csvout: nstops = 0 station_ids = set() for stop in context.dao().stops(fltr=context.args.filter, prefetch_parent=False, prefetch_substops=False): _output_stop(stop) stop_ids.add((stop.feed_id, stop.stop_id)) if stop.parent_station_id is not None: station_ids.add((stop.feed_id, stop.parent_station_id)) if stop.zone_id is not None: zone_ids.add((stop.feed_id, stop.zone_id)) nstops += 1 # Only export parent station that have not been already seen station_ids -= stop_ids for feed_id, st_ids in group_pairs(station_ids, 1000): for station in context.dao().stops(fltr=(Stop.feed_id == feed_id) & (Stop.stop_id.in_(st_ids))): _output_stop(station) if station.zone_id is not None: zone_ids.add((station.feed_id, station.zone_id)) nstops += 1 print("Exported %d stops" % (nstops)) stop_ids |= station_ids route_ids = set() with PrettyCsv("routes.txt", ["route_id", "agency_id", "route_short_name", "route_long_name", "route_desc", "route_type", "route_url", "route_color", "route_text_color" ], **kwargs) as csvout: nroutes = 0 for route in context.dao().routes(fltr=context.args.filter): nroutes += 1 csvout.writerow([ route.route_id, route.agency_id, route.route_short_name, route.route_long_name, route.route_desc, route.route_type, route.route_url, route.route_color, route.route_text_color ]) route_ids.add((route.feed_id, route.route_id)) print("Exported %d routes" % (nroutes)) stop_times_columns = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "stop_headsign", "pickup_type", "drop_off_type", "timepoint"] if not skip_shape_dist: stop_times_columns.append("shape_dist_traveled") with PrettyCsv("trips.txt", ["route_id", "service_id", "trip_id", "trip_headsign", "trip_short_name", "direction_id", "block_id", "shape_id", "wheelchair_accessible", "bikes_allowed" ], **kwargs) as csvout1: with PrettyCsv("stop_times.txt", stop_times_columns, **kwargs) as csvout2: ntrips = 0 nstoptimes = 0 for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=False, prefetch_stop_times=True, prefetch_calendars=False, prefetch_routes=False): ntrips += 1 if ntrips % 1000 == 0: print("%d trips..." % (ntrips)) csvout1.writerow([ trip.route_id, trip.service_id, trip.trip_id, trip.trip_headsign, trip.trip_short_name, trip.direction_id, trip.block_id, trip.shape_id, trip.wheelchair_accessible, trip.bikes_allowed]) for stoptime in trip.stop_times: nstoptimes += 1 row = [ trip.trip_id, fmttime(stoptime.arrival_time if stoptime.arrival_time is not None else stoptime.departure_time), fmttime(stoptime.departure_time if stoptime.departure_time is not None else stoptime.arrival_time), stoptime.stop_id, stoptime.stop_sequence, stoptime.stop_headsign, stoptime.pickup_type, stoptime.drop_off_type, stoptime.timepoint ] if not skip_shape_dist: row.append(stoptime.shape_dist_traveled) csvout2.writerow(row) print("Exported %d trips with %d stop times" % (ntrips, nstoptimes)) # Note: GTFS' model does not have calendars objects to export, # since a calendar is renormalized/expanded to a list of dates. with PrettyCsv("calendar_dates.txt", ["service_id", "date", "exception_type"], **kwargs) as csvout: ncals = ndates = 0 for calendar in context.dao().calendars(fltr=context.args.filter, prefetch_dates=True): ncals += 1 if ncals % 1000 == 0: print("%d calendars, %d dates..." % (ncals, ndates)) for date in calendar.dates: ndates += 1 csvout.writerow([calendar.service_id, date.toYYYYMMDD(), 1]) print("Exported %d calendars with %d dates" % (ncals, ndates)) fare_attr_ids = set() nfarerules = [0] def _output_farerule(farerule): if farerule.route_id is not None and (farerule.feed_id, farerule.route_id) not in route_ids: return False if farerule.origin_id is not None and (farerule.feed_id, farerule.origin_id) not in zone_ids: return False if farerule.contains_id is not None and (farerule.feed_id, farerule.contains_id) not in zone_ids: return False if farerule.destination_id is not None and (farerule.feed_id, farerule.destination_id) not in zone_ids: return False csvout.writerow([ farerule.fare_id, farerule.route_id, farerule.origin_id, farerule.destination_id, farerule.contains_id ]) fare_attr_ids.add((farerule.feed_id, farerule.fare_id)) nfarerules[0] += 1 return True with PrettyCsv("fare_rules.txt", ["fare_id", "route_id", "origin_id", "destination_id", "contains_id"], **kwargs) as csvout: feed_ids = set() for feed_id, rt_ids in group_pairs(route_ids, 1000): feed_ids.add(feed_id) for farerule in context.dao().fare_rules(fltr=(FareRule.feed_id==feed_id) & FareRule.route_id.in_(rt_ids), prefetch_fare_attributes=False): if not _output_farerule(farerule): continue for feed_id, zn_ids in group_pairs(zone_ids, 1000): feed_ids.add(feed_id) for farerule in context.dao().fare_rules(fltr=(FareRule.feed_id==feed_id) & (FareRule.origin_id.in_(zn_ids) | FareRule.contains_id.in_(zn_ids) | FareRule.destination_id.in_(zn_ids)), prefetch_fare_attributes=False): if not _output_farerule(farerule): continue # Special code to include all fare rules w/o any relationships # of any feed_id we've encountered so far for feed_id in feed_ids: for farerule in context.dao().fare_rules(fltr=(FareRule.feed_id==feed_id) & (FareRule.route_id==None) & (FareRule.origin_id==None) & (FareRule.contains_id==None) & (FareRule.destination_id==None), prefetch_fare_attributes=False): if not _output_farerule(farerule): continue print("Exported %d fare rules" % (nfarerules[0])) if nfarerules[0] == 0: os.remove("fare_rules.txt") with PrettyCsv("fare_attributes.txt", ["fare_id", "price", "currency_type", "payment_method", "transfers", "transfer_duration"], **kwargs) as csvout: nfareattrs = 0 for feed_id, fa_ids in group_pairs(fare_attr_ids, 1000): for fareattr in context.dao().fare_attributes(fltr=(FareAttribute.feed_id==feed_id) & FareAttribute.fare_id.in_(fa_ids), prefetch_fare_rules=False): nfareattrs += 1 csvout.writerow([ fareattr.fare_id, fareattr.price, fareattr.currency_type, fareattr.payment_method, fareattr.transfers, fareattr.transfer_duration ]) print("Exported %d fare attributes" % (nfareattrs)) if nfareattrs == 0: os.remove("fare_attributes.txt") shapes_columns = ["shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"] if not skip_shape_dist: shapes_columns.append("shape_dist_traveled") with PrettyCsv("shapes.txt", shapes_columns, **kwargs) as csvout: nshapes = nshapepoints = 0 for shape in context.dao().shapes(fltr=context.args.filter, prefetch_points=True): nshapes += 1 if nshapes % 100 == 0: print("%d shapes, %d points..." % (nshapes, nshapepoints)) for point in shape.points: nshapepoints += 1 row = [shape.shape_id, point.shape_pt_lat, point.shape_pt_lon, point.shape_pt_sequence] if not skip_shape_dist: row.append(point.shape_dist_traveled) csvout.writerow(row) print("Exported %d shapes with %d points" % (nshapes, nshapepoints)) if nshapes == 0: os.remove("shapes.txt") with PrettyCsv("transfers.txt", ["from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time"], **kwargs) as csvout: ntransfers = 0 transfer_ids = set() for feed_id, st_ids in group_pairs(stop_ids, 1000): # Note: we can't use a & operator below instead of |, # as we would need to have *all* IDs in one batch. for transfer in context.dao().transfers(fltr=(Transfer.feed_id == feed_id) & (Transfer.from_stop_id.in_(st_ids) | Transfer.to_stop_id.in_(st_ids)), prefetch_stops=False): # As we used from_stop_id.in(...) OR to_stop_id.in(...), # we need to filter out the potential superfluous results. from_stop_id = (transfer.feed_id, transfer.from_stop_id) to_stop_id = (transfer.feed_id, transfer.to_stop_id) if from_stop_id not in stop_ids or to_stop_id not in stop_ids: continue transfer_id = (from_stop_id, to_stop_id) if transfer_id in transfer_ids: # Prevent duplicates (can happen from grouping) continue transfer_ids.add(transfer_id) ntransfers += 1 csvout.writerow([ transfer.from_stop_id, transfer.to_stop_id, transfer.transfer_type, transfer.min_transfer_time ]) print("Exported %d transfers" % (ntransfers)) if ntransfers == 0: os.remove("transfers.txt") if bundle: if not isinstance(bundle, six.string_types): # Allow the use of "--bundle" option only bundle = "gtfs.zip" if not bundle.endswith('.zip'): bundle = bundle + '.zip' print("Zipping result to %s (removing .txt files)" % (bundle)) with zipfile.ZipFile(bundle, 'w', zipfile.ZIP_DEFLATED) as zipf: for f in [ "agency.txt", "stops.txt", "routes.txt", "trips.txt", "stop_times.txt", "calendar_dates.txt", "fare_rules.txt", "fare_attributes.txt", "shapes.txt", "transfers.txt" ]: if os.path.isfile(f): zipf.write(f) os.remove(f)
def _convert_gtfs_model(feed_id, gtfs, dao, lenient=False, disable_normalization=False): feedinfo2 = None logger.info("Importing feed ID '%s'" % feed_id) n_feedinfo = 0 for feedinfo in gtfs.feedinfo(): n_feedinfo += 1 if n_feedinfo > 1: logger.error("Feed info should be unique if defined. Taking first one." % (n_feedinfo)) break # TODO Automatically compute from calendar range if missing? feedinfo['feed_start_date'] = _todate(feedinfo.get('feed_start_date')) feedinfo['feed_end_date'] = _todate(feedinfo.get('feed_end_date')) feedinfo2 = FeedInfo(feed_id, **feedinfo) if feedinfo2 is None: # Optional, generate empty feed info feedinfo2 = FeedInfo(feed_id) dao.add(feedinfo2) dao.flush() logger.info("Imported %d feedinfo" % n_feedinfo) logger.info("Importing agencies...") n_agencies = 0 single_agency = None agency_ids = set() for agency in gtfs.agencies(): # agency_id is optional only if we have a single agency if n_agencies == 0 and agency.get('agency_id') is None: agency['agency_id'] = '' agency2 = Agency(feed_id, **agency) if n_agencies == 0: single_agency = agency2 else: single_agency = None n_agencies += 1 dao.add(agency2) agency_ids.add(agency2.agency_id) dao.flush() logger.info("Imported %d agencies" % n_agencies) def import_stop(stop, stoptype, zone_ids, item_ids, station_ids=None): zone_id = stop.get('zone_id') if zone_id and zone_id not in zone_ids: # Lazy-creation of zone zone = Zone(feed_id, zone_id) zone_ids.add(zone_id) dao.add(zone) stop['location_type'] = _toint(stop.get('location_type'), Stop.TYPE_STOP) if stop['location_type'] != stoptype: return 0 stop['wheelchair_boarding'] = _toint(stop.get('wheelchair_boarding'), Stop.WHEELCHAIR_UNKNOWN) lat = _tofloat(stop.get('stop_lat'), None) lon = _tofloat(stop.get('stop_lon'), None) if lat is None or lon is None: if lenient: logger.error("Missing lat/lon for '%s', set to default (0,0)" % (stop,)) if lat is None: lat = 0 if lon is None: lon = 0 else: raise ValueError("Missing mandatory lat/lon for '%s'." % (stop,)) stop['stop_lat'] = lat stop['stop_lon'] = lon # This field has been renamed for consistency parent_id = stop.get('parent_station') stop['parent_station_id'] = parent_id if parent_id else None if parent_id and station_ids and parent_id not in station_ids: if lenient: logger.error("Parent station ID '%s' in '%s' is invalid, resetting." % (parent_id, stop)) stop['parent_station_id'] = None else: raise KeyError("Parent station ID '%s' in '%s' is invalid." % (parent_id, stop)) stop.pop('parent_station', None) stop2 = Stop(feed_id, **stop) dao.add(stop2) item_ids.add(stop2.stop_id) return 1 stop_ids = set() station_ids = set() zone_ids = set() logger.info("Importing zones, stations and stops...") n_stations = n_stops = 0 for station in gtfs.stops(): n_stations += import_stop(station, Stop.TYPE_STATION, zone_ids, station_ids) for stop in gtfs.stops(): n_stops += import_stop(stop, Stop.TYPE_STOP, zone_ids, stop_ids, station_ids) dao.flush() logger.info("Imported %d zones, %d stations and %d stops" % (len(zone_ids), n_stations, n_stops)) logger.info("Importing transfers...") n_transfers = 0 for transfer in gtfs.transfers(): from_stop_id = transfer.get('from_stop_id') to_stop_id = transfer.get('to_stop_id') transfer['transfer_type'] = _toint(transfer.get('transfer_type'), 0) for stop_id in (from_stop_id, to_stop_id): if stop_id not in station_ids and stop_id not in stop_ids: if lenient: logger.error("Stop ID '%s' in '%s' is invalid, skipping." % (stop_id, transfer)) continue else: raise KeyError("Stop ID '%s' in '%s' is invalid." % (stop_id, transfer)) transfer2 = Transfer(feed_id, **transfer) n_transfers += 1 dao.add(transfer2) dao.flush() logger.info("Imported %d transfers" % (n_transfers)) logger.info("Importing routes...") n_routes = 0 route_ids = set() for route in gtfs.routes(): route['route_type'] = int(route.get('route_type')) agency_id = route.get('agency_id') if (agency_id is None or len(agency_id) == 0) and single_agency is not None: # Route.agency is optional if only a single agency exists. agency_id = route['agency_id'] = single_agency.agency_id if agency_id not in agency_ids: if lenient: logger.error("Agency ID '%s' in '%s' is invalid, skipping route." % (agency_id, route)) continue else: raise KeyError("agency ID '%s' in '%s' is invalid." % (agency_id, route)) route2 = Route(feed_id, **route) dao.add(route2) route_ids.add(route2.route_id) n_routes += 1 dao.flush() logger.info("Imported %d routes" % n_routes) logger.info("Importing fares...") n_fares = 0 for fare_attr in gtfs.fare_attributes(): fare_id = fare_attr.get('fare_id') fare_price = _tofloat(fare_attr.get('price')) currency_type = fare_attr.get('currency_type') payment_method = _toint(fare_attr.get('payment_method')) n_transfers = None if fare_attr.get('transfers') is not None: n_transfers = _toint(fare_attr.get('transfers')) transfer_duration = None if fare_attr.get('transfer_duration') is not None: transfer_duration = _toint(fare_attr.get('transfer_duration')) fare = FareAttribute(feed_id, fare_id, fare_price, currency_type, payment_method, n_transfers, transfer_duration) dao.add(fare) n_fares += 1 dao.flush() fare_rules = set() for fare_rule in gtfs.fare_rules(): fare_rule2 = FareRule(feed_id, **fare_rule) if fare_rule2 in fare_rules: if lenient: logger.error("Duplicated fare rule (%s), skipping." % (fare_rule2)) continue else: raise KeyError("Duplicated fare rule (%s)" % (fare_rule2)) dao.add(fare_rule2) fare_rules.add(fare_rule2) dao.flush() logger.info("Imported %d fare and %d rules" % (n_fares, len(fare_rules))) logger.info("Importing calendars...") calanddates2 = {} for calendar in gtfs.calendars(): calid = calendar.get('service_id') calendar2 = Calendar(feed_id, calid) dates2 = [] start_date = CalendarDate.fromYYYYMMDD(calendar.get('start_date')) end_date = CalendarDate.fromYYYYMMDD(calendar.get('end_date')) for d in CalendarDate.range(start_date, end_date.next_day()): if int(calendar.get(DOW_NAMES[d.dow()])): dates2.append(d) calanddates2[calid] = (calendar2, set(dates2)) logger.info("Normalizing calendar dates...") for caldate in gtfs.calendar_dates(): calid = caldate.get('service_id') date2 = CalendarDate.fromYYYYMMDD(caldate.get('date')) addremove = int(caldate.get('exception_type')) if calid in calanddates2: calendar2, dates2 = calanddates2[calid] else: calendar2 = Calendar(feed_id, calid) dates2 = set([]) calanddates2[calid] = (calendar2, dates2) if addremove == 1: dates2.add(date2) elif addremove == 2: if date2 in dates2: dates2.remove(date2) n_calendars = 0 n_caldates = 0 calendar_ids = set() for (calendar2, dates2) in calanddates2.values(): calendar2.dates = [ d for d in dates2 ] dao.add(calendar2) calendar_ids.add(calendar2.service_id) n_calendars += 1 n_caldates += len(calendar2.dates) dao.flush() logger.info("Imported %d calendars and %d dates" % (n_calendars, n_caldates)) logger.info("Importing shapes...") n_shape_pts = 0 shape_ids = set() shapepts_q = [] for shpt in gtfs.shapes(): shape_id = shpt.get('shape_id') if shape_id not in shape_ids: dao.add(Shape(feed_id, shape_id)) dao.flush() shape_ids.add(shape_id) pt_seq = _toint(shpt.get('shape_pt_sequence')) # This field is optional dist_traveled = _tofloat(shpt.get('shape_dist_traveled'), -999999) lat = _tofloat(shpt.get('shape_pt_lat')) lon = _tofloat(shpt.get('shape_pt_lon')) shape_point = ShapePoint(feed_id, shape_id, pt_seq, lat, lon, dist_traveled) shapepts_q.append(shape_point) n_shape_pts += 1 if n_shape_pts % 100000 == 0: logger.info("%d shape points" % n_shape_pts) dao.bulk_save_objects(shapepts_q) dao.flush() shapepts_q = [] dao.bulk_save_objects(shapepts_q) dao.flush() logger.info("Imported %d shapes and %d points" % (len(shape_ids), n_shape_pts)) logger.info("Importing trips...") n_trips = 0 trips_q = [] trip_ids = set() for trip in gtfs.trips(): trip['wheelchair_accessible'] = _toint(trip.get('wheelchair_accessible'), Trip.WHEELCHAIR_UNKNOWN) trip['bikes_allowed'] = _toint(trip.get('bikes_allowed'), Trip.BIKES_UNKNOWN) cal_id = trip.get('service_id') if cal_id not in calendar_ids: if lenient: logger.error("Calendar ID '%s' in '%s' is invalid. Skipping trip." % (cal_id, trip)) continue else: raise KeyError("Calendar ID '%s' in '%s' is invalid." % (cal_id, trip)) route_id = trip.get('route_id') if route_id not in route_ids: if lenient: logger.error("Route ID '%s' in '%s' is invalid. Skipping trip." % (route_id, trip)) continue else: raise KeyError("Route ID '%s' in trip '%s' is invalid." % (route_id, trip)) trip2 = Trip(feed_id, frequency_generated=False, **trip) trips_q.append(trip2) n_trips += 1 if n_trips % 10000 == 0: dao.bulk_save_objects(trips_q) dao.flush() logger.info('%s trips' % n_trips) trips_q = [] trip_ids.add(trip.get('trip_id')) dao.bulk_save_objects(trips_q) dao.flush() logger.info("Imported %d trips" % n_trips) logger.info("Importing stop times...") n_stoptimes = 0 stoptimes_q = [] for stoptime in gtfs.stop_times(): stopseq = _toint(stoptime.get('stop_sequence')) # Mark times to interpolate later on arrtime = _timetoint(stoptime.get('arrival_time'), -999999) deptime = _timetoint(stoptime.get('departure_time'), -999999) if arrtime == -999999: arrtime = deptime if deptime == -999999: deptime = arrtime interp = arrtime < 0 and deptime < 0 shpdist = _tofloat(stoptime.get('shape_dist_traveled'), -999999) pkptype = _toint(stoptime.get('pickup_type'), StopTime.PICKUP_DROPOFF_REGULAR) drptype = _toint(stoptime.get('drop_off_type'), StopTime.PICKUP_DROPOFF_REGULAR) trip_id = stoptime.get('trip_id') if trip_id not in trip_ids: if lenient: logger.error("Trip ID '%s' in '%s' is invalid. Skipping stop time." % (trip_id, stoptime)) continue else: raise KeyError("Trip ID '%s' in '%s' is invalid." % (trip_id, stoptime)) stop_id = stoptime.get('stop_id') if stop_id not in stop_ids: if lenient: logger.error("Stop ID '%s' in '%s' is invalid. Skipping stop time." % (stop_id, stoptime)) continue else: raise KeyError("Trip ID '%s' in stoptime '%s' is invalid." % (stop_id, stoptime)) stoptime2 = StopTime(feed_id, trip_id, stop_id, stop_sequence=stopseq, arrival_time=arrtime, departure_time=deptime, shape_dist_traveled=shpdist, interpolated=interp, pickup_type=pkptype, drop_off_type=drptype, stop_headsign=stoptime.get('stop_headsign')) stoptimes_q.append(stoptime2) n_stoptimes += 1 # Commit every now and then if n_stoptimes % 50000 == 0: logger.info("%d stop times" % n_stoptimes) dao.bulk_save_objects(stoptimes_q) dao.flush() stoptimes_q = [] dao.bulk_save_objects(stoptimes_q) logger.info("Imported %d stop times" % n_stoptimes) logger.info("Committing") dao.flush() # TODO Add option to enable/disable this commit # to ensure import is transactionnal dao.commit() logger.info("Commit done") def normalize_trip(trip, odometer): stopseq = 0 n_stoptimes = len(trip.stop_times) last_stoptime_with_time = None to_interpolate = [] odometer.reset() for stoptime in trip.stop_times: stoptime.stop_sequence = stopseq stoptime.shape_dist_traveled = odometer.dist_traveled(stoptime.stop, stoptime.shape_dist_traveled if stoptime.shape_dist_traveled != -999999 else None) if stopseq == 0: # Force first arrival time to NULL stoptime.arrival_time = None if stopseq == n_stoptimes - 1: # Force last departure time to NULL stoptime.departure_time = None if stoptime.interpolated: to_interpolate.append(stoptime) else: if len(to_interpolate) > 0: # Interpolate if last_stoptime_with_time is None: logger.error("Cannot interpolate missing time at trip start: %s" % trip) for stti in to_interpolate: # Use first defined time as fallback value. stti.arrival_time = stoptime.arrival_time stti.departure_time = stoptime.arrival_time else: tdist = stoptime.shape_dist_traveled - last_stoptime_with_time.shape_dist_traveled ttime = stoptime.arrival_time - last_stoptime_with_time.departure_time for stti in to_interpolate: fdist = stti.shape_dist_traveled - last_stoptime_with_time.shape_dist_traveled t = last_stoptime_with_time.departure_time + ttime * fdist // tdist stti.arrival_time = t stti.departure_time = t to_interpolate = [] last_stoptime_with_time = stoptime stopseq += 1 if len(to_interpolate) > 0: # Should not happen, but handle the case, we never know if last_stoptime_with_time is None: logger.error("Cannot interpolate missing time, no time at all: %s" % trip) # Keep times NULL (TODO: or remove the trip?) else: logger.error("Cannot interpolate missing time at trip end: %s" % trip) for stti in to_interpolate: # Use last defined time as fallback value stti.arrival_time = last_stoptime_with_time.departure_time stti.departure_time = last_stoptime_with_time.departure_time if disable_normalization: logger.info("Skipping shapes and trips normalization") else: logger.info("Normalizing shapes and trips...") nshapes = 0 ntrips = 0 odometer = _Odometer() # Process shapes and associated trips for shape in dao.shapes(fltr=Shape.feed_id == feed_id, prefetch_points=True, batch_size=50): # Shape will be registered in the normalize odometer.normalize_and_register_shape(shape) for trip in dao.trips(fltr=(Trip.feed_id == feed_id) & (Trip.shape_id == shape.shape_id), prefetch_stop_times=True, prefetch_stops=True, batch_size=800): normalize_trip(trip, odometer) ntrips += 1 if ntrips % 1000 == 0: logger.info("%d trips, %d shapes" % (ntrips, nshapes)) dao.flush() nshapes += 1 #odometer._debug_cache() # Process trips w/o shapes for trip in dao.trips(fltr=(Trip.feed_id == feed_id) & (Trip.shape_id == None), prefetch_stop_times=True, prefetch_stops=True, batch_size=800): odometer.register_noshape() normalize_trip(trip, odometer) ntrips += 1 if ntrips % 1000 == 0: logger.info("%d trips" % ntrips) dao.flush() dao.flush() logger.info("Normalized %d trips and %d shapes" % (ntrips, nshapes)) # Note: we expand frequencies *after* normalization # for performances purpose only: that minimize the # number of trips to normalize. We can do that since # the expansion is neutral trip-normalization-wise. logger.info("Expanding frequencies...") n_freq = 0 n_exp_trips = 0 trips_to_delete = [] for frequency in gtfs.frequencies(): trip_id = frequency.get('trip_id') if trip_id not in trip_ids: if lenient: logger.error("Trip ID '%s' in '%s' is invalid. Skipping frequency." % (trip_id, frequency)) continue else: raise KeyError("Trip ID '%s' in '%s' is invalid." % (trip_id, frequency)) trip = dao.trip(trip_id, feed_id=feed_id) start_time = _timetoint(frequency.get('start_time')) end_time = _timetoint(frequency.get('end_time')) headway_secs = _toint(frequency.get('headway_secs')) exact_times = _toint(frequency.get('exact_times'), Trip.TIME_APPROX) for trip_dep_time in range(start_time, end_time, headway_secs): # Here we assume departure time are all different. # That's a requirement in the GTFS specs, but this may break. # TODO Make the expanded trip ID generation parametrable. trip_id2 = trip.trip_id + "@" + fmttime(trip_dep_time) trip2 = Trip(feed_id, trip_id2, trip.route_id, trip.service_id, wheelchair_accessible=trip.wheelchair_accessible, bikes_allowed=trip.bikes_allowed, exact_times=exact_times, frequency_generated=True, trip_headsign=trip.trip_headsign, trip_short_name=trip.trip_short_name, direction_id=trip.direction_id, block_id=trip.block_id) trip2.stop_times = [] base_time = trip.stop_times[0].departure_time for stoptime in trip.stop_times: arrtime = None if stoptime.arrival_time is None else stoptime.arrival_time - base_time + trip_dep_time deptime = None if stoptime.departure_time is None else stoptime.departure_time - base_time + trip_dep_time stoptime2 = StopTime(feed_id, trip_id2, stoptime.stop_id, stoptime.stop_sequence, arrival_time=arrtime, departure_time=deptime, shape_dist_traveled=stoptime.shape_dist_traveled, interpolated=stoptime.interpolated, timepoint=stoptime.timepoint, pickup_type=stoptime.pickup_type, drop_off_type=stoptime.drop_off_type) trip2.stop_times.append(stoptime2) n_exp_trips += 1 # This will add the associated stop times dao.add(trip2) # Do not delete trip now, as two frequency can refer to same trip trips_to_delete.append(trip) n_freq += 1 for trip in trips_to_delete: # This also delete the associated stop times dao.delete(trip) dao.flush() dao.commit() logger.info("Expanded %d frequencies to %d trips." % (n_freq, n_exp_trips)) logger.info("Feed '%s': import done." % feed_id)
def run(self, context, skip_shape_dist=False, bundle=None, **kwargs): with PrettyCsv("agency.txt", ["agency_id", "agency_name", "agency_url", "agency_timezone", "agency_lang", "agency_phone", "agency_fare_url", "agency_email" ], **kwargs) as csvout: nagencies = 0 for agency in context.dao().agencies(fltr=context.args.filter): nagencies += 1 csvout.writerow([ agency.agency_id, agency.agency_name, agency.agency_url, agency.agency_timezone, agency.agency_lang, agency.agency_phone, agency.agency_fare_url, agency.agency_email ]) print("Exported %d agencies" % (nagencies)) with PrettyCsv("stops.txt", ["stop_id", "stop_code", "stop_name", "stop_desc", "stop_lat", "stop_lon", "zone_id", "stop_url", "location_type", "parent_station", "stop_timezone", "wheelchair_boarding" ], **kwargs) as csvout: nstops = 0 for stop in context.dao().stops(fltr=context.args.filter, prefetch_parent=False, prefetch_substops=False): nstops += 1 csvout.writerow([ stop.stop_id, stop.stop_code, stop.stop_name, stop.stop_desc, stop.stop_lat, stop.stop_lon, stop.zone_id, stop.stop_url, stop.location_type, stop.parent_station_id, stop.stop_timezone, stop.wheelchair_boarding ]) print("Exported %d stops" % (nstops)) with PrettyCsv("routes.txt", ["route_id", "agency_id", "route_short_name", "route_long_name", "route_desc", "route_type", "route_url", "route_color", "route_text_color" ], **kwargs) as csvout: nroutes = 0 for route in context.dao().routes(fltr=context.args.filter): nroutes += 1 csvout.writerow([ route.route_id, route.agency_id, route.route_short_name, route.route_long_name, route.route_desc, route.route_type, route.route_url, route.route_color, route.route_text_color ]) print("Exported %d routes" % (nroutes)) stop_times_columns = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "stop_headsign", "pickup_type", "drop_off_type", "timepoint"] if not skip_shape_dist: stop_times_columns.append("shape_dist_traveled") with PrettyCsv("trips.txt", ["route_id", "service_id", "trip_id", "trip_headsign", "trip_short_name", "direction_id", "block_id", "shape_id", "wheelchair_accessible", "bikes_allowed" ], **kwargs) as csvout1: with PrettyCsv("stop_times.txt", stop_times_columns, **kwargs) as csvout2: ntrips = 0 nstoptimes = 0 for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=False, prefetch_stop_times=True, prefetch_calendars=False, prefetch_routes=False): ntrips += 1 if ntrips % 1000 == 0: print("%d trips..." % (ntrips)) csvout1.writerow([ trip.route_id, trip.service_id, trip.trip_id, trip.trip_headsign, trip.trip_short_name, trip.direction_id, trip.block_id, trip.shape_id, trip.wheelchair_accessible, trip.bikes_allowed]) for stoptime in trip.stop_times: nstoptimes += 1 row = [ trip.trip_id, fmttime(stoptime.arrival_time if stoptime.arrival_time else stoptime.departure_time), fmttime(stoptime.departure_time if stoptime.departure_time else stoptime.arrival_time), stoptime.stop_id, stoptime.stop_sequence, stoptime.stop_headsign, stoptime.pickup_type, stoptime.drop_off_type, stoptime.timepoint ] if not skip_shape_dist: row.append(stoptime.shape_dist_traveled) csvout2.writerow(row) print("Exported %d trips with %d stop times" % (ntrips, nstoptimes)) # Note: GTFS' model does not have calendars objects to export, # since a calendar is renormalized/expanded to a list of dates. with PrettyCsv("calendar_dates.txt", ["service_id", "date", "exception_type"], **kwargs) as csvout: ncals = ndates = 0 for calendar in context.dao().calendars(fltr=context.args.filter, prefetch_dates=True): ncals += 1 if ncals % 1000 == 0: print("%d calendars, %d dates..." % (ncals, ndates)) for date in calendar.dates: ndates += 1 csvout.writerow([calendar.service_id, date.toYYYYMMDD(), 1]) print("Exported %d calendars with %d dates" % (ncals, ndates)) with PrettyCsv("fare_attributes.txt", ["fare_id", "price", "currency_type", "payment_method", "transfers", "transfer_duration"], **kwargs) as csvout: nfareattrs = 0 for fareattr in context.dao().fare_attributes(fltr=context.args.filter, prefetch_fare_rules=False): nfareattrs += 1 csvout.writerow([ fareattr.fare_id, fareattr.price, fareattr.currency_type, fareattr.payment_method, fareattr.transfers, fareattr.transfer_duration ]) print("Exported %d fare attributes" % (nfareattrs)) with PrettyCsv("fare_rules.txt", ["fare_id", "route_id", "origin_id", "destination_id", "contains_id"], **kwargs) as csvout: nfarerules = 0 for farerule in context.dao().fare_rules(fltr=context.args.filter, prefetch_fare_attributes=False): nfarerules += 1 csvout.writerow([ farerule.fare_id, farerule.route_id, farerule.origin_id, farerule.destination_id, farerule.contains_id ]) print("Exported %d fare rules" % (nfarerules)) shapes_columns = ["shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"] if not skip_shape_dist: shapes_columns.append("shape_dist_traveled") with PrettyCsv("shapes.txt", shapes_columns, **kwargs) as csvout: nshapes = nshapepoints = 0 for shape in context.dao().shapes(fltr=context.args.filter, prefetch_points=True): nshapes += 1 if nshapes % 100 == 0: print("%d shapes, %d points..." % (nshapes, nshapepoints)) for point in shape.points: nshapepoints += 1 row = [shape.shape_id, point.shape_pt_lat, point.shape_pt_lon, point.shape_pt_sequence] if not skip_shape_dist: row.append(point.shape_dist_traveled) csvout.writerow(row) print("Exported %d shapes with %d points" % (nshapes, nshapepoints)) with PrettyCsv("transfers.txt", ["from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time"], **kwargs) as csvout: ntransfers = 0 for transfer in context.dao().transfers(fltr=context.args.filter, prefetch_stops=False): ntransfers += 1 csvout.writerow([ transfer.from_stop_id, transfer.to_stop_id, transfer.transfer_type, transfer.min_transfer_time ]) print("Exported %d transfers" % (ntransfers)) if bundle: if not isinstance(bundle, six.string_types): # Allow the use of "--bundle" option only bundle = "gtfs.zip" if not bundle.endswith('.zip'): bundle = bundle + '.zip' print("Zipping result to %s (removing .txt files)" % (bundle)) with zipfile.ZipFile(bundle, 'w', zipfile.ZIP_DEFLATED) as zipf: for f in [ "agency.txt", "stops.txt", "routes.txt", "trips.txt", "stop_times.txt", "calendar_dates.txt", "fare_rules.txt", "fare_attributes.txt", "shapes.txt", "transfers.txt" ]: zipf.write(f) os.remove(f)
def run(self, context, csv=None, cluster=0, dstp=0.5, samename=False, alldates=False, **kwargs): cluster_meters = float(cluster) dstp = float(dstp) print("Loading stops...") stops = set() sc = SpatialClusterizer(cluster_meters) for stop in context.dao().stops(fltr=context.args.filter): sc.add_point(stop) stops.add(stop) print("Loaded %d stops. Clusterize..." % (len(stops))) sc.clusterize(comparator=sc.make_comparator(samename, dstp)) print("Aggregated in %d clusters" % (len(sc.clusters()))) print("Loading calendar dates...") dates = set(context.dao().calendar_dates_date(fltr=context.args.filter)) print("Loaded %d dates" % (len(dates))) print("Processing trips...") departures_by_clusters = defaultdict(lambda : defaultdict(list)) ntrips = 0 for trip in context.dao().trips(fltr=context.args.filter, prefetch_stops=True, prefetch_stop_times=True, prefetch_calendars=True): for stop_time in trip.stop_times: if not stop_time.departure_time: continue if not stop_time.stop in stops: continue cluster = sc.cluster_of(stop_time.stop) departures_by_dates = departures_by_clusters[cluster] for date in trip.calendar.dates: if date.as_date() not in dates: continue departures_by_dates[date.as_date()].append(stop_time) if ntrips % 1000 == 0: print("%d trips..." % (ntrips)) ntrips += 1 with PrettyCsv(csv, ["cluster", "stop_id", "stop_name", "date", "departures", "min_time", "max_time", "dep_hour" ], **kwargs) as csvout: for cluster, departures_by_dates in departures_by_clusters.items(): for stop in cluster.items: csvout.writerow([ cluster.id, stop.stop_id, stop.stop_name ]) if alldates: # Print departure count for all dates dates_to_print = list(departures_by_dates.keys()) dates_to_print.sort() else: # Compute the max only date_max = None dep_max = 0 for date, departures in departures_by_dates.items(): ndep = len(departures) if ndep >= dep_max: dep_max = ndep date_max = date if date_max is None: continue dates_to_print = [ date_max ] for date in dates_to_print: dep_times = [dep.departure_time for dep in departures_by_dates.get(date)] max_hour = max(dep_times) min_hour = min(dep_times) delta_hour = max_hour - min_hour avg_dep = float('inf') if delta_hour == 0 else len(dep_times) * 3600. / (max_hour - min_hour) csvout.writerow([ cluster.id, None, None, date, len(dep_times), fmttime(min_hour), fmttime(max_hour), "%.3f" % avg_dep ])
def decret_2015_1610(trips, trace=True, required_distance=500, required_ratio=2.5): affiche(trace, "Calcul decret 2015 1610 sur %d voyages." % (len(trips))) if len(trips) == 0: affiche(trace, "Aucun voyages, impossible de calculer.") return None, None, None affiche(trace, "Calcul de l'espacement moyen des arrêts...") espacement_moyen = 0 w_esp = 0 for trip in trips: # Note: on pondère par le nombre de jours chaque voyage est applicable. # Ceci permet de prendre en compte la fréquence: par exemple, la distance # d'un intervalle entre deux arrêt actif le lundi uniquement sera pondéré # 5 fois moins qu'un autre intervalle actif du lundi au vendredi. n_jours = len(trip.calendar.dates) for stoptime1, stoptime2 in trip.hops(): espacement_moyen += (stoptime2.shape_dist_traveled - stoptime1.shape_dist_traveled) * n_jours w_esp += n_jours espacement_moyen /= w_esp affiche(trace, "L'espacement moyen entre arrêt du réseau est de %.2f mètres (max 500m)." % espacement_moyen) affiche(trace, "Calcul du jour ayant la fréquence en voyage la plus élevée...") frequences = defaultdict(lambda: 0) for trip in trips: for date in trip.calendar.dates: frequences[date] += 1 date_max = None freq_max = 0 for date, frequence in frequences.items(): if frequence > freq_max: freq_max = frequence date_max = date affiche(trace, "Le jour ayant le nombre de voyage le plus élevé est le %s, avec %d voyages." % (date_max.as_date(), freq_max)) affiche(trace, "Calcul des fréquences sur la plage horaire 8h - 19h...") # TODO Est-ce que ce calcul est correct? Le décret est pas clair. # On calcule le nombre de voyages actifs pendant chaque minute. frequences = [ 0 for minute in range(0, 20 * 60) ] for trip in trips: if date_max not in trip.calendar.dates: continue minute_depart = trip.stop_times[0].departure_time // 60 minute_arrivee = trip.stop_times[-1].arrival_time // 60 for minute in range(minute_depart, minute_arrivee + 1): if minute >= 8 * 60 and minute < 20 * 60: frequences[minute] += 1 frequence_min = 99999999999 minute_min = 0 frequence_max = 0 minute_max = 0 # La fréquence horaire min/max est calculé en moyenne glissante # sur une heure en sommant les fréquences par minute. for minute in range(8 * 60, 19 * 60): freq = 0 for delta_minute in range(0, 60): freq += frequences[minute + delta_minute] if freq > frequence_max: frequence_max = freq minute_max = minute if freq < frequence_min: frequence_min = freq minute_min = minute affiche(trace, "La fréquence minimale est de %.2f voyages/heure, entre %s et %s." % (frequence_min / 60.0, fmttime(minute_min * 60), fmttime((minute_min + 60) * 60))) affiche(trace, "La fréquence maximale est de %.2f voyages/heure, entre %s et %s." % (frequence_max / 60.0, fmttime(minute_max * 60), fmttime((minute_max + 60) * 60))) if frequence_min == 0: ratio_frequence = float('inf') else: ratio_frequence = frequence_max / float(frequence_min) affiche(trace, "Le ratio entre fréquence max et min est de %.3f (max 2.5)." % ratio_frequence) urbain = ratio_frequence < required_ratio and espacement_moyen < required_distance affiche(trace, "Ce service est %s au sens du décret n° 2015-1610." % ("URBAIN" if urbain else "NON URBAIN")) return urbain, espacement_moyen, ratio_frequence