class AllToAllRoutingPipeline: def __init__(self, feed_dict, routing_params): self.pickle = PICKLE self.gtfs_dir = feed_dict["gtfs_dir"] self.G = GTFS(feed_dict["gtfs_dir"]) self.tz = self.G.get_timezone_name() self.journey_dir = feed_dict["journey_dir"] self.day_start = feed_dict["day_start"] self.day_end = feed_dict["day_end"] self.routing_start_time = feed_dict["routing_start_time"] self.routing_end_time = feed_dict["routing_end_time"] self.analysis_start_time = feed_dict["analysis_start_time"] self.analysis_end_time = feed_dict["analysis_end_time"] self.pickle_dir = feed_dict["pickle_dir"] self.routing_params = routing_params self.jdm = None if not self.pickle: self.jdm = JourneyDataManager(os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_FNAME), journey_db_path=os.path.join(RESULTS_DIR, JOURNEY_DB_FNAME), routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_route=TRACK_ROUTE) def get_all_events(self): print("Retrieving transit events") connections = [] for e in self.G.generate_routable_transit_events(start_time_ut=self.routing_start_time, end_time_ut=self.routing_end_time): connections.append(Connection(int(e.from_stop_I), int(e.to_stop_I), int(e.dep_time_ut), int(e.arr_time_ut), int(e.trip_I), int(e.seq))) assert (len(connections) == len(set(connections))) print("scheduled events:", len(connections)) print("Retrieving walking network") net = walk_transfer_stop_to_stop_network(self.G, max_link_distance=CUTOFF_DISTANCE) print("net edges: ", len(net.edges())) return net, connections @timeit def loop_trough_targets_and_run_routing(self, targets, slurm_array_i): net, connections = self.get_all_events() csp = None for target in targets: print(target) if csp is None: csp = MultiObjectivePseudoCSAProfiler(connections, target, walk_network=net, end_time_ut=self.routing_end_time, transfer_margin=TRANSFER_MARGIN, start_time_ut=self.routing_start_time, walk_speed=WALK_SPEED, verbose=True, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_time=TRACK_TIME, track_route=TRACK_ROUTE) else: csp.reset([target]) csp.run() profiles = dict(csp.stop_profiles) if self.pickle: self._pickle_results(profiles, slurm_array_i, target) else: self.jdm.import_journey_data_for_target_stop(target, profiles) profiles = None gc.collect() @timeit def loop_trough_targets_and_run_routing_with_route(self, targets, slurm_array_i): net, connections = self.get_all_events() csp = None for target in targets: print("target: ", target) if csp is None: csp = MultiObjectivePseudoCSAProfiler(connections, target, walk_network=net, end_time_ut=self.routing_end_time, transfer_margin=TRANSFER_MARGIN, start_time_ut=self.routing_start_time, walk_speed=WALK_SPEED, verbose=True, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_time=TRACK_TIME, track_route=TRACK_ROUTE) else: csp.reset([target]) csp.run() profiles = dict(csp.stop_profiles) if self.pickle: self._pickle_results(profiles, slurm_array_i, target) else: self.jdm.import_journey_data_for_target_stop(target, profiles) profiles = None gc.collect() @timeit def _pickle_results(self, profiles, pickle_subdir, target): pickle_path = makedirs(os.path.join(self.pickle_dir, str(pickle_subdir))) pickle_path = os.path.join(pickle_path, str(target) + ".pickle") profiles = dict((key, value.get_final_optimal_labels()) for (key, value) in profiles.items()) """for key, values in profiles.items(): values.sort(key=lambda x: x.departure_time, reverse=True) new_values = compute_pareto_front(values) profiles[key] = new_values """ pickle.dump(profiles, open(pickle_path, 'wb'), -1) profiles = None gc.collect() def get_list_of_stops(self, where=''): df = self.G.execute_custom_query_pandas("SELECT stop_I FROM stops " + where + " ORDER BY stop_I") return df @timeit def store_pickle_in_db(self): self.jdm = JourneyDataManager(self.gtfs_dir, journey_db_path=self.journey_dir, routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_route=TRACK_ROUTE) for root, dirs, files in os.walk(self.pickle_dir): for target_file in files: target = target_file.replace(".pickle", "") if not target in self.jdm.get_targets_having_journeys(): print("target: ", target) profiles = pickle.load(open(os.path.join(root, target_file), 'rb')) self.jdm.import_journey_data_for_target_stop(int(target), profiles) else: print("skipping: ", target, " already in db") self.jdm.create_indices() def calculate_additional_columns_for_journey(self): if not self.jdm: self.jdm = JourneyDataManager(self.gtfs_dir, journey_db_path=self.journey_dir, routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_route=TRACK_ROUTE) self.jdm.populate_additional_journey_columns() self.jdm.compute_and_store_travel_impedance_measures(self.analysis_start_time, self.analysis_end_time, TRAVEL_IMPEDANCE_STORE_PATH) def calculate_comparison_measures(self): if not self.jdm: self.jdm = JourneyDataManager(self.gtfs_dir, journey_db_path=self.journey_dir, routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_route=TRACK_ROUTE) prev_dict = None prev_key = None before_db_tuple = None after_db_tuple = None for (key, feed_dict) in FEED_LIST: if prev_dict: if feed_dict["feed_seq"] < prev_dict["feed_seq"]: after_db_tuple = (feed_dict["journey_dir"], key) before_db_tuple = (prev_dict["journey_dir"], prev_key) else: before_db_tuple = (feed_dict["journey_dir"], key) after_db_tuple = (prev_dict["journey_dir"], prev_key) prev_dict = feed_dict prev_key = key self.jdm.initialize_comparison_tables(DIFF_PATH, before_db_tuple, after_db_tuple)
class GenericJourneyDataPipeline: def __init__(self): self.G = GTFS(GTFS_DATA_BASEDIR) self.day_start_ut = self.G.get_suitable_date_for_daily_extract( ut=True) + 3600 self.start_time = self.day_start_ut + 8 * 3600 self.end_time = self.day_start_ut + 11 * 3600 self.profiles = {} self.journey_analyzer = None # self.analysis_start_time # self.analysis_end_time makedirs(RESULTS_DIRECTORY) print("Retrieving transit events") self.connections = [] for e in self.G.generate_routable_transit_events( start_time_ut=self.start_time, end_time_ut=self.end_time): self.connections.append( Connection(int(e.from_stop_I), int(e.to_stop_I), int(e.dep_time_ut), int(e.arr_time_ut), int(e.trip_I))) print("Retrieving walking network") self.net = self.G.get_walk_transfer_stop_to_stop_network() def script(self): self.get_profile_data() journey_analyzer = JourneyDataManager(TARGET_STOPS, JOURNEY_DATA_DIR, GTFS_DATA_BASEDIR, ROUTING_PARAMS, track_route=True, close_connection=False) journey_analyzer.import_journey_data_for_target_stop(self.profiles) journey_analyzer.create_indices() if False: journey_analyzer.add_fastest_path_column() """ all_geoms = journey_analyzer.get_all_geoms() journey_path = os.path.join(RESULTS_DIRECTORY, "all_routes_to_" + target_list_to_str(TARGET_STOPS) + ".geojson") with open(journey_path, 'w') as f: dump(journey_analyzer.extract_geojson(all_geoms), f) """ def get_profile_data(self, targets=TARGET_STOPS, recompute=False): node_profiles_fname = os.path.join( RESULTS_DIRECTORY, "node_profile_" + target_list_to_str(targets) + ".pickle") if not recompute and os.path.exists(node_profiles_fname): print("Loading precomputed data") self.profiles = pickle.load(open(node_profiles_fname, 'rb')) print("Loaded precomputed data") else: print("Recomputing profiles") self._compute_profile_data() pickle.dump(self.profiles, open(node_profiles_fname, 'wb'), -1) print("Recomputing profiles") def _compute_profile_data(self): csp = MultiObjectivePseudoCSAProfiler(self.connections, TARGET_STOPS, walk_network=self.net, transfer_margin=TRANSFER_MARGIN, walk_speed=WALK_SPEED, verbose=True, track_vehicle_legs=False, track_time=True, track_route=True) print("CSA Profiler running...") csp.run() print("CSA profiler finished") self.profiles = dict(csp.stop_profiles) def key_measures_as_csv(self, csv_path="stop_data.csv"): """ Combines key temporal distance measures for each node with stop data from gtfs and stores in csv format :return: """ node_profiles_list = [] # iterate through all node profiles and add the NodeProfileAnalyzer data to a list of dicts for node, profile in self.profiles.items(): npa = NodeProfileAnalyzerTimeAndVehLegs.from_profile( profile, self.start_time, self.end_time) node_profile_dict = npa.get_node_profile_measures_as_dict() node_profile_dict["node"] = node node_profiles_list.append(node_profile_dict) node_profiles = DataFrame(node_profiles_list) stops = self.G.stops() stops.join(node_profiles.set_index("node"), on='stop_I').to_csv(path_or_buf=csv_path)
datetimes = [date.to_pydatetime() for date in daily_trip_counts['date']] trip_counts = daily_trip_counts['trip_counts'] ax.bar(datetimes, trip_counts) ax.axvline(G.meta['download_date'], color="red") threshold = 0.96 ax.axhline(trip_counts.max() * threshold, color="red") ax.axvline(G.get_weekly_extract_start_date(weekdays_at_least_of_max=threshold), color="yellow") weekly_db_path = "test_db_kuopio.week.sqlite" if os.path.exists(weekly_db_path): G = GTFS(weekly_db_path) f, ax = plt.subplots() daily_trip_counts = G.get_trip_counts_per_day() datetimes = [date.to_pydatetime() for date in daily_trip_counts['date']] trip_counts = daily_trip_counts['trip_counts'] ax.bar(datetimes, trip_counts) events = list( G.generate_routable_transit_events( 0, G.get_approximate_schedule_time_span_in_ut()[0])) min_ut = float('inf') for e in events: min_ut = min(e.dep_time_ut, min_ut) print(G.get_approximate_schedule_time_span_in_ut()) plt.show()