def prepare_dbs(): try: assert not os.path.isfile( os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_LM + SQLITE_SUFFIX)) assert not os.path.isfile( os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_OLD + SQLITE_SUFFIX)) for fn in [GTFS_DB_LM + SQLITE_SUFFIX, GTFS_DB_OLD + SQLITE_SUFFIX]: subprocess.call([ "cp", os.path.join(GTFS_DB_SOURCE_DIR, fn), os.path.join(GTFS_DB_WORK_DIR, fn) ]) except AssertionError: print("remove old files to start from scratch, continuing...") G_lm = GTFS(os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_LM + SQLITE_SUFFIX)) G_old = GTFS(os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_OLD + SQLITE_SUFFIX)) """try: except IntegrityError: print("additional stops already added")""" #G_lm.add_stops_from_csv(os.path.join(GTFS_DB_SOURCE_DIR, PSEUDO_STOP_FNAME)) merge_stops_tables( os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_OLD + SQLITE_SUFFIX), os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_LM + SQLITE_SUFFIX)) G_old.replace_stop_i_with_stop_pair_i(colname="stop_pair_I") G_lm.replace_stop_i_with_stop_pair_i(colname="stop_pair_I") lm_stops = G_lm.execute_custom_query_pandas("SELECT * FROM stops") old_stops = G_old.execute_custom_query_pandas("SELECT * FROM stops") lm_stops_set = set(lm_stops["stop_I"]) old_stops_set = set(old_stops["stop_I"]) print(lm_stops_set) print(old_stops_set) print("stops not in old:", lm_stops_set - old_stops_set) print("stops not in old:", old_stops_set - lm_stops_set) print("calculating stop distances for first feed") G_old.recalculate_stop_distances(2000, remove_old_table=True) print("calculating stop distances for second feed") G_lm.recalculate_stop_distances(2000, remove_old_table=True) #G_lm.homogenize_stops_table_with_other_db(os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_OLD + SQLITE_SUFFIX)) print("run walk distance routing") """ add_walk_distances_to_db_python(G_lm, OSM_DIR, cutoff_distance_m=CUTOFF_DISTANCE) add_walk_distances_to_db_python(G_old, OSM_DIR, cutoff_distance_m=CUTOFF_DISTANCE) """ for fn in [GTFS_DB_LM + SQLITE_SUFFIX, GTFS_DB_OLD + SQLITE_SUFFIX]: subprocess.call([ "java", "-jar", "gtfspy/java_routing/target/transit_osm_routing-1.0-SNAPSHOT-jar-with-dependencies.jar", "-u", os.path.join(GTFS_DB_WORK_DIR, fn), "-osm", OSM_DIR, "--tempDir", "/tmp/" ])
def stops_to_exclude(return_sqlite_list=False): gtfs_lm = GTFS(LM_DICT["gtfs_dir"]) areas_to_remove = gtfs_lm.execute_custom_query_pandas( "SELECT * FROM stops WHERE CASE WHEN substr(stop_id,1, 5) = '__b__' THEN CAST(substr(stop_id,6, 1) AS integer) ELSE CAST(substr(stop_id,1, 1) AS integer) END >4" ) if return_sqlite_list: return "(" + ",".join( [str(x) for x in areas_to_remove["stop_I"].tolist()]) + ")" return areas_to_remove
class JourneyDataManager: def __init__(self, gtfs_path, journey_db_path, routing_params=None, multitarget_routing=False, track_vehicle_legs=True, track_route=False): """ :param gtfs: GTFS object :param list_of_stop_profiles: dict of NodeProfileMultiObjective :param multitarget_routing: bool """ self.multitarget_routing = multitarget_routing self.track_route = track_route self.track_vehicle_legs = track_vehicle_legs self.gtfs_path = gtfs_path self.gtfs = GTFS(self.gtfs_path) self.gtfs_meta = self.gtfs.meta self.gtfs._dont_close = True self.od_pairs = None self._targets = None self._origins = None self.diff_conn = None if not routing_params: routing_params = dict() self.routing_params_input = routing_params assert os.path.exists(journey_db_path) or routing_params is not None journey_db_pre_exists = os.path.isfile(journey_db_path) # insert a pretty robust timeout: timeout = 100 self.conn = sqlite3.connect(journey_db_path, timeout) if not journey_db_pre_exists: self.initialize_database() self.routing_parameters = Parameters(self.conn) self._assert_journey_computation_paramaters_match() self.journey_properties = { "journey_duration": (_T_WALK_STR, _T_WALK_STR) } if routing_params.get('track_vehicle_legs', False) or \ self.routing_parameters.get('track_vehicle_legs', False): self.journey_properties["n_boardings"] = (float("inf"), 0) if self.track_route: additional_journey_parameters = { "in_vehicle_duration": (float('inf'), 0), "transfer_wait_duration": (float('inf'), 0), "walking_duration": (_T_WALK_STR, _T_WALK_STR), "pre_journey_wait_fp": (float('inf'), 0) } self.journey_properties.update(additional_journey_parameters) self.travel_impedance_measure_names = list( self.journey_properties.keys()) self.travel_impedance_measure_names += ["temporal_distance"] def __del__(self): self.gtfs._dont_close = False if self.conn: self.conn.close() @timeit def import_journey_data_for_target_stop(self, target_stop_I, origin_stop_I_to_journey_labels): """ Parameters ---------- origin_stop_I_to_journey_labels: dict key: origin_stop_Is value: list of labels target_stop_I: int """ cur = self.conn.cursor() self.conn.isolation_level = 'EXCLUSIVE' cur.execute('PRAGMA synchronous = OFF;') if self.track_route: self._insert_journeys_with_route_into_db( origin_stop_I_to_journey_labels, target_stop=int(target_stop_I)) else: self._insert_journeys_into_db_no_route( origin_stop_I_to_journey_labels, target_stop=int(target_stop_I)) print("Finished import process") def _assert_journey_computation_paramaters_match(self): for key, value in self.routing_parameters.items(): if key in self.gtfs_meta.keys(): assert self.gtfs_meta[key] == value def _get_largest_journey_id(self): cur = self.conn.cursor() val = cur.execute("select max(journey_id) FROM journeys").fetchone() return val[0] if val[0] else 0 def _insert_journeys_into_db_no_route(self, stop_profiles, target_stop=None): # TODO: Change the insertion so that the check last journey id and insertions are in the same transaction block """ con.isolation_level = 'EXCLUSIVE' con.execute('BEGIN EXCLUSIVE') #exclusive access starts here. Nothing else can r/w the db, do your magic here. con.commit() """ print("Collecting journey data") journey_id = 1 journey_list = [] tot = len(stop_profiles) for i, (origin_stop, labels) in enumerate(stop_profiles.items(), start=1): #print("\r Stop " + str(i) + " of " + str(tot), end='', flush=True) for label in labels: assert (isinstance(label, LabelTimeWithBoardingsCount)) if self.multitarget_routing: target_stop = None else: target_stop = int(target_stop) values = [ int(journey_id), int(origin_stop), target_stop, int(label.departure_time), int(label.arrival_time_target), int(label.n_boardings) ] journey_list.append(values) journey_id += 1 print("Inserting journeys into database") insert_journeys_stmt = '''INSERT INTO journeys( journey_id, from_stop_I, to_stop_I, departure_time, arrival_time_target, n_boardings) VALUES (%s) ''' % (", ".join( ["?" for x in range(6)])) #self.conn.executemany(insert_journeys_stmt, journey_list) self._execute_function(insert_journeys_stmt, journey_list) self.conn.commit() @timeit def _execute_function(self, statement, rows): self.conn.execute('BEGIN EXCLUSIVE') last_id = self._get_largest_journey_id() rows = [[x[0] + last_id] + x[1:] for x in rows] self.conn.executemany(statement, rows) def _insert_journeys_with_route_into_db(self, stop_I_to_journey_labels, target_stop): print("Collecting journey and connection data") journey_id = (self._get_largest_journey_id() if self._get_largest_journey_id() else 0) + 1 journey_list = [] connection_list = [] label = None for i, (origin_stop, labels) in enumerate(stop_I_to_journey_labels.items(), start=1): # tot = len(stop_profiles) #print("\r Stop " + str(i) + " of " + str(tot), end='', flush=True) assert (isinstance(stop_I_to_journey_labels[origin_stop], list)) for label in labels: assert (isinstance(label, LabelTimeAndRoute) or isinstance(label, LabelTimeBoardingsAndRoute)) # We need to "unpack" the journey to actually figure out where the trip went # (there can be several targets). if label.departure_time == label.arrival_time_target: print("Weird label:", label) continue target_stop, new_connection_values, route_stops = self._collect_connection_data( journey_id, label) if origin_stop == target_stop: continue if isinstance(label, LabelTimeBoardingsAndRoute): values = [ int(journey_id), int(origin_stop), int(target_stop), int(label.departure_time), int(label.arrival_time_target), label.n_boardings, label.movement_duration, route_stops ] else: values = [ int(journey_id), int(origin_stop), int(target_stop), int(label.departure_time), int(label.arrival_time_target), label.movement_duration, route_stops ] journey_list.append(values) connection_list += new_connection_values journey_id += 1 print("Inserting journeys into database") if label: if isinstance(label, LabelTimeBoardingsAndRoute): insert_journeys_stmt = '''INSERT INTO journeys( journey_id, from_stop_I, to_stop_I, departure_time, arrival_time_target, n_boardings, movement_duration, route) VALUES (%s) ''' % (", ".join( ["?" for x in range(8)])) else: insert_journeys_stmt = '''INSERT INTO journeys( journey_id, from_stop_I, to_stop_I, departure_time, arrival_time_target, movement_duration, route) VALUES (%s) ''' % (", ".join( ["?" for x in range(7)])) self.conn.executemany(insert_journeys_stmt, journey_list) print("Inserting legs into database") insert_legs_stmt = '''INSERT INTO legs( journey_id, from_stop_I, to_stop_I, departure_time, arrival_time_target, trip_I, seq, leg_stops) VALUES (%s) ''' % (", ".join( ["?" for x in range(8)])) self.conn.executemany(insert_legs_stmt, connection_list) self.routing_parameters["target_list"] += (str(target_stop) + ",") self.conn.commit() def create_index_for_journeys_table(self): self.conn.execute("PRAGMA temp_store=2") self.conn.commit() self.conn.execute( "CREATE INDEX IF NOT EXISTS journeys_to_stop_I_idx ON journeys (to_stop_I)" ) def _collect_connection_data(self, journey_id, label): target_stop = None cur_label = label seq = 1 value_list = [] route_stops = [] leg_stops = [] prev_trip_id = None connection = None leg_departure_time = None leg_departure_stop = None leg_arrival_time = None leg_arrival_stop = None while True: if isinstance(cur_label.connection, Connection): connection = cur_label.connection if connection.trip_id: trip_id = connection.trip_id else: trip_id = -1 # In case of new leg if prev_trip_id != trip_id: route_stops.append(connection.departure_stop) if prev_trip_id: leg_stops.append(connection.departure_stop) values = (int(journey_id), int(leg_departure_stop), int(leg_arrival_stop), int(leg_departure_time), int(leg_arrival_time), int(prev_trip_id), int(seq), ','.join([str(x) for x in leg_stops])) value_list.append(values) seq += 1 leg_stops = [] leg_departure_stop = connection.departure_stop leg_departure_time = connection.departure_time leg_arrival_time = connection.arrival_time leg_arrival_stop = connection.arrival_stop leg_stops.append(connection.departure_stop) target_stop = connection.arrival_stop prev_trip_id = trip_id if not cur_label.previous_label: leg_stops.append(connection.arrival_stop) values = (int(journey_id), int(leg_departure_stop), int(leg_arrival_stop), int(leg_departure_time), int(leg_arrival_time), int(prev_trip_id), int(seq), ','.join([str(x) for x in leg_stops])) value_list.append(values) break cur_label = cur_label.previous_label route_stops.append(target_stop) route_stops = ','.join([str(x) for x in route_stops]) return target_stop, value_list, route_stops def populate_additional_journey_columns(self): # self.add_fastest_path_column() # self.add_time_to_prev_journey_fp_column() self.compute_journey_time_components() self.calculate_pre_journey_waiting_times_ignoring_direct_walk() def get_od_pairs(self): cur = self.conn.cursor() if not self.od_pairs: cur.execute( 'SELECT from_stop_I, to_stop_I FROM journeys GROUP BY from_stop_I, to_stop_I' ) self.od_pairs = cur.fetchall() return self.od_pairs def get_targets(self): cur = self.conn.cursor() if not self._targets: cur.execute('SELECT to_stop_I FROM journeys GROUP BY to_stop_I') self._targets = [target[0] for target in cur.fetchall()] return self._targets def get_origins(self): cur = self.conn.cursor() if not self._origins: cur.execute( 'SELECT from_stop_I FROM journeys GROUP BY from_stop_I') self._origins = [origin[0] for origin in cur.fetchall()] return self._origins def get_table_with_coordinates(self, table_name, target=None): df = self.get_table_as_dataframe(table_name, target) return self.gtfs.add_coordinates_to_df(df, join_column='from_stop_I') def get_table_as_dataframe(self, table_name, to_stop_I_target=None): query = "SELECT * FROM " + table_name if to_stop_I_target: query += " WHERE to_stop_I = %s" % to_stop_I_target return pd.read_sql_query(query, self.conn) @timeit def add_fastest_path_column(self): print("adding fastest path column") cur = self.conn.cursor() for target in self.get_targets(): fastest_path_journey_ids = [] for origin in self.get_origins(): cur.execute( 'SELECT departure_time, arrival_time_target, journey_id FROM journeys ' 'WHERE from_stop_I = ? AND to_stop_I = ? ' 'ORDER BY departure_time ASC', (origin, target)) all_trips = cur.fetchall() all_labels = [ LabelTimeAndRoute(x[0], x[1], x[2], False) for x in all_trips ] #putting journey_id as movement_duration all_fp_labels = compute_pareto_front(all_labels, finalization=False, ignore_n_boardings=True) fastest_path_journey_ids.append(all_fp_labels) fastest_path_journey_ids = [(1, x.movement_duration) for sublist in fastest_path_journey_ids for x in sublist] cur.executemany( "UPDATE journeys SET fastest_path = ? WHERE journey_id = ?", fastest_path_journey_ids) self.conn.commit() @timeit def add_time_to_prev_journey_fp_column(self): print("adding pre journey waiting time") cur = self.conn.cursor() for target in self.get_targets(): cur.execute( 'SELECT journey_id, from_stop_I, to_stop_I, departure_time FROM journeys ' 'WHERE fastest_path = 1 AND to_stop_I = ? ' 'ORDER BY from_stop_I, to_stop_I, departure_time ', (target[0], )) all_trips = cur.fetchall() time_to_prev_journey = [] prev_departure_time = None prev_origin = None prev_destination = None for trip in all_trips: journey_id = trip[0] from_stop_I = trip[1] to_stop_I = trip[2] departure_time = trip[3] if prev_origin != from_stop_I or prev_destination != to_stop_I: prev_departure_time = None if prev_departure_time: time_to_prev_journey.append( (departure_time - prev_departure_time, journey_id)) prev_origin = from_stop_I prev_destination = to_stop_I prev_departure_time = departure_time cur.executemany( "UPDATE journeys SET pre_journey_wait_fp = ? WHERE journey_id = ?", time_to_prev_journey) self.conn.commit() @timeit def compute_journey_time_components(self): print("adding journey components") cur = self.conn.cursor() cur.execute( "UPDATE journeys SET journey_duration = arrival_time_target - departure_time" ) if self.track_route: cur.execute( "UPDATE journeys " "SET " "in_vehicle_duration = " "(SELECT sum(arrival_time_target - departure_time) AS in_vehicle_duration FROM legs " "WHERE journeys.journey_id = legs.journey_id AND trip_I != -1 GROUP BY journey_id)" ) cur.execute( "UPDATE journeys " "SET " "walking_duration = " "(SELECT sum(arrival_time_target - departure_time) AS walking_duration FROM legs " "WHERE journeys.journey_id = legs.journey_id AND trip_I < 0 GROUP BY journey_id)" ) cur.execute( "UPDATE journeys " "SET transfer_wait_duration = journey_duration - in_vehicle_duration - walking_duration" ) self.conn.commit() def _journey_label_generator(self, destination_stop_Is=None, origin_stop_Is=None): conn = self.conn conn.row_factory = sqlite3.Row if destination_stop_Is is None: destination_stop_Is = self.get_targets() if origin_stop_Is is None: origin_stop_Is = self.get_origins() for destination_stop_I in destination_stop_Is: if self.track_route: label_features = "journey_id, from_stop_I, to_stop_I, n_boardings, movement_duration, " \ "journey_duration, in_vehicle_duration, transfer_wait_duration, walking_duration, " \ "departure_time, arrival_time_target""" else: label_features = "journey_id, from_stop_I, to_stop_I, n_boardings, departure_time, " \ "arrival_time_target" sql = "SELECT " + label_features + " FROM journeys WHERE to_stop_I = %s" % destination_stop_I df = pd.read_sql_query(sql, self.conn) for origin_stop_I in origin_stop_Is: selection = df.loc[df['from_stop_I'] == origin_stop_I] journey_labels = [] for journey in selection.to_dict(orient='records'): journey["pre_journey_wait_fp"] = -1 try: journey_labels.append(LabelGeneric(journey)) except: print(journey) yield origin_stop_I, destination_stop_I, journey_labels def get_node_profile_time_analyzer(self, target, origin, start_time_dep, end_time_dep): sql = """SELECT journey_id, from_stop_I, to_stop_I, n_boardings, movement_duration, journey_duration, in_vehicle_duration, transfer_wait_duration, walking_duration, departure_time, arrival_time_target FROM journeys WHERE to_stop_I = %s AND from_stop_I = %s""" % (target, origin) df = pd.read_sql_query(sql, self.conn) journey_labels = [] for journey in df.to_dict(orient='records'): journey_labels.append(LabelGeneric(journey)) fpa = FastestPathAnalyzer( journey_labels, start_time_dep, end_time_dep, walk_duration=float('inf'), # walking time label_props_to_consider=list(self.journey_properties.keys())) return fpa.get_time_analyzer() def _get_node_profile_analyzer_time_and_veh_legs(self, target, origin, start_time_dep, end_time_dep): sql = """SELECT from_stop_I, to_stop_I, n_boardings, departure_time, arrival_time_target FROM journeys WHERE to_stop_I = %s AND from_stop_I = %s""" % ( target, origin) df = pd.read_sql_query(sql, self.conn) journey_labels = [] for journey in df.itertuples(): departure_time = journey.departure_time arrival_time_target = journey.arrival_time_target n_boardings = journey.n_boardings journey_labels.append( LabelTimeWithBoardingsCount(departure_time, arrival_time_target, n_boardings, first_leg_is_walk=float('nan'))) # This ought to be optimized... query = """SELECT d, d_walk FROM stop_distances WHERE to_stop_I = %s AND from_stop_I = %s""" % ( target, origin) df = self.gtfs.execute_custom_query_pandas(query) if len(df) > 0: walk_duration = float( df['d_walk']) / self.routing_params_input['walk_speed'] else: walk_duration = float('inf') analyzer = NodeProfileAnalyzerTimeAndVehLegs( journey_labels, walk_duration, # walking time start_time_dep, end_time_dep) return analyzer def read_travel_impedance_measure_from_table(self, travel_impedance_measure, from_stop_I=None, to_stop_I=None, statistic=None): """ Recover pre-computed travel_impedance between od-pairs from the database. Returns ------- values: number | Pandas DataFrame """ to_select = [] where_clauses = [] to_select.append("from_stop_I") to_select.append("to_stop_I") if from_stop_I is not None: where_clauses.append("from_stop_I=" + str(int(from_stop_I))) if to_stop_I is not None: where_clauses.append("to_stop_I=" + str(int(to_stop_I))) where_clause = "" if len(where_clauses) > 0: where_clause = " WHERE " + " AND ".join(where_clauses) if not statistic: to_select.extend(["min", "mean", "median", "max"]) else: to_select.append(statistic) to_select_clause = ",".join(to_select) if not to_select_clause: to_select_clause = "*" sql = "SELECT " + to_select_clause + " FROM " + travel_impedance_measure + where_clause + ";" df = pd.read_sql(sql, self.conn) return df @timeit def compute_travel_impedance_measures_for_od_pairs(self, analysis_start_time, analysis_end_time, targets=None, origins=None): results_dict = {} for travel_impedance_measure in self.travel_impedance_measure_names: self._create_travel_impedance_measure_table( travel_impedance_measure) print("Computing total number of origins and targets..", end='', flush=True) if targets is None: targets = self.get_targets() if origins is None: origins = self.get_origins() print("\rComputed total number of origins and targets") n_pairs_tot = len(origins) * len(targets) def _flush_data_to_db(results): for travel_impedance_measure, data in results.items(): self.__insert_travel_impedance_data_to_db( travel_impedance_measure, data) for travel_impedance_measure in self.travel_impedance_measure_names: results[travel_impedance_measure] = [] _flush_data_to_db(results_dict) for i, (origin, target, journey_labels) in enumerate( self._journey_label_generator(targets, origins)): if i % 100 == 0: print("\r", i, "/", n_pairs_tot, " : ", "%.2f" % round(float(i) / n_pairs_tot, 3), end='', flush=True) kwargs = {"from_stop_I": origin, "to_stop_I": target} walking_distance = self.gtfs.get_stop_distance(origin, target) if walking_distance: walking_duration = walking_distance / self.routing_params_input[ "walk_speed"] else: walking_duration = float("inf") fpa = FastestPathAnalyzer( journey_labels, analysis_start_time, analysis_end_time, walk_duration=walking_duration, # walking time label_props_to_consider=list(self.journey_properties.keys()), **kwargs) temporal_distance_analyzer\ = fpa.get_temporal_distance_analyzer() # Note: the summary_as_dict automatically includes also the from_stop_I and to_stop_I -fields. results_dict["temporal_distance"].append( temporal_distance_analyzer.summary_as_dict()) fpa.calculate_pre_journey_waiting_times_ignoring_direct_walk() for key, (value_no_next_journey, value_cutoff) in self.journey_properties.items(): value_cutoff = walking_duration if value_cutoff == _T_WALK_STR else value_cutoff value_no_next_journey = walking_duration if value_no_next_journey == _T_WALK_STR else value_no_next_journey if key == "pre_journey_wait_fp": property_analyzer = fpa.get_prop_analyzer_for_pre_journey_wait( ) else: property_analyzer = fpa.get_prop_analyzer_flat( key, value_no_next_journey, value_cutoff) results_dict[key].append(property_analyzer.summary_as_dict()) if i % 1000 == 0: # update in batches of 1000 _flush_data_to_db(results_dict) # flush everything that remains _flush_data_to_db(results_dict) def create_indices_for_travel_impedance_measure_tables(self): for travel_impedance_measure in self.travel_impedance_measure_names: self._create_index_for_travel_impedance_measure_table( travel_impedance_measure) @timeit def calculate_pre_journey_waiting_times_ignoring_direct_walk(self): all_fp_labels = [] for origin, target, journey_labels in self._journey_label_generator(): if not journey_labels: continue fpa = FastestPathAnalyzer( journey_labels, self.routing_parameters["routing_start_time_dep"], self.routing_parameters["routing_end_time_dep"], walk_duration=float('inf')) fpa.calculate_pre_journey_waiting_times_ignoring_direct_walk() all_fp_labels += fpa.get_fastest_path_labels() self.update_journey_from_labels(all_fp_labels, "pre_journey_wait_fp") def update_journey_from_labels(self, labels, attribute): cur = self.conn.cursor() insert_tuples = [] for label in labels: insert_tuples.append( (getattr(label, attribute), getattr(label, "journey_id"))) sql = "UPDATE journeys SET %s = ? WHERE journey_id = ?" % (attribute, ) cur.executemany(sql, insert_tuples) self.conn.commit() def _create_travel_impedance_measure_table(self, travel_impedance_measure): print("creating table: ", travel_impedance_measure) self.conn.execute("CREATE TABLE IF NOT EXISTS " + travel_impedance_measure + " (from_stop_I INT, " "to_stop_I INT, " "min INT, " "max INT, " "median INT, " "mean REAL, " "UNIQUE (from_stop_I, to_stop_I))") def __insert_travel_impedance_data_to_db(self, travel_impedance_measure_name, data): """ Parameters ---------- travel_impedance_measure_name: str data: list[dict] Each list element must contain keys: "from_stop_I", "to_stop_I", "min", "max", "median" and "mean" """ f = float data_tuple = [(x["from_stop_I"], x["to_stop_I"], f(x["min"]), f(x["max"]), f(x["median"]), f(x["mean"])) for x in data] insert_stmt = '''INSERT OR REPLACE INTO ''' + travel_impedance_measure_name + ''' ( from_stop_I, to_stop_I, min, max, median, mean) VALUES (?, ?, ?, ?, ?, ?) ''' self.conn.executemany(insert_stmt, data_tuple) self.conn.commit() def create_index_for_journeys_table(self): self.conn.execute( "CREATE INDEX IF NOT EXISTS journeys_to_stop_I_idx ON journeys (to_stop_I)" ) def _create_index_for_travel_impedance_measure_table( self, travel_impedance_measure_name): table = travel_impedance_measure_name sql_from = "CREATE INDEX IF NOT EXISTS " + table + "_from_stop_I ON " + table + " (from_stop_I)" sql_to = "CREATE INDEX IF NOT EXISTS " + table + "_to_stop_I ON " + table + " (to_stop_I)" self.conn.execute(sql_from) self.conn.execute(sql_to) self.conn.commit() @timeit def initialize_comparison_tables(self, diff_db_path, before_db_tuple, after_db_tuple): self.diff_conn = sqlite3.connect(diff_db_path) self.diff_conn = attach_database(self.diff_conn, before_db_tuple[0], name=before_db_tuple[1]) self.diff_conn = attach_database(self.diff_conn, after_db_tuple[0], name=after_db_tuple[1]) for table in self.travel_impedance_measure_names: self.diff_conn.execute( "CREATE TABLE IF NOT EXISTS diff_" + table + " (from_stop_I, to_stop_I, diff_min, diff_max, diff_median, diff_mean)" ) insert_stmt = "INSERT OR REPLACE INTO diff_" + table + \ "(from_stop_I, to_stop_I, diff_min, diff_max, diff_median, diff_mean) " \ "SELECT t1.from_stop_I, t1.to_stop_I, " \ "t1.min - t2.min AS diff_min, " \ "t1.max - t2.max AS diff_max, " \ "t1.median - t2.median AS diff_median, " \ "t1.mean - t2.mean AS diff_mean " \ "FROM " + before_db_tuple[1] + "." + table + " AS t1, " \ + before_db_tuple[1] + "." + table + " AS t2 " \ "WHERE t1.from_stop_I = t2.from_stop_I " \ "AND t1.to_stop_I = t2.to_stop_I " self.diff_conn.execute(insert_stmt) self.diff_conn.commit() def initialize_database(self): self._set_up_database() self._initialize_parameter_table() print("Database initialized!") def _set_up_database(self): self.conn.execute('''CREATE TABLE IF NOT EXISTS parameters( key TEXT UNIQUE, value BLOB)''') if self.track_route: self.conn.execute('''CREATE TABLE IF NOT EXISTS journeys( journey_id INTEGER PRIMARY KEY, from_stop_I INT, to_stop_I INT, departure_time INT, arrival_time_target INT, n_boardings INT, movement_duration INT, route TEXT, journey_duration INT, pre_journey_wait_fp INT, in_vehicle_duration INT, transfer_wait_duration INT, walking_duration INT, fastest_path INT)''') self.conn.execute('''CREATE TABLE IF NOT EXISTS legs( journey_id INT, from_stop_I INT, to_stop_I INT, departure_time INT, arrival_time_target INT, trip_I INT, seq INT, leg_stops TEXT)''') """ self.conn.execute('''CREATE TABLE IF NOT EXISTS nodes( stop_I INT, agg_temp_distances REAL, agg_journey_duration REAL, agg_boardings REAL, agg_transfer_wait REAL, agg_pre_journey_wait REAL, agg_walking_duration REAL)''') self.conn.execute('''CREATE TABLE IF NOT EXISTS od_pairs( from_stop_I INT, to_stop_I INT, avg_temp_distance REAL, agg_journey_duration REAL, agg_boardings REAL, agg_transfer_wait REAL, agg_pre_journey_wait REAL, agg_walking_duration REAL)''') self.conn.execute('''CREATE TABLE IF NOT EXISTS sections( from_stop_I INT, to_stop_I INT, from_stop_pair_I INT, to_stop_pair_I INT, avg_temp_distance INT, avg_journey_duration INT, n_trips INT)''') self.conn.execute('''CREATE TABLE IF NOT EXISTS transfer_nodes( from_stop_I INT, to_stop_I INT, from_stop_pair_I INT, to_stop_pair_I INT, avg_waiting_time INT, n_trips INT)''') """ else: self.conn.execute('''CREATE TABLE IF NOT EXISTS journeys( journey_id INTEGER PRIMARY KEY, from_stop_I INT, to_stop_I INT, departure_time INT, arrival_time_target INT, n_boardings INT, journey_duration INT, time_to_prev_journey_fp INT, fastest_path INT)''') self.conn.commit() def _initialize_parameter_table(self): parameters = Parameters(self.conn) parameters["multiple_targets"] = self.multitarget_routing parameters["gtfs_dir"] = self.gtfs_path for param in [ "location_name", "lat_median", "lon_median", "start_time_ut", "end_time_ut", "start_date", "end_date" ]: parameters[param] = self.gtfs_meta[param] parameters["target_list"] = "," for key, value in self.routing_params_input.items(): parameters[key] = value self.conn.commit() def create_indices(self): # Next 3 lines are python 3.6 work-arounds again. self.conn.isolation_level = None # former default of autocommit mode cur = self.conn.cursor() cur.execute('VACUUM;') self.conn.isolation_level = '' # back to python default # end python3.6 workaround print("Analyzing...") cur.execute('ANALYZE') print("Indexing") cur = self.conn.cursor() cur.execute( 'CREATE INDEX IF NOT EXISTS idx_journeys_route ON journeys (route)' ) cur.execute( 'CREATE INDEX IF NOT EXISTS idx_journeys_jid ON journeys (journey_id)' ) cur.execute( 'CREATE INDEX IF NOT EXISTS idx_journeys_fid ON journeys (from_stop_I)' ) cur.execute( 'CREATE INDEX IF NOT EXISTS idx_journeys_tid ON journeys (to_stop_I)' ) if self.track_route: cur.execute( 'CREATE INDEX IF NOT EXISTS idx_legs_jid ON legs (journey_id)') cur.execute( 'CREATE INDEX IF NOT EXISTS idx_legs_trid ON legs (trip_I)') cur.execute( 'CREATE INDEX IF NOT EXISTS idx_legs_fid ON legs (from_stop_I)' ) cur.execute( 'CREATE INDEX IF NOT EXISTS idx_legs_tid ON legs (to_stop_I)') self.conn.commit()
from gtfspy.route_types import ROUTE_TYPE_TO_SHORT_DESCRIPTION from scripts.all_to_all_analyzer import stops_to_exclude from scripts.all_to_all_settings import * if __name__ == "__main__": master_df = None ignore_stops = stops_to_exclude(return_sqlite_list=True) for feed, feed_dict in FEED_LIST: g = GTFS(feed_dict["gtfs_dir"]) stats = g.get_stats() n_stops = g.execute_custom_query_pandas("""SELECT TYPE, count(*) AS n_stops FROM (SELECT * FROM stop_times, stops, trips, routes WHERE stops.stop_I=stop_times.stop_I AND trips.trip_I=stop_times.trip_I AND stops.stop_I NOT IN {ignore_stops} AND trips.route_I=routes.route_I GROUP BY stops.self_or_parent_I, type) q1 GROUP BY type""".format(ignore_stops=ignore_stops)) vehicle_kms = g.execute_custom_query_pandas("""SELECT type, sum(distance)/1000 AS vehicle_kilometers FROM (SELECT find_distance(q1.lon, q1.lat, q2.lon, q2.lat) AS distance, q1.stop_I AS from_stop_I, q2.stop_I AS to_stop_I, type FROM (SELECT * FROM stop_times, stops, trips, routes WHERE stops.stop_I=stop_times.stop_I AND trips.trip_I=stop_times.trip_I AND trips.route_I=routes.route_I AND stops.stop_I NOT IN {ignore_stops}) q1, (SELECT * FROM stop_times, stops WHERE stops.stop_I=stop_times.stop_I AND stops.stop_I NOT IN {ignore_stops}) q2 WHERE q1.seq+1 = q2.seq AND q1.trip_I = q2.trip_I) sq1 GROUP BY type""".format(ignore_stops=ignore_stops)) network_length = g.execute_custom_query_pandas("""SELECT type, sum(distance)/1000 AS route_kilometers FROM (SELECT find_distance(q1.lon, q1.lat, q2.lon, q2.lat) AS distance, q1.stop_I AS from_stop_I, q2.stop_I AS to_stop_I, type FROM
class RouteMapMaker: def __init__(self, gtfs_name): if isinstance(gtfs_name, str): self.gtfs = GTFS(FEED_DICT[gtfs_name]["gtfs_dir"]) else: self.gtfs = gtfs_name self.bunching_value = 99 self.line_spacing = 0.0001 self.shapes = False self.crs_wgs = {'init': 'epsg:4326'} #self.crs_eurefin = {'init': 'epsg:3067'} def cluster_shapes(self): """ :return: """ # get unique stop-to-stop shapes, with trips aggregated # split by nearby stops # match splitted, and aggregate trips # identify branches: large overlap but crosses buffer, insert pseudo stop at branch # split everything again # match splitted #this query returns shapes for of the maximum trips, both directions df = self.gtfs.execute_custom_query_pandas("""WITH a AS ( SELECT routes.name AS name, shape_id, route_I, trip_I, routes.type, direction_id, max(end_time_ds-start_time_ds) AS trip_duration, count(*) AS n_trips FROM trips LEFT JOIN routes USING(route_I) WHERE start_time_ds >= 7*3600 AND start_time_ds < 8*3600 GROUP BY routes.route_I, direction_id ), b AS( SELECT q1.trip_I AS trip_I, q1.stop_I AS from_stop_I, q2.stop_I AS to_stop_I, q1.seq AS seq, q1.shape_break AS from_shape_break, q2.shape_break AS to_shape_break FROM (SELECT stop_I, trip_I, shape_break, seq FROM stop_times) q1, (SELECT stop_I, trip_I, shape_break, seq AS seq FROM stop_times) q2 WHERE q1.seq=q2.seq-1 AND q1.trip_I=q2.trip_I AND q1.trip_I IN (SELECT trip_I FROM a) ), c AS( SELECT b.*, name, direction_id, route_I, a.shape_id, group_concat(lat) AS lats, group_concat(lon) AS lons, count(*) AS n_coords FROM b, a, shapes WHERE b.trip_I = a.trip_I AND shapes.shape_id=a.shape_id AND b.from_shape_break <= shapes.seq AND b.to_shape_break >= shapes.seq GROUP BY route_I, direction_id, b.seq ORDER BY route_I, b.seq ) SELECT from_stop_I, to_stop_I, group_concat(trip_I) AS trip_ids, group_concat(direction_id) AS direction_ids, lats, lons FROM c WHERE n_coords > 1 GROUP BY from_stop_I, to_stop_I ORDER BY count(*) DESC""") df["geometry"] = df.apply(lambda row: shapely.LineString([ (float(lon), float(lat)) for lon, lat in zip(row["lons"].split(","), row["lats"].split(",")) ]), axis=1) gdf = GeoDataFrame(df, crs=self.crs_wgs, geometry=df["geometry"]) #gdf = gdf.to_crs(self.crs_eurefin) gdf = gdf.to_crs(self.crs_wgs) gdf = gdf.drop(["lats", "lons"], axis=1) stops_set = set(gdf["from_stop_I"]) | set(gdf["to_stop_I"]) gdf["orig_parent_stops"] = list( zip(gdf['from_stop_I'], gdf['to_stop_I'])) clustered_stops = self.cluster_stops(stops_set) cluster_dict = clustered_stops[[ "new_stop_I", "stop_I", "geometry" ]].set_index('stop_I').T.to_dict('list') geom_dict = clustered_stops[[ "new_stop_I", "geometry" ]].set_index("new_stop_I").T.to_dict('list') gdf["to_stop_I"] = gdf.apply( lambda row: cluster_dict[row["to_stop_I"]][0], axis=1) gdf["from_stop_I"] = gdf.apply( lambda row: cluster_dict[row["from_stop_I"]][0], axis=1) # to/from_stop_I: cluster id # orig_parent_stops: old id # child_stop_I: cluster id splitted_gdf = self.split_shapes_by_nearby_stops(clustered_stops, gdf) splitted_gdf['child_stop_I'] = splitted_gdf.apply( lambda row: ",".join([str(int(x)) for x in row.child_stop_I]), axis=1) splitted_gdf_grouped = splitted_gdf.groupby(['child_stop_I']) splitted_gdf_grouped = splitted_gdf_grouped.agg( { 'orig_parent_stops': lambda x: tuple(x), 'geometry': lambda x: x.iloc[0] }, axis=1) splitted_gdf = splitted_gdf_grouped.reset_index() splitted_gdf['value'] = splitted_gdf.apply(lambda row: 1, axis=1) #splitted_gdf = splitted_gdf.set_geometry(splitted_gdf["geometry"], crs=self.crs_eurefin) splitted_gdf = self.match_shapes(splitted_gdf) splitted_gdf["rand"] = np.random.randint(1, 10, splitted_gdf.shape[0]) print(splitted_gdf) self.plot_geopandas(splitted_gdf, alpha=0.3) def split_shapes_by_nearby_stops(self, stops, shapes, buffer=0.01): """ Splits shapes by stops, within buffer :param stops: GeoDataFrame :param shapes: :return: """ # stops within buffer # splitter # retain the "parent" stop section #stops['geometry'] = stops.apply(lambda row: str(row.geometry), axis=1) #stops = stops.groupby(['new_stop_I', 'geometry'])['stop_I'].apply(list).reset_index() #stops["geometry"] = stops.apply(lambda row: loads(row.geometry), axis=1) stops_grouped = stops.groupby(['new_stop_I']) stops_grouped = stops_grouped.agg( { 'stop_I': lambda x: tuple(x), 'geometry': lambda x: x.iloc[0] }, axis=1) stops = stops_grouped.reset_index() #stops = stops.set_geometry(stops["geometry"], crs=self.crs_eurefin) stops["point_geom"] = stops["geometry"] shapes["buffer"] = shapes["geometry"].buffer(buffer) shapes["line_geom"] = shapes["geometry"] shapes = shapes.set_geometry(shapes["buffer"]) gdf_joined = sjoin(shapes, stops, how="left", op='intersects') gdf_joined = gdf_joined.set_geometry(gdf_joined["line_geom"]) gdf_joined = gdf_joined.drop(["buffer", "line_geom"], axis=1) #gdf_joined['geometry'] = gdf_joined.apply(lambda row: str(row.geometry), axis=1) gdf_grouped = gdf_joined.groupby( ["orig_parent_stops", 'from_stop_I', 'to_stop_I']) gdf_grouped = gdf_grouped.agg( { 'point_geom': lambda x: tuple(x), 'new_stop_I': lambda x: tuple(x), 'geometry': lambda x: x.iloc[0] }, axis=1) gdf_joined = gdf_grouped.reset_index() gdf_joined = gdf_joined.apply( lambda row: self.split_shape_by_points(row), axis=1) new_list = [] for row in gdf_joined.to_dict('records'): for shape, stop_tuple in zip(row['shape_parts'], row['child_stop_Is']): new_row = copy.deepcopy(row) new_row["shape_part"] = shape new_row["child_stop_I"] = stop_tuple new_list.append(new_row) gdf_joined = pd.DataFrame(new_list) gdf_joined = gdf_joined.set_geometry(gdf_joined["shape_part"]) return gdf_joined[['child_stop_I', 'orig_parent_stops', 'geometry']] def check_shape_orientation(self, shape, from_stop_point, to_stop_point): """ Checks that the shape goes from the from stop to the to stop and not the opposite direction :param shape: :param from_stop_point: :param to_stop_point: :return: """ # def split_shape_by_points(self, shape, shape_parents, points, point_ids): def split_shape_by_points(self, row): """ :param shape: :param shape_parents: :param points: :param point_ids: :return: """ shape = row["geometry"] shape_parents = [row["from_stop_I"], row["to_stop_I"]] points = row["point_geom"] point_ids = row["new_stop_I"] # TODO: change this to also output the cluster point ids for the end points so that matching is possible directly if not isinstance(points[0], shapely.Point): row["shape_parts"] = [shape] row["child_stop_Is"] = [shape_parents] return row # finds the distance on the shape that corresponds to the closest distance to the point distance_dict = { shape.project(point): { "point": point, "id": id } for point, id in zip(points, point_ids) } shape_parts = [] stop_sections = [] rest_of_shape = copy.deepcopy(shape) previous_stop = shape_parents[0] # loops trough the points in the order they are compared to the shape if len(distance_dict) >= 3: for key in sorted(distance_dict)[1:-1]: if distance_dict[key]["id"] not in shape_parents: new_point = shape.interpolate(key) # TODO: this step only works with a modified version of split(), replace with a custom function geometries = split(rest_of_shape, new_point) stop_sections.append( (int(previous_stop), int(distance_dict[key]["id"]))) previous_stop = distance_dict[key]["id"] if len(geometries) == 2: rest_of_shape = geometries[1] shape_parts.append(geometries[0]) else: rest_of_shape = geometries[0] shape_parts.append(rest_of_shape) stop_sections.append((previous_stop, shape_parents[1])) #if len(shape_parts) > 1: # assert not all(x == shape_parts[0] for x in shape_parts) #shape_parts = row["new_stop_I"] #stop_sections = row["new_stop_I"] row["shape_parts"] = shape_parts row["child_stop_Is"] = stop_sections return row # return (shape_parts, stop_sections) def match_shapes(self, shapes, buffer=0.01): """ checks if shapes are completely within each others buffers, aggregates routes for these :return: """ # buffer for spatial self join first_points = shapes["geometry"].apply(lambda x: Point(x.coords[0])) last_points = shapes["geometry"].apply(lambda x: Point(x.coords[-1])) points = pd.concat([first_points, last_points]) point_df = points.to_frame(name='geometry') #point_df = point_df.set_geometry(point_df["geometry"], crs=self.crs_eurefin) point_df = point_df.set_geometry(point_df["geometry"], crs=self.crs_wgs) #buffer = point_df.buffer(30) #buffer = GeoDataFrame(crs=self.crs_eurefin, geometry=point_df.buffer(buffer)) buffer = GeoDataFrame(crs=self.crs_wgs, geometry=point_df.buffer(buffer)) buffer["everything"] = 1 gdf_poly = buffer.dissolve(by="everything") polygons = None for geoms in gdf_poly["geometry"]: polygons = [polygon for polygon in geoms] #single_parts = GeoDataFrame(crs=self.crs_eurefin, geometry=polygons) single_parts = GeoDataFrame(crs=self.crs_wgs, geometry=polygons) single_parts['new_stop_I'] = single_parts.index gdf_joined = sjoin(shapes, single_parts, how="left", op='within') return gdf_joined def identify_branches(self, shapes, buffer=0.01): """ Checks for other shapes that exits the buffer of another buffer. In these cases a pseudo stop is created, for further splitting of shapes :param shapes: :param buffer: :return: """ def get_linestrings_for_stop_section(self, stop_tuple, trip_id, from_shape_brake, to_shape_brake): try: assert self.shapes shapedict = get_shape_between_stops( self.gtfs.conn.cursor(), trip_id, stop_tuple[0], stop_tuple[1], (from_shape_brake, to_shape_brake)) assert not len(set(shapedict["lat"])) <= 1 assert not len(set(shapedict["lon"])) <= 1 return shapely.LineString([ (lon, lat) for lat, lon in zip(shapedict["lat"], shapedict["lon"]) ]) except (ValueError, AssertionError): lat0, lon0 = self.gtfs.get_stop_coordinates(stop_tuple[0]) lat1, lon1 = self.gtfs.get_stop_coordinates(stop_tuple[1]) if lat0 == lat1 and lon0 == lon1: return else: return shapely.LineString([(lon0, lat0), (lon1, lat1)]) def route_parallels(self, line, route, all_routes, bunching_value=5, line_spacing=0.0001): n_parallels = len(all_routes) line_routes = [] if not line: return if n_parallels < bunching_value: offsets = np.linspace(-1 * ((n_parallels - 1) * line_spacing) / 2, ((n_parallels - 1) * line_spacing) / 2, n_parallels) try: return line.parallel_offset( abs(offsets[all_routes.index(route)]), "left" if offsets[all_routes.index(route)] < 0 else "right") except: print(line, offsets[all_routes.index(route)]) else: return line def get_route_ranking(self, df): route_order_for_stop_sections = {} stop_section_shapes = {} for row in df.itertuples(): section_tuple = (row.from_stop_I, row.to_stop_I) alt_section_tuple = (row.to_stop_I, row.from_stop_I) if not section_tuple in route_order_for_stop_sections and not alt_section_tuple in route_order_for_stop_sections: route_order_for_stop_sections[section_tuple] = [row.route_I] stop_section_shapes[section_tuple] = (row.trip_I, row.from_shape_break, row.to_shape_break) elif section_tuple in route_order_for_stop_sections: route_order_for_stop_sections[section_tuple].append( row.route_I) elif alt_section_tuple in route_order_for_stop_sections: route_order_for_stop_sections[alt_section_tuple].insert( 0, row.route_I) return route_order_for_stop_sections, stop_section_shapes def get_geometry(self, stop_tuple, route, all_routes, cluster_dict): #line = get_linestrings_for_stop_section(stop_tuple, trip_id, from_shape_break, to_shape_break) #print(stop_tuple, cluster_dict[stop_tuple[0]], cluster_dict[stop_tuple[1]]) line = shapely.LineString( [cluster_dict[stop_tuple[0]][0], cluster_dict[stop_tuple[1]][0]]) if stop_tuple[0] == stop_tuple[1]: return else: return self.route_parallels(line, route, all_routes, bunching_value=self.bunching_value, line_spacing=self.line_spacing) def cluster_stops(self, stops_set, distance=100): """ merges stops that are within distance together into one stop :param stops_set: iterable that lists stop_I's :param distance: int, distance to merge, meters :return: """ df = self.gtfs.execute_custom_query_pandas( """SELECT * FROM stops WHERE stop_I IN ({stops_set})""" .format(stops_set=",".join([str(x) for x in stops_set]))) df["geometry"] = df.apply(lambda row: Point((row["lon"], row["lat"])), axis=1) gdf = GeoDataFrame(df, crs=self.crs_wgs, geometry=df["geometry"]) gdf = gdf.to_crs(self.crs_eurefin) gdf_poly = gdf.copy() gdf_poly["geometry"] = gdf_poly["geometry"].buffer(distance / 2) gdf_poly["everything"] = 1 gdf_poly = gdf_poly.dissolve(by="everything") polygons = None for geoms in gdf_poly["geometry"]: polygons = [polygon for polygon in geoms] single_parts = GeoDataFrame(crs=self.crs_eurefin, geometry=polygons) single_parts['new_stop_I'] = single_parts.index gdf_joined = sjoin(gdf, single_parts, how="left", op='within') single_parts["geometry"] = single_parts.centroid gdf_joined = gdf_joined.drop('geometry', 1) centroid_stops = single_parts.merge(gdf_joined, on="new_stop_I") return centroid_stops """ change projection for accurate buffer distance merge polygons, select single parts calculate centroids """ def plot_geopandas(self, gdf, **kwargs): fig, ax = plt.subplots() gdf.plot(column="rand", **kwargs) plt.show()
class AllToAllRoutingPipeline: def __init__(self, feed_dict, routing_params): self.pickle = PICKLE self.gtfs_dir = feed_dict["gtfs_dir"] self.G = GTFS(feed_dict["gtfs_dir"]) self.tz = self.G.get_timezone_name() self.journey_dir = feed_dict["journey_dir"] self.day_start = feed_dict["day_start"] self.day_end = feed_dict["day_end"] self.routing_start_time = feed_dict["routing_start_time"] self.routing_end_time = feed_dict["routing_end_time"] self.analysis_start_time = feed_dict["analysis_start_time"] self.analysis_end_time = feed_dict["analysis_end_time"] self.pickle_dir = feed_dict["pickle_dir"] self.routing_params = routing_params self.jdm = None if not self.pickle: self.jdm = JourneyDataManager(os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_FNAME), journey_db_path=os.path.join(RESULTS_DIR, JOURNEY_DB_FNAME), routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_route=TRACK_ROUTE) def get_all_events(self): print("Retrieving transit events") connections = [] for e in self.G.generate_routable_transit_events(start_time_ut=self.routing_start_time, end_time_ut=self.routing_end_time): connections.append(Connection(int(e.from_stop_I), int(e.to_stop_I), int(e.dep_time_ut), int(e.arr_time_ut), int(e.trip_I), int(e.seq))) assert (len(connections) == len(set(connections))) print("scheduled events:", len(connections)) print("Retrieving walking network") net = walk_transfer_stop_to_stop_network(self.G, max_link_distance=CUTOFF_DISTANCE) print("net edges: ", len(net.edges())) return net, connections @timeit def loop_trough_targets_and_run_routing(self, targets, slurm_array_i): net, connections = self.get_all_events() csp = None for target in targets: print(target) if csp is None: csp = MultiObjectivePseudoCSAProfiler(connections, target, walk_network=net, end_time_ut=self.routing_end_time, transfer_margin=TRANSFER_MARGIN, start_time_ut=self.routing_start_time, walk_speed=WALK_SPEED, verbose=True, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_time=TRACK_TIME, track_route=TRACK_ROUTE) else: csp.reset([target]) csp.run() profiles = dict(csp.stop_profiles) if self.pickle: self._pickle_results(profiles, slurm_array_i, target) else: self.jdm.import_journey_data_for_target_stop(target, profiles) profiles = None gc.collect() @timeit def loop_trough_targets_and_run_routing_with_route(self, targets, slurm_array_i): net, connections = self.get_all_events() csp = None for target in targets: print("target: ", target) if csp is None: csp = MultiObjectivePseudoCSAProfiler(connections, target, walk_network=net, end_time_ut=self.routing_end_time, transfer_margin=TRANSFER_MARGIN, start_time_ut=self.routing_start_time, walk_speed=WALK_SPEED, verbose=True, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_time=TRACK_TIME, track_route=TRACK_ROUTE) else: csp.reset([target]) csp.run() profiles = dict(csp.stop_profiles) if self.pickle: self._pickle_results(profiles, slurm_array_i, target) else: self.jdm.import_journey_data_for_target_stop(target, profiles) profiles = None gc.collect() @timeit def _pickle_results(self, profiles, pickle_subdir, target): pickle_path = makedirs(os.path.join(self.pickle_dir, str(pickle_subdir))) pickle_path = os.path.join(pickle_path, str(target) + ".pickle") profiles = dict((key, value.get_final_optimal_labels()) for (key, value) in profiles.items()) """for key, values in profiles.items(): values.sort(key=lambda x: x.departure_time, reverse=True) new_values = compute_pareto_front(values) profiles[key] = new_values """ pickle.dump(profiles, open(pickle_path, 'wb'), -1) profiles = None gc.collect() def get_list_of_stops(self, where=''): df = self.G.execute_custom_query_pandas("SELECT stop_I FROM stops " + where + " ORDER BY stop_I") return df @timeit def store_pickle_in_db(self): self.jdm = JourneyDataManager(self.gtfs_dir, journey_db_path=self.journey_dir, routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_route=TRACK_ROUTE) for root, dirs, files in os.walk(self.pickle_dir): for target_file in files: target = target_file.replace(".pickle", "") if not target in self.jdm.get_targets_having_journeys(): print("target: ", target) profiles = pickle.load(open(os.path.join(root, target_file), 'rb')) self.jdm.import_journey_data_for_target_stop(int(target), profiles) else: print("skipping: ", target, " already in db") self.jdm.create_indices() def calculate_additional_columns_for_journey(self): if not self.jdm: self.jdm = JourneyDataManager(self.gtfs_dir, journey_db_path=self.journey_dir, routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_route=TRACK_ROUTE) self.jdm.populate_additional_journey_columns() self.jdm.compute_and_store_travel_impedance_measures(self.analysis_start_time, self.analysis_end_time, TRAVEL_IMPEDANCE_STORE_PATH) def calculate_comparison_measures(self): if not self.jdm: self.jdm = JourneyDataManager(self.gtfs_dir, journey_db_path=self.journey_dir, routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS, track_route=TRACK_ROUTE) prev_dict = None prev_key = None before_db_tuple = None after_db_tuple = None for (key, feed_dict) in FEED_LIST: if prev_dict: if feed_dict["feed_seq"] < prev_dict["feed_seq"]: after_db_tuple = (feed_dict["journey_dir"], key) before_db_tuple = (prev_dict["journey_dir"], prev_key) else: before_db_tuple = (feed_dict["journey_dir"], key) after_db_tuple = (prev_dict["journey_dir"], prev_key) prev_dict = feed_dict prev_key = key self.jdm.initialize_comparison_tables(DIFF_PATH, before_db_tuple, after_db_tuple)