Ejemplo n.º 1
0
def prepare_dbs():
    try:
        assert not os.path.isfile(
            os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_LM + SQLITE_SUFFIX))
        assert not os.path.isfile(
            os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_OLD + SQLITE_SUFFIX))

        for fn in [GTFS_DB_LM + SQLITE_SUFFIX, GTFS_DB_OLD + SQLITE_SUFFIX]:
            subprocess.call([
                "cp",
                os.path.join(GTFS_DB_SOURCE_DIR, fn),
                os.path.join(GTFS_DB_WORK_DIR, fn)
            ])
    except AssertionError:
        print("remove old files to start from scratch, continuing...")

    G_lm = GTFS(os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_LM + SQLITE_SUFFIX))

    G_old = GTFS(os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_OLD + SQLITE_SUFFIX))
    """try:
        except IntegrityError:
        print("additional stops already added")"""
    #G_lm.add_stops_from_csv(os.path.join(GTFS_DB_SOURCE_DIR, PSEUDO_STOP_FNAME))
    merge_stops_tables(
        os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_OLD + SQLITE_SUFFIX),
        os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_LM + SQLITE_SUFFIX))

    G_old.replace_stop_i_with_stop_pair_i(colname="stop_pair_I")
    G_lm.replace_stop_i_with_stop_pair_i(colname="stop_pair_I")

    lm_stops = G_lm.execute_custom_query_pandas("SELECT * FROM stops")
    old_stops = G_old.execute_custom_query_pandas("SELECT * FROM stops")
    lm_stops_set = set(lm_stops["stop_I"])
    old_stops_set = set(old_stops["stop_I"])
    print(lm_stops_set)
    print(old_stops_set)
    print("stops not in old:", lm_stops_set - old_stops_set)
    print("stops not in old:", old_stops_set - lm_stops_set)

    print("calculating stop distances for first feed")
    G_old.recalculate_stop_distances(2000, remove_old_table=True)

    print("calculating stop distances for second feed")
    G_lm.recalculate_stop_distances(2000, remove_old_table=True)

    #G_lm.homogenize_stops_table_with_other_db(os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_OLD + SQLITE_SUFFIX))

    print("run walk distance routing")
    """
    add_walk_distances_to_db_python(G_lm, OSM_DIR, cutoff_distance_m=CUTOFF_DISTANCE)
    add_walk_distances_to_db_python(G_old, OSM_DIR, cutoff_distance_m=CUTOFF_DISTANCE)
    """
    for fn in [GTFS_DB_LM + SQLITE_SUFFIX, GTFS_DB_OLD + SQLITE_SUFFIX]:
        subprocess.call([
            "java", "-jar",
            "gtfspy/java_routing/target/transit_osm_routing-1.0-SNAPSHOT-jar-with-dependencies.jar",
            "-u",
            os.path.join(GTFS_DB_WORK_DIR,
                         fn), "-osm", OSM_DIR, "--tempDir", "/tmp/"
        ])
Ejemplo n.º 2
0
def stops_to_exclude(return_sqlite_list=False):
    gtfs_lm = GTFS(LM_DICT["gtfs_dir"])
    areas_to_remove = gtfs_lm.execute_custom_query_pandas(
        "SELECT *  FROM stops  WHERE  CASE WHEN substr(stop_id,1, 5) = '__b__' THEN CAST(substr(stop_id,6, 1) AS integer) ELSE CAST(substr(stop_id,1, 1) AS integer) END >4"
    )
    if return_sqlite_list:
        return "(" + ",".join(
            [str(x) for x in areas_to_remove["stop_I"].tolist()]) + ")"
    return areas_to_remove
Ejemplo n.º 3
0
class JourneyDataManager:
    def __init__(self,
                 gtfs_path,
                 journey_db_path,
                 routing_params=None,
                 multitarget_routing=False,
                 track_vehicle_legs=True,
                 track_route=False):
        """
        :param gtfs: GTFS object
        :param list_of_stop_profiles: dict of NodeProfileMultiObjective
        :param multitarget_routing: bool
        """
        self.multitarget_routing = multitarget_routing
        self.track_route = track_route
        self.track_vehicle_legs = track_vehicle_legs
        self.gtfs_path = gtfs_path
        self.gtfs = GTFS(self.gtfs_path)
        self.gtfs_meta = self.gtfs.meta
        self.gtfs._dont_close = True
        self.od_pairs = None
        self._targets = None
        self._origins = None
        self.diff_conn = None

        if not routing_params:
            routing_params = dict()
        self.routing_params_input = routing_params

        assert os.path.exists(journey_db_path) or routing_params is not None
        journey_db_pre_exists = os.path.isfile(journey_db_path)

        # insert a pretty robust timeout:
        timeout = 100
        self.conn = sqlite3.connect(journey_db_path, timeout)
        if not journey_db_pre_exists:
            self.initialize_database()

        self.routing_parameters = Parameters(self.conn)
        self._assert_journey_computation_paramaters_match()

        self.journey_properties = {
            "journey_duration": (_T_WALK_STR, _T_WALK_STR)
        }
        if routing_params.get('track_vehicle_legs', False) or \
                self.routing_parameters.get('track_vehicle_legs', False):
            self.journey_properties["n_boardings"] = (float("inf"), 0)
        if self.track_route:
            additional_journey_parameters = {
                "in_vehicle_duration": (float('inf'), 0),
                "transfer_wait_duration": (float('inf'), 0),
                "walking_duration": (_T_WALK_STR, _T_WALK_STR),
                "pre_journey_wait_fp": (float('inf'), 0)
            }
            self.journey_properties.update(additional_journey_parameters)
        self.travel_impedance_measure_names = list(
            self.journey_properties.keys())
        self.travel_impedance_measure_names += ["temporal_distance"]

    def __del__(self):
        self.gtfs._dont_close = False
        if self.conn:
            self.conn.close()

    @timeit
    def import_journey_data_for_target_stop(self, target_stop_I,
                                            origin_stop_I_to_journey_labels):
        """
        Parameters
        ----------
        origin_stop_I_to_journey_labels: dict
            key: origin_stop_Is
            value: list of labels
        target_stop_I: int
        """
        cur = self.conn.cursor()
        self.conn.isolation_level = 'EXCLUSIVE'
        cur.execute('PRAGMA synchronous = OFF;')

        if self.track_route:
            self._insert_journeys_with_route_into_db(
                origin_stop_I_to_journey_labels,
                target_stop=int(target_stop_I))
        else:
            self._insert_journeys_into_db_no_route(
                origin_stop_I_to_journey_labels,
                target_stop=int(target_stop_I))
        print("Finished import process")

    def _assert_journey_computation_paramaters_match(self):
        for key, value in self.routing_parameters.items():
            if key in self.gtfs_meta.keys():
                assert self.gtfs_meta[key] == value

    def _get_largest_journey_id(self):
        cur = self.conn.cursor()
        val = cur.execute("select max(journey_id) FROM journeys").fetchone()
        return val[0] if val[0] else 0

    def _insert_journeys_into_db_no_route(self,
                                          stop_profiles,
                                          target_stop=None):
        # TODO: Change the insertion so that the check last journey id and insertions are in the same transaction block
        """
        con.isolation_level = 'EXCLUSIVE'
        con.execute('BEGIN EXCLUSIVE')
        #exclusive access starts here. Nothing else can r/w the db, do your magic here.
        con.commit()
        """
        print("Collecting journey data")
        journey_id = 1
        journey_list = []
        tot = len(stop_profiles)
        for i, (origin_stop, labels) in enumerate(stop_profiles.items(),
                                                  start=1):
            #print("\r Stop " + str(i) + " of " + str(tot), end='', flush=True)
            for label in labels:
                assert (isinstance(label, LabelTimeWithBoardingsCount))
                if self.multitarget_routing:
                    target_stop = None
                else:
                    target_stop = int(target_stop)

                values = [
                    int(journey_id),
                    int(origin_stop), target_stop,
                    int(label.departure_time),
                    int(label.arrival_time_target),
                    int(label.n_boardings)
                ]

                journey_list.append(values)
                journey_id += 1
        print("Inserting journeys into database")
        insert_journeys_stmt = '''INSERT INTO journeys(
              journey_id,
              from_stop_I,
              to_stop_I,
              departure_time,
              arrival_time_target,
              n_boardings) VALUES (%s) ''' % (", ".join(
            ["?" for x in range(6)]))
        #self.conn.executemany(insert_journeys_stmt, journey_list)

        self._execute_function(insert_journeys_stmt, journey_list)
        self.conn.commit()

    @timeit
    def _execute_function(self, statement, rows):
        self.conn.execute('BEGIN EXCLUSIVE')
        last_id = self._get_largest_journey_id()
        rows = [[x[0] + last_id] + x[1:] for x in rows]
        self.conn.executemany(statement, rows)

    def _insert_journeys_with_route_into_db(self, stop_I_to_journey_labels,
                                            target_stop):
        print("Collecting journey and connection data")
        journey_id = (self._get_largest_journey_id()
                      if self._get_largest_journey_id() else 0) + 1
        journey_list = []
        connection_list = []
        label = None
        for i, (origin_stop,
                labels) in enumerate(stop_I_to_journey_labels.items(),
                                     start=1):
            # tot = len(stop_profiles)
            #print("\r Stop " + str(i) + " of " + str(tot), end='', flush=True)

            assert (isinstance(stop_I_to_journey_labels[origin_stop], list))

            for label in labels:
                assert (isinstance(label, LabelTimeAndRoute)
                        or isinstance(label, LabelTimeBoardingsAndRoute))
                # We need to "unpack" the journey to actually figure out where the trip went
                # (there can be several targets).
                if label.departure_time == label.arrival_time_target:
                    print("Weird label:", label)
                    continue

                target_stop, new_connection_values, route_stops = self._collect_connection_data(
                    journey_id, label)
                if origin_stop == target_stop:
                    continue

                if isinstance(label, LabelTimeBoardingsAndRoute):
                    values = [
                        int(journey_id),
                        int(origin_stop),
                        int(target_stop),
                        int(label.departure_time),
                        int(label.arrival_time_target), label.n_boardings,
                        label.movement_duration, route_stops
                    ]
                else:
                    values = [
                        int(journey_id),
                        int(origin_stop),
                        int(target_stop),
                        int(label.departure_time),
                        int(label.arrival_time_target),
                        label.movement_duration, route_stops
                    ]

                journey_list.append(values)
                connection_list += new_connection_values
                journey_id += 1

        print("Inserting journeys into database")
        if label:
            if isinstance(label, LabelTimeBoardingsAndRoute):
                insert_journeys_stmt = '''INSERT INTO journeys(
                      journey_id,
                      from_stop_I,
                      to_stop_I,
                      departure_time,
                      arrival_time_target,
                      n_boardings,
                      movement_duration,
                      route) VALUES (%s) ''' % (", ".join(
                    ["?" for x in range(8)]))
            else:
                insert_journeys_stmt = '''INSERT INTO journeys(
                      journey_id,
                      from_stop_I,
                      to_stop_I,
                      departure_time,
                      arrival_time_target,
                      movement_duration,
                      route) VALUES (%s) ''' % (", ".join(
                    ["?" for x in range(7)]))
            self.conn.executemany(insert_journeys_stmt, journey_list)

            print("Inserting legs into database")
            insert_legs_stmt = '''INSERT INTO legs(
                                  journey_id,
                                  from_stop_I,
                                  to_stop_I,
                                  departure_time,
                                  arrival_time_target,
                                  trip_I,
                                  seq,
                                  leg_stops) VALUES (%s) ''' % (", ".join(
                ["?" for x in range(8)]))
            self.conn.executemany(insert_legs_stmt, connection_list)
            self.routing_parameters["target_list"] += (str(target_stop) + ",")
            self.conn.commit()

    def create_index_for_journeys_table(self):
        self.conn.execute("PRAGMA temp_store=2")
        self.conn.commit()
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS journeys_to_stop_I_idx ON journeys (to_stop_I)"
        )

    def _collect_connection_data(self, journey_id, label):
        target_stop = None
        cur_label = label
        seq = 1
        value_list = []
        route_stops = []
        leg_stops = []
        prev_trip_id = None
        connection = None
        leg_departure_time = None
        leg_departure_stop = None
        leg_arrival_time = None
        leg_arrival_stop = None
        while True:
            if isinstance(cur_label.connection, Connection):
                connection = cur_label.connection
                if connection.trip_id:
                    trip_id = connection.trip_id
                else:
                    trip_id = -1

                # In case of new leg
                if prev_trip_id != trip_id:
                    route_stops.append(connection.departure_stop)
                    if prev_trip_id:
                        leg_stops.append(connection.departure_stop)

                        values = (int(journey_id), int(leg_departure_stop),
                                  int(leg_arrival_stop),
                                  int(leg_departure_time),
                                  int(leg_arrival_time), int(prev_trip_id),
                                  int(seq),
                                  ','.join([str(x) for x in leg_stops]))
                        value_list.append(values)
                        seq += 1
                        leg_stops = []

                    leg_departure_stop = connection.departure_stop
                    leg_departure_time = connection.departure_time
                leg_arrival_time = connection.arrival_time
                leg_arrival_stop = connection.arrival_stop
                leg_stops.append(connection.departure_stop)
                target_stop = connection.arrival_stop
                prev_trip_id = trip_id

            if not cur_label.previous_label:
                leg_stops.append(connection.arrival_stop)
                values = (int(journey_id), int(leg_departure_stop),
                          int(leg_arrival_stop), int(leg_departure_time),
                          int(leg_arrival_time), int(prev_trip_id), int(seq),
                          ','.join([str(x) for x in leg_stops]))
                value_list.append(values)
                break

            cur_label = cur_label.previous_label
        route_stops.append(target_stop)
        route_stops = ','.join([str(x) for x in route_stops])
        return target_stop, value_list, route_stops

    def populate_additional_journey_columns(self):
        # self.add_fastest_path_column()
        # self.add_time_to_prev_journey_fp_column()
        self.compute_journey_time_components()
        self.calculate_pre_journey_waiting_times_ignoring_direct_walk()

    def get_od_pairs(self):
        cur = self.conn.cursor()
        if not self.od_pairs:
            cur.execute(
                'SELECT from_stop_I, to_stop_I FROM journeys GROUP BY from_stop_I, to_stop_I'
            )
            self.od_pairs = cur.fetchall()
        return self.od_pairs

    def get_targets(self):
        cur = self.conn.cursor()
        if not self._targets:
            cur.execute('SELECT to_stop_I FROM journeys GROUP BY to_stop_I')
            self._targets = [target[0] for target in cur.fetchall()]
        return self._targets

    def get_origins(self):
        cur = self.conn.cursor()
        if not self._origins:
            cur.execute(
                'SELECT from_stop_I FROM journeys GROUP BY from_stop_I')
            self._origins = [origin[0] for origin in cur.fetchall()]
        return self._origins

    def get_table_with_coordinates(self, table_name, target=None):
        df = self.get_table_as_dataframe(table_name, target)
        return self.gtfs.add_coordinates_to_df(df, join_column='from_stop_I')

    def get_table_as_dataframe(self, table_name, to_stop_I_target=None):
        query = "SELECT * FROM " + table_name
        if to_stop_I_target:
            query += " WHERE to_stop_I = %s" % to_stop_I_target
        return pd.read_sql_query(query, self.conn)

    @timeit
    def add_fastest_path_column(self):
        print("adding fastest path column")
        cur = self.conn.cursor()
        for target in self.get_targets():
            fastest_path_journey_ids = []
            for origin in self.get_origins():
                cur.execute(
                    'SELECT departure_time, arrival_time_target, journey_id FROM journeys '
                    'WHERE from_stop_I = ? AND to_stop_I = ? '
                    'ORDER BY departure_time ASC', (origin, target))
                all_trips = cur.fetchall()
                all_labels = [
                    LabelTimeAndRoute(x[0], x[1], x[2], False)
                    for x in all_trips
                ]  #putting journey_id as movement_duration
                all_fp_labels = compute_pareto_front(all_labels,
                                                     finalization=False,
                                                     ignore_n_boardings=True)
                fastest_path_journey_ids.append(all_fp_labels)

            fastest_path_journey_ids = [(1, x.movement_duration)
                                        for sublist in fastest_path_journey_ids
                                        for x in sublist]
            cur.executemany(
                "UPDATE journeys SET fastest_path = ? WHERE journey_id = ?",
                fastest_path_journey_ids)
        self.conn.commit()

    @timeit
    def add_time_to_prev_journey_fp_column(self):
        print("adding pre journey waiting time")
        cur = self.conn.cursor()
        for target in self.get_targets():

            cur.execute(
                'SELECT journey_id, from_stop_I, to_stop_I, departure_time FROM journeys '
                'WHERE fastest_path = 1 AND to_stop_I = ? '
                'ORDER BY from_stop_I, to_stop_I, departure_time ',
                (target[0], ))

            all_trips = cur.fetchall()
            time_to_prev_journey = []
            prev_departure_time = None
            prev_origin = None
            prev_destination = None
            for trip in all_trips:
                journey_id = trip[0]
                from_stop_I = trip[1]
                to_stop_I = trip[2]
                departure_time = trip[3]
                if prev_origin != from_stop_I or prev_destination != to_stop_I:
                    prev_departure_time = None
                if prev_departure_time:
                    time_to_prev_journey.append(
                        (departure_time - prev_departure_time, journey_id))
                prev_origin = from_stop_I
                prev_destination = to_stop_I
                prev_departure_time = departure_time
            cur.executemany(
                "UPDATE journeys SET pre_journey_wait_fp = ? WHERE journey_id = ?",
                time_to_prev_journey)
        self.conn.commit()

    @timeit
    def compute_journey_time_components(self):
        print("adding journey components")
        cur = self.conn.cursor()
        cur.execute(
            "UPDATE journeys SET journey_duration = arrival_time_target - departure_time"
        )

        if self.track_route:
            cur.execute(
                "UPDATE journeys "
                "SET "
                "in_vehicle_duration = "
                "(SELECT sum(arrival_time_target - departure_time) AS in_vehicle_duration FROM legs "
                "WHERE journeys.journey_id = legs.journey_id AND trip_I != -1 GROUP BY journey_id)"
            )
            cur.execute(
                "UPDATE journeys "
                "SET "
                "walking_duration = "
                "(SELECT sum(arrival_time_target - departure_time) AS walking_duration FROM legs "
                "WHERE journeys.journey_id = legs.journey_id AND trip_I < 0 GROUP BY journey_id)"
            )
            cur.execute(
                "UPDATE journeys "
                "SET transfer_wait_duration = journey_duration - in_vehicle_duration - walking_duration"
            )
        self.conn.commit()

    def _journey_label_generator(self,
                                 destination_stop_Is=None,
                                 origin_stop_Is=None):
        conn = self.conn
        conn.row_factory = sqlite3.Row
        if destination_stop_Is is None:
            destination_stop_Is = self.get_targets()
        if origin_stop_Is is None:
            origin_stop_Is = self.get_origins()

        for destination_stop_I in destination_stop_Is:
            if self.track_route:
                label_features = "journey_id, from_stop_I, to_stop_I, n_boardings, movement_duration, " \
                                 "journey_duration, in_vehicle_duration, transfer_wait_duration, walking_duration, " \
                                 "departure_time, arrival_time_target"""
            else:
                label_features = "journey_id, from_stop_I, to_stop_I, n_boardings, departure_time, " \
                                 "arrival_time_target"
            sql = "SELECT " + label_features + " FROM journeys WHERE to_stop_I = %s" % destination_stop_I

            df = pd.read_sql_query(sql, self.conn)
            for origin_stop_I in origin_stop_Is:
                selection = df.loc[df['from_stop_I'] == origin_stop_I]
                journey_labels = []
                for journey in selection.to_dict(orient='records'):
                    journey["pre_journey_wait_fp"] = -1
                    try:
                        journey_labels.append(LabelGeneric(journey))
                    except:
                        print(journey)
                yield origin_stop_I, destination_stop_I, journey_labels

    def get_node_profile_time_analyzer(self, target, origin, start_time_dep,
                                       end_time_dep):
        sql = """SELECT journey_id, from_stop_I, to_stop_I, n_boardings, movement_duration, journey_duration,
        in_vehicle_duration, transfer_wait_duration, walking_duration, departure_time, arrival_time_target
        FROM journeys WHERE to_stop_I = %s AND from_stop_I = %s""" % (target,
                                                                      origin)
        df = pd.read_sql_query(sql, self.conn)
        journey_labels = []
        for journey in df.to_dict(orient='records'):
            journey_labels.append(LabelGeneric(journey))

        fpa = FastestPathAnalyzer(
            journey_labels,
            start_time_dep,
            end_time_dep,
            walk_duration=float('inf'),  # walking time
            label_props_to_consider=list(self.journey_properties.keys()))
        return fpa.get_time_analyzer()

    def _get_node_profile_analyzer_time_and_veh_legs(self, target, origin,
                                                     start_time_dep,
                                                     end_time_dep):
        sql = """SELECT from_stop_I, to_stop_I, n_boardings, departure_time, arrival_time_target FROM journeys WHERE to_stop_I = %s AND from_stop_I = %s""" % (
            target, origin)
        df = pd.read_sql_query(sql, self.conn)

        journey_labels = []
        for journey in df.itertuples():
            departure_time = journey.departure_time
            arrival_time_target = journey.arrival_time_target
            n_boardings = journey.n_boardings
            journey_labels.append(
                LabelTimeWithBoardingsCount(departure_time,
                                            arrival_time_target,
                                            n_boardings,
                                            first_leg_is_walk=float('nan')))

        # This ought to be optimized...
        query = """SELECT d, d_walk FROM stop_distances WHERE to_stop_I = %s AND from_stop_I = %s""" % (
            target, origin)
        df = self.gtfs.execute_custom_query_pandas(query)
        if len(df) > 0:
            walk_duration = float(
                df['d_walk']) / self.routing_params_input['walk_speed']
        else:
            walk_duration = float('inf')
        analyzer = NodeProfileAnalyzerTimeAndVehLegs(
            journey_labels,
            walk_duration,  # walking time
            start_time_dep,
            end_time_dep)
        return analyzer

    def read_travel_impedance_measure_from_table(self,
                                                 travel_impedance_measure,
                                                 from_stop_I=None,
                                                 to_stop_I=None,
                                                 statistic=None):
        """
        Recover pre-computed travel_impedance between od-pairs from the database.

        Returns
        -------
        values: number | Pandas DataFrame
        """
        to_select = []
        where_clauses = []
        to_select.append("from_stop_I")
        to_select.append("to_stop_I")
        if from_stop_I is not None:
            where_clauses.append("from_stop_I=" + str(int(from_stop_I)))
        if to_stop_I is not None:
            where_clauses.append("to_stop_I=" + str(int(to_stop_I)))
        where_clause = ""
        if len(where_clauses) > 0:
            where_clause = " WHERE " + " AND ".join(where_clauses)
        if not statistic:
            to_select.extend(["min", "mean", "median", "max"])
        else:
            to_select.append(statistic)
        to_select_clause = ",".join(to_select)
        if not to_select_clause:
            to_select_clause = "*"
        sql = "SELECT " + to_select_clause + " FROM " + travel_impedance_measure + where_clause + ";"
        df = pd.read_sql(sql, self.conn)
        return df

    @timeit
    def compute_travel_impedance_measures_for_od_pairs(self,
                                                       analysis_start_time,
                                                       analysis_end_time,
                                                       targets=None,
                                                       origins=None):
        results_dict = {}
        for travel_impedance_measure in self.travel_impedance_measure_names:
            self._create_travel_impedance_measure_table(
                travel_impedance_measure)

        print("Computing total number of origins and targets..",
              end='',
              flush=True)
        if targets is None:
            targets = self.get_targets()
        if origins is None:
            origins = self.get_origins()
        print("\rComputed total number of origins and targets")
        n_pairs_tot = len(origins) * len(targets)

        def _flush_data_to_db(results):
            for travel_impedance_measure, data in results.items():
                self.__insert_travel_impedance_data_to_db(
                    travel_impedance_measure, data)
            for travel_impedance_measure in self.travel_impedance_measure_names:
                results[travel_impedance_measure] = []

        _flush_data_to_db(results_dict)

        for i, (origin, target, journey_labels) in enumerate(
                self._journey_label_generator(targets, origins)):
            if i % 100 == 0:
                print("\r",
                      i,
                      "/",
                      n_pairs_tot,
                      " : ",
                      "%.2f" % round(float(i) / n_pairs_tot, 3),
                      end='',
                      flush=True)

            kwargs = {"from_stop_I": origin, "to_stop_I": target}
            walking_distance = self.gtfs.get_stop_distance(origin, target)

            if walking_distance:
                walking_duration = walking_distance / self.routing_params_input[
                    "walk_speed"]
            else:
                walking_duration = float("inf")
            fpa = FastestPathAnalyzer(
                journey_labels,
                analysis_start_time,
                analysis_end_time,
                walk_duration=walking_duration,  # walking time
                label_props_to_consider=list(self.journey_properties.keys()),
                **kwargs)
            temporal_distance_analyzer\
                = fpa.get_temporal_distance_analyzer()
            # Note: the summary_as_dict automatically includes also the from_stop_I and to_stop_I -fields.
            results_dict["temporal_distance"].append(
                temporal_distance_analyzer.summary_as_dict())
            fpa.calculate_pre_journey_waiting_times_ignoring_direct_walk()
            for key, (value_no_next_journey,
                      value_cutoff) in self.journey_properties.items():
                value_cutoff = walking_duration if value_cutoff == _T_WALK_STR else value_cutoff
                value_no_next_journey = walking_duration if value_no_next_journey == _T_WALK_STR else value_no_next_journey
                if key == "pre_journey_wait_fp":
                    property_analyzer = fpa.get_prop_analyzer_for_pre_journey_wait(
                    )
                else:
                    property_analyzer = fpa.get_prop_analyzer_flat(
                        key, value_no_next_journey, value_cutoff)
                results_dict[key].append(property_analyzer.summary_as_dict())

            if i % 1000 == 0:  # update in batches of 1000
                _flush_data_to_db(results_dict)
        # flush everything that remains
        _flush_data_to_db(results_dict)

    def create_indices_for_travel_impedance_measure_tables(self):
        for travel_impedance_measure in self.travel_impedance_measure_names:
            self._create_index_for_travel_impedance_measure_table(
                travel_impedance_measure)

    @timeit
    def calculate_pre_journey_waiting_times_ignoring_direct_walk(self):
        all_fp_labels = []
        for origin, target, journey_labels in self._journey_label_generator():
            if not journey_labels:
                continue
            fpa = FastestPathAnalyzer(
                journey_labels,
                self.routing_parameters["routing_start_time_dep"],
                self.routing_parameters["routing_end_time_dep"],
                walk_duration=float('inf'))
            fpa.calculate_pre_journey_waiting_times_ignoring_direct_walk()
            all_fp_labels += fpa.get_fastest_path_labels()
        self.update_journey_from_labels(all_fp_labels, "pre_journey_wait_fp")

    def update_journey_from_labels(self, labels, attribute):
        cur = self.conn.cursor()
        insert_tuples = []
        for label in labels:
            insert_tuples.append(
                (getattr(label, attribute), getattr(label, "journey_id")))

        sql = "UPDATE journeys SET %s = ? WHERE journey_id = ?" % (attribute, )
        cur.executemany(sql, insert_tuples)
        self.conn.commit()

    def _create_travel_impedance_measure_table(self, travel_impedance_measure):
        print("creating table: ", travel_impedance_measure)
        self.conn.execute("CREATE TABLE IF NOT EXISTS " +
                          travel_impedance_measure + " (from_stop_I INT, "
                          "to_stop_I INT, "
                          "min INT, "
                          "max INT, "
                          "median INT, "
                          "mean REAL, "
                          "UNIQUE (from_stop_I, to_stop_I))")

    def __insert_travel_impedance_data_to_db(self,
                                             travel_impedance_measure_name,
                                             data):
        """
        Parameters
        ----------
        travel_impedance_measure_name: str
        data: list[dict]
            Each list element must contain keys:
            "from_stop_I", "to_stop_I", "min", "max", "median" and "mean"
        """
        f = float
        data_tuple = [(x["from_stop_I"], x["to_stop_I"], f(x["min"]),
                       f(x["max"]), f(x["median"]), f(x["mean"]))
                      for x in data]
        insert_stmt = '''INSERT OR REPLACE INTO ''' + travel_impedance_measure_name + ''' (
                              from_stop_I,
                              to_stop_I,
                              min,
                              max,
                              median,
                              mean) VALUES (?, ?, ?, ?, ?, ?) '''
        self.conn.executemany(insert_stmt, data_tuple)
        self.conn.commit()

    def create_index_for_journeys_table(self):
        self.conn.execute(
            "CREATE INDEX IF NOT EXISTS journeys_to_stop_I_idx ON journeys (to_stop_I)"
        )

    def _create_index_for_travel_impedance_measure_table(
            self, travel_impedance_measure_name):
        table = travel_impedance_measure_name
        sql_from = "CREATE INDEX IF NOT EXISTS " + table + "_from_stop_I ON " + table + " (from_stop_I)"
        sql_to = "CREATE INDEX IF NOT EXISTS " + table + "_to_stop_I ON " + table + " (to_stop_I)"
        self.conn.execute(sql_from)
        self.conn.execute(sql_to)
        self.conn.commit()

    @timeit
    def initialize_comparison_tables(self, diff_db_path, before_db_tuple,
                                     after_db_tuple):
        self.diff_conn = sqlite3.connect(diff_db_path)

        self.diff_conn = attach_database(self.diff_conn,
                                         before_db_tuple[0],
                                         name=before_db_tuple[1])
        self.diff_conn = attach_database(self.diff_conn,
                                         after_db_tuple[0],
                                         name=after_db_tuple[1])

        for table in self.travel_impedance_measure_names:
            self.diff_conn.execute(
                "CREATE TABLE IF NOT EXISTS diff_" + table +
                " (from_stop_I, to_stop_I, diff_min, diff_max, diff_median, diff_mean)"
            )
            insert_stmt = "INSERT OR REPLACE INTO diff_" + table + \
                          "(from_stop_I, to_stop_I, diff_min, diff_max, diff_median, diff_mean) " \
                          "SELECT t1.from_stop_I, t1.to_stop_I, " \
                          "t1.min - t2.min AS diff_min, " \
                          "t1.max - t2.max AS diff_max, " \
                          "t1.median - t2.median AS diff_median, " \
                          "t1.mean - t2.mean AS diff_mean " \
                          "FROM " + before_db_tuple[1] + "." + table + " AS t1, " \
                          + before_db_tuple[1] + "." + table + " AS t2 " \
                                                               "WHERE t1.from_stop_I = t2.from_stop_I " \
                                                               "AND t1.to_stop_I = t2.to_stop_I "
            self.diff_conn.execute(insert_stmt)
            self.diff_conn.commit()

    def initialize_database(self):
        self._set_up_database()
        self._initialize_parameter_table()
        print("Database initialized!")

    def _set_up_database(self):
        self.conn.execute('''CREATE TABLE IF NOT EXISTS parameters(
                             key TEXT UNIQUE,
                             value BLOB)''')
        if self.track_route:
            self.conn.execute('''CREATE TABLE IF NOT EXISTS journeys(
                             journey_id INTEGER PRIMARY KEY,
                             from_stop_I INT,
                             to_stop_I INT,
                             departure_time INT,
                             arrival_time_target INT,
                             n_boardings INT,
                             movement_duration INT,
                             route TEXT,
                             journey_duration INT,
                             pre_journey_wait_fp INT,
                             in_vehicle_duration INT,
                             transfer_wait_duration INT,
                             walking_duration INT,
                             fastest_path INT)''')

            self.conn.execute('''CREATE TABLE IF NOT EXISTS legs(
                         journey_id INT,
                         from_stop_I INT,
                         to_stop_I INT,
                         departure_time INT,
                         arrival_time_target INT,
                         trip_I INT,
                         seq INT,
                         leg_stops TEXT)''')
            """
            self.conn.execute('''CREATE TABLE IF NOT EXISTS nodes(
                         stop_I INT,
                         agg_temp_distances REAL,
                         agg_journey_duration REAL,
                         agg_boardings REAL,
                         agg_transfer_wait REAL,
                         agg_pre_journey_wait REAL,
                         agg_walking_duration REAL)''')

            self.conn.execute('''CREATE TABLE IF NOT EXISTS od_pairs(
                         from_stop_I INT,
                         to_stop_I INT,
                         avg_temp_distance REAL,
                         agg_journey_duration REAL,
                         agg_boardings REAL,
                         agg_transfer_wait REAL,
                         agg_pre_journey_wait REAL,
                         agg_walking_duration REAL)''')


            self.conn.execute('''CREATE TABLE IF NOT EXISTS sections(
                         from_stop_I INT,
                         to_stop_I INT,
                         from_stop_pair_I INT,
                         to_stop_pair_I INT,
                         avg_temp_distance INT,
                         avg_journey_duration INT,
                         n_trips INT)''')

            self.conn.execute('''CREATE TABLE IF NOT EXISTS transfer_nodes(
                         from_stop_I INT,
                         to_stop_I INT,
                         from_stop_pair_I INT,
                         to_stop_pair_I INT,
                         avg_waiting_time INT,
                         n_trips INT)''')
            """
        else:
            self.conn.execute('''CREATE TABLE IF NOT EXISTS journeys(
                         journey_id INTEGER PRIMARY KEY,
                         from_stop_I INT,
                         to_stop_I INT,
                         departure_time INT,
                         arrival_time_target INT,
                         n_boardings INT,
                         journey_duration INT,
                         time_to_prev_journey_fp INT,
                         fastest_path INT)''')

        self.conn.commit()

    def _initialize_parameter_table(self):

        parameters = Parameters(self.conn)

        parameters["multiple_targets"] = self.multitarget_routing
        parameters["gtfs_dir"] = self.gtfs_path
        for param in [
                "location_name", "lat_median", "lon_median", "start_time_ut",
                "end_time_ut", "start_date", "end_date"
        ]:
            parameters[param] = self.gtfs_meta[param]
        parameters["target_list"] = ","
        for key, value in self.routing_params_input.items():
            parameters[key] = value
        self.conn.commit()

    def create_indices(self):
        # Next 3 lines are python 3.6 work-arounds again.
        self.conn.isolation_level = None  # former default of autocommit mode
        cur = self.conn.cursor()
        cur.execute('VACUUM;')
        self.conn.isolation_level = ''  # back to python default
        # end python3.6 workaround
        print("Analyzing...")
        cur.execute('ANALYZE')
        print("Indexing")
        cur = self.conn.cursor()
        cur.execute(
            'CREATE INDEX IF NOT EXISTS idx_journeys_route ON journeys (route)'
        )
        cur.execute(
            'CREATE INDEX IF NOT EXISTS idx_journeys_jid ON journeys (journey_id)'
        )
        cur.execute(
            'CREATE INDEX IF NOT EXISTS idx_journeys_fid ON journeys (from_stop_I)'
        )
        cur.execute(
            'CREATE INDEX IF NOT EXISTS idx_journeys_tid ON journeys (to_stop_I)'
        )

        if self.track_route:
            cur.execute(
                'CREATE INDEX IF NOT EXISTS idx_legs_jid ON legs (journey_id)')
            cur.execute(
                'CREATE INDEX IF NOT EXISTS idx_legs_trid ON legs (trip_I)')
            cur.execute(
                'CREATE INDEX IF NOT EXISTS idx_legs_fid ON legs (from_stop_I)'
            )
            cur.execute(
                'CREATE INDEX IF NOT EXISTS idx_legs_tid ON legs (to_stop_I)')
        self.conn.commit()
Ejemplo n.º 4
0
from gtfspy.route_types import ROUTE_TYPE_TO_SHORT_DESCRIPTION
from scripts.all_to_all_analyzer import stops_to_exclude

from scripts.all_to_all_settings import *

if __name__ == "__main__":
    master_df = None
    ignore_stops = stops_to_exclude(return_sqlite_list=True)
    for feed, feed_dict in FEED_LIST:
        g = GTFS(feed_dict["gtfs_dir"])
        stats = g.get_stats()


        n_stops = g.execute_custom_query_pandas("""SELECT TYPE, count(*) AS n_stops FROM (SELECT * FROM stop_times, 
                                            stops, trips, routes 
                                            WHERE stops.stop_I=stop_times.stop_I AND trips.trip_I=stop_times.trip_I AND stops.stop_I NOT IN {ignore_stops}
                                            AND trips.route_I=routes.route_I
                                            GROUP BY stops.self_or_parent_I, type) q1
                                            GROUP BY type""".format(ignore_stops=ignore_stops))

        vehicle_kms = g.execute_custom_query_pandas("""SELECT type, sum(distance)/1000 AS vehicle_kilometers FROM
    (SELECT find_distance(q1.lon, q1.lat, q2.lon, q2.lat) AS distance, q1.stop_I AS from_stop_I, q2.stop_I AS to_stop_I, 
    type FROM
    (SELECT * FROM stop_times, stops, trips, routes WHERE stops.stop_I=stop_times.stop_I 
    AND trips.trip_I=stop_times.trip_I AND trips.route_I=routes.route_I  AND stops.stop_I NOT IN {ignore_stops}) q1,
    (SELECT * FROM stop_times, stops WHERE stops.stop_I=stop_times.stop_I AND stops.stop_I NOT IN {ignore_stops}) q2
    WHERE q1.seq+1 = q2.seq AND q1.trip_I = q2.trip_I) sq1
    GROUP BY type""".format(ignore_stops=ignore_stops))

        network_length = g.execute_custom_query_pandas("""SELECT type, sum(distance)/1000 AS route_kilometers FROM
    (SELECT find_distance(q1.lon, q1.lat, q2.lon, q2.lat) AS distance, q1.stop_I AS from_stop_I, q2.stop_I AS to_stop_I, 
    type FROM
Ejemplo n.º 5
0
class RouteMapMaker:
    def __init__(self, gtfs_name):
        if isinstance(gtfs_name, str):
            self.gtfs = GTFS(FEED_DICT[gtfs_name]["gtfs_dir"])
        else:
            self.gtfs = gtfs_name
        self.bunching_value = 99
        self.line_spacing = 0.0001
        self.shapes = False
        self.crs_wgs = {'init': 'epsg:4326'}
        #self.crs_eurefin = {'init': 'epsg:3067'}

    def cluster_shapes(self):
        """

        :return:
        """
        # get unique stop-to-stop shapes, with trips aggregated
        # split by nearby stops
        # match splitted, and aggregate trips
        # identify branches: large overlap but crosses buffer, insert pseudo stop at branch
        # split everything again
        # match splitted

        #this query returns shapes for of the maximum trips, both directions
        df = self.gtfs.execute_custom_query_pandas("""WITH 
                a AS (
                SELECT routes.name AS name, shape_id, route_I, trip_I, routes.type, direction_id, 
                max(end_time_ds-start_time_ds) AS trip_duration, count(*) AS n_trips
                FROM trips
                LEFT JOIN routes 
                USING(route_I)
                WHERE start_time_ds >= 7*3600 AND start_time_ds < 8*3600
                GROUP BY routes.route_I, direction_id
                ),
                b AS(
                SELECT q1.trip_I AS trip_I, q1.stop_I AS from_stop_I, q2.stop_I AS to_stop_I, q1.seq AS seq, 
                q1.shape_break AS from_shape_break, q2.shape_break AS to_shape_break FROM
                (SELECT stop_I, trip_I, shape_break, seq FROM stop_times) q1,
                (SELECT stop_I, trip_I, shape_break, seq AS seq FROM stop_times) q2
                WHERE q1.seq=q2.seq-1 AND q1.trip_I=q2.trip_I AND q1.trip_I IN (SELECT trip_I FROM a)
                ),
                c AS(
                SELECT b.*, name, direction_id, route_I, a.shape_id, group_concat(lat) AS lats, 
                group_concat(lon) AS lons, count(*) AS n_coords FROM b, a, shapes
                WHERE b.trip_I = a.trip_I AND shapes.shape_id=a.shape_id
                AND b.from_shape_break <= shapes.seq AND b.to_shape_break >= shapes.seq
                GROUP BY route_I, direction_id, b.seq
                ORDER BY route_I, b.seq
                )
                SELECT from_stop_I, to_stop_I, group_concat(trip_I) AS trip_ids, 
                group_concat(direction_id) AS direction_ids, lats, lons FROM c
                WHERE n_coords > 1
                GROUP BY from_stop_I, to_stop_I
                ORDER BY count(*) DESC""")

        df["geometry"] = df.apply(lambda row: shapely.LineString([
            (float(lon), float(lat))
            for lon, lat in zip(row["lons"].split(","), row["lats"].split(","))
        ]),
                                  axis=1)

        gdf = GeoDataFrame(df, crs=self.crs_wgs, geometry=df["geometry"])
        #gdf = gdf.to_crs(self.crs_eurefin)
        gdf = gdf.to_crs(self.crs_wgs)

        gdf = gdf.drop(["lats", "lons"], axis=1)

        stops_set = set(gdf["from_stop_I"]) | set(gdf["to_stop_I"])
        gdf["orig_parent_stops"] = list(
            zip(gdf['from_stop_I'], gdf['to_stop_I']))
        clustered_stops = self.cluster_stops(stops_set)
        cluster_dict = clustered_stops[[
            "new_stop_I", "stop_I", "geometry"
        ]].set_index('stop_I').T.to_dict('list')
        geom_dict = clustered_stops[[
            "new_stop_I", "geometry"
        ]].set_index("new_stop_I").T.to_dict('list')
        gdf["to_stop_I"] = gdf.apply(
            lambda row: cluster_dict[row["to_stop_I"]][0], axis=1)
        gdf["from_stop_I"] = gdf.apply(
            lambda row: cluster_dict[row["from_stop_I"]][0], axis=1)
        # to/from_stop_I: cluster id
        # orig_parent_stops: old id
        # child_stop_I: cluster id
        splitted_gdf = self.split_shapes_by_nearby_stops(clustered_stops, gdf)
        splitted_gdf['child_stop_I'] = splitted_gdf.apply(
            lambda row: ",".join([str(int(x)) for x in row.child_stop_I]),
            axis=1)
        splitted_gdf_grouped = splitted_gdf.groupby(['child_stop_I'])
        splitted_gdf_grouped = splitted_gdf_grouped.agg(
            {
                'orig_parent_stops': lambda x: tuple(x),
                'geometry': lambda x: x.iloc[0]
            },
            axis=1)

        splitted_gdf = splitted_gdf_grouped.reset_index()
        splitted_gdf['value'] = splitted_gdf.apply(lambda row: 1, axis=1)
        #splitted_gdf = splitted_gdf.set_geometry(splitted_gdf["geometry"], crs=self.crs_eurefin)

        splitted_gdf = self.match_shapes(splitted_gdf)
        splitted_gdf["rand"] = np.random.randint(1, 10, splitted_gdf.shape[0])
        print(splitted_gdf)
        self.plot_geopandas(splitted_gdf, alpha=0.3)

    def split_shapes_by_nearby_stops(self, stops, shapes, buffer=0.01):
        """
        Splits shapes by stops, within buffer
        :param stops: GeoDataFrame
        :param shapes:
        :return:
        """
        # stops within buffer
        # splitter
        # retain the "parent" stop section
        #stops['geometry'] = stops.apply(lambda row: str(row.geometry), axis=1)

        #stops = stops.groupby(['new_stop_I', 'geometry'])['stop_I'].apply(list).reset_index()
        #stops["geometry"] = stops.apply(lambda row: loads(row.geometry), axis=1)

        stops_grouped = stops.groupby(['new_stop_I'])
        stops_grouped = stops_grouped.agg(
            {
                'stop_I': lambda x: tuple(x),
                'geometry': lambda x: x.iloc[0]
            },
            axis=1)

        stops = stops_grouped.reset_index()

        #stops = stops.set_geometry(stops["geometry"], crs=self.crs_eurefin)
        stops["point_geom"] = stops["geometry"]

        shapes["buffer"] = shapes["geometry"].buffer(buffer)
        shapes["line_geom"] = shapes["geometry"]
        shapes = shapes.set_geometry(shapes["buffer"])

        gdf_joined = sjoin(shapes, stops, how="left", op='intersects')
        gdf_joined = gdf_joined.set_geometry(gdf_joined["line_geom"])
        gdf_joined = gdf_joined.drop(["buffer", "line_geom"], axis=1)
        #gdf_joined['geometry'] = gdf_joined.apply(lambda row: str(row.geometry), axis=1)

        gdf_grouped = gdf_joined.groupby(
            ["orig_parent_stops", 'from_stop_I', 'to_stop_I'])
        gdf_grouped = gdf_grouped.agg(
            {
                'point_geom': lambda x: tuple(x),
                'new_stop_I': lambda x: tuple(x),
                'geometry': lambda x: x.iloc[0]
            },
            axis=1)
        gdf_joined = gdf_grouped.reset_index()

        gdf_joined = gdf_joined.apply(
            lambda row: self.split_shape_by_points(row), axis=1)

        new_list = []
        for row in gdf_joined.to_dict('records'):
            for shape, stop_tuple in zip(row['shape_parts'],
                                         row['child_stop_Is']):
                new_row = copy.deepcopy(row)
                new_row["shape_part"] = shape
                new_row["child_stop_I"] = stop_tuple
                new_list.append(new_row)
        gdf_joined = pd.DataFrame(new_list)
        gdf_joined = gdf_joined.set_geometry(gdf_joined["shape_part"])
        return gdf_joined[['child_stop_I', 'orig_parent_stops', 'geometry']]

    def check_shape_orientation(self, shape, from_stop_point, to_stop_point):
        """
        Checks that the shape goes from the from stop to the to stop and not the opposite direction
        :param shape:
        :param from_stop_point:
        :param to_stop_point:
        :return:
        """

#    def split_shape_by_points(self, shape, shape_parents, points, point_ids):

    def split_shape_by_points(self, row):
        """

        :param shape:
        :param shape_parents:
        :param points:
        :param point_ids:
        :return:
        """
        shape = row["geometry"]
        shape_parents = [row["from_stop_I"], row["to_stop_I"]]
        points = row["point_geom"]
        point_ids = row["new_stop_I"]
        # TODO: change this to also output the cluster point ids for the end points so that matching is possible directly
        if not isinstance(points[0], shapely.Point):
            row["shape_parts"] = [shape]
            row["child_stop_Is"] = [shape_parents]
            return row
        # finds the distance on the shape that corresponds to the closest distance to the point
        distance_dict = {
            shape.project(point): {
                "point": point,
                "id": id
            }
            for point, id in zip(points, point_ids)
        }
        shape_parts = []
        stop_sections = []
        rest_of_shape = copy.deepcopy(shape)
        previous_stop = shape_parents[0]
        # loops trough the points in the order they are compared to the shape
        if len(distance_dict) >= 3:
            for key in sorted(distance_dict)[1:-1]:
                if distance_dict[key]["id"] not in shape_parents:
                    new_point = shape.interpolate(key)

                    # TODO: this step only works with a modified version of split(), replace with a custom function
                    geometries = split(rest_of_shape, new_point)

                    stop_sections.append(
                        (int(previous_stop), int(distance_dict[key]["id"])))
                    previous_stop = distance_dict[key]["id"]

                    if len(geometries) == 2:
                        rest_of_shape = geometries[1]
                        shape_parts.append(geometries[0])
                    else:
                        rest_of_shape = geometries[0]

        shape_parts.append(rest_of_shape)
        stop_sections.append((previous_stop, shape_parents[1]))
        #if len(shape_parts) > 1:
        #    assert not all(x == shape_parts[0] for x in shape_parts)

        #shape_parts = row["new_stop_I"]
        #stop_sections = row["new_stop_I"]
        row["shape_parts"] = shape_parts
        row["child_stop_Is"] = stop_sections
        return row


#        return (shape_parts, stop_sections)

    def match_shapes(self, shapes, buffer=0.01):
        """
        checks if shapes are completely within each others buffers, aggregates routes for these
        :return:
        """
        # buffer for spatial self join
        first_points = shapes["geometry"].apply(lambda x: Point(x.coords[0]))
        last_points = shapes["geometry"].apply(lambda x: Point(x.coords[-1]))

        points = pd.concat([first_points, last_points])
        point_df = points.to_frame(name='geometry')
        #point_df = point_df.set_geometry(point_df["geometry"], crs=self.crs_eurefin)
        point_df = point_df.set_geometry(point_df["geometry"],
                                         crs=self.crs_wgs)

        #buffer = point_df.buffer(30)
        #buffer = GeoDataFrame(crs=self.crs_eurefin, geometry=point_df.buffer(buffer))
        buffer = GeoDataFrame(crs=self.crs_wgs,
                              geometry=point_df.buffer(buffer))

        buffer["everything"] = 1
        gdf_poly = buffer.dissolve(by="everything")
        polygons = None
        for geoms in gdf_poly["geometry"]:
            polygons = [polygon for polygon in geoms]

        #single_parts = GeoDataFrame(crs=self.crs_eurefin, geometry=polygons)
        single_parts = GeoDataFrame(crs=self.crs_wgs, geometry=polygons)

        single_parts['new_stop_I'] = single_parts.index

        gdf_joined = sjoin(shapes, single_parts, how="left", op='within')
        return gdf_joined

    def identify_branches(self, shapes, buffer=0.01):
        """
        Checks for other shapes that exits the buffer of another buffer. In these cases a pseudo stop is created,
        for further splitting of shapes
        :param shapes:
        :param buffer:
        :return:
        """

    def get_linestrings_for_stop_section(self, stop_tuple, trip_id,
                                         from_shape_brake, to_shape_brake):
        try:
            assert self.shapes
            shapedict = get_shape_between_stops(
                self.gtfs.conn.cursor(), trip_id, stop_tuple[0], stop_tuple[1],
                (from_shape_brake, to_shape_brake))
            assert not len(set(shapedict["lat"])) <= 1
            assert not len(set(shapedict["lon"])) <= 1
            return shapely.LineString([
                (lon, lat)
                for lat, lon in zip(shapedict["lat"], shapedict["lon"])
            ])
        except (ValueError, AssertionError):
            lat0, lon0 = self.gtfs.get_stop_coordinates(stop_tuple[0])
            lat1, lon1 = self.gtfs.get_stop_coordinates(stop_tuple[1])
            if lat0 == lat1 and lon0 == lon1:
                return
            else:
                return shapely.LineString([(lon0, lat0), (lon1, lat1)])

    def route_parallels(self,
                        line,
                        route,
                        all_routes,
                        bunching_value=5,
                        line_spacing=0.0001):
        n_parallels = len(all_routes)
        line_routes = []
        if not line:
            return

        if n_parallels < bunching_value:
            offsets = np.linspace(-1 * ((n_parallels - 1) * line_spacing) / 2,
                                  ((n_parallels - 1) * line_spacing) / 2,
                                  n_parallels)
            try:
                return line.parallel_offset(
                    abs(offsets[all_routes.index(route)]), "left"
                    if offsets[all_routes.index(route)] < 0 else "right")
            except:
                print(line, offsets[all_routes.index(route)])
        else:
            return line

    def get_route_ranking(self, df):
        route_order_for_stop_sections = {}
        stop_section_shapes = {}
        for row in df.itertuples():
            section_tuple = (row.from_stop_I, row.to_stop_I)
            alt_section_tuple = (row.to_stop_I, row.from_stop_I)
            if not section_tuple in route_order_for_stop_sections and not alt_section_tuple in route_order_for_stop_sections:
                route_order_for_stop_sections[section_tuple] = [row.route_I]
                stop_section_shapes[section_tuple] = (row.trip_I,
                                                      row.from_shape_break,
                                                      row.to_shape_break)
            elif section_tuple in route_order_for_stop_sections:
                route_order_for_stop_sections[section_tuple].append(
                    row.route_I)
            elif alt_section_tuple in route_order_for_stop_sections:
                route_order_for_stop_sections[alt_section_tuple].insert(
                    0, row.route_I)
        return route_order_for_stop_sections, stop_section_shapes

    def get_geometry(self, stop_tuple, route, all_routes, cluster_dict):
        #line = get_linestrings_for_stop_section(stop_tuple, trip_id, from_shape_break, to_shape_break)
        #print(stop_tuple, cluster_dict[stop_tuple[0]], cluster_dict[stop_tuple[1]])
        line = shapely.LineString(
            [cluster_dict[stop_tuple[0]][0], cluster_dict[stop_tuple[1]][0]])
        if stop_tuple[0] == stop_tuple[1]:
            return
        else:
            return self.route_parallels(line,
                                        route,
                                        all_routes,
                                        bunching_value=self.bunching_value,
                                        line_spacing=self.line_spacing)

    def cluster_stops(self, stops_set, distance=100):
        """
        merges stops that are within distance together into one stop
        :param stops_set: iterable that lists stop_I's
        :param distance: int, distance to merge, meters
        :return:
        """
        df = self.gtfs.execute_custom_query_pandas(
            """SELECT * FROM stops
                                                 WHERE stop_I IN ({stops_set})"""
            .format(stops_set=",".join([str(x) for x in stops_set])))
        df["geometry"] = df.apply(lambda row: Point((row["lon"], row["lat"])),
                                  axis=1)
        gdf = GeoDataFrame(df, crs=self.crs_wgs, geometry=df["geometry"])
        gdf = gdf.to_crs(self.crs_eurefin)
        gdf_poly = gdf.copy()
        gdf_poly["geometry"] = gdf_poly["geometry"].buffer(distance / 2)
        gdf_poly["everything"] = 1

        gdf_poly = gdf_poly.dissolve(by="everything")

        polygons = None
        for geoms in gdf_poly["geometry"]:
            polygons = [polygon for polygon in geoms]

        single_parts = GeoDataFrame(crs=self.crs_eurefin, geometry=polygons)
        single_parts['new_stop_I'] = single_parts.index
        gdf_joined = sjoin(gdf, single_parts, how="left", op='within')
        single_parts["geometry"] = single_parts.centroid
        gdf_joined = gdf_joined.drop('geometry', 1)
        centroid_stops = single_parts.merge(gdf_joined, on="new_stop_I")
        return centroid_stops
        """
        change projection for accurate buffer distance
        merge polygons,
        select single parts
        calculate centroids
        """

    def plot_geopandas(self, gdf, **kwargs):
        fig, ax = plt.subplots()
        gdf.plot(column="rand", **kwargs)
        plt.show()
Ejemplo n.º 6
0
class AllToAllRoutingPipeline:
    def __init__(self, feed_dict, routing_params):
        self.pickle = PICKLE
        self.gtfs_dir = feed_dict["gtfs_dir"]
        self.G = GTFS(feed_dict["gtfs_dir"])
        self.tz = self.G.get_timezone_name()
        self.journey_dir = feed_dict["journey_dir"]
        self.day_start = feed_dict["day_start"]
        self.day_end = feed_dict["day_end"]
        self.routing_start_time = feed_dict["routing_start_time"]
        self.routing_end_time = feed_dict["routing_end_time"]
        self.analysis_start_time = feed_dict["analysis_start_time"]
        self.analysis_end_time = feed_dict["analysis_end_time"]
        self.pickle_dir = feed_dict["pickle_dir"]
        self.routing_params = routing_params

        self.jdm = None
        if not self.pickle:
            self.jdm = JourneyDataManager(os.path.join(GTFS_DB_WORK_DIR, GTFS_DB_FNAME),
                                          journey_db_path=os.path.join(RESULTS_DIR, JOURNEY_DB_FNAME),
                                          routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS,
                                          track_route=TRACK_ROUTE)

    def get_all_events(self):
        print("Retrieving transit events")
        connections = []
        for e in self.G.generate_routable_transit_events(start_time_ut=self.routing_start_time,
                                                         end_time_ut=self.routing_end_time):
            connections.append(Connection(int(e.from_stop_I),
                                          int(e.to_stop_I),
                                          int(e.dep_time_ut),
                                          int(e.arr_time_ut),
                                          int(e.trip_I),
                                          int(e.seq)))
        assert (len(connections) == len(set(connections)))
        print("scheduled events:", len(connections))
        print("Retrieving walking network")
        net = walk_transfer_stop_to_stop_network(self.G, max_link_distance=CUTOFF_DISTANCE)
        print("net edges: ", len(net.edges()))
        return net, connections

    @timeit
    def loop_trough_targets_and_run_routing(self, targets, slurm_array_i):
        net, connections = self.get_all_events()
        csp = None

        for target in targets:
            print(target)
            if csp is None:
                csp = MultiObjectivePseudoCSAProfiler(connections, target, walk_network=net,
                                                      end_time_ut=self.routing_end_time,
                                                      transfer_margin=TRANSFER_MARGIN,
                                                      start_time_ut=self.routing_start_time, walk_speed=WALK_SPEED,
                                                      verbose=True, track_vehicle_legs=TRACK_VEHICLE_LEGS,
                                                      track_time=TRACK_TIME, track_route=TRACK_ROUTE)
            else:
                csp.reset([target])
            csp.run()

            profiles = dict(csp.stop_profiles)
            if self.pickle:
                self._pickle_results(profiles, slurm_array_i, target)
            else:
                self.jdm.import_journey_data_for_target_stop(target, profiles)
            profiles = None
            gc.collect()

    @timeit
    def loop_trough_targets_and_run_routing_with_route(self, targets, slurm_array_i):
        net, connections = self.get_all_events()
        csp = None

        for target in targets:
            print("target: ", target)
            if csp is None:
                csp = MultiObjectivePseudoCSAProfiler(connections, target, walk_network=net,
                                                      end_time_ut=self.routing_end_time,
                                                      transfer_margin=TRANSFER_MARGIN,
                                                      start_time_ut=self.routing_start_time, walk_speed=WALK_SPEED,
                                                      verbose=True, track_vehicle_legs=TRACK_VEHICLE_LEGS,
                                                      track_time=TRACK_TIME, track_route=TRACK_ROUTE)
            else:
                csp.reset([target])
            csp.run()

            profiles = dict(csp.stop_profiles)
            if self.pickle:
                self._pickle_results(profiles, slurm_array_i, target)
            else:
                self.jdm.import_journey_data_for_target_stop(target, profiles)
            profiles = None
            gc.collect()
    @timeit
    def _pickle_results(self, profiles, pickle_subdir, target):
        pickle_path = makedirs(os.path.join(self.pickle_dir, str(pickle_subdir)))
        pickle_path = os.path.join(pickle_path, str(target) + ".pickle")
        profiles = dict((key, value.get_final_optimal_labels()) for (key, value) in profiles.items())
        """for key, values in profiles.items():
            values.sort(key=lambda x: x.departure_time, reverse=True)
            new_values = compute_pareto_front(values)
            profiles[key] = new_values
            """
        pickle.dump(profiles, open(pickle_path, 'wb'), -1)
        profiles = None
        gc.collect()

    def get_list_of_stops(self, where=''):
        df = self.G.execute_custom_query_pandas("SELECT stop_I FROM stops " + where + " ORDER BY stop_I")
        return df

    @timeit
    def store_pickle_in_db(self):
        self.jdm = JourneyDataManager(self.gtfs_dir, journey_db_path=self.journey_dir,
                                      routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS,
                                      track_route=TRACK_ROUTE)
        for root, dirs, files in os.walk(self.pickle_dir):
            for target_file in files:
                target = target_file.replace(".pickle", "")
                if not target in self.jdm.get_targets_having_journeys():
                    print("target: ", target)
                    profiles = pickle.load(open(os.path.join(root, target_file), 'rb'))

                    self.jdm.import_journey_data_for_target_stop(int(target), profiles)
                else:
                    print("skipping: ", target, " already in db")

        self.jdm.create_indices()

    def calculate_additional_columns_for_journey(self):
        if not self.jdm:
            self.jdm = JourneyDataManager(self.gtfs_dir, journey_db_path=self.journey_dir,
                                          routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS,
                                          track_route=TRACK_ROUTE)
        self.jdm.populate_additional_journey_columns()
        self.jdm.compute_and_store_travel_impedance_measures(self.analysis_start_time, self.analysis_end_time, TRAVEL_IMPEDANCE_STORE_PATH)

    def calculate_comparison_measures(self):
        if not self.jdm:
            self.jdm = JourneyDataManager(self.gtfs_dir, journey_db_path=self.journey_dir,
                                          routing_params=self.routing_params, track_vehicle_legs=TRACK_VEHICLE_LEGS,
                                          track_route=TRACK_ROUTE)
        prev_dict = None
        prev_key = None
        before_db_tuple = None
        after_db_tuple = None
        for (key, feed_dict) in FEED_LIST:
            if prev_dict:
                if feed_dict["feed_seq"] < prev_dict["feed_seq"]:
                    after_db_tuple = (feed_dict["journey_dir"], key)
                    before_db_tuple = (prev_dict["journey_dir"], prev_key)
                else:
                    before_db_tuple = (feed_dict["journey_dir"], key)
                    after_db_tuple = (prev_dict["journey_dir"], prev_key)
            prev_dict = feed_dict
            prev_key = key

        self.jdm.initialize_comparison_tables(DIFF_PATH, before_db_tuple, after_db_tuple)