Esempio n. 1
0
    def get_n_winning_targets_using_change_in_mean(self,
                                                   n,
                                                   measure,
                                                   distance=500,
                                                   threshold=10800,
                                                   losers=False,
                                                   include_list=None):

        if losers:
            order_by = "DESC"
        else:
            order_by = "ASC"
        include_list = "(" + ",".join([str(x) for x in include_list]) + ")"
        query = """SELECT t1.to_stop_I, t2.mean-t1.mean AS diff_mean FROM 
                    (SELECT to_stop_I, avg(mean) AS mean FROM before.{measure}
                     WHERE mean <= {threshold} AND to_stop_I IN {include_list}
                    GROUP BY to_stop_I) t1, 
                    (SELECT to_stop_I, avg(mean) AS mean FROM after.{measure}
                     WHERE mean <= {threshold}  AND to_stop_I IN {include_list}
                    GROUP BY to_stop_I) t2
                    WHERE t1.to_stop_I=t2.to_stop_I
                    ORDER BY diff_mean {order_by}
                    """.format(measure=measure,
                               threshold=threshold,
                               order_by=order_by,
                               include_list=include_list)

        df = pandas.read_sql_query(query, self.conn)
        # exclude nearby stops
        nearby_excluded_stops = []
        stops_remaining = []
        gtfs = GTFS(GTFS_PATH)
        for value in df.itertuples():
            if not value.to_stop_I in nearby_excluded_stops:
                exclude_df = gtfs.get_stops_within_distance(
                    value.to_stop_I, distance)
                nearby_excluded_stops += list(exclude_df["stop_I"])
                stops_remaining.append(value.to_stop_I)
                if len(stops_remaining) == n:
                    break
        df = df.loc[df['to_stop_I'].isin(stops_remaining)]
        return df
def analysis_zones(as_dict=False):
    """
    returns data containers that pair zone type to a set of stops
    :param as_dict:
    :return:
    """
    gtfs_old = GTFS(OLD_DICT["gtfs_dir"])
    gtfs_lm = GTFS(LM_DICT["gtfs_dir"])
    station_distance = 600
    upstream_ratio = 0.5
    df_old = gtfs_old.get_stops_for_route_type(1)
    df_lm = gtfs_lm.get_stops_for_route_type(1)
    new_metro = difference_of_pandas_dfs(df_old, df_lm, ["stop_I"])
    old_metro = difference_of_pandas_dfs(new_metro, df_lm, ["stop_I"])
    train = gtfs_lm.get_stops_for_route_type(2)
    feeder_area = pd.DataFrame()
    other_stops = gtfs_lm.stops()
    jda = JourneyDataAnalyzer(LM_DICT["journey_dir"], LM_DICT["gtfs_dir"])
    # jda = JourneyDataAnalyzer(OLD_DICT["journey_dir"], OLD_DICT["gtfs_dir"])

    areas_to_remove = stops_to_exclude(return_sqlite_list=False)
    df = jda.get_upstream_stops_ratio(
        1040, [str(i.stop_I) for i in new_metro.itertuples()], upstream_ratio)
    feeder_area = feeder_area.append(df)
    # df = jda.get_upstream_stops_ratio(7193, 563, 0.7)
    print("new metro")
    for i in new_metro.itertuples():
        df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance)
        new_metro = new_metro.append(df)

    print("old metro")

    for i in old_metro.itertuples():
        df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance)
        old_metro = old_metro.append(df)
    print("train")

    for i in train.itertuples():
        df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance)
        train = train.append(df)

    new_metro = new_metro.drop_duplicates().reset_index(drop=True)
    old_metro = old_metro.drop_duplicates().reset_index(drop=True)
    train = train.drop_duplicates().reset_index(drop=True)
    feeder_area = feeder_area.drop_duplicates().reset_index(drop=True)

    # cleaning up borders
    new_metro = difference_of_pandas_dfs(old_metro, new_metro, ["stop_I"])
    for zone in [new_metro, old_metro, areas_to_remove]:
        train = difference_of_pandas_dfs(zone, train, ["stop_I"])
    for zone in [new_metro, train, old_metro, areas_to_remove]:
        feeder_area = difference_of_pandas_dfs(zone, feeder_area, ["stop_I"])

    spec_areas = pd.concat(
        [new_metro, old_metro, train, feeder_area, areas_to_remove])

    other_stops = difference_of_pandas_dfs(spec_areas, other_stops, ["stop_I"])

    old_metro = old_metro.assign(stop_cat=1)
    new_metro = new_metro.assign(stop_cat=2)
    train = train.assign(stop_cat=3)
    feeder_area = feeder_area.assign(stop_cat=4)
    other_stops = other_stops.assign(stop_cat=5)
    all_stops = pd.concat(
        [new_metro, old_metro, train, feeder_area,
         other_stops]).reset_index(drop=True)
    if as_dict:
        all_dfs = {
            "new_metro_stations": new_metro,
            "feeder_bus_area": feeder_area,
            "old_metro_stations": old_metro,
            "commuter_train_stations": train,
            "other_stops": other_stops
        }
    else:
        all_dfs = [("new_metro_stations", new_metro),
                   ("feeder_bus_area", feeder_area),
                   ("old_metro_stations", old_metro),
                   ("commuter_train_stations", train),
                   ("other_stops", other_stops)]
    return all_dfs, all_stops