def get_n_winning_targets_using_change_in_mean(self, n, measure, distance=500, threshold=10800, losers=False, include_list=None): if losers: order_by = "DESC" else: order_by = "ASC" include_list = "(" + ",".join([str(x) for x in include_list]) + ")" query = """SELECT t1.to_stop_I, t2.mean-t1.mean AS diff_mean FROM (SELECT to_stop_I, avg(mean) AS mean FROM before.{measure} WHERE mean <= {threshold} AND to_stop_I IN {include_list} GROUP BY to_stop_I) t1, (SELECT to_stop_I, avg(mean) AS mean FROM after.{measure} WHERE mean <= {threshold} AND to_stop_I IN {include_list} GROUP BY to_stop_I) t2 WHERE t1.to_stop_I=t2.to_stop_I ORDER BY diff_mean {order_by} """.format(measure=measure, threshold=threshold, order_by=order_by, include_list=include_list) df = pandas.read_sql_query(query, self.conn) # exclude nearby stops nearby_excluded_stops = [] stops_remaining = [] gtfs = GTFS(GTFS_PATH) for value in df.itertuples(): if not value.to_stop_I in nearby_excluded_stops: exclude_df = gtfs.get_stops_within_distance( value.to_stop_I, distance) nearby_excluded_stops += list(exclude_df["stop_I"]) stops_remaining.append(value.to_stop_I) if len(stops_remaining) == n: break df = df.loc[df['to_stop_I'].isin(stops_remaining)] return df
def analysis_zones(as_dict=False): """ returns data containers that pair zone type to a set of stops :param as_dict: :return: """ gtfs_old = GTFS(OLD_DICT["gtfs_dir"]) gtfs_lm = GTFS(LM_DICT["gtfs_dir"]) station_distance = 600 upstream_ratio = 0.5 df_old = gtfs_old.get_stops_for_route_type(1) df_lm = gtfs_lm.get_stops_for_route_type(1) new_metro = difference_of_pandas_dfs(df_old, df_lm, ["stop_I"]) old_metro = difference_of_pandas_dfs(new_metro, df_lm, ["stop_I"]) train = gtfs_lm.get_stops_for_route_type(2) feeder_area = pd.DataFrame() other_stops = gtfs_lm.stops() jda = JourneyDataAnalyzer(LM_DICT["journey_dir"], LM_DICT["gtfs_dir"]) # jda = JourneyDataAnalyzer(OLD_DICT["journey_dir"], OLD_DICT["gtfs_dir"]) areas_to_remove = stops_to_exclude(return_sqlite_list=False) df = jda.get_upstream_stops_ratio( 1040, [str(i.stop_I) for i in new_metro.itertuples()], upstream_ratio) feeder_area = feeder_area.append(df) # df = jda.get_upstream_stops_ratio(7193, 563, 0.7) print("new metro") for i in new_metro.itertuples(): df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance) new_metro = new_metro.append(df) print("old metro") for i in old_metro.itertuples(): df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance) old_metro = old_metro.append(df) print("train") for i in train.itertuples(): df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance) train = train.append(df) new_metro = new_metro.drop_duplicates().reset_index(drop=True) old_metro = old_metro.drop_duplicates().reset_index(drop=True) train = train.drop_duplicates().reset_index(drop=True) feeder_area = feeder_area.drop_duplicates().reset_index(drop=True) # cleaning up borders new_metro = difference_of_pandas_dfs(old_metro, new_metro, ["stop_I"]) for zone in [new_metro, old_metro, areas_to_remove]: train = difference_of_pandas_dfs(zone, train, ["stop_I"]) for zone in [new_metro, train, old_metro, areas_to_remove]: feeder_area = difference_of_pandas_dfs(zone, feeder_area, ["stop_I"]) spec_areas = pd.concat( [new_metro, old_metro, train, feeder_area, areas_to_remove]) other_stops = difference_of_pandas_dfs(spec_areas, other_stops, ["stop_I"]) old_metro = old_metro.assign(stop_cat=1) new_metro = new_metro.assign(stop_cat=2) train = train.assign(stop_cat=3) feeder_area = feeder_area.assign(stop_cat=4) other_stops = other_stops.assign(stop_cat=5) all_stops = pd.concat( [new_metro, old_metro, train, feeder_area, other_stops]).reset_index(drop=True) if as_dict: all_dfs = { "new_metro_stations": new_metro, "feeder_bus_area": feeder_area, "old_metro_stations": old_metro, "commuter_train_stations": train, "other_stops": other_stops } else: all_dfs = [("new_metro_stations", new_metro), ("feeder_bus_area", feeder_area), ("old_metro_stations", old_metro), ("commuter_train_stations", train), ("other_stops", other_stops)] return all_dfs, all_stops