def get_simulated_trips(graph: nx.DiGraph, ntrips=10000, edge_weight="len"): random_state = np.random.RandomState(1) node_ids = pd.Series(graph.nodes) trips = pd.DataFrame( data=list(tuple(node_ids.sample(2, random_state=random_state)) for __ in range(ntrips)), columns=["u", "v"], ) with manhattan.GraphPathDist(graph, edge_weight=edge_weight) as pathdist: trips = trips.join( pd.DataFrame( data=parallel_map(pathdist, progressbar(list(zip(trips.u, trips.v)))), index=trips.index, columns=["path", "distance"], ), how="inner", ) # trips = trips.drop(columns=['path']) nodes_loc = nx.get_node_attributes(graph, name="loc") for (prefix, nn) in [("pickup", trips.u), ("dropoff", trips.v)]: trips = trips.join( pd.DataFrame( data=list(nodes_loc[n] for n in nn), index=trips.index, columns=[(prefix + "_" + postfix) for postfix in ["latitude", "longitude"]], ), how="inner", ) return trips
def trip_distance_vs_shortest(table_name): mpl.use("Agg") graph = get_road_graph() trips = get_trip_data(table_name, graph) with Section("Computing shortest distances", out=logger.debug): trips = trips.join( pd.DataFrame( data=parallel_map(GraphPathDist(graph, edge_weight="len"), zip(trips.u, trips.v)), columns=['path', 'shortest'], index=trips.index, ) ) # On-graph distance vs reported distance [meters] df: pd.DataFrame df = pd.DataFrame(data=dict( reported=(trips['distance']), shortest=(trips['shortest']), )) # Convert to [km] and stay below 10km df = df.applymap(lambda x: (x / 1e3)) df = df.applymap(lambda km: (km if (km < 10) else np.nan)).dropna() # Hour of the day df['h'] = trips['pickup_datetime'].dt.hour with plt.style.context(PARAM['mpl_style']): with Axes() as ax1: ax1.set_aspect(aspect="equal", adjustable="box") ax1.grid() ax1.plot(*(2 * [[0, df[['reported', 'shortest']].values.max()]]), c='k', ls='--', lw=0.5, zorder=100) for (h, hdf) in df.groupby(df['h']): c = plt.get_cmap("twilight_shifted")([h / 24]) ax1.scatter( hdf['reported'], hdf['shortest'], c=c, s=3, alpha=0.8, lw=0, zorder=10, label=(F"{len(hdf)} trips at {h}h") ) ax1.set_xlabel("Reported distance, km") ax1.set_ylabel("Naive graph distance, km") ax1.set_xticks(range(11)) ax1.set_yticks(range(11)) ax1.legend() # Save to file fn = os.path.join(PARAM['out_images_path'], F"{myname()}/{table_name}.png") ax1.figure.savefig(makedirs(fn)) # Meta info json.dump({'number_of_datapoints': len(df)}, open((fn + ".txt"), 'w'))
def complete_all(presults) -> Generator: # Attach a relevant graph extract around the waypoints presults = map(mapmatch_prepare_subgraph, presults) # The previous step may have reduced the number of waypoints in subgroups presults = filter((lambda p: len(p['waypoints_used']) >= PARAM[ 'waypoints_min_number']), presults) # Mapmatch -- batch-parallel version # Note: 'Parallel' does not yield until all tasks are complete for presult_batch in commons.batchup(presults, 5 * commons.PARALLEL_MAP_CPUS): yield from commons.parallel_map(mapmatch_complete_this, presult_batch)
def trip_trajectories_ingraph(table_name): mpl.use("Agg") # Max number of trajectories to plot N = 1000 graph = get_road_graph() nodes = pd.DataFrame(data=nx.get_node_attributes(graph, "loc"), index=["lat", "lon"]).T trips = get_trip_data(table_name, graph) trips = trips.sample(min(N, len(trips))) logger.debug(F"{len(trips)} trips") logger.debug("Computing trajectories") trajectories = parallel_map(GraphPathDist(graph).path_only, zip(trips.u, trips.v)) with Section("Getting the background OSM map", out=logger.debug): extent = maps.ax4(nodes.lat, nodes.lon) osmap = maps.get_map_by_bbox(maps.ax2mb(*extent)) with plt.style.context({**PARAM['mpl_style'], 'font.size': 5}): with Axes() as ax1: # The background map ax1.imshow(osmap, extent=extent, interpolation='quadric', zorder=-100) ax1.axis("off") ax1.set_xlim(extent[0:2]) ax1.set_ylim(extent[2:4]) c = 'b' if ("green" in table_name): c = "green" if ("yello" in table_name): c = "orange" logger.debug("Plotting trajectories") for traj in trajectories: (y, x) = nodes.loc[list(traj)].values.T ax1.plot(x, y, c=c, alpha=0.1, lw=0.3) # Save to file fn = os.path.join(PARAM['out_images_path'], F"{myname()}/{table_name}.png") ax1.figure.savefig(makedirs(fn)) # Meta info json.dump({'number_of_trajectories': len(trips)}, open((fn + ".txt"), 'w'))
def trip_trajectories_velocity(table_name): mpl.use("Agg") # Max number of trajectories to use N = 10000 graph = get_road_graph() nodes = pd.DataFrame(data=nx.get_node_attributes(graph, "loc"), index=["lat", "lon"]).T edge_name = pd.Series(nx.get_edge_attributes(graph, name="name")) where = "('2016-05-02 08:00' <= pickup_datetime) and (pickup_datetime <= '2016-05-02 09:00')" trips = get_trip_data(table_name, graph, order="", limit=N, where=where) trips['velocity'] = trips['distance'] / trips['duration/s'] trips = trips.sort_values(by='velocity', ascending=True) logger.debug(F"{len(trips)} trips") with Section("Computing estimated trajectories", out=logger.debug): trips['traj'] = parallel_map(GraphPathDist(graph).path_only, zip(trips.u, trips.v)) with Section("Getting the background OSM map", out=logger.debug): extent = maps.ax4(nodes.lat, nodes.lon) osmap = maps.get_map_by_bbox(maps.ax2mb(*extent)) with Section("Computing edge velocities", out=logger.debug): edge_vel = defaultdict(list) for (traj, v) in zip(trips.traj, trips.velocity): for e in pairwise(traj): edge_vel[e].append(v) edge_vel = pd.Series({e: np.mean(v or np.nan) for (e, v) in edge_vel.items()}, index=graph.edges) edge_vel = edge_vel.dropna() with plt.style.context({**PARAM['mpl_style'], 'font.size': 5}), Axes() as ax1: # The background map ax1.imshow(osmap, extent=extent, interpolation='quadric', zorder=-100) ax1.axis("off") ax1.set_xlim(extent[0:2]) ax1.set_ylim(extent[2:4]) cmap_velocity = LinearSegmentedColormap.from_list(name="noname", colors=["brown", "r", "orange", "g"]) # marker = dict(markersize=0.5, markeredgewidth=0.1, markerfacecolor="None") # ax1.plot(trips['pickup_longitude'], trips['pickup_latitude'], 'og', **marker) # ax1.plot(trips['dropoff_longitude'], trips['dropoff_latitude'], 'xr', **marker) # for e in edge_name[edge_name == "65th Street Transverse"].index: # print(e, edge_vel[e]) edge_vel: pd.Series # edge_vel = edge_vel.rank(pct=True) edge_vel = edge_vel.clip(lower=2, upper=6).round() edge_vel = (edge_vel - edge_vel.min()) / (edge_vel.max() - edge_vel.min()) edge_vel = edge_vel.apply(cmap_velocity) nx.draw_networkx_edges( graph.edge_subgraph(edge_vel.index), ax=ax1, pos=nx.get_node_attributes(graph, name="pos"), edge_list=list(edge_vel.index), edge_color=list(edge_vel), # edge_cmap=cmap_velocity, # vmin=0, vmax=1, with_labels=False, arrows=False, node_size=0, alpha=0.8, width=0.3, ) # Save to file fn = os.path.join(PARAM['out_images_path'], F"{myname()}/{table_name}.png") ax1.figure.savefig(makedirs(fn)) # Meta info json.dump({'number_of_trajectories': len(trips)}, open((fn + ".txt"), 'w'))
G = nx.DiGraph() for (osm_id, way) in ways.iterrows(): G.add_edges_from(pairwise(way['nodes']), osm_id=osm_id, **way['tags']) if not ("yes" == str.lower(way['tags'].get('oneway', "no"))): # https://wiki.openstreetmap.org/wiki/Key:oneway G.add_edges_from(pairwise(reversed(way['nodes'])), osm_id=osm_id, **way['tags']) def edge_len(uv): return (uv, distance(nodes['loc'][uv[0]], nodes['loc'][uv[1]]).m) nx.set_edge_attributes(G, name="len", values=dict(parallel_map(edge_len, G.edges))) nx.set_node_attributes(G, name="loc", values=dict(nodes['loc'])) nx.set_node_attributes(G, name="pos", values=dict(nodes['pos'])) with Section("Breaking down long edges", out=print): print(F"Before: {G.number_of_nodes()} nodes / {G.number_of_edges()} edges") break_long_edges(G, max_edge_len=PARAM['max_graph_edge_len']) print(F"After: {G.number_of_nodes()} nodes / {G.number_of_edges()} edges") # Update node positions nodes = pd.DataFrame(data=nx.get_node_attributes(G, name="loc"), index=["lat", "lon"]).T nodes['loc'] = list(zip(nodes['lat'], nodes['lon'])) nodes['pos'] = list(zip(nodes['lon'], nodes['lat'])) # Node position for plotting
table_name="yellow_tripdata_2016-05", where= "('2016-05-02 08:00' <= pickup_datetime) and (dropoff_datetime <= '2016-05-02 08:30')", limit=200, ) trips = a_effective_metric_manhattan.get_taxidata_trips(**sql) trips = trips.join(a_effective_metric_manhattan.project(trips, graph), how="inner") # Attach estimated trajectories of trips with a_effective_metric_manhattan.GraphPathDist(graph, edge_weight="met") as gpd: trips = trips.join( pd.DataFrame( data=parallel_map(gpd, progressbar(list(zip(trips.u, trips.v)))), index=trips.index, columns=["path", "dist"], ), how="inner", ) class SpaceTimeTraj: def __init__(self, graph: nx.DiGraph): self.nodes_loc = nx.get_node_attributes(graph, name="loc") self.edges_met = nx.get_edge_attributes(graph, name="met") self.pathdist = GraphPathDist(graph, edge_weight="met") self.approx_dist = ApproxGeodistance(graph, location_attr="loc") def length(self, path):
def experiment(graph_size=32, ntrips=1000, noise=0.2, num_rounds=64): graph = odd_king_graph(xn=graph_size, yn=graph_size, scale=50) logger.debug(F"Constructed 'odd king' graph with {graph.number_of_nodes()} nodes") # nodes = pd.DataFrame(data=nx.get_node_attributes(graph, name="loc"), index=["lat", "lon"]).T random_state = np.random.RandomState(1) secret_met = {e: (v * (1 + noise * random_state.random())) for (e, v) in nx.get_edge_attributes(graph, name="len").items()} nx.set_edge_attributes(graph, name="met", values=secret_met) assert(sorted(list(graph.nodes)) == sorted(range(graph.number_of_nodes()))), "Expect node labels to be 0, 1, ..." random_state = np.random.RandomState(2) trips = pd.DataFrame( data=((random_state.choice(graph.number_of_nodes(), size=2, replace=False)) for __ in range(ntrips)), columns=["u", "v"], ) # logger.warning("Invoking trips.drop_duplicates") # trips = trips.drop_duplicates() with Section(F"Collecting {len(trips)} secret trips", out=logger.debug): with GraphPathDist(graph, edge_weight="met") as pathdist: # Estimated trajectories of trips trips = trips.join( pd.DataFrame( data=parallel_map(pathdist, progressbar(list(zip(trips.u, trips.v)))), index=trips.index, columns=["secret_path", "distance"], ) ) coverage = pd.Series(dict(Counter(e for path in trips['secret_path'] for e in pairwise(path)))) logger.debug([ F"{nedges} edges x{cov}" for (cov, nedges) in sorted(Counter(coverage).items(), key=first) ]) # nx.draw(graph, pos=nx.get_node_attributes(graph, name="pos")) # for (__, trip) in trips.iterrows(): # path = nx.shortest_path(graph, source=trip.u, target=trip.v, weight="met") # plt.plot(nodes.lon[path], nodes.lat[path], 'b-') # plt.show() # Initial metric guess is given by "len" history = pd.DataFrame({'secret': secret_met, 0: pd.Series(nx.get_edge_attributes(graph, name="len"))}) def cb(info): if (info.round == (2 ** round(log2(info.round)))): history[info.round] = info.edges_met opt = options_refine_effective_metric() opt.min_trip_distance_m = 0.1 opt.max_trip_distance_m = 1e8 opt.num_rounds = num_rounds refine_effective_metric(graph, trips, callback=cb, opt=opt) return history
def refine_once(graph: nx.DiGraph, trips: pd.DataFrame): edge_dt = pd.Series({ # Default values **{(u, edge_t): (trip_t / 3) for (u, edge_t, trip_t) in graph.edges.data("len")}, # Overwrite using existing values **nx.get_edge_attributes(graph, name=PARAM['edge_time_attr']) }) nx.set_edge_attributes(graph, name=PARAM['edge_time_attr'], values=dict(edge_dt)) trips = trips.join(trip_endpoints_on_graph(trips, graph), how="inner") with graphs.GraphPathDist(graph, edge_weight=PARAM['edge_time_attr']) as pathdist: trips = trips.join( pd.DataFrame( data=commons.parallel_map( pathdist, progressbar(list(zip(trips.a, trips.b)))), columns=["est_path", "est_path_dt"], index=trips.index, )) trips = trips[(trips.duration > 0) & (trips.est_path_dt > 0)] trips['f'] = trips.duration / trips.est_path_dt def path_edge_matrix( paths, edges) -> Tuple[sparse.csr_matrix, np.ndarray, np.ndarray]: edgej = dict(zip(edges, itertools.count())) (ii, jj) = np.array([(ii, edgej[e]) for (ii, path) in enumerate(paths) for e in pairwise(path)]).T M = sparse.csr_matrix((np.ones(len(ii)), (ii, jj)), shape=(len(paths), len(edges))) return (M, np.unique(ii), np.unique(jj)) def Vec(a) -> np.ndarray: return np.array(a, dtype=float).reshape(-1) with commons.Section("Solving LSQR", out=print): # Trip x Edge incidence matrix (M, nzi, nzj) = path_edge_matrix(trips.est_path, edge_dt.index) # *Duration* for each trip trip_t = Vec(trips.duration) # *Time-to-cross* for each edge, current estimate edge_t = Vec(edge_dt) # S = sparse.diags([d for (u, v, d) in graph.edges.data("len")], format='csc') # Not all trips/edges are involved: truncate the linear system (M, trip_t) = (M[nzi, :], trip_t[nzi]) (M, edge_t) = (M[:, nzj], edge_t[nzj]) S = S[nzj, :][:, nzj] assert (len(trip_t) == M.shape[0]) assert (M.shape[1] == len(edge_t)) # Compute the correction to the estimate # (s, lsqr_istop, lsqr_itn, lsqr_r1norm, lsqr_r2norm, *__) = linalg.lsqr(M, d - M.dot(t), damp=1e-3, show=True) (s, *__) = linalg.lsmr(M.dot(S), trip_t - M.dot(edge_t), maxiter=100, damp=1e-1, show=True) s = S.dot(s) # Update the time-to-cross estimate edge_t = np.clip(edge_t + 0.1 * s, a_min=(edge_t / 1.1), a_max=(edge_t * 1.1)) edge_dt.iloc[nzj] = edge_t nx.set_edge_attributes(graph, name=PARAM['edge_time_attr'], values=dict(edge_dt))
def refine_effective_metric( graph: nx.DiGraph, trips: pd.DataFrame, opt=options_refine_effective_metric(), callback=None, edges_met=None, skip_rounds=0, ) -> pd.Series: """ Returns a pandas series edges_met such that edges_met[E] is the effective length of edge E. If edges_met is provided it is used as the initial guess (but not modified). Invalidates the edge attribute opt.temp_graph_metric_attr_name in the graph if present. """ if nx.get_edge_attributes(graph, name=opt.temp_graph_metric_attr_name): logger.warning( F"Graph edge attributes '{opt.temp_graph_metric_attr_name}' will be invalidates" ) # Only nontrivial trips that are not too short or too long trips = trips[trips['u'] != trips['v']] trips = trips[trips['distance'] >= opt.min_trip_distance_m] trips = trips[trips['distance'] <= opt.max_trip_distance_m] logger.debug(F"Trip pool has {len(trips)} trips") assert ((edges_met is not None) == bool(skip_rounds) ), "Both or none of (edges_met, skip_rounds) should be provided" # Geographic metric as initial guess / prior edges_len = pd.Series(data=nx.get_edge_attributes(graph, name="len"), name="len") # Effective metric, to be modified if edges_met is not None: edges_met = pd.Series(name="met", copy=True, data=edges_met) skip_rounds = skip_rounds else: edges_met = pd.Series(name="met", copy=True, data=nx.get_edge_attributes(graph, name="len")) skip_rounds = 0 for r in range[1 + skip_rounds, opt.num_rounds]: logger.debug(F"Round {r}") if any(~edges_met.notna()): logger.warning(F"There are edges with 'n/a' metric") with Section("Computing trajectories", out=logger.debug): nx.set_edge_attributes(graph, name=opt.temp_graph_metric_attr_name, values=dict(edges_met)) with GraphPathDist( graph, edge_weight=opt.temp_graph_metric_attr_name) as gpd: # Estimated trajectories of trips traj = pd.DataFrame( data=parallel_map(gpd, progressbar(list(zip(trips.u, trips.v)))), index=trips.index, columns=["path", "dist"], ) # Per-trajectory correction factor traj['f'] = trips['distance'] / traj['dist'] # # Accept trips/trajectories that are feasibly related # traj = traj[(0.8 < traj.f) & (traj.f < 1.2)] logger.debug( F"Weight correction using {sum(traj.f < 1)}(down) + {sum(traj.f > 1)}(up) trips" ) with Section("Computing correction factors", out=logger.debug): with Section("Edges of trajectories"): edges_loci = dict(zip(edges_met.index, range(len(edges_met)))) edges_of_traj = list( tuple(edges_loci[e] for e in pairwise(path)) for path in progressbar(traj.path)) with Section("Incidence matrix [trips x edges]"): M = dok_matrix((len(traj), len(edges_met)), dtype=float) for (t, edges, f) in zip(range(M.shape[0]), edges_of_traj, traj.f): M[t, edges] = f del edges_of_traj with Section("Subsample trips"): I = pd.Series(range(M.shape[0])).sample( frac=0.5, random_state=opt.random_state) M = csr_matrix(M)[I, :] with Section("Compute correction"): M = csc_matrix(M) correction = pd.Series( index=edges_met.index, data=[(lambda L: (2**np.mean(np.log2(L if len(L) else 1))))( M.getcol(j).data) for j in range(M.shape[1])]).fillna(1) # Clip and moderate the correction factors correction = 2**(opt.correction_factor_moderation * np.log2(correction).clip(lower=-1, upper=+1)) with Section("Applying correction factors", out=logger.debug): edges_met = edges_met * correction # Clip extremes, slow-revert to the prior edges_met = edges_met.clip(lower=(edges_len / 2), upper=(edges_len * 4)) edges_met = edges_met * (2 **(0.01 * np.log2(edges_len / edges_met))) if callback: # # The edges of estimated trajectories # df = pd.DataFrame.sparse.from_spmatrix( # data=M, # index=pd.Series(traj.index, name="Estimated trajectory"), # columns=pd.Index(edges_met.index, name="Edges") # ).astype(pd.SparseDtype('float', np.nan)) callback( SimpleNamespace(graph=graph, trips=trips, edges_met=edges_met, traj=traj, round=r, correction=correction)) # Record the estimated metric nx.set_edge_attributes(graph, name=opt.temp_graph_metric_attr_name, values=dict(edges_met)) logger.debug(F"Iteration done") return edges_met