def get_simulated_trips(graph: nx.DiGraph, ntrips=10000, edge_weight="len"):
	random_state = np.random.RandomState(1)

	node_ids = pd.Series(graph.nodes)
	trips = pd.DataFrame(
		data=list(tuple(node_ids.sample(2, random_state=random_state)) for __ in range(ntrips)),
		columns=["u", "v"],
	)

	with manhattan.GraphPathDist(graph, edge_weight=edge_weight) as pathdist:
		trips = trips.join(
			pd.DataFrame(
				data=parallel_map(pathdist, progressbar(list(zip(trips.u, trips.v)))),
				index=trips.index,
				columns=["path", "distance"],
			),
			how="inner",
		)

	# trips = trips.drop(columns=['path'])

	nodes_loc = nx.get_node_attributes(graph, name="loc")

	for (prefix, nn) in [("pickup", trips.u), ("dropoff", trips.v)]:
		trips = trips.join(
			pd.DataFrame(
				data=list(nodes_loc[n] for n in nn),
				index=trips.index,
				columns=[(prefix + "_" + postfix) for postfix in ["latitude", "longitude"]],
			),
			how="inner",
		)

	return trips
Esempio n. 2
0
def trip_distance_vs_shortest(table_name):
	mpl.use("Agg")

	graph = get_road_graph()
	trips = get_trip_data(table_name, graph)

	with Section("Computing shortest distances", out=logger.debug):
		trips = trips.join(
			pd.DataFrame(
				data=parallel_map(GraphPathDist(graph, edge_weight="len"), zip(trips.u, trips.v)),
				columns=['path', 'shortest'], index=trips.index,
			)
		)

	# On-graph distance vs reported distance [meters]
	df: pd.DataFrame
	df = pd.DataFrame(data=dict(
		reported=(trips['distance']),
		shortest=(trips['shortest']),
	))
	# Convert to [km] and stay below 10km
	df = df.applymap(lambda x: (x / 1e3))
	df = df.applymap(lambda km: (km if (km < 10) else np.nan)).dropna()

	# Hour of the day
	df['h'] = trips['pickup_datetime'].dt.hour

	with plt.style.context(PARAM['mpl_style']):
		with Axes() as ax1:
			ax1.set_aspect(aspect="equal", adjustable="box")
			ax1.grid()
			ax1.plot(*(2 * [[0, df[['reported', 'shortest']].values.max()]]), c='k', ls='--', lw=0.5, zorder=100)
			for (h, hdf) in df.groupby(df['h']):
				c = plt.get_cmap("twilight_shifted")([h / 24])
				ax1.scatter(
					hdf['reported'], hdf['shortest'],
					c=c, s=3, alpha=0.8, lw=0, zorder=10,
					label=(F"{len(hdf)} trips at {h}h")
				)
			ax1.set_xlabel("Reported distance, km")
			ax1.set_ylabel("Naive graph distance, km")
			ax1.set_xticks(range(11))
			ax1.set_yticks(range(11))
			ax1.legend()

			# Save to file
			fn = os.path.join(PARAM['out_images_path'], F"{myname()}/{table_name}.png")
			ax1.figure.savefig(makedirs(fn))

			# Meta info
			json.dump({'number_of_datapoints': len(df)}, open((fn + ".txt"), 'w'))
Esempio n. 3
0
    def complete_all(presults) -> Generator:

        # Attach a relevant graph extract around the waypoints
        presults = map(mapmatch_prepare_subgraph, presults)

        # The previous step may have reduced the number of waypoints in subgroups
        presults = filter((lambda p: len(p['waypoints_used']) >= PARAM[
            'waypoints_min_number']), presults)

        # Mapmatch -- batch-parallel version
        # Note: 'Parallel' does not yield until all tasks are complete
        for presult_batch in commons.batchup(presults,
                                             5 * commons.PARALLEL_MAP_CPUS):
            yield from commons.parallel_map(mapmatch_complete_this,
                                            presult_batch)
Esempio n. 4
0
def trip_trajectories_ingraph(table_name):
	mpl.use("Agg")

	# Max number of trajectories to plot
	N = 1000

	graph = get_road_graph()
	nodes = pd.DataFrame(data=nx.get_node_attributes(graph, "loc"), index=["lat", "lon"]).T

	trips = get_trip_data(table_name, graph)

	trips = trips.sample(min(N, len(trips)))
	logger.debug(F"{len(trips)} trips")

	logger.debug("Computing trajectories")
	trajectories = parallel_map(GraphPathDist(graph).path_only, zip(trips.u, trips.v))

	with Section("Getting the background OSM map", out=logger.debug):
		extent = maps.ax4(nodes.lat, nodes.lon)
		osmap = maps.get_map_by_bbox(maps.ax2mb(*extent))

	with plt.style.context({**PARAM['mpl_style'], 'font.size': 5}):
		with Axes() as ax1:
			# The background map
			ax1.imshow(osmap, extent=extent, interpolation='quadric', zorder=-100)

			ax1.axis("off")

			ax1.set_xlim(extent[0:2])
			ax1.set_ylim(extent[2:4])

			c = 'b'
			if ("green" in table_name): c = "green"
			if ("yello" in table_name): c = "orange"

			logger.debug("Plotting trajectories")
			for traj in trajectories:
				(y, x) = nodes.loc[list(traj)].values.T
				ax1.plot(x, y, c=c, alpha=0.1, lw=0.3)

			# Save to file
			fn = os.path.join(PARAM['out_images_path'], F"{myname()}/{table_name}.png")
			ax1.figure.savefig(makedirs(fn))

			# Meta info
			json.dump({'number_of_trajectories': len(trips)}, open((fn + ".txt"), 'w'))
Esempio n. 5
0
def trip_trajectories_velocity(table_name):
	mpl.use("Agg")

	# Max number of trajectories to use
	N = 10000

	graph = get_road_graph()
	nodes = pd.DataFrame(data=nx.get_node_attributes(graph, "loc"), index=["lat", "lon"]).T

	edge_name = pd.Series(nx.get_edge_attributes(graph, name="name"))

	where = "('2016-05-02 08:00' <= pickup_datetime) and (pickup_datetime <= '2016-05-02 09:00')"
	trips = get_trip_data(table_name, graph, order="", limit=N, where=where)

	trips['velocity'] = trips['distance'] / trips['duration/s']
	trips = trips.sort_values(by='velocity', ascending=True)

	logger.debug(F"{len(trips)} trips")

	with Section("Computing estimated trajectories", out=logger.debug):
		trips['traj'] = parallel_map(GraphPathDist(graph).path_only, zip(trips.u, trips.v))

	with Section("Getting the background OSM map", out=logger.debug):
		extent = maps.ax4(nodes.lat, nodes.lon)
		osmap = maps.get_map_by_bbox(maps.ax2mb(*extent))

	with Section("Computing edge velocities", out=logger.debug):
		edge_vel = defaultdict(list)
		for (traj, v) in zip(trips.traj, trips.velocity):
			for e in pairwise(traj):
				edge_vel[e].append(v)
		edge_vel = pd.Series({e: np.mean(v or np.nan) for (e, v) in edge_vel.items()}, index=graph.edges)
		edge_vel = edge_vel.dropna()

	with plt.style.context({**PARAM['mpl_style'], 'font.size': 5}), Axes() as ax1:
		# The background map
		ax1.imshow(osmap, extent=extent, interpolation='quadric', zorder=-100)

		ax1.axis("off")

		ax1.set_xlim(extent[0:2])
		ax1.set_ylim(extent[2:4])

		cmap_velocity = LinearSegmentedColormap.from_list(name="noname", colors=["brown", "r", "orange", "g"])

		# marker = dict(markersize=0.5, markeredgewidth=0.1, markerfacecolor="None")
		# ax1.plot(trips['pickup_longitude'], trips['pickup_latitude'], 'og', **marker)
		# ax1.plot(trips['dropoff_longitude'], trips['dropoff_latitude'], 'xr', **marker)

		# for e in edge_name[edge_name == "65th Street Transverse"].index:
		# 	print(e, edge_vel[e])

		edge_vel: pd.Series
		# edge_vel = edge_vel.rank(pct=True)
		edge_vel = edge_vel.clip(lower=2, upper=6).round()
		edge_vel = (edge_vel - edge_vel.min()) / (edge_vel.max() - edge_vel.min())
		edge_vel = edge_vel.apply(cmap_velocity)

		nx.draw_networkx_edges(
			graph.edge_subgraph(edge_vel.index),
			ax=ax1,
			pos=nx.get_node_attributes(graph, name="pos"),
			edge_list=list(edge_vel.index),
			edge_color=list(edge_vel),
			# edge_cmap=cmap_velocity,
			# vmin=0, vmax=1,
			with_labels=False, arrows=False, node_size=0, alpha=0.8, width=0.3,
		)

		# Save to file
		fn = os.path.join(PARAM['out_images_path'], F"{myname()}/{table_name}.png")
		ax1.figure.savefig(makedirs(fn))

		# Meta info
		json.dump({'number_of_trajectories': len(trips)}, open((fn + ".txt"), 'w'))
Esempio n. 6
0
    G = nx.DiGraph()

    for (osm_id, way) in ways.iterrows():
        G.add_edges_from(pairwise(way['nodes']), osm_id=osm_id, **way['tags'])
        if not ("yes" == str.lower(way['tags'].get('oneway', "no"))):
            # https://wiki.openstreetmap.org/wiki/Key:oneway
            G.add_edges_from(pairwise(reversed(way['nodes'])),
                             osm_id=osm_id,
                             **way['tags'])

    def edge_len(uv):
        return (uv, distance(nodes['loc'][uv[0]], nodes['loc'][uv[1]]).m)

    nx.set_edge_attributes(G,
                           name="len",
                           values=dict(parallel_map(edge_len, G.edges)))
    nx.set_node_attributes(G, name="loc", values=dict(nodes['loc']))
    nx.set_node_attributes(G, name="pos", values=dict(nodes['pos']))

with Section("Breaking down long edges", out=print):
    print(F"Before: {G.number_of_nodes()} nodes / {G.number_of_edges()} edges")
    break_long_edges(G, max_edge_len=PARAM['max_graph_edge_len'])
    print(F"After:  {G.number_of_nodes()} nodes / {G.number_of_edges()} edges")

    # Update node positions
    nodes = pd.DataFrame(data=nx.get_node_attributes(G, name="loc"),
                         index=["lat", "lon"]).T
    nodes['loc'] = list(zip(nodes['lat'], nodes['lon']))
    nodes['pos'] = list(zip(nodes['lon'], nodes['lat']))

    # Node position for plotting
Esempio n. 7
0
        table_name="yellow_tripdata_2016-05",
        where=
        "('2016-05-02 08:00' <= pickup_datetime) and (dropoff_datetime <= '2016-05-02 08:30')",
        limit=200,
    )

    trips = a_effective_metric_manhattan.get_taxidata_trips(**sql)
    trips = trips.join(a_effective_metric_manhattan.project(trips, graph),
                       how="inner")

# Attach estimated trajectories of trips
with a_effective_metric_manhattan.GraphPathDist(graph,
                                                edge_weight="met") as gpd:
    trips = trips.join(
        pd.DataFrame(
            data=parallel_map(gpd, progressbar(list(zip(trips.u, trips.v)))),
            index=trips.index,
            columns=["path", "dist"],
        ),
        how="inner",
    )


class SpaceTimeTraj:
    def __init__(self, graph: nx.DiGraph):
        self.nodes_loc = nx.get_node_attributes(graph, name="loc")
        self.edges_met = nx.get_edge_attributes(graph, name="met")
        self.pathdist = GraphPathDist(graph, edge_weight="met")
        self.approx_dist = ApproxGeodistance(graph, location_attr="loc")

    def length(self, path):
Esempio n. 8
0
def experiment(graph_size=32, ntrips=1000, noise=0.2, num_rounds=64):

	graph = odd_king_graph(xn=graph_size, yn=graph_size, scale=50)
	logger.debug(F"Constructed 'odd king' graph with {graph.number_of_nodes()} nodes")

	# nodes = pd.DataFrame(data=nx.get_node_attributes(graph, name="loc"), index=["lat", "lon"]).T

	random_state = np.random.RandomState(1)

	secret_met = {e: (v * (1 + noise * random_state.random())) for (e, v) in nx.get_edge_attributes(graph, name="len").items()}
	nx.set_edge_attributes(graph, name="met", values=secret_met)

	assert(sorted(list(graph.nodes)) == sorted(range(graph.number_of_nodes()))), "Expect node labels to be 0, 1, ..."

	random_state = np.random.RandomState(2)

	trips = pd.DataFrame(
		data=((random_state.choice(graph.number_of_nodes(), size=2, replace=False)) for __ in range(ntrips)),
		columns=["u", "v"],
	)

	# logger.warning("Invoking trips.drop_duplicates")
	# trips = trips.drop_duplicates()

	with Section(F"Collecting {len(trips)} secret trips", out=logger.debug):
		with GraphPathDist(graph, edge_weight="met") as pathdist:
			# Estimated trajectories of trips
			trips = trips.join(
				pd.DataFrame(
					data=parallel_map(pathdist, progressbar(list(zip(trips.u, trips.v)))),
					index=trips.index,
					columns=["secret_path", "distance"],
				)
			)

	coverage = pd.Series(dict(Counter(e for path in trips['secret_path'] for e in pairwise(path))))

	logger.debug([
		F"{nedges} edges x{cov}"
		for (cov, nedges) in sorted(Counter(coverage).items(), key=first)
	])

	# nx.draw(graph, pos=nx.get_node_attributes(graph, name="pos"))
	# for (__, trip) in trips.iterrows():
	# 	path = nx.shortest_path(graph, source=trip.u, target=trip.v, weight="met")
	# 	plt.plot(nodes.lon[path], nodes.lat[path], 'b-')
	# plt.show()

	# Initial metric guess is given by "len"
	history = pd.DataFrame({'secret': secret_met, 0: pd.Series(nx.get_edge_attributes(graph, name="len"))})

	def cb(info):
		if (info.round == (2 ** round(log2(info.round)))):
			history[info.round] = info.edges_met

	opt = options_refine_effective_metric()
	opt.min_trip_distance_m = 0.1
	opt.max_trip_distance_m = 1e8
	opt.num_rounds = num_rounds

	refine_effective_metric(graph, trips, callback=cb, opt=opt)

	return history
Esempio n. 9
0
def refine_once(graph: nx.DiGraph, trips: pd.DataFrame):
    edge_dt = pd.Series({
        # Default values
        **{(u, edge_t): (trip_t / 3)
           for (u, edge_t, trip_t) in graph.edges.data("len")},
        # Overwrite using existing values
        **nx.get_edge_attributes(graph, name=PARAM['edge_time_attr'])
    })

    nx.set_edge_attributes(graph,
                           name=PARAM['edge_time_attr'],
                           values=dict(edge_dt))

    trips = trips.join(trip_endpoints_on_graph(trips, graph), how="inner")

    with graphs.GraphPathDist(graph,
                              edge_weight=PARAM['edge_time_attr']) as pathdist:
        trips = trips.join(
            pd.DataFrame(
                data=commons.parallel_map(
                    pathdist, progressbar(list(zip(trips.a, trips.b)))),
                columns=["est_path", "est_path_dt"],
                index=trips.index,
            ))

    trips = trips[(trips.duration > 0) & (trips.est_path_dt > 0)]

    trips['f'] = trips.duration / trips.est_path_dt

    def path_edge_matrix(
            paths, edges) -> Tuple[sparse.csr_matrix, np.ndarray, np.ndarray]:
        edgej = dict(zip(edges, itertools.count()))
        (ii, jj) = np.array([(ii, edgej[e]) for (ii, path) in enumerate(paths)
                             for e in pairwise(path)]).T
        M = sparse.csr_matrix((np.ones(len(ii)), (ii, jj)),
                              shape=(len(paths), len(edges)))
        return (M, np.unique(ii), np.unique(jj))

    def Vec(a) -> np.ndarray:
        return np.array(a, dtype=float).reshape(-1)

    with commons.Section("Solving LSQR", out=print):
        # Trip x Edge incidence matrix
        (M, nzi, nzj) = path_edge_matrix(trips.est_path, edge_dt.index)

        # *Duration* for each trip
        trip_t = Vec(trips.duration)
        # *Time-to-cross* for each edge, current estimate
        edge_t = Vec(edge_dt)

        #
        S = sparse.diags([d for (u, v, d) in graph.edges.data("len")],
                         format='csc')

        # Not all trips/edges are involved: truncate the linear system
        (M, trip_t) = (M[nzi, :], trip_t[nzi])
        (M, edge_t) = (M[:, nzj], edge_t[nzj])
        S = S[nzj, :][:, nzj]

        assert (len(trip_t) == M.shape[0])
        assert (M.shape[1] == len(edge_t))

        # Compute the correction to the estimate
        # (s, lsqr_istop, lsqr_itn, lsqr_r1norm, lsqr_r2norm, *__) = linalg.lsqr(M, d - M.dot(t), damp=1e-3, show=True)
        (s, *__) = linalg.lsmr(M.dot(S),
                               trip_t - M.dot(edge_t),
                               maxiter=100,
                               damp=1e-1,
                               show=True)
        s = S.dot(s)
        # Update the time-to-cross estimate
        edge_t = np.clip(edge_t + 0.1 * s,
                         a_min=(edge_t / 1.1),
                         a_max=(edge_t * 1.1))

        edge_dt.iloc[nzj] = edge_t

    nx.set_edge_attributes(graph,
                           name=PARAM['edge_time_attr'],
                           values=dict(edge_dt))
Esempio n. 10
0
def refine_effective_metric(
    graph: nx.DiGraph,
    trips: pd.DataFrame,
    opt=options_refine_effective_metric(),
    callback=None,
    edges_met=None,
    skip_rounds=0,
) -> pd.Series:
    """
	Returns a pandas series edges_met such that edges_met[E] is the effective length of edge E.
	If edges_met is provided it is used as the initial guess (but not modified).
	Invalidates the edge attribute opt.temp_graph_metric_attr_name in the graph if present.
	"""

    if nx.get_edge_attributes(graph, name=opt.temp_graph_metric_attr_name):
        logger.warning(
            F"Graph edge attributes '{opt.temp_graph_metric_attr_name}' will be invalidates"
        )

    # Only nontrivial trips that are not too short or too long
    trips = trips[trips['u'] != trips['v']]
    trips = trips[trips['distance'] >= opt.min_trip_distance_m]
    trips = trips[trips['distance'] <= opt.max_trip_distance_m]

    logger.debug(F"Trip pool has {len(trips)} trips")

    assert ((edges_met is not None) == bool(skip_rounds)
            ), "Both or none of (edges_met, skip_rounds) should be provided"

    # Geographic metric as initial guess / prior
    edges_len = pd.Series(data=nx.get_edge_attributes(graph, name="len"),
                          name="len")

    # Effective metric, to be modified
    if edges_met is not None:
        edges_met = pd.Series(name="met", copy=True, data=edges_met)
        skip_rounds = skip_rounds
    else:
        edges_met = pd.Series(name="met",
                              copy=True,
                              data=nx.get_edge_attributes(graph, name="len"))
        skip_rounds = 0

    for r in range[1 + skip_rounds, opt.num_rounds]:
        logger.debug(F"Round {r}")

        if any(~edges_met.notna()):
            logger.warning(F"There are edges with 'n/a' metric")

        with Section("Computing trajectories", out=logger.debug):

            nx.set_edge_attributes(graph,
                                   name=opt.temp_graph_metric_attr_name,
                                   values=dict(edges_met))

            with GraphPathDist(
                    graph, edge_weight=opt.temp_graph_metric_attr_name) as gpd:
                # Estimated trajectories of trips
                traj = pd.DataFrame(
                    data=parallel_map(gpd,
                                      progressbar(list(zip(trips.u,
                                                           trips.v)))),
                    index=trips.index,
                    columns=["path", "dist"],
                )

            # Per-trajectory correction factor
            traj['f'] = trips['distance'] / traj['dist']

            # # Accept trips/trajectories that are feasibly related
            # traj = traj[(0.8 < traj.f) & (traj.f < 1.2)]

            logger.debug(
                F"Weight correction using {sum(traj.f < 1)}(down) + {sum(traj.f > 1)}(up) trips"
            )

        with Section("Computing correction factors", out=logger.debug):

            with Section("Edges of trajectories"):
                edges_loci = dict(zip(edges_met.index, range(len(edges_met))))
                edges_of_traj = list(
                    tuple(edges_loci[e] for e in pairwise(path))
                    for path in progressbar(traj.path))

            with Section("Incidence matrix [trips x edges]"):
                M = dok_matrix((len(traj), len(edges_met)), dtype=float)
                for (t, edges, f) in zip(range(M.shape[0]), edges_of_traj,
                                         traj.f):
                    M[t, edges] = f
                del edges_of_traj

            with Section("Subsample trips"):
                I = pd.Series(range(M.shape[0])).sample(
                    frac=0.5, random_state=opt.random_state)
                M = csr_matrix(M)[I, :]

            with Section("Compute correction"):
                M = csc_matrix(M)

                correction = pd.Series(
                    index=edges_met.index,
                    data=[(lambda L: (2**np.mean(np.log2(L
                                                         if len(L) else 1))))(
                                                             M.getcol(j).data)
                          for j in range(M.shape[1])]).fillna(1)

                # Clip and moderate the correction factors
                correction = 2**(opt.correction_factor_moderation *
                                 np.log2(correction).clip(lower=-1, upper=+1))

        with Section("Applying correction factors", out=logger.debug):

            edges_met = edges_met * correction

            # Clip extremes, slow-revert to the prior
            edges_met = edges_met.clip(lower=(edges_len / 2),
                                       upper=(edges_len * 4))
            edges_met = edges_met * (2
                                     **(0.01 * np.log2(edges_len / edges_met)))

        if callback:
            # # The edges of estimated trajectories
            # df = pd.DataFrame.sparse.from_spmatrix(
            # 	data=M,
            # 	index=pd.Series(traj.index, name="Estimated trajectory"),
            # 	columns=pd.Index(edges_met.index, name="Edges")
            # ).astype(pd.SparseDtype('float', np.nan))

            callback(
                SimpleNamespace(graph=graph,
                                trips=trips,
                                edges_met=edges_met,
                                traj=traj,
                                round=r,
                                correction=correction))

    # Record the estimated metric
    nx.set_edge_attributes(graph,
                           name=opt.temp_graph_metric_attr_name,
                           values=dict(edges_met))

    logger.debug(F"Iteration done")

    return edges_met