def set_sample_points(self, interval, *args, **kwargs): # get sample points # these are initially in the same order as the network edges sample_points, _ = network_walker_uniform_sample_points( self.graph, interval) # divide into sample units and reorder the sample points # will also need to redefine the sample units and grid polys at the end sample_units = [] grid_polys = [] reordered_sample_points = [] self.n_sample_point_per_unit = [] x, y = sample_points.to_cartesian().separate for i in range(len(self.sample_units)): xmin, ymin, xmax, ymax = self.sample_units[i] in_su = (x >= xmin) & (x < xmax) & (y >= ymin) & (y < ymax) n = sum(in_su) if n: reordered_sample_points.extend( sample_points.getrows(in_su).toarray(0)) self.n_sample_point_per_unit.append(n) sample_units.append((xmin, ymin, xmax, ymax)) grid_polys.append(self.grid_polys[i]) self.n_sample_point_per_unit = np.array(self.n_sample_point_per_unit) self.sample_points = NetworkData(reordered_sample_points) self.sample_units = sample_units self.grid_polys = grid_polys
def uniform_random_points_on_net(net, n=1): """ Draw n NetPoints at random that lie on the supplied network :param net: :param n: Number of points to draw :return: NetworkData array if n>1, else NetPoint """ all_edges = net.edges() # segment lengths ls = np.array([e.length for e in all_edges]) # random edge draw weighted by segment length if n == 1: selected_edges = [all_edges[weighted_random_selection(ls, n=n)]] else: ind = weighted_random_selection(ls, n=n) selected_edges = [all_edges[i] for i in ind] # random location along each edge frac_along = np.random.rand(n) res = [] for e, fa in zip(selected_edges, frac_along): dist_along = { e.orientation_neg: e.length * fa, e.orientation_pos: e.length * (1 - fa), } the_pt = NetPoint(net, e, dist_along) res.append(the_pt) if n == 1: return res[0] else: return NetworkData(res)
def network_walker_uniform_sample_points(net_obj, interval, source_node=None): """ Generate NetPoints uniformly along the network with the supplied interval :param net_obj: StreetNet instance :param interval: Distance between points :param source_node: Optionally specify the node to start at. This will affect the outcome. :return: """ g = network_walker(net_obj, source_node=source_node, repeat_edges=False) points = OrderedDict() n_per_edge = OrderedDict() for e in net_obj.edges(): points[e] = None n_per_edge[e] = None for path, edge in g: el = edge.length # next point location # npl = interval - dist % interval npl = interval - path.distance_total % interval # distances along to deposit points point_dists = np.arange(npl, el, interval) if not point_dists.size: # this edge is too short - just place one point at the centroid points[edge] = [edge.centroid] n_per_edge[edge] = 1 continue else: n_per_edge[edge] = point_dists.size # create the points on = path.nodes[-1] op = get_next_node(edge, path.nodes[-1]) points[edge] = [] for pd in point_dists: node_dist = { on: pd, op: el - pd, } points[edge].append(NetPoint(net_obj, edge, node_dist)) points = NetworkData(reduce(operator.add, points.values())) n_per_edge = np.array(n_per_edge.values()) return points, n_per_edge
def network_point_coverage(net, dx=None, include_nodes=True): ''' Produce a series of semi-regularly-spaced points on the supplied network. :param net: Network :param dx: Optional spacing between points, otherwise this is automatically selected :param include_nodes: If True, points are added at node locations too :return: - NetworkData array of NetPoints, ordered by edge - length E array of indices. Each gives the number of points in that edge ''' # small delta to avoid errors eps = 1e-6 ## temp set dx with a constant xy = [] cd = [] edge_count = [] dx = dx or 1. for edge in net.edges(): this_xy = [] this_cd = [] n_pt = int(np.math.ceil(edge['length'] / float(dx))) interp_lengths = np.linspace(eps, edge['length'] - eps, n_pt) # interpolate along linestring ls = edge['linestring'] interp_pts = [ls.interpolate(t) for t in interp_lengths] for i in range(interp_lengths.size): this_xy.append((interp_pts[i].x, interp_pts[i].y)) node_dist = { edge['orientation_neg']: interp_lengths[i], edge['orientation_pos']: edge['length'] - interp_lengths[i], } this_cd.append(NetPoint(net, edge, node_dist)) xy.extend(this_xy) cd.extend(this_cd) edge_count.append(interp_lengths.size) return NetworkData(cd), edge_count
def __init__(self, net_obj, targets, max_distance=None, max_split=None, repeat_edges=True, verbose=False, logger=None): self.net_obj = net_obj self.targets = NetworkData(targets) self.max_distance = max_distance self.max_split = max_split self.repeat_edges = repeat_edges # logging if logger: self.logger = logger else: self.logger = logging.getLogger("NetworkWalker") self.logger.handlers = [ ] # make sure logger has no handlers to begin with if verbose: self.logger.addHandler(logging.StreamHandler()) else: self.logger.addHandler(logging.NullHandler()) if verbose: self.verbose = True self.logger.setLevel(logging.DEBUG) else: self.verbose = False self.logger.setLevel(logging.INFO) # this dictionary keeps track of the walks already carried out self.cached_walks = {} self.cached_source_target_paths = {}
def prep_input(x): if not isinstance(x, NetworkData): x = NetworkData(x) return x
class NetworkRocUniformSamplingGrid(NetworkRocSegments): """ Place sample points uniformly over the network, then divide into sample units by imposing a regular grid """ @staticmethod def generate_bounding_poly(data): return RocGrid.generate_bounding_poly(data.to_cartesian()) def set_sample_units(self, side_length, interval, *args, **kwargs): """ Set the ROC grid. :param side_length: side length of grid squares :param interval: The length interval between sample points :param args: Passed to set_sample_points :param kwargs: Passed to set_sample_points. :return: None """ # reset prediction values self.prediction_values = None if not self.poly: # find minimal bounding rectangle self.poly = self.generate_bounding_poly(self.data) # set sample grid self.side_length = side_length self.grid_polys, self.sample_units, _ = create_spatial_grid( self.poly, self.side_length) # set network sampling points self.set_sample_points(interval, *args, **kwargs) def set_sample_points(self, interval, *args, **kwargs): # get sample points # these are initially in the same order as the network edges sample_points, _ = network_walker_uniform_sample_points( self.graph, interval) # divide into sample units and reorder the sample points # will also need to redefine the sample units and grid polys at the end sample_units = [] grid_polys = [] reordered_sample_points = [] self.n_sample_point_per_unit = [] x, y = sample_points.to_cartesian().separate for i in range(len(self.sample_units)): xmin, ymin, xmax, ymax = self.sample_units[i] in_su = (x >= xmin) & (x < xmax) & (y >= ymin) & (y < ymax) n = sum(in_su) if n: reordered_sample_points.extend( sample_points.getrows(in_su).toarray(0)) self.n_sample_point_per_unit.append(n) sample_units.append((xmin, ymin, xmax, ymax)) grid_polys.append(self.grid_polys[i]) self.n_sample_point_per_unit = np.array(self.n_sample_point_per_unit) self.sample_points = NetworkData(reordered_sample_points) self.sample_units = sample_units self.grid_polys = grid_polys def plot(self, show_sample_units=True, show_prediction=True, fmax=0.9, cmap='Reds', **kwargs): from matplotlib import patches fig = plt.figure() ax = fig.add_subplot(111) ax.set_aspect('equal') if show_prediction: # create dictionary of segment colours for plotting # this requires creating a norm instance and using that to index a colourmap vmax = sorted(self.prediction_values)[int(self.n_sample_units * fmax)] cmap = cm.get_cmap(cmap) norm = mpl.colors.Normalize(vmin=0, vmax=vmax) colour_mapper = cm.ScalarMappable(norm=norm, cmap=cmap) edge_inner_col = {} for pv, edge in zip(self.prediction_values, self.sample_units): edge_inner_col[edge['fid']] = colour_mapper.to_rgba(pv) self.graph.plot_network(ax=ax, edge_width=7, edge_inner_col=edge_inner_col) else: # plot standard network edge outlines without colour self.graph.plot_network(edge_width=10, edge_inner_col='w') if show_sample_units: # alternating grey - black grid squares / crosses xsp, ysp = self.sample_points.to_cartesian().separate mins = np.array(self.sample_units).min(axis=0) maxs = np.array(self.sample_units).max(axis=0) xmin_group = np.arange(mins[0], maxs[2], 2 * self.side_length) ymin_group = np.arange(mins[1], maxs[3], 2 * self.side_length) count = 0 for gp, su, n in zip(self.grid_polys, self.sample_units, self.n_sample_point_per_unit): a = np.any(np.abs(xmin_group - su[0]) < 1e-3) ^ np.any( np.abs(ymin_group - su[1]) < 1e-3) fc = np.ones(3) * (0.5 if a else 0.8) mc = np.ones(3) * 0.5 if a else 'k' plot_shapely_geos(gp, facecolor=fc, alpha=0.4) plt.plot( xsp[count:count + n], ysp[count:count + n], 'o', color=mc, markersize=5, ) count += n # remove x and y ticks as these rarely add anything ax.set_xticks([]) ax.set_yticks([])
def network_paths_source_targets(net_obj, source, targets, max_search_distance, max_split=None, verbose=False, logger=None): if logger is None: logger = logging.getLogger('null') logger.handlers = [] logger.addHandler(logging.NullHandler()) target_points = NetworkData(targets) paths = defaultdict(list) g = network_walker_from_net_point(net_obj, source, max_distance=max_search_distance, max_split=max_split, repeat_edges=True, verbose=verbose, logger=logger) # Cartesian filtering by nodes target_nodes_pos = CartesianData([t.edge.node_pos_coords for t in target_points.toarray(0)]) target_nodes_neg = CartesianData([t.edge.node_neg_coords for t in target_points.toarray(0)]) # Find the targets on the source edge and include these explicitly. # This is required for longer edges, where neither of the edge nodes are within max_search distance on_this_edge = np.array([t.edge == source.edge for t in target_points.toarray(0)]) logger.debug("Found %d points on the starting edge" % on_this_edge.sum()) source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata) target_distance_pos = target_nodes_pos.distance(source_xy_tiled) target_distance_neg = target_nodes_neg.distance(source_xy_tiled) reduced_target_idx = np.where( (target_distance_pos.toarray(0) <= max_search_distance) | (target_distance_neg.toarray(0) <= max_search_distance) | on_this_edge )[0] reduced_targets = target_points.getrows(reduced_target_idx) logger.debug("Initial filtering reduces number of targets from {0} to {1}".format( target_points.ndata, reduced_targets.ndata)) # cartesian filtering by NetPoint # source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata) # target_distance = target_points.to_cartesian().distance(source_xy_tiled) # reduced_target_idx = np.where(target_distance.toarray(0) <= max_search_distance)[0] # reduced_targets = target_points.getrows(reduced_target_idx) # ALL targets kept # reduced_targets = target_points # reduced_target_idx = range(target_points.ndata) for path, edge in g: # test whether any targets lie on the new edge for i, t in enumerate(reduced_targets.toarray(0)): if t.edge == edge: # get distance from current node to this point if not len(path.nodes): # this only happens at the starting edge dist_between = (t - source).length else: # all other situations dist_along = t.node_dist[path.nodes[-1]] dist_between = path.distance_total + dist_along logger.debug("Target %d is on this edge at a distance of %.2f" % (reduced_target_idx[i], dist_between)) if dist_between <= max_search_distance: logger.debug("Adding target %d to paths" % reduced_target_idx[i]) this_path = NetPath( net_obj, start=path.start, end=t, nodes=list(path.nodes), distance=dist_between, split=path.splits_total) paths[reduced_target_idx[i]].append(this_path) return dict(paths)
def test_network_binary_hotspot(self): from network.streetnet import NetPoint stk = hotspot.STNetworkFixedRadius( radius=50) # value is zero when the net distance is > 50 itn_net = load_test_network() e = itn_net.edges()[40] source = NetworkSpaceTimeData([[1.0, e.centroid]]) stk.train(source) # one edge away targets = [] e = itn_net.edges()[41] targets.append( NetPoint(itn_net, e, { e.orientation_neg: 1, e.orientation_pos: e.length - 1 })) # dist ~ 20m targets.append( NetPoint(itn_net, e, { e.orientation_neg: 30, e.orientation_pos: e.length - 30 })) # dist ~ 49m targets.append( NetPoint(itn_net, e, { e.orientation_neg: 30, e.orientation_pos: e.length - 30 })) # dist ~ 49m targets.append( NetPoint(itn_net, e, { e.orientation_neg: 35, e.orientation_pos: e.length - 35 })) # dist ~ 54m targets.append( NetPoint(itn_net, e, { e.orientation_neg: 75, e.orientation_pos: e.length - 75 })) # dist >> 50m targets = NetworkData(targets) # NB: times less than the source (1.0) will result in zeros pred = stk.predict(0.99, targets) pred_expctd = np.zeros(5) self.assertTrue(np.all(np.abs(pred - pred_expctd) < self.tol)) pred = stk.predict(1.01, targets) pred_expctd = np.array([np.exp(-.01)] * 3 + [0., 0.]) self.assertTrue(np.all(np.abs(pred - pred_expctd) < self.tol)) pred = stk.predict(1.1, targets) pred_expctd = np.array([np.exp(-.1)] * 3 + [0., 0.]) self.assertTrue(np.all(np.abs(pred - pred_expctd) < self.tol)) # two edges away targets = [] e = itn_net.edges()[2] targets.append( NetPoint(itn_net, e, { e.orientation_neg: 120, e.orientation_pos: e.length - 120 })) # dist ~ 40m targets.append( NetPoint(itn_net, e, { e.orientation_neg: 120, e.orientation_pos: e.length - 120 })) # dist ~ 40m targets.append( NetPoint(itn_net, e, { e.orientation_neg: 100, e.orientation_pos: e.length - 100 })) # dist ~ 60m targets = NetworkData(targets) pred = stk.predict(1.01, targets) pred_expctd = np.array([np.exp(-.01)] * 2 + [0.]) self.assertTrue(np.all(np.abs(pred - pred_expctd) < self.tol)) pred = stk.predict(2.7, targets) pred_expctd = np.array([np.exp(-1.7)] * 2 + [0.]) self.assertTrue(np.all(np.abs(pred - pred_expctd) < self.tol))
def test_network_roc(self): itn_net = load_test_network() # lay down a few events on the network net_point_array = [] edge_idx = [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 100] for i in edge_idx: net_point_array.append( itn_net.edges()[i].centroid) # midway along the edge net_point_array = NetworkData(net_point_array) # append to times to get full dataset st_net_array = DataArray.from_args( np.arange(net_point_array.ndata) / float(len(edge_idx))).adddim( net_point_array, type=NetworkSpaceTimeData) # test 1: Bowers kernel (decrease with time and distance) stk = hotspot.STNetworkBowers(a=10., b=1.) vb = validation.NetworkValidationBase(st_net_array, stk) vb.set_sample_units(None) # the argument makes no difference here vb.set_t_cutoff(0.5) res = vb.run(time_step=0.1) # on each step, the segment corresponding to the most recent event should have the highest prediction rank for i in range(5): self.assertTrue( np.all(res['prediction_rank'][i][:( i + 1)] == np.array(edge_idx[5:(i + 6)])[::-1])) # test 2: binary mask kernel in space, decr in time # exponential decrease with dt, zero when dd > 10 radius = 50. stk = hotspot.STNetworkFixedRadius(radius, 1.0) vb = validation.NetworkValidationBase(st_net_array, stk) vb.set_sample_units(None) # the argument makes no difference here vb.set_t_cutoff(0.5) res = vb.run(time_step=0.1, n_iter=1) pvals = vb.roc.prediction_values # check network distance from centroid to sources and verify non zero values pvals_expctd_nonzero = [] for i, e in enumerate(vb.roc.sample_units): if np.any([(e.centroid - x).length <= radius for x in vb.training.toarray(1)]): pvals_expctd_nonzero.append(i) pvals_expctd_nonzero = np.array(pvals_expctd_nonzero) self.assertTrue( np.all(pvals.nonzero()[0] == np.array(pvals_expctd_nonzero))) # test 3: repeat but with a mean version of the ROC vb = validation.NetworkValidationMean(st_net_array, stk) vb.set_sample_units(None, 10) # points are spaced ~10m apart vb.set_t_cutoff(0.5) res = vb.run(time_step=0.1, n_iter=1) # just one run this time pvals2 = vb.roc.prediction_values # check network distance from centroid to sources and verify non zero values pvals_expctd_nonzero2 = [] for i, pt in enumerate(vb.sample_points.toarray(0)): if np.any([(pt - x).length <= radius for x in vb.training.toarray(1)]): # find sample unit from sample point this_sample_unit = bisect.bisect_right( vb.roc.n_sample_point_per_unit.cumsum(), i) if this_sample_unit not in pvals_expctd_nonzero2: pvals_expctd_nonzero2.append(this_sample_unit) self.assertTrue( np.all(pvals2.nonzero()[0] == np.array(pvals_expctd_nonzero2)))
from data.models import CartesianData, DataArray, NetworkData from matplotlib import pyplot as plt itn_net = load_test_network() nodes = np.array([t['loc'] for t in itn_net.g.node.values()]) xmin, ymin, xmax, ymax = itn_net.extent targets, n_per_edge = utils.network_walker_uniform_sample_points( itn_net, 10) # lay down some random points within that box num_pts = 100 x_pts = np.random.rand(num_pts) * (xmax - xmin) + xmin y_pts = np.random.rand(num_pts) * (ymax - ymin) + ymin xy = CartesianData.from_args(x_pts, y_pts) sources = NetworkData.from_cartesian( itn_net, xy, grid_size=50) # grid_size defaults to 50 # not all points necessarily snap successfully num_pts = sources.ndata radius = 200. nw = utils.NetworkWalker(itn_net, targets, max_distance=radius, max_split=1e4) k = NetworkKernelEqualSplitLinear(sources.getone(0), 200.) k.set_walker(nw) z = k.pdf() zn = z / max(z) # plt.figure() # itn_net.plot_network()
def network_paths_source_targets(net_obj, source, targets, max_search_distance, max_split=None, verbose=False, logger=None): if logger is None: logger = logging.getLogger('null') logger.handlers = [] logger.addHandler(logging.NullHandler()) target_points = NetworkData(targets) paths = defaultdict(list) g = network_walker_from_net_point(net_obj, source, max_distance=max_search_distance, max_split=max_split, repeat_edges=True, verbose=verbose, logger=logger) # Cartesian filtering by nodes target_nodes_pos = CartesianData( [t.edge.node_pos_coords for t in target_points.toarray(0)]) target_nodes_neg = CartesianData( [t.edge.node_neg_coords for t in target_points.toarray(0)]) # Find the targets on the source edge and include these explicitly. # This is required for longer edges, where neither of the edge nodes are within max_search distance on_this_edge = np.array( [t.edge == source.edge for t in target_points.toarray(0)]) logger.debug("Found %d points on the starting edge" % on_this_edge.sum()) source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata) target_distance_pos = target_nodes_pos.distance(source_xy_tiled) target_distance_neg = target_nodes_neg.distance(source_xy_tiled) reduced_target_idx = np.where( (target_distance_pos.toarray(0) <= max_search_distance) | (target_distance_neg.toarray(0) <= max_search_distance) | on_this_edge)[0] reduced_targets = target_points.getrows(reduced_target_idx) logger.debug( "Initial filtering reduces number of targets from {0} to {1}".format( target_points.ndata, reduced_targets.ndata)) # cartesian filtering by NetPoint # source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata) # target_distance = target_points.to_cartesian().distance(source_xy_tiled) # reduced_target_idx = np.where(target_distance.toarray(0) <= max_search_distance)[0] # reduced_targets = target_points.getrows(reduced_target_idx) # ALL targets kept # reduced_targets = target_points # reduced_target_idx = range(target_points.ndata) for path, edge in g: # test whether any targets lie on the new edge for i, t in enumerate(reduced_targets.toarray(0)): if t.edge == edge: # get distance from current node to this point if not len(path.nodes): # this only happens at the starting edge dist_between = (t - source).length else: # all other situations dist_along = t.node_dist[path.nodes[-1]] dist_between = path.distance_total + dist_along logger.debug( "Target %d is on this edge at a distance of %.2f" % (reduced_target_idx[i], dist_between)) if dist_between <= max_search_distance: logger.debug("Adding target %d to paths" % reduced_target_idx[i]) this_path = NetPath(net_obj, start=path.start, end=t, nodes=list(path.nodes), distance=dist_between, split=path.splits_total) paths[reduced_target_idx[i]].append(this_path) return dict(paths)
for ct in crime_types: print "Started %s, getting data..." % ct data, t0, cid = cad.get_crimes_from_dump('monsuru_cad_%s' % ct) cid = np.array(cid) # filter by end date idx = np.where(data[:, 0] < (MAX_DAY_NUMBER + 1))[0] data = data[idx] cid = cid[idx] cids[ct] = cid jiggled = cad.jiggle_all_points_on_grid(data[:, 1], data[:, 2]) b_jiggled = [np.all(a != b) for a, b in zip(data[:, 1:], jiggled)] snapped, fail = NetworkData.from_cartesian(net, jiggled, return_failure_idx=True) keep_idx = [k for k in range(len(cid)) if k not in fail] cid = cid[keep_idx] times = data[keep_idx, 0] failed[ct] = fail print "Done. writing data to a file." fields = [ 'my_idx', 'original_idx', 'days_since_1_mar_2011', 'x', 'y', 'jiggled?' ] xy = snapped.to_cartesian() out_data = [] for k in range(cid.size): out_data.append(
def create_network_with_crime_counts( start_date=datetime.date(2011, 3, 1), domain_name='South'): # load network, count crimes in 6mo and 12mo window, output shapefile domains = chicago.get_chicago_side_polys() domain = domains[domain_name] end_date = start_date + datetime.timedelta(days=365) crime_types = ( 'THEFT', 'BURGLARY', 'HOMICIDE', 'BATTERY', 'ARSON', 'MOTOR VEHICLE THEFT', 'ASSAULT', 'ROBBERY', ) time_window_filters = { '6mo': lambda t: t <= 183, '12mo': lambda t: t <= 365, } # get crime data data, t0, cid = chicago.get_crimes_by_type(crime_type=crime_types, start_date=start_date, end_date=end_date, domain=domain) # get network osm_file = os.path.join( DATA_DIR, 'osm_chicago', '%s_clipped.net' % consts.FILE_FRIENDLY_REGIONS[domain_name]) net = osm.OSMStreetNet.from_pickle(osm_file) # snap crime data to network with maximum distance cutoff netdata, failed = NetworkData.from_cartesian(net, data[:, 1:], radius=50, return_failure_idx=True) # get non-failed times idx = sorted(set(range(data.shape[0])) - set(failed)) netdata = DataArray(data[idx, 0]).adddim(netdata, type=NetworkSpaceTimeData) # run over edges, count crimes in the two time windows filters = {} for filt_name, filt_func in time_window_filters.items(): filters[filt_name] = filt_func(netdata.toarray(0)).astype(int) # add count attributes to all edges for e in net.edges(): e.attrs['crimes_6mo'] = 0 e.attrs['crimes_12mo'] = 0 edge_counts = {} for i, t in enumerate(netdata.space.toarray()): t.edge.attrs['crimes_6mo'] += filters['6mo'][i] t.edge.attrs['crimes_12mo'] += filters['12mo'][i] net.save(consts.FILE_FRIENDLY_REGIONS[domain_name] + '_network_crime_counts', fmt='shp')