Beispiel #1
0
    def set_sample_points(self, interval, *args, **kwargs):
        # get sample points
        # these are initially in the same order as the network edges
        sample_points, _ = network_walker_uniform_sample_points(
            self.graph, interval)

        # divide into sample units and reorder the sample points
        # will also need to redefine the sample units and grid polys at the end
        sample_units = []
        grid_polys = []
        reordered_sample_points = []
        self.n_sample_point_per_unit = []
        x, y = sample_points.to_cartesian().separate

        for i in range(len(self.sample_units)):
            xmin, ymin, xmax, ymax = self.sample_units[i]
            in_su = (x >= xmin) & (x < xmax) & (y >= ymin) & (y < ymax)
            n = sum(in_su)
            if n:
                reordered_sample_points.extend(
                    sample_points.getrows(in_su).toarray(0))
                self.n_sample_point_per_unit.append(n)
                sample_units.append((xmin, ymin, xmax, ymax))
                grid_polys.append(self.grid_polys[i])

        self.n_sample_point_per_unit = np.array(self.n_sample_point_per_unit)
        self.sample_points = NetworkData(reordered_sample_points)
        self.sample_units = sample_units
        self.grid_polys = grid_polys
Beispiel #2
0
def uniform_random_points_on_net(net, n=1):
    """
    Draw n NetPoints at random that lie on the supplied network
    :param net:
    :param n: Number of points to draw
    :return: NetworkData array if n>1, else NetPoint
    """
    all_edges = net.edges()

    # segment lengths
    ls = np.array([e.length for e in all_edges])

    # random edge draw weighted by segment length
    if n == 1:
        selected_edges = [all_edges[weighted_random_selection(ls, n=n)]]
    else:
        ind = weighted_random_selection(ls, n=n)
        selected_edges = [all_edges[i] for i in ind]

    # random location along each edge
    frac_along = np.random.rand(n)
    res = []
    for e, fa in zip(selected_edges, frac_along):
        dist_along = {
            e.orientation_neg: e.length * fa,
            e.orientation_pos: e.length * (1 - fa),
        }
        the_pt = NetPoint(net, e, dist_along)
        res.append(the_pt)

    if n == 1:
        return res[0]
    else:
        return NetworkData(res)
Beispiel #3
0
def network_walker_uniform_sample_points(net_obj, interval, source_node=None):
    """
    Generate NetPoints uniformly along the network with the supplied interval
    :param net_obj: StreetNet instance
    :param interval: Distance between points
    :param source_node: Optionally specify the node to start at. This will affect the outcome.
    :return:
    """
    g = network_walker(net_obj, source_node=source_node, repeat_edges=False)
    points = OrderedDict()
    n_per_edge = OrderedDict()
    for e in net_obj.edges():
        points[e] = None
        n_per_edge[e] = None

    for path, edge in g:
        el = edge.length

        # next point location
        # npl = interval - dist % interval
        npl = interval - path.distance_total % interval

        # distances along to deposit points
        point_dists = np.arange(npl, el, interval)

        if not point_dists.size:
            # this edge is too short - just place one point at the centroid
            points[edge] = [edge.centroid]
            n_per_edge[edge] = 1
            continue
        else:
            n_per_edge[edge] = point_dists.size

        # create the points
        on = path.nodes[-1]
        op = get_next_node(edge, path.nodes[-1])

        points[edge] = []
        for pd in point_dists:
            node_dist = {
                on: pd,
                op: el - pd,
            }
            points[edge].append(NetPoint(net_obj, edge, node_dist))

    points = NetworkData(reduce(operator.add, points.values()))
    n_per_edge = np.array(n_per_edge.values())

    return points, n_per_edge
Beispiel #4
0
def network_point_coverage(net, dx=None, include_nodes=True):
    '''
    Produce a series of semi-regularly-spaced points on the supplied network.
    :param net: Network
    :param dx: Optional spacing between points, otherwise this is automatically selected
    :param include_nodes: If True, points are added at node locations too
    :return: - NetworkData array of NetPoints, ordered by edge
             - length E array of indices. Each gives the number of points in that edge
    '''

    # small delta to avoid errors
    eps = 1e-6

    ## temp set dx with a constant
    xy = []
    cd = []
    edge_count = []
    dx = dx or 1.

    for edge in net.edges():
        this_xy = []
        this_cd = []
        n_pt = int(np.math.ceil(edge['length'] / float(dx)))
        interp_lengths = np.linspace(eps, edge['length'] - eps, n_pt)
        # interpolate along linestring
        ls = edge['linestring']
        interp_pts = [ls.interpolate(t) for t in interp_lengths]

        for i in range(interp_lengths.size):
            this_xy.append((interp_pts[i].x, interp_pts[i].y))
            node_dist = {
                edge['orientation_neg']: interp_lengths[i],
                edge['orientation_pos']: edge['length'] - interp_lengths[i],
            }
            this_cd.append(NetPoint(net, edge, node_dist))
        xy.extend(this_xy)
        cd.extend(this_cd)
        edge_count.append(interp_lengths.size)

    return NetworkData(cd), edge_count
Beispiel #5
0
    def __init__(self,
                 net_obj,
                 targets,
                 max_distance=None,
                 max_split=None,
                 repeat_edges=True,
                 verbose=False,
                 logger=None):

        self.net_obj = net_obj
        self.targets = NetworkData(targets)
        self.max_distance = max_distance
        self.max_split = max_split
        self.repeat_edges = repeat_edges

        # logging
        if logger:
            self.logger = logger
        else:
            self.logger = logging.getLogger("NetworkWalker")
            self.logger.handlers = [
            ]  # make sure logger has no handlers to begin with
            if verbose:
                self.logger.addHandler(logging.StreamHandler())
            else:
                self.logger.addHandler(logging.NullHandler())

        if verbose:
            self.verbose = True
            self.logger.setLevel(logging.DEBUG)
        else:
            self.verbose = False
            self.logger.setLevel(logging.INFO)

        # this dictionary keeps track of the walks already carried out
        self.cached_walks = {}
        self.cached_source_target_paths = {}
Beispiel #6
0
 def prep_input(x):
     if not isinstance(x, NetworkData):
         x = NetworkData(x)
     return x
Beispiel #7
0
class NetworkRocUniformSamplingGrid(NetworkRocSegments):
    """
    Place sample points uniformly over the network, then divide into sample units by imposing a regular grid
    """
    @staticmethod
    def generate_bounding_poly(data):
        return RocGrid.generate_bounding_poly(data.to_cartesian())

    def set_sample_units(self, side_length, interval, *args, **kwargs):
        """
        Set the ROC grid.
        :param side_length: side length of grid squares
        :param interval: The length interval between sample points
        :param args: Passed to set_sample_points
        :param kwargs: Passed to set_sample_points.
        :return: None
        """
        # reset prediction values
        self.prediction_values = None

        if not self.poly:
            # find minimal bounding rectangle
            self.poly = self.generate_bounding_poly(self.data)

        # set sample grid
        self.side_length = side_length
        self.grid_polys, self.sample_units, _ = create_spatial_grid(
            self.poly, self.side_length)

        # set network sampling points
        self.set_sample_points(interval, *args, **kwargs)

    def set_sample_points(self, interval, *args, **kwargs):
        # get sample points
        # these are initially in the same order as the network edges
        sample_points, _ = network_walker_uniform_sample_points(
            self.graph, interval)

        # divide into sample units and reorder the sample points
        # will also need to redefine the sample units and grid polys at the end
        sample_units = []
        grid_polys = []
        reordered_sample_points = []
        self.n_sample_point_per_unit = []
        x, y = sample_points.to_cartesian().separate

        for i in range(len(self.sample_units)):
            xmin, ymin, xmax, ymax = self.sample_units[i]
            in_su = (x >= xmin) & (x < xmax) & (y >= ymin) & (y < ymax)
            n = sum(in_su)
            if n:
                reordered_sample_points.extend(
                    sample_points.getrows(in_su).toarray(0))
                self.n_sample_point_per_unit.append(n)
                sample_units.append((xmin, ymin, xmax, ymax))
                grid_polys.append(self.grid_polys[i])

        self.n_sample_point_per_unit = np.array(self.n_sample_point_per_unit)
        self.sample_points = NetworkData(reordered_sample_points)
        self.sample_units = sample_units
        self.grid_polys = grid_polys

    def plot(self,
             show_sample_units=True,
             show_prediction=True,
             fmax=0.9,
             cmap='Reds',
             **kwargs):
        from matplotlib import patches

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.set_aspect('equal')

        if show_prediction:
            # create dictionary of segment colours for plotting
            # this requires creating a norm instance and using that to index a colourmap
            vmax = sorted(self.prediction_values)[int(self.n_sample_units *
                                                      fmax)]
            cmap = cm.get_cmap(cmap)
            norm = mpl.colors.Normalize(vmin=0, vmax=vmax)
            colour_mapper = cm.ScalarMappable(norm=norm, cmap=cmap)
            edge_inner_col = {}
            for pv, edge in zip(self.prediction_values, self.sample_units):
                edge_inner_col[edge['fid']] = colour_mapper.to_rgba(pv)
            self.graph.plot_network(ax=ax,
                                    edge_width=7,
                                    edge_inner_col=edge_inner_col)
        else:
            # plot standard network edge outlines without colour
            self.graph.plot_network(edge_width=10, edge_inner_col='w')

        if show_sample_units:
            # alternating grey - black grid squares / crosses
            xsp, ysp = self.sample_points.to_cartesian().separate
            mins = np.array(self.sample_units).min(axis=0)
            maxs = np.array(self.sample_units).max(axis=0)
            xmin_group = np.arange(mins[0], maxs[2], 2 * self.side_length)
            ymin_group = np.arange(mins[1], maxs[3], 2 * self.side_length)

            count = 0
            for gp, su, n in zip(self.grid_polys, self.sample_units,
                                 self.n_sample_point_per_unit):
                a = np.any(np.abs(xmin_group - su[0]) < 1e-3) ^ np.any(
                    np.abs(ymin_group - su[1]) < 1e-3)
                fc = np.ones(3) * (0.5 if a else 0.8)
                mc = np.ones(3) * 0.5 if a else 'k'
                plot_shapely_geos(gp, facecolor=fc, alpha=0.4)
                plt.plot(
                    xsp[count:count + n],
                    ysp[count:count + n],
                    'o',
                    color=mc,
                    markersize=5,
                )
                count += n

        # remove x and y ticks as these rarely add anything
        ax.set_xticks([])
        ax.set_yticks([])
Beispiel #8
0
def network_paths_source_targets(net_obj,
                                 source,
                                 targets,
                                 max_search_distance,
                                 max_split=None,
                                 verbose=False,
                                 logger=None):
    if logger is None:
        logger = logging.getLogger('null')
        logger.handlers = []
        logger.addHandler(logging.NullHandler())
    target_points = NetworkData(targets)
    paths = defaultdict(list)

    g = network_walker_from_net_point(net_obj,
                                      source,
                                      max_distance=max_search_distance,
                                      max_split=max_split,
                                      repeat_edges=True,
                                      verbose=verbose,
                                      logger=logger)

    # Cartesian filtering by nodes
    target_nodes_pos = CartesianData([t.edge.node_pos_coords for t in target_points.toarray(0)])
    target_nodes_neg = CartesianData([t.edge.node_neg_coords for t in target_points.toarray(0)])
    # Find the targets on the source edge and include these explicitly.
    # This is required for longer edges, where neither of the edge nodes are within max_search distance
    on_this_edge = np.array([t.edge == source.edge for t in target_points.toarray(0)])
    logger.debug("Found %d points on the starting edge" % on_this_edge.sum())
    source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata)

    target_distance_pos = target_nodes_pos.distance(source_xy_tiled)
    target_distance_neg = target_nodes_neg.distance(source_xy_tiled)

    reduced_target_idx = np.where(
        (target_distance_pos.toarray(0) <= max_search_distance) |
        (target_distance_neg.toarray(0) <= max_search_distance) |
        on_this_edge
    )[0]
    reduced_targets = target_points.getrows(reduced_target_idx)
    logger.debug("Initial filtering reduces number of targets from {0} to {1}".format(
        target_points.ndata,
        reduced_targets.ndata))


    # cartesian filtering by NetPoint
    # source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata)
    # target_distance = target_points.to_cartesian().distance(source_xy_tiled)
    # reduced_target_idx = np.where(target_distance.toarray(0) <= max_search_distance)[0]
    # reduced_targets = target_points.getrows(reduced_target_idx)

    # ALL targets kept
    # reduced_targets = target_points
    # reduced_target_idx = range(target_points.ndata)

    for path, edge in g:
        # test whether any targets lie on the new edge
        for i, t in enumerate(reduced_targets.toarray(0)):
            if t.edge == edge:
                # get distance from current node to this point
                if not len(path.nodes):
                    # this only happens at the starting edge
                    dist_between = (t - source).length
                else:
                    # all other situations
                    dist_along = t.node_dist[path.nodes[-1]]
                    dist_between = path.distance_total + dist_along
                logger.debug("Target %d is on this edge at a distance of %.2f" % (reduced_target_idx[i], dist_between))
                if dist_between <= max_search_distance:
                    logger.debug("Adding target %d to paths" % reduced_target_idx[i])
                    this_path = NetPath(
                        net_obj,
                        start=path.start,
                        end=t,
                        nodes=list(path.nodes),
                        distance=dist_between,
                        split=path.splits_total)
                    paths[reduced_target_idx[i]].append(this_path)

    return dict(paths)
Beispiel #9
0
    def test_network_binary_hotspot(self):
        from network.streetnet import NetPoint

        stk = hotspot.STNetworkFixedRadius(
            radius=50)  # value is zero when the net distance is > 50
        itn_net = load_test_network()
        e = itn_net.edges()[40]
        source = NetworkSpaceTimeData([[1.0, e.centroid]])
        stk.train(source)

        # one edge away
        targets = []
        e = itn_net.edges()[41]
        targets.append(
            NetPoint(itn_net, e, {
                e.orientation_neg: 1,
                e.orientation_pos: e.length - 1
            }))  # dist ~ 20m
        targets.append(
            NetPoint(itn_net, e, {
                e.orientation_neg: 30,
                e.orientation_pos: e.length - 30
            }))  # dist ~ 49m
        targets.append(
            NetPoint(itn_net, e, {
                e.orientation_neg: 30,
                e.orientation_pos: e.length - 30
            }))  # dist ~ 49m
        targets.append(
            NetPoint(itn_net, e, {
                e.orientation_neg: 35,
                e.orientation_pos: e.length - 35
            }))  # dist ~ 54m
        targets.append(
            NetPoint(itn_net, e, {
                e.orientation_neg: 75,
                e.orientation_pos: e.length - 75
            }))  # dist >> 50m
        targets = NetworkData(targets)

        # NB: times less than the source (1.0) will result in zeros
        pred = stk.predict(0.99, targets)
        pred_expctd = np.zeros(5)
        self.assertTrue(np.all(np.abs(pred - pred_expctd) < self.tol))

        pred = stk.predict(1.01, targets)
        pred_expctd = np.array([np.exp(-.01)] * 3 + [0., 0.])
        self.assertTrue(np.all(np.abs(pred - pred_expctd) < self.tol))

        pred = stk.predict(1.1, targets)
        pred_expctd = np.array([np.exp(-.1)] * 3 + [0., 0.])
        self.assertTrue(np.all(np.abs(pred - pred_expctd) < self.tol))

        # two edges away
        targets = []
        e = itn_net.edges()[2]
        targets.append(
            NetPoint(itn_net, e, {
                e.orientation_neg: 120,
                e.orientation_pos: e.length - 120
            }))  # dist ~ 40m
        targets.append(
            NetPoint(itn_net, e, {
                e.orientation_neg: 120,
                e.orientation_pos: e.length - 120
            }))  # dist ~ 40m
        targets.append(
            NetPoint(itn_net, e, {
                e.orientation_neg: 100,
                e.orientation_pos: e.length - 100
            }))  # dist ~ 60m
        targets = NetworkData(targets)

        pred = stk.predict(1.01, targets)
        pred_expctd = np.array([np.exp(-.01)] * 2 + [0.])
        self.assertTrue(np.all(np.abs(pred - pred_expctd) < self.tol))

        pred = stk.predict(2.7, targets)
        pred_expctd = np.array([np.exp(-1.7)] * 2 + [0.])
        self.assertTrue(np.all(np.abs(pred - pred_expctd) < self.tol))
Beispiel #10
0
    def test_network_roc(self):
        itn_net = load_test_network()

        # lay down a few events on the network
        net_point_array = []
        edge_idx = [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 100]
        for i in edge_idx:
            net_point_array.append(
                itn_net.edges()[i].centroid)  # midway along the edge
        net_point_array = NetworkData(net_point_array)

        # append to times to get full dataset
        st_net_array = DataArray.from_args(
            np.arange(net_point_array.ndata) / float(len(edge_idx))).adddim(
                net_point_array, type=NetworkSpaceTimeData)

        # test 1: Bowers kernel (decrease with time and distance)
        stk = hotspot.STNetworkBowers(a=10., b=1.)
        vb = validation.NetworkValidationBase(st_net_array, stk)
        vb.set_sample_units(None)  # the argument makes no difference here
        vb.set_t_cutoff(0.5)
        res = vb.run(time_step=0.1)

        # on each step, the segment corresponding to the most recent event should have the highest prediction rank
        for i in range(5):
            self.assertTrue(
                np.all(res['prediction_rank'][i][:(
                    i + 1)] == np.array(edge_idx[5:(i + 6)])[::-1]))

        # test 2: binary mask kernel in space, decr in time
        # exponential decrease with dt, zero when dd > 10
        radius = 50.
        stk = hotspot.STNetworkFixedRadius(radius, 1.0)
        vb = validation.NetworkValidationBase(st_net_array, stk)
        vb.set_sample_units(None)  # the argument makes no difference here
        vb.set_t_cutoff(0.5)
        res = vb.run(time_step=0.1, n_iter=1)
        pvals = vb.roc.prediction_values

        # check network distance from centroid to sources and verify non zero values
        pvals_expctd_nonzero = []
        for i, e in enumerate(vb.roc.sample_units):
            if np.any([(e.centroid - x).length <= radius
                       for x in vb.training.toarray(1)]):
                pvals_expctd_nonzero.append(i)
        pvals_expctd_nonzero = np.array(pvals_expctd_nonzero)
        self.assertTrue(
            np.all(pvals.nonzero()[0] == np.array(pvals_expctd_nonzero)))

        # test 3: repeat but with a mean version of the ROC

        vb = validation.NetworkValidationMean(st_net_array, stk)
        vb.set_sample_units(None, 10)  # points are spaced ~10m apart
        vb.set_t_cutoff(0.5)
        res = vb.run(time_step=0.1, n_iter=1)  # just one run this time
        pvals2 = vb.roc.prediction_values

        # check network distance from centroid to sources and verify non zero values
        pvals_expctd_nonzero2 = []
        for i, pt in enumerate(vb.sample_points.toarray(0)):
            if np.any([(pt - x).length <= radius
                       for x in vb.training.toarray(1)]):
                # find sample unit from sample point
                this_sample_unit = bisect.bisect_right(
                    vb.roc.n_sample_point_per_unit.cumsum(), i)
                if this_sample_unit not in pvals_expctd_nonzero2:
                    pvals_expctd_nonzero2.append(this_sample_unit)

        self.assertTrue(
            np.all(pvals2.nonzero()[0] == np.array(pvals_expctd_nonzero2)))
Beispiel #11
0
    from data.models import CartesianData, DataArray, NetworkData
    from matplotlib import pyplot as plt

    itn_net = load_test_network()
    nodes = np.array([t['loc'] for t in itn_net.g.node.values()])
    xmin, ymin, xmax, ymax = itn_net.extent
    targets, n_per_edge = utils.network_walker_uniform_sample_points(
        itn_net, 10)

    # lay down some random points within that box
    num_pts = 100

    x_pts = np.random.rand(num_pts) * (xmax - xmin) + xmin
    y_pts = np.random.rand(num_pts) * (ymax - ymin) + ymin
    xy = CartesianData.from_args(x_pts, y_pts)
    sources = NetworkData.from_cartesian(
        itn_net, xy, grid_size=50)  # grid_size defaults to 50
    # not all points necessarily snap successfully
    num_pts = sources.ndata

    radius = 200.

    nw = utils.NetworkWalker(itn_net,
                             targets,
                             max_distance=radius,
                             max_split=1e4)
    k = NetworkKernelEqualSplitLinear(sources.getone(0), 200.)
    k.set_walker(nw)
    z = k.pdf()
    zn = z / max(z)
    # plt.figure()
    # itn_net.plot_network()
Beispiel #12
0
def network_paths_source_targets(net_obj,
                                 source,
                                 targets,
                                 max_search_distance,
                                 max_split=None,
                                 verbose=False,
                                 logger=None):
    if logger is None:
        logger = logging.getLogger('null')
        logger.handlers = []
        logger.addHandler(logging.NullHandler())
    target_points = NetworkData(targets)
    paths = defaultdict(list)

    g = network_walker_from_net_point(net_obj,
                                      source,
                                      max_distance=max_search_distance,
                                      max_split=max_split,
                                      repeat_edges=True,
                                      verbose=verbose,
                                      logger=logger)

    # Cartesian filtering by nodes
    target_nodes_pos = CartesianData(
        [t.edge.node_pos_coords for t in target_points.toarray(0)])
    target_nodes_neg = CartesianData(
        [t.edge.node_neg_coords for t in target_points.toarray(0)])
    # Find the targets on the source edge and include these explicitly.
    # This is required for longer edges, where neither of the edge nodes are within max_search distance
    on_this_edge = np.array(
        [t.edge == source.edge for t in target_points.toarray(0)])
    logger.debug("Found %d points on the starting edge" % on_this_edge.sum())
    source_xy_tiled = CartesianData([source.cartesian_coords] *
                                    target_points.ndata)

    target_distance_pos = target_nodes_pos.distance(source_xy_tiled)
    target_distance_neg = target_nodes_neg.distance(source_xy_tiled)

    reduced_target_idx = np.where(
        (target_distance_pos.toarray(0) <= max_search_distance)
        | (target_distance_neg.toarray(0) <= max_search_distance)
        | on_this_edge)[0]
    reduced_targets = target_points.getrows(reduced_target_idx)
    logger.debug(
        "Initial filtering reduces number of targets from {0} to {1}".format(
            target_points.ndata, reduced_targets.ndata))

    # cartesian filtering by NetPoint
    # source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata)
    # target_distance = target_points.to_cartesian().distance(source_xy_tiled)
    # reduced_target_idx = np.where(target_distance.toarray(0) <= max_search_distance)[0]
    # reduced_targets = target_points.getrows(reduced_target_idx)

    # ALL targets kept
    # reduced_targets = target_points
    # reduced_target_idx = range(target_points.ndata)

    for path, edge in g:
        # test whether any targets lie on the new edge
        for i, t in enumerate(reduced_targets.toarray(0)):
            if t.edge == edge:
                # get distance from current node to this point
                if not len(path.nodes):
                    # this only happens at the starting edge
                    dist_between = (t - source).length
                else:
                    # all other situations
                    dist_along = t.node_dist[path.nodes[-1]]
                    dist_between = path.distance_total + dist_along
                logger.debug(
                    "Target %d is on this edge at a distance of %.2f" %
                    (reduced_target_idx[i], dist_between))
                if dist_between <= max_search_distance:
                    logger.debug("Adding target %d to paths" %
                                 reduced_target_idx[i])
                    this_path = NetPath(net_obj,
                                        start=path.start,
                                        end=t,
                                        nodes=list(path.nodes),
                                        distance=dist_between,
                                        split=path.splits_total)
                    paths[reduced_target_idx[i]].append(this_path)

    return dict(paths)
Beispiel #13
0
    for ct in crime_types:
        print "Started %s, getting data..." % ct
        data, t0, cid = cad.get_crimes_from_dump('monsuru_cad_%s' % ct)
        cid = np.array(cid)
        # filter by end date
        idx = np.where(data[:, 0] < (MAX_DAY_NUMBER + 1))[0]
        data = data[idx]
        cid = cid[idx]
        cids[ct] = cid

        jiggled = cad.jiggle_all_points_on_grid(data[:, 1], data[:, 2])
        b_jiggled = [np.all(a != b) for a, b in zip(data[:, 1:], jiggled)]

        snapped, fail = NetworkData.from_cartesian(net,
                                                   jiggled,
                                                   return_failure_idx=True)
        keep_idx = [k for k in range(len(cid)) if k not in fail]
        cid = cid[keep_idx]
        times = data[keep_idx, 0]
        failed[ct] = fail

        print "Done. writing data to a file."
        fields = [
            'my_idx', 'original_idx', 'days_since_1_mar_2011', 'x', 'y',
            'jiggled?'
        ]
        xy = snapped.to_cartesian()
        out_data = []
        for k in range(cid.size):
            out_data.append(
Beispiel #14
0
def create_network_with_crime_counts(
        start_date=datetime.date(2011, 3, 1), domain_name='South'):

    # load network, count crimes in 6mo and 12mo window, output shapefile
    domains = chicago.get_chicago_side_polys()
    domain = domains[domain_name]

    end_date = start_date + datetime.timedelta(days=365)
    crime_types = (
        'THEFT',
        'BURGLARY',
        'HOMICIDE',
        'BATTERY',
        'ARSON',
        'MOTOR VEHICLE THEFT',
        'ASSAULT',
        'ROBBERY',
    )

    time_window_filters = {
        '6mo': lambda t: t <= 183,
        '12mo': lambda t: t <= 365,
    }

    # get crime data
    data, t0, cid = chicago.get_crimes_by_type(crime_type=crime_types,
                                               start_date=start_date,
                                               end_date=end_date,
                                               domain=domain)

    # get network
    osm_file = os.path.join(
        DATA_DIR, 'osm_chicago',
        '%s_clipped.net' % consts.FILE_FRIENDLY_REGIONS[domain_name])
    net = osm.OSMStreetNet.from_pickle(osm_file)

    # snap crime data to network with maximum distance cutoff
    netdata, failed = NetworkData.from_cartesian(net,
                                                 data[:, 1:],
                                                 radius=50,
                                                 return_failure_idx=True)
    # get non-failed times
    idx = sorted(set(range(data.shape[0])) - set(failed))
    netdata = DataArray(data[idx, 0]).adddim(netdata,
                                             type=NetworkSpaceTimeData)

    # run over edges, count crimes in the two time windows
    filters = {}
    for filt_name, filt_func in time_window_filters.items():
        filters[filt_name] = filt_func(netdata.toarray(0)).astype(int)

    # add count attributes to all edges
    for e in net.edges():
        e.attrs['crimes_6mo'] = 0
        e.attrs['crimes_12mo'] = 0

    edge_counts = {}
    for i, t in enumerate(netdata.space.toarray()):
        t.edge.attrs['crimes_6mo'] += filters['6mo'][i]
        t.edge.attrs['crimes_12mo'] += filters['12mo'][i]

    net.save(consts.FILE_FRIENDLY_REGIONS[domain_name] +
             '_network_crime_counts',
             fmt='shp')