Beispiel #1
0
def discrepancy_seeds(goods, bads, all_locs):
    """Find regions with concentration of good points compared with bad
    ones."""
    import spatial_scan as sps
    size = 50
    support = 8
    sps.GRID_SIZE = size
    sps.TOP_K = 500

    xedges, yedges = [
        np.linspace(low, high, size + 1)
        for low, high in zip(np.min(all_locs, 0), np.max(all_locs, 0))
    ]
    bins = (xedges, yedges)
    good_ids, good_loc = goods
    bad_ids, bad_loc = bads
    count, _, _ = np.histogram2d(good_loc[:, 0], good_loc[:, 1], bins=bins)
    measured = count.T.ravel()
    count, _, _ = np.histogram2d(bad_loc[:, 0], bad_loc[:, 1], bins=bins)
    background = count.T.ravel()
    total_b = np.sum(background)
    total_m = np.sum(measured)
    discrepancy = sps.get_discrepancy_function(total_m, total_b, support)

    def euc_index_to_rect(idx):
        """Return the bounding box of a grid's cell defined by its
        `idx`"""
        i = idx % size
        j = idx / size
        return [xedges[i], yedges[j], xedges[i + 1], yedges[j + 1]]

    sps.index_to_rect = euc_index_to_rect

    top_loc = sps.exact_grid(np.reshape(measured, (size, size)),
                             np.reshape(background, (size, size)), discrepancy,
                             sps.TOP_K, sps.GRID_SIZE / 8)
    merged = sps.merge_regions(top_loc)

    gcluster = []
    bcluster = []
    hulls = []
    for region in merged:
        gcluster.append([
            id_ for id_, loc in zip(good_ids, good_loc)
            if region[1].contains(sgeo.Point(loc))
        ])
        bcluster.append([
            id_ for id_, loc in zip(bad_ids, bad_loc)
            if region[1].contains(sgeo.Point(loc))
        ])
        hulls.append(region[1].convex_hull)
    return hulls, gcluster, bcluster
Beispiel #2
0
def discrepancy_seeds(goods, bads, all_locs):
    """Find regions with concentration of good points compared with bad
    ones."""
    import spatial_scan as sps
    size = 50
    support = 8
    sps.GRID_SIZE = size
    sps.TOP_K = 500

    xedges, yedges = [np.linspace(low, high, size+1)
                      for low, high in zip(np.min(all_locs, 0),
                                           np.max(all_locs, 0))]
    bins = (xedges, yedges)
    good_ids, good_loc = goods
    bad_ids, bad_loc = bads
    count, _, _ = np.histogram2d(good_loc[:, 0], good_loc[:, 1], bins=bins)
    measured = count.T.ravel()
    count, _, _ = np.histogram2d(bad_loc[:, 0], bad_loc[:, 1], bins=bins)
    background = count.T.ravel()
    total_b = np.sum(background)
    total_m = np.sum(measured)
    discrepancy = sps.get_discrepancy_function(total_m, total_b, support)

    def euc_index_to_rect(idx):
        """Return the bounding box of a grid's cell defined by its
        `idx`"""
        i = idx % size
        j = idx / size
        return [xedges[i], yedges[j], xedges[i+1], yedges[j+1]]
    sps.index_to_rect = euc_index_to_rect

    top_loc = sps.exact_grid(np.reshape(measured, (size, size)),
                             np.reshape(background, (size, size)),
                             discrepancy, sps.TOP_K,
                             sps.GRID_SIZE/8)
    merged = sps.merge_regions(top_loc)

    gcluster = []
    bcluster = []
    hulls = []
    for region in merged:
        gcluster.append([id_ for id_, loc in zip(good_ids, good_loc)
                         if region[1].contains(sgeo.Point(loc))])
        bcluster.append([id_ for id_, loc in zip(bad_ids, bad_loc)
                         if region[1].contains(sgeo.Point(loc))])
        hulls.append(region[1].convex_hull)
    return hulls, gcluster, bcluster
Beispiel #3
0
def do_scan(client, city, k, photos_as_background=True):
    """Perform discrepancy scan on `city` with grid_size."""
    background, measured = load_frequency(client, city, k,
                                          photos_as_background)
    total_b = np.sum(background)
    total_m = np.sum(measured)
    if not total_m > 0:
        return
    if 0 < total_m <= 500:
        support = 20
    if 500 < total_m <= 2000:
        support = 40
    if 2000 < total_m:
        support = sps.MAX_SUPPORT
    discrepancy = sps.get_discrepancy_function(total_m, total_b, support)
    grid_dim = (k, k)
    info = u'g={}, s={}, k={}, w={}, h={}, max={}'
    print(info.format(k, support, sps.TOP_K, sps.MIN_WIDTH, sps.MIN_HEIGHT,
                      sps.MAX_SIZE))
    top_loc = sps.exact_grid(np.reshape(measured, grid_dim),
                             np.reshape(background, grid_dim),
                             discrepancy, sps.TOP_K, k/sps.MAX_SIZE)
    return top_loc, compute_ratio(background, measured)
Beispiel #4
0
def do_scan(client, city, k, photos_as_background=True):
    """Perform discrepancy scan on `city` with grid_size."""
    background, measured = load_frequency(client, city, k,
                                          photos_as_background)
    total_b = np.sum(background)
    total_m = np.sum(measured)
    if not total_m > 0:
        return
    if 0 < total_m <= 500:
        support = 20
    if 500 < total_m <= 2000:
        support = 40
    if 2000 < total_m:
        support = sps.MAX_SUPPORT
    discrepancy = sps.get_discrepancy_function(total_m, total_b, support)
    grid_dim = (k, k)
    info = u'g={}, s={}, k={}, w={}, h={}, max={}'
    print(
        info.format(k, support, sps.TOP_K, sps.MIN_WIDTH, sps.MIN_HEIGHT,
                    sps.MAX_SIZE))
    top_loc = sps.exact_grid(np.reshape(measured, grid_dim),
                             np.reshape(background, grid_dim), discrepancy,
                             sps.TOP_K, k / sps.MAX_SIZE)
    return top_loc, compute_ratio(background, measured)