Beispiel #1
0
def discrepancy_seeds(goods, bads, all_locs):
    """Find regions with concentration of good points compared with bad
    ones."""
    import spatial_scan as sps
    size = 50
    support = 8
    sps.GRID_SIZE = size
    sps.TOP_K = 500

    xedges, yedges = [
        np.linspace(low, high, size + 1)
        for low, high in zip(np.min(all_locs, 0), np.max(all_locs, 0))
    ]
    bins = (xedges, yedges)
    good_ids, good_loc = goods
    bad_ids, bad_loc = bads
    count, _, _ = np.histogram2d(good_loc[:, 0], good_loc[:, 1], bins=bins)
    measured = count.T.ravel()
    count, _, _ = np.histogram2d(bad_loc[:, 0], bad_loc[:, 1], bins=bins)
    background = count.T.ravel()
    total_b = np.sum(background)
    total_m = np.sum(measured)
    discrepancy = sps.get_discrepancy_function(total_m, total_b, support)

    def euc_index_to_rect(idx):
        """Return the bounding box of a grid's cell defined by its
        `idx`"""
        i = idx % size
        j = idx / size
        return [xedges[i], yedges[j], xedges[i + 1], yedges[j + 1]]

    sps.index_to_rect = euc_index_to_rect

    top_loc = sps.exact_grid(np.reshape(measured, (size, size)),
                             np.reshape(background, (size, size)), discrepancy,
                             sps.TOP_K, sps.GRID_SIZE / 8)
    merged = sps.merge_regions(top_loc)

    gcluster = []
    bcluster = []
    hulls = []
    for region in merged:
        gcluster.append([
            id_ for id_, loc in zip(good_ids, good_loc)
            if region[1].contains(sgeo.Point(loc))
        ])
        bcluster.append([
            id_ for id_, loc in zip(bad_ids, bad_loc)
            if region[1].contains(sgeo.Point(loc))
        ])
        hulls.append(region[1].convex_hull)
    return hulls, gcluster, bcluster
Beispiel #2
0
def discrepancy_seeds(goods, bads, all_locs):
    """Find regions with concentration of good points compared with bad
    ones."""
    import spatial_scan as sps
    size = 50
    support = 8
    sps.GRID_SIZE = size
    sps.TOP_K = 500

    xedges, yedges = [np.linspace(low, high, size+1)
                      for low, high in zip(np.min(all_locs, 0),
                                           np.max(all_locs, 0))]
    bins = (xedges, yedges)
    good_ids, good_loc = goods
    bad_ids, bad_loc = bads
    count, _, _ = np.histogram2d(good_loc[:, 0], good_loc[:, 1], bins=bins)
    measured = count.T.ravel()
    count, _, _ = np.histogram2d(bad_loc[:, 0], bad_loc[:, 1], bins=bins)
    background = count.T.ravel()
    total_b = np.sum(background)
    total_m = np.sum(measured)
    discrepancy = sps.get_discrepancy_function(total_m, total_b, support)

    def euc_index_to_rect(idx):
        """Return the bounding box of a grid's cell defined by its
        `idx`"""
        i = idx % size
        j = idx / size
        return [xedges[i], yedges[j], xedges[i+1], yedges[j+1]]
    sps.index_to_rect = euc_index_to_rect

    top_loc = sps.exact_grid(np.reshape(measured, (size, size)),
                             np.reshape(background, (size, size)),
                             discrepancy, sps.TOP_K,
                             sps.GRID_SIZE/8)
    merged = sps.merge_regions(top_loc)

    gcluster = []
    bcluster = []
    hulls = []
    for region in merged:
        gcluster.append([id_ for id_, loc in zip(good_ids, good_loc)
                         if region[1].contains(sgeo.Point(loc))])
        bcluster.append([id_ for id_, loc in zip(bad_ids, bad_loc)
                         if region[1].contains(sgeo.Point(loc))])
        hulls.append(region[1].convex_hull)
    return hulls, gcluster, bcluster
Beispiel #3
0
    import cities
    import sys
    sys.exit()
    args = arguments.city_parser().parse_args()
    city = args.city
    _, client = cm.connect_to_db('foursquare', args.host, args.port)
    # client = None
    photos_in_background = True
    k = 100
    sps.GRID_SIZE = k
    sps.MAX_SUPPORT = 200
    bbox = (cities.US+cities.EU)[cities.INDEX[city]]
    sps.BBOX = bbox
    _, _, sps.index_to_rect = sps.k_split_bbox(bbox, k)
    options = {'city': city, 'photos_background': True,
               'bbox': cities.bbox_to_polygon(bbox), 'only': False}
    top_loc, ratio = do_scan(client, city, k, options['photos_background'])
    options['ratio'] = ratio
    output_json(sps.merge_regions(top_loc), options)
    options['photos_background'] = False
    top_loc, ratio = do_scan(client, city, k, options['photos_background'])
    options['ratio'] = ratio
    output_json(sps.merge_regions(top_loc), options)

    # options['only'] = True
    # for pb in [True, False]:
    #     options['photos_background'] = pb
        # top_loc = stand_alone(client, city, 100,
        #                       options['photos_background'])
    #     output_json(sps.merge_regions(top_loc, use_mean=False), options)
Beispiel #4
0
    city = args.city
    _, client = cm.connect_to_db('foursquare', args.host, args.port)
    # client = None
    photos_in_background = True
    k = 100
    sps.GRID_SIZE = k
    sps.MAX_SUPPORT = 200
    bbox = (cities.US + cities.EU)[cities.INDEX[city]]
    sps.BBOX = bbox
    _, _, sps.index_to_rect = sps.k_split_bbox(bbox, k)
    options = {
        'city': city,
        'photos_background': True,
        'bbox': cities.bbox_to_polygon(bbox),
        'only': False
    }
    top_loc, ratio = do_scan(client, city, k, options['photos_background'])
    options['ratio'] = ratio
    output_json(sps.merge_regions(top_loc), options)
    options['photos_background'] = False
    top_loc, ratio = do_scan(client, city, k, options['photos_background'])
    options['ratio'] = ratio
    output_json(sps.merge_regions(top_loc), options)

    # options['only'] = True
    # for pb in [True, False]:
    #     options['photos_background'] = pb
    # top_loc = stand_alone(client, city, 100,
    #                       options['photos_background'])
    #     output_json(sps.merge_regions(top_loc, use_mean=False), options)