def discrepancy_seeds(goods, bads, all_locs): """Find regions with concentration of good points compared with bad ones.""" import spatial_scan as sps size = 50 support = 8 sps.GRID_SIZE = size sps.TOP_K = 500 xedges, yedges = [ np.linspace(low, high, size + 1) for low, high in zip(np.min(all_locs, 0), np.max(all_locs, 0)) ] bins = (xedges, yedges) good_ids, good_loc = goods bad_ids, bad_loc = bads count, _, _ = np.histogram2d(good_loc[:, 0], good_loc[:, 1], bins=bins) measured = count.T.ravel() count, _, _ = np.histogram2d(bad_loc[:, 0], bad_loc[:, 1], bins=bins) background = count.T.ravel() total_b = np.sum(background) total_m = np.sum(measured) discrepancy = sps.get_discrepancy_function(total_m, total_b, support) def euc_index_to_rect(idx): """Return the bounding box of a grid's cell defined by its `idx`""" i = idx % size j = idx / size return [xedges[i], yedges[j], xedges[i + 1], yedges[j + 1]] sps.index_to_rect = euc_index_to_rect top_loc = sps.exact_grid(np.reshape(measured, (size, size)), np.reshape(background, (size, size)), discrepancy, sps.TOP_K, sps.GRID_SIZE / 8) merged = sps.merge_regions(top_loc) gcluster = [] bcluster = [] hulls = [] for region in merged: gcluster.append([ id_ for id_, loc in zip(good_ids, good_loc) if region[1].contains(sgeo.Point(loc)) ]) bcluster.append([ id_ for id_, loc in zip(bad_ids, bad_loc) if region[1].contains(sgeo.Point(loc)) ]) hulls.append(region[1].convex_hull) return hulls, gcluster, bcluster
def discrepancy_seeds(goods, bads, all_locs): """Find regions with concentration of good points compared with bad ones.""" import spatial_scan as sps size = 50 support = 8 sps.GRID_SIZE = size sps.TOP_K = 500 xedges, yedges = [np.linspace(low, high, size+1) for low, high in zip(np.min(all_locs, 0), np.max(all_locs, 0))] bins = (xedges, yedges) good_ids, good_loc = goods bad_ids, bad_loc = bads count, _, _ = np.histogram2d(good_loc[:, 0], good_loc[:, 1], bins=bins) measured = count.T.ravel() count, _, _ = np.histogram2d(bad_loc[:, 0], bad_loc[:, 1], bins=bins) background = count.T.ravel() total_b = np.sum(background) total_m = np.sum(measured) discrepancy = sps.get_discrepancy_function(total_m, total_b, support) def euc_index_to_rect(idx): """Return the bounding box of a grid's cell defined by its `idx`""" i = idx % size j = idx / size return [xedges[i], yedges[j], xedges[i+1], yedges[j+1]] sps.index_to_rect = euc_index_to_rect top_loc = sps.exact_grid(np.reshape(measured, (size, size)), np.reshape(background, (size, size)), discrepancy, sps.TOP_K, sps.GRID_SIZE/8) merged = sps.merge_regions(top_loc) gcluster = [] bcluster = [] hulls = [] for region in merged: gcluster.append([id_ for id_, loc in zip(good_ids, good_loc) if region[1].contains(sgeo.Point(loc))]) bcluster.append([id_ for id_, loc in zip(bad_ids, bad_loc) if region[1].contains(sgeo.Point(loc))]) hulls.append(region[1].convex_hull) return hulls, gcluster, bcluster
import cities import sys sys.exit() args = arguments.city_parser().parse_args() city = args.city _, client = cm.connect_to_db('foursquare', args.host, args.port) # client = None photos_in_background = True k = 100 sps.GRID_SIZE = k sps.MAX_SUPPORT = 200 bbox = (cities.US+cities.EU)[cities.INDEX[city]] sps.BBOX = bbox _, _, sps.index_to_rect = sps.k_split_bbox(bbox, k) options = {'city': city, 'photos_background': True, 'bbox': cities.bbox_to_polygon(bbox), 'only': False} top_loc, ratio = do_scan(client, city, k, options['photos_background']) options['ratio'] = ratio output_json(sps.merge_regions(top_loc), options) options['photos_background'] = False top_loc, ratio = do_scan(client, city, k, options['photos_background']) options['ratio'] = ratio output_json(sps.merge_regions(top_loc), options) # options['only'] = True # for pb in [True, False]: # options['photos_background'] = pb # top_loc = stand_alone(client, city, 100, # options['photos_background']) # output_json(sps.merge_regions(top_loc, use_mean=False), options)
city = args.city _, client = cm.connect_to_db('foursquare', args.host, args.port) # client = None photos_in_background = True k = 100 sps.GRID_SIZE = k sps.MAX_SUPPORT = 200 bbox = (cities.US + cities.EU)[cities.INDEX[city]] sps.BBOX = bbox _, _, sps.index_to_rect = sps.k_split_bbox(bbox, k) options = { 'city': city, 'photos_background': True, 'bbox': cities.bbox_to_polygon(bbox), 'only': False } top_loc, ratio = do_scan(client, city, k, options['photos_background']) options['ratio'] = ratio output_json(sps.merge_regions(top_loc), options) options['photos_background'] = False top_loc, ratio = do_scan(client, city, k, options['photos_background']) options['ratio'] = ratio output_json(sps.merge_regions(top_loc), options) # options['only'] = True # for pb in [True, False]: # options['photos_background'] = pb # top_loc = stand_alone(client, city, 100, # options['photos_background']) # output_json(sps.merge_regions(top_loc, use_mean=False), options)