def all_gold_dst(): """Compute the distance between all gold regions and the query ones for all metrics.""" assert GROUND_TRUTH, 'load GROUND_TRUTH before calling' districts = GROUND_TRUTH.keys() cities = GROUND_TRUTH.items()[0][1]['gold'].keys() cities.remove('paris') metrics = ['cluster', 'emd', 'emd-lmnn', 'jsd'] results = {} for city, district in i.product(cities, districts): geo = GROUND_TRUTH[district]['gold']['paris'][0]['geometry'] for metric in metrics: name = '_'.join([city, district, metric]) info = interpret_query('paris', city, geo, metric) _, target_city, target_desc, regions_distance, _, threshold = info support = target_desc[1] candidates = get_gold_desc(target_city, district) if not candidates: print(name + ' is empty') continue current_dsts = [] for region in candidates: features, _, weights, _ = region if metric == 'cluster' and weights.size < 3: print("{}: can't make three clusters".format(name)) continue dst = generic_distance(metric, regions_distance, features, weights, support) if metric == 'leftover': dst = emd_leftover.collect_matlab_output(1) clean_tmp_mats() current_dsts.append(dst) results[name] = current_dsts return results
def brute_search(city_desc, hsize, distance_function, threshold, metric='jsd'): """Move a sliding circle over the whole city and keep track of the best result.""" global SURROUNDINGS, CITY_FEATURES, THRESHOLD, RADIUS global METRIC_NAME, CITY_SUPPORT, DISTANCE_FUNCTION import multiprocessing RADIUS = hsize THRESHOLD = threshold METRIC_NAME = metric city_size, CITY_SUPPORT, CITY_FEATURES, city_infos = city_desc SURROUNDINGS, bounds = city_infos DISTANCE_FUNCTION = distance_function minx, miny, maxx, maxy = bounds nb_x_step = int(3*np.floor(city_size[0]) / hsize + 1) nb_y_step = int(3*np.floor(city_size[1]) / hsize + 1) best = [1e20, [], [], RADIUS] res_map = [] pool = multiprocessing.Pool(4) x_steps = np.linspace(minx+hsize, maxx-hsize, nb_x_step) y_steps = np.linspace(miny+hsize, maxy-hsize, nb_y_step) x_vals, y_vals = np.meshgrid(x_steps, y_steps) to_cell_arg = lambda _: (float(_[1][0]), float(_[1][1]), _[0] % nb_x_step, _[0]/nb_x_step, _[0]) cells = i.imap(to_cell_arg, enumerate(i.izip(np.nditer(x_vals), np.nditer(y_vals)))) res = pool.map(one_cell, cells) pool.close() pool.join() res_map = [] if metric == 'leftover': dsts = emd_leftover.collect_matlab_output(len(res)) for cell, dst in i.izip(res, dsts): if cell[0]: cell[2] = dst clean_tmp_mats() for cell in res: if cell[0] is None: continue res_map.append(cell[:3]) if cell[2] < best[0]: best = [cell[2], cell[3], [cell[0], cell[1]], RADIUS] if QUERY_NAME: import persistent as p logging.info('wrote: '+str(os.path.join(OTMPDIR, QUERY_NAME))) p.save_var(os.path.join(OTMPDIR, QUERY_NAME), [[cell[2], cell[3], [cell[0], cell[1]], RADIUS] for cell in res if cell[0]]) yield best, res_map, 1.0
def brute_search(city_desc, hsize, distance_function, threshold, metric='jsd'): """Move a sliding circle over the whole city and keep track of the best result.""" global SURROUNDINGS, CITY_FEATURES, THRESHOLD, RADIUS global METRIC_NAME, CITY_SUPPORT, DISTANCE_FUNCTION import multiprocessing RADIUS = hsize THRESHOLD = threshold METRIC_NAME = metric city_size, CITY_SUPPORT, CITY_FEATURES, city_infos = city_desc SURROUNDINGS, bounds = city_infos DISTANCE_FUNCTION = distance_function minx, miny, maxx, maxy = bounds nb_x_step = int(3 * np.floor(city_size[0]) / hsize + 1) nb_y_step = int(3 * np.floor(city_size[1]) / hsize + 1) best = [1e20, [], [], RADIUS] res_map = [] pool = multiprocessing.Pool(4) x_steps = np.linspace(minx + hsize, maxx - hsize, nb_x_step) y_steps = np.linspace(miny + hsize, maxy - hsize, nb_y_step) x_vals, y_vals = np.meshgrid(x_steps, y_steps) to_cell_arg = lambda _: (float(_[1][0]), float(_[1][1]), _[0] % nb_x_step, _[0] / nb_x_step, _[0]) cells = i.imap(to_cell_arg, enumerate(i.izip(np.nditer(x_vals), np.nditer(y_vals)))) res = pool.map(one_cell, cells) pool.close() pool.join() res_map = [] if metric == 'leftover': dsts = emd_leftover.collect_matlab_output(len(res)) for cell, dst in i.izip(res, dsts): if cell[0]: cell[2] = dst clean_tmp_mats() for cell in res: if cell[0] is None: continue res_map.append(cell[:3]) if cell[2] < best[0]: best = [cell[2], cell[3], [cell[0], cell[1]], RADIUS] if QUERY_NAME: import persistent as p logging.info('wrote: ' + str(os.path.join(OTMPDIR, QUERY_NAME))) p.save_var(os.path.join(OTMPDIR, QUERY_NAME), [[cell[2], cell[3], [cell[0], cell[1]], RADIUS] for cell in res if cell[0]]) yield best, res_map, 1.0