def infer(): with tf.Graph().as_default() as graph: print("In Graph") ops, tuple_shape = build_inference_model() sess = restore_weights() # For better gpu utilization, loading processes and gpu inference are done in separate threads. # Start CPU threads num_loader_threads = 6 for i in range(num_loader_threads): worker = Thread(target=cpu_thread) worker.setDaemon(True) worker.start() # Start GPU threads worker = Thread(target=gpu_thread, args=(sess, ops)) worker.setDaemon(True) worker.start() csv_file = os.path.join(CSV_ROOT, '{}.csv'.format(SET)) meta = load_csv(csv_file) num = len(meta['path']) # Clean list padding = [0 for i in range(IMAGES_PER_PASS - (num % IMAGES_PER_PASS))] image_info = [(meta['path'][i]) for i in np.concatenate((np.arange(num), np.array(padding)))] padded_num = len(image_info) batched_indices = np.reshape(np.arange(padded_num), (-1, TUPLES_PER_BATCH * sum(tuple_shape))) batched_image_info = np.reshape( image_info, (-1, TUPLES_PER_BATCH * sum(tuple_shape))) for batch_indices, batch_image_info in zip(batched_indices, batched_image_info): CPU_IN_QUEUE.put((batch_indices, batch_image_info)) # Wait for completion & order output CPU_IN_QUEUE.join() GPU_IN_QUEUE.join() feature_pairs = list(GPU_OUT_QUEUE.queue) GPU_OUT_QUEUE.queue.clear() features = [[]] * padded_num for pair in feature_pairs: for i, f in zip(pair[0], pair[1]): features[i] = f features = features[:num] save_pickle( features, os.path.join(OUT_ROOT, '{}_{}.pickle'.format(SET, OUT_NAME)))
def cluster(in_root, out_root, s, mode, r): out_file = os.path.join(out_root, '{}_{}_{}.pickle'.format(s, mode, r)) meta_file = os.path.join(in_root, '{}_{}_000.csv'.format(s, mode)) meta = load_csv(meta_file) if not os.path.exists(out_file): date = getattr(sys.modules[__name__], '{}_ref_date'.format(s)) temp_meta = dict() for key in meta.keys(): temp_meta[key] = [ e for e, d in zip(meta[key], meta['date']) if d in date ] t_idx = np.argsort(temp_meta['t']) date_meta = dict() for key in meta.keys(): date_meta[key] = [temp_meta[key][i] for i in t_idx] print(len(date_meta['t'])) xy = get_xy(date_meta) ref_xy = [xy[0, :]] ref_idx = [0] for i in tqdm(range(len(date_meta['t']))): if sum((xy[i, :] - ref_xy[-1])**2) > r**2: ref_xy.append(xy[i, :]) ref_idx.append(i) ref_xy = np.array(ref_xy) save_pickle([ref_xy, date_meta, ref_idx], out_file) else: ref_xy, date_meta, ref_idx = load_pickle(out_file) print('{}: {}'.format(s, len(ref_idx))) out_img = os.path.join(out_root, '{}_{}_{}.png'.format(s, mode, r)) plt.clf() plt.clf() f, (ax1) = plt.subplots(1, 1, sharey=False) f.set_figheight(50) f.set_figwidth(50) ax1.scatter(ref_xy[:, 0], ref_xy[:, 1], c=np.arange(len(ref_xy))) plt.savefig(out_img) out_meta = dict() for key in meta.keys(): out_meta[key] = [date_meta[key][i] for i in ref_idx] out_file = os.path.join(out_root, '{}_{}_{}.csv'.format(s, mode, r)) save_csv(out_meta, out_file)
def clean_parametrization(in_root, folds, cols_to_keep, out_root): full_data = dict() full_ref_data = dict() full_query_data = dict() for key in cols_to_keep: full_data[key] = [] full_ref_data[key] = [] full_query_data[key] = [] meta = dict() for s in folds: ref_data = load_csv(os.path.join(in_root, '{}_ref.csv'.format(s))) query_data = load_csv(os.path.join(in_root, '{}_query.csv'.format(s))) # Not used to detect ref outliers for key in ['l', 'northing', 'easting']: ref_data[key] = np.array(ref_data[key], dtype=float) query_data[key] = np.array(query_data[key], dtype=float) l_max = max(ref_data['l']) num_bins = math.ceil(l_max) ref_member_path = os.path.join(out_root, '{}_ref_bin_raw_members.pickle'.format(s)) if not os.path.exists(ref_member_path): bin_members = [[i for i in range(len(ref_data['t'])) if math.floor(ref_data['l'][i]) == j] for j in tqdm(range(num_bins))] save_pickle(bin_members, ref_member_path) else: bin_members = load_pickle(ref_member_path) ref_bin_xy_path = os.path.join(out_root, '{}_ref_bin_raw_xy.pickle'.format(s)) if not os.path.exists(ref_bin_xy_path): ref_bin_xy = [ np.median(np.array([[ref_data['easting'][i], ref_data['northing'][i]] for i in bin_members[j]]), axis=0) if len( bin_members[j]) else np.array([-1, -1]) for j in tqdm(range(num_bins))] save_pickle(ref_bin_xy, ref_bin_xy_path) else: ref_bin_xy = load_pickle(ref_bin_xy_path) meta['{}_ref'.format(s)], clean_ref_data = find_and_remove_errors('ref', out_root, ref_bin_xy, ref_data, s) # Cleaning query files to allow for more efficient testing, should not influence performance # (other than possibly excluding faulty gps/ins 'ground truth', which we don't want anyways) meta['{}_query'.format(s)], clean_query_data = find_and_remove_errors('query', out_root, ref_bin_xy, query_data, s) fold_clean_data = dict() for key in clean_ref_data.keys(): fold_clean_data[key] = [] fold_clean_data[key].extend(clean_ref_data[key]) fold_clean_data[key].extend(clean_query_data[key]) full_data[key].extend(clean_ref_data[key]) full_data[key].extend(clean_query_data[key]) full_query_data[key].extend(clean_ref_data[key]) full_ref_data[key].extend(clean_query_data[key]) save_csv(fold_clean_data, os.path.join(out_root, '{}.csv'.format(s))) save_csv(full_data, os.path.join(out_root, 'full.csv'.format(s))) save_csv(full_ref_data, os.path.join(out_root, 'full_ref.csv'.format(s))) save_csv(full_query_data, os.path.join(out_root, 'full_query.csv'.format(s))) save_csv(meta, os.path.join(out_root, 'meta.csv'))
def get_top_n(): # check if complete: ld_checkpoints = get_checkpoints('obm') ld_cp_names = [] for cp in ld_checkpoints: cp_name = cp.split('/')[-2] cp_name = ''.join(os.path.basename(cp_name).split('.')) # Removing '.' cp_name += '_e{}'.format(cp[-1]) ld_cp_names.append(cp_name) if any([x in QUERY_LV_PICKLE for x in ld_cp_names]): L = [0.0, 0.3, 1.0, 5.0] D = [64, 128, 256, 512, 1024, 2048, 4096] else: L = [0.0] D = [256] complete = True for l in L: for d in D: out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d)) name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1]) out_pickle = os.path.join(out_folder, '{}.pickle'.format(name)) if not os.path.exists(out_pickle): complete = False break if not complete: break if complete: print('Skipping complete {}'.format(QUERY_LV_PICKLE)) return ref_meta = load_csv(REF_CSV) query_meta = load_csv(QUERY_CSV) full_ref_xy = get_xy(ref_meta) full_query_xy = get_xy(query_meta) num_q = full_query_xy.shape[0] pca_f = np.array(load_pickle(PCA_LV_PICKLE)) full_ref_f = np.array(load_pickle(REF_LV_PICKLE)) full_query_f = np.array(load_pickle(QUERY_LV_PICKLE)) full_xy_dists = pairwise_distances(full_query_xy, full_ref_xy, metric='euclidean') for d in D: print(d) pca = PCA(whiten=True, n_components=d) pca = pca.fit(pca_f) pca_ref_f = pca.transform(full_ref_f) pca_query_f = pca.transform(full_query_f) for l in L: print(l) out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d)) mkdir(out_folder) name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1]) out_pickle = os.path.join(out_folder, '{}.pickle'.format(name)) if os.path.exists(out_pickle): print('{} already exists. Skipping.'.format(out_pickle)) continue ref_idx = [0] for i in range(len(full_ref_xy)): if sum((full_ref_xy[i, :] - full_ref_xy[ref_idx[-1], :])** 2) >= l**2: ref_idx.append(i) if len(ref_idx) < N: continue ref_f = np.array([pca_ref_f[i, :] for i in ref_idx]) xy_dists = np.array([full_xy_dists[:, i] for i in ref_idx]).transpose() print('Building tree') ref_tree = KDTree(ref_f) print('Retrieving') top_f_dists, top_i = np.array( ref_tree.query(pca_query_f, k=N, return_distance=True, sort_results=True)) top_f_dists = np.array(top_f_dists) top_i = np.array(top_i, dtype=int) top_g_dists = [[xy_dists[q, r] for r in top_i[q, :]] for q in range(num_q)] gt_i = np.argmin(xy_dists, axis=1) gt_g_dist = np.min(xy_dists, axis=1) # Translate to original indices top_i = [[ref_idx[r] for r in top_i[q, :]] for q in range(num_q)] gt_i = [ref_idx[r] for r in gt_i] save_pickle( [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx], out_pickle)
def get_top_n(): ref_meta = load_csv(REF_CSV) query_meta = load_csv(QUERY_CSV) full_ref_xy = get_xy(ref_meta) full_query_xy = get_xy(query_meta) num_q = full_query_xy.shape[0] pca_f = np.array(load_pickle(PCA_LV_PICKLE)) full_ref_f = np.array(load_pickle(REF_LV_PICKLE)) full_query_f = np.array(load_pickle(QUERY_LV_PICKLE)) full_xy_dists = pairwise_distances(full_query_xy, full_ref_xy, metric='euclidean') for d in DIMS: print(d) pca = PCA(whiten=True, n_components=d) pca = pca.fit(pca_f) pca_ref_f = pca.transform(full_ref_f) pca_query_f = pca.transform(full_query_f) for l in L: print(l) out_folder = os.path.join(OUT_ROOT, 'l{}_dim{}'.format(l, d)) mkdir(out_folder) name = ''.join(os.path.basename(QUERY_LV_PICKLE).split('.')[:-1]) out_pickle = os.path.join(out_folder, '{}.pickle'.format(name)) if os.path.exists(out_pickle): print('{} already exists. Skipping.'.format(out_pickle)) continue ref_idx = [0] for i in range(len(full_ref_xy)): if sum((full_ref_xy[i, :] - full_ref_xy[ref_idx[-1], :])** 2) >= l**2: ref_idx.append(i) if len(ref_idx) < N: continue ref_f = np.array([pca_ref_f[i, :] for i in ref_idx]) xy_dists = np.array([full_xy_dists[:, i] for i in ref_idx]).transpose() print('Building tree') ref_tree = KDTree(ref_f) print('Retrieving') top_f_dists, top_i = np.array( ref_tree.query(pca_query_f, k=N, return_distance=True, sort_results=True)) top_f_dists = np.array(top_f_dists) top_i = np.array(top_i, dtype=int) top_g_dists = [[xy_dists[q, r] for r in top_i[q, :]] for q in range(num_q)] gt_i = np.argmin(xy_dists, axis=1) gt_g_dist = np.min(xy_dists, axis=1) # Translate to original indices top_i = [[ref_idx[r] for r in top_i[q, :]] for q in range(num_q)] gt_i = [ref_idx[r] for r in gt_i] save_pickle( [top_i, top_g_dists, top_f_dists, gt_i, gt_g_dist, ref_idx], out_pickle)
image_info, features, xy = load_pickle(lv_file) tuple_info = load_pickle(tuple_file) xy = np.array(xy) f_dists = [] e_dists = [] for i in tqdm(range(len(xy))): for j in tuple_info['positives'][i]: if j < i: f_dist = np.sum((features[i] - features[j])**2) f_dists.append(f_dist) e_dist = np.sum((xy[i, :] - xy[j, :])**2) e_dists.append(e_dist) save_pickle([e_dists, f_dists], out_file) else: e_dists, f_dists = load_pickle(out_file) full_info = dict() full_info['f_mean'] = np.mean(f_dists) full_info['e_mean'] = np.mean(e_dists) full_info['f_med'] = np.median(f_dists) full_info['e_med'] = np.median(e_dists) full_info['f_max'] = np.max(f_dists) full_info['e_max'] = np.max(e_dists) save_csv(full_info, out_file_meta) plt.clf() f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)