def neighborhoods_from_aoi( aoi, source_scan, mode, scan_nums, config, logger=None): # TO DO: this is pretty tightly coupled and needs to be refactored # so that logging and checking overlap sizes can be more cohesive. igroup_bounds = get_igroup_bounds(config['igroup_size']) t_num, s_num = scan_nums # Build kd tree over source scan kd = kdtree._build(source_scan[:, :3]) # filter out target points from aoi with neighborhoods in source # with size < `max_n_size` aoi = filter_aoi( kd, aoi, config, pb_pos=2) overlap_size = aoi.shape[0] log_message(f"[{t_num}|{s_num}][{mode}] overlap size post-filter: {aoi.shape}", "INFO", logger) if overlap_size >= config['min_overlap_size']: bins = [i[0] for i in igroup_bounds] + [igroup_bounds[-1][1]] plot_hist(aoi, bins, mode+"-B", s_num, t_num, config['plots_path']) # resample aoi aoi = resample_aoi( aoi, igroup_bounds, config['igroup_sample_size'], config) log_message(f"[{t_num}|{s_num}][{mode}] overlap size post-resample: {aoi.shape}", "INFO", logger) # Verify resampling operation plot_hist(aoi, bins, mode+"-P", s_num, t_num, config['plots_path']) # Query neighborhoods from filtered resampled aoi query = kdtree._query(kd, aoi[:, :3], k=config['max_n_size'], dmax=1) query = np.array(query).astype(np.int) save_neighborhoods_hdf5(aoi, query, source_scan, config) return overlap_size
def create_big_tile_dataset(path, neighborhood_size=150): path = Path(path) save_path = path / "neighborhoods" save_path.mkdir(parents=True, exist_ok=True) ARF = ApplyResponseFunction("dorf.json", "mapping.npy") big_tile_gt = np.loadtxt(path / "gt.txt.gz") scan_num = int(big_tile_gt[0, 8]) # big_tile_alt = np.load(path / "alt.npy") kd = kdtree._build(big_tile_gt[:, :3]) query = kdtree._query( kd, big_tile_gt[:, :3], k=neighborhood_size) my_query = [] for i in query: if len(i) == neighborhood_size: my_query.append(i) good_sample_ratio = ((len(query) - len(my_query))/len(query)) * 100 print(f"Found {good_sample_ratio} perecent of points with not enough close neighbors!") query = my_query examples = [None] * len(query) fid = [None] * len(query) intensities = [None] * len(query) # get neighborhoods for i in trange(len(query), desc="querying neighborhoods"): gt_neighborhood = big_tile_gt[query[i]] alt_neighborhood = ARF(gt_neighborhood, scan_num, 512) # Keep parity with training dataset - save example as (152, 9) point cloud # there will be an extra copy of the center pt altered gt at idx 1 my_example = np.concatenate(( np.expand_dims(gt_neighborhood[0, :], 0), np.expand_dims(alt_neighborhood[0, :], 0), alt_neighborhood)) np.savetxt(save_path / f"{i}.txt.gz", my_example) examples[i] = (save_path / f"{i}.txt.gz").absolute() fid[i] = my_example[0, 8] # flight number intensities[i] = int(my_example[0, 4]) # create csv df = pd.DataFrame() df["examples"] = examples df["source_scan"] = fid df["target_intensity"] = intensities df.to_csv(path / "big_tile_dataset.csv")
def create_eval_tile(config): scans_path = Path(config['scans_path']) intersecting_flight = config['eval_source_scan'] flight = np.load(scans_path / (intersecting_flight+".npy")) kd = kdtree._build(flight[:, :3]) q = kdtree._query(kd, np.array([config['eval_tile_center']]), k=config['eval_tile_size']) tile = flight[tuple(q)] q = kdtree._query(kd, tile[:, :3], k=config['max_n_size']) setup_eval_hdf5(config) save_neighborhoods_hdf5_eval(tile, np.array(q), flight, config, pb_pos=0)
def pptk(self, points): if self.pptk_index is not None: self.delete_pptk() self.pptk_index = kdtree._build(points) self.pptk_n = len(points)
def harmonize(model, source_scan_path, target_scan_num, config, save=False, sample_size=None): harmonized_path = Path(config['dataset']['harmonized_path']) plots_path = harmonized_path / "plots" plots_path.mkdir(exist_ok=True, parents=True) n_size = config['train']['neighborhood_size'] b_size = config['train']['batch_size'] chunk_size = config['dataset']['dataloader_size'] transforms = get_transforms(config) G = GlobalShift(**config["dataset"]) source_scan = np.load(source_scan_path) if config['dataset']['shift']: source_scan = G(source_scan) source_scan_num = int(source_scan[0, 8]) if sample_size is not None: sample = np.random.choice(source_scan.shape[0], sample_size) else: sample = np.arange(source_scan.shape[0]) model = model.to(config['train']['device']) model.eval() kd = kdtree._build(source_scan[:, :3]) query = kdtree._query(kd, source_scan[sample, :3], k=n_size) query = np.array(query) size = len(query) hz = torch.empty(size).double() ip = torch.empty(size).double() cr = torch.empty(size).double() running_loss = 0 pbar1 = get_pbar(range(0, len(query), chunk_size), int(np.ceil(source_scan.shape[0] / chunk_size)), f"Hzing Scan {source_scan_num}-->{target_scan_num}", 0, leave=True, disable=config['dataset']['tqdm']) for i in pbar1: query_chunk = query[i:i + chunk_size, :] source_chunk = source_scan[i:i + chunk_size, :] source_chunk = np.expand_dims(source_chunk, 1) neighborhoods = np.concatenate( (source_chunk, source_scan[query_chunk]), axis=1) dataset = LidarDatasetNP(neighborhoods, transform=transforms) dataloader = DataLoader(dataset, batch_size=b_size, num_workers=config['train']['num_workers']) pbar2 = get_pbar(dataloader, len(dataloader), " Processing Chunk", 1, disable=config['dataset']['tqdm']) with torch.no_grad(): for j, batch in enumerate(pbar2): batch[:, 0, -1] = target_scan_num # specify that we wish to harmonize # batch = torch.tensor(np.expand_dims(ex, 0)) batch = batch.to(config['train']['device']) # dublin specific? h_target = batch[:, 0, 3].clone() i_target = batch[:, 1, 3].clone() harmonization, interpolation, _ = model(batch) ldx = i + (j * b_size) hdx = i + (j + 1) * b_size hz[ldx:hdx] = harmonization.cpu().squeeze() ip[ldx:hdx] = interpolation.cpu().squeeze() cr[ldx:hdx] = i_target.cpu() # corruption loss = torch.mean(torch.abs(harmonization.squeeze() - h_target)) running_loss += loss.item() pbar2.set_postfix({"loss": f"{running_loss/(i+j+1):.3f}"}) # visualize results hz = hz.numpy() hz = np.clip(hz, 0, 1) ip = ip.numpy() ip = np.clip(ip, 0, 1) cr = cr.numpy() cr = np.expand_dims(cr, 1) if config['dataset']['name'] == "dublin" and sample_size is None: create_kde(source_scan[sample, 3], hz.squeeze(), xlabel="ground truth harmonization", ylabel="predicted harmonization", output_path=plots_path / f"{source_scan_num}-{target_scan_num}_harmonization.png") create_kde(cr.squeeze(), ip.squeeze(), xlabel="ground truth interpolation", ylabel="predicted interpolation", output_path=plots_path / f"{source_scan_num}-{target_scan_num}_interpolation.png") create_kde(source_scan[sample, 3], cr.squeeze(), xlabel="ground truth", ylabel="corruption", output_path=plots_path / f"{source_scan_num}-{target_scan_num}_corruption.png") # insert results into original scan harmonized_scan = np.hstack( (source_scan[sample, :3], np.expand_dims(hz, 1), source_scan[sample, 4:])) if config['dataset']['name'] == "dublin": scan_error = np.mean(np.abs((source_scan[sample, 3]) - hz.squeeze())) print(f"Scan {source_scan_num} Harmonize MAE: {scan_error}") if save: np.save((Path(config['dataset']['harmonized_path']) / (str(source_scan_num) + ".npy")), harmonized_scan) return harmonized_scan
def get_hist_overlap(pc1, pc2, sample_overlap_size=10000, hist_bin_length=25): # Params: # pc1: point cloud 1 (np array with shape ([m, k1])) # pc2: point cloud 2 (np array with shape ([n, k2])) # # k1 and k2 must contain at least x and y coordinates. # # Returns: # # define a data range pc_combined = np.concatenate((pc1, pc2)) data_range = np.array( [[pc_combined[:, 0].min(), pc_combined[:, 0].max()], [pc_combined[:, 1].min(), pc_combined[:, 1].max()], [pc_combined[:, 2].min(), pc_combined[:, 2].max()]]) bin_counts = [int((f[1]-f[0])/hist_bin_length) for f in data_range] del pc_combined # save some mem # define bins based on data_range: x_bins = np.linspace(data_range[0][0], data_range[0][1], num=bin_counts[0]) y_bins = np.linspace(data_range[1][0], data_range[1][1], num=bin_counts[1]) z_bins = np.linspace(data_range[2][0], data_range[2][1], num=bin_counts[2]) # Collect some number of points as overlap between these point clouds # build kd tree so we can search for points in pc2 kd = kdtree._build(pc2[:, :3]) # collect a sample of points in pc1 to query in pc2 sample_overlap = np.random.choice(len(pc1), size=sample_overlap_size) pc1_sample = pc1[sample_overlap] # query pc1 sample in pc2. note that we want lots of nearby neighbors query = kdtree._query(kd, pc1_sample[:, :3], k=150, dmax=1) # Count the number of neighbors found at each query point counts = np.zeros((len(query), 1)) for i in range(len(query)): counts[i][0] = len(query[i]) # Append this to our sample pc1_sample_with_counts = np.concatenate((pc1_sample[:, :3], counts), axis=1) # this needs to be transformed such that the points (X, Y) occur in the # array `count` times. This will make histogram creation easier. rows = [] for i in range(len(pc1_sample_with_counts)): row = pc1_sample_with_counts[i, :3] row = np.expand_dims(row, 0) if pc1_sample_with_counts[i, 2]: duplication = np.repeat(row, pc1_sample_with_counts[i, 3], axis=0) rows.append(duplication) pc1_sample_f = np.concatenate(rows, axis=0) # build histogram over data hist, edges = np.histogramdd( pc1_sample_f[:, :3], bins=[x_bins, y_bins, z_bins]) return (hist, edges), pc1_sample_f