Beispiel #1
0
def filter_aoi(kd, aoi, config, pb_pos=1):
    # Querying uses a large amount of memory, use chunking to keep 
    #   the footprint small
    keep = []
    max_chunk_size = config['max_chunk_size']

    max_idx = int(np.ceil(aoi.shape[0] / max_chunk_size))
    sub_pbar = get_pbar(
            range(0, aoi.shape[0], max_chunk_size),
            max_idx,
            "Filtering AOI",
            pb_pos, disable=config['tqdm']
            )

    for i in sub_pbar:
        current_chunk = aoi[i:i+max_chunk_size]
        query = kdtree._query(kd, 
                              current_chunk[:, :3], 
                              k=config['max_n_size'], dmax=1)


        for j in range(len(query)):
            if len(query[j]) == config['max_n_size']:
                keep.append(i+j)

    return aoi[keep]
Beispiel #2
0
def neighborhoods_from_aoi(
        aoi,
        source_scan,
        mode,
        scan_nums,
        config,
        logger=None):

    # TO DO: this is pretty tightly coupled and needs to be refactored
    #   so that logging and checking overlap sizes can be more cohesive.
    igroup_bounds = get_igroup_bounds(config['igroup_size'])
    t_num, s_num = scan_nums

    # Build kd tree over source scan
    kd = kdtree._build(source_scan[:, :3])

    # filter out target points from aoi with neighborhoods in source 
    #   with size < `max_n_size`
    aoi = filter_aoi(
        kd,
        aoi,
        config,
        pb_pos=2)

    overlap_size = aoi.shape[0]

    log_message(f"[{t_num}|{s_num}][{mode}] overlap size post-filter: {aoi.shape}", "INFO", logger)

    if overlap_size >= config['min_overlap_size']:
        bins = [i[0] for i in igroup_bounds] + [igroup_bounds[-1][1]]

        plot_hist(aoi, bins, mode+"-B", 
            s_num, t_num, config['plots_path'])

        # resample aoi
        aoi = resample_aoi(
            aoi, 
            igroup_bounds, 
            config['igroup_sample_size'],
            config)

        log_message(f"[{t_num}|{s_num}][{mode}] overlap size post-resample: {aoi.shape}", "INFO", logger)

        # Verify resampling operation
        plot_hist(aoi, bins, mode+"-P", 
            s_num, t_num, config['plots_path'])

        # Query neighborhoods from filtered resampled aoi
        query = kdtree._query(kd, 
                              aoi[:, :3], 
                              k=config['max_n_size'], dmax=1)

        query = np.array(query).astype(np.int)

        save_neighborhoods_hdf5(aoi, query, source_scan, config)

    return overlap_size 
Beispiel #3
0
def create_big_tile_dataset(path, neighborhood_size=150):

    path = Path(path)
    save_path = path / "neighborhoods"
    save_path.mkdir(parents=True, exist_ok=True) 
    ARF = ApplyResponseFunction("dorf.json", "mapping.npy")
    

    big_tile_gt = np.loadtxt(path / "gt.txt.gz")
    scan_num = int(big_tile_gt[0, 8])
    # big_tile_alt = np.load(path / "alt.npy")
    
    kd = kdtree._build(big_tile_gt[:, :3])

    query = kdtree._query(
            kd, 
            big_tile_gt[:, :3],
            k=neighborhood_size)

    my_query = []
    for i in query:
        if len(i) == neighborhood_size:
            my_query.append(i)

    good_sample_ratio = ((len(query) - len(my_query))/len(query)) * 100
    print(f"Found {good_sample_ratio} perecent of points with not enough close neighbors!")

    query = my_query
    examples = [None] * len(query)
    fid = [None] * len(query)
    intensities = [None] * len(query)

    # get neighborhoods
    for i in trange(len(query), desc="querying neighborhoods"):
        gt_neighborhood = big_tile_gt[query[i]]
        alt_neighborhood = ARF(gt_neighborhood, scan_num, 512)
        
        # Keep parity with training dataset - save example as (152, 9) point cloud
        # there will be an extra copy of the center pt altered gt at idx 1
        my_example = np.concatenate((
            np.expand_dims(gt_neighborhood[0, :], 0), 
            np.expand_dims(alt_neighborhood[0, :], 0),
            alt_neighborhood))
        
        np.savetxt(save_path / f"{i}.txt.gz", my_example)
        examples[i] = (save_path / f"{i}.txt.gz").absolute()
        fid[i] = my_example[0, 8]  # flight number
        intensities[i] = int(my_example[0, 4])

    # create csv
    df = pd.DataFrame()
    df["examples"] = examples
    df["source_scan"] = fid
    df["target_intensity"] = intensities

    df.to_csv(path / "big_tile_dataset.csv")
Beispiel #4
0
    def neighbors(self,
                  query,
                  k=100,
                  radius=np.power(10, 10),
                  distances=False,
                  manhatten=False,
                  approx=0.0):
        if self.pptk_index is not None:
            try:
                neighbors = kdtree._query(self.pptk_index, query, k, radius)[0]
            except ValueError:
                neighbors = kdtree._query(self.pptk_index, np.array([query]),
                                          k, radius)[0]
            if distances:
                dists = np.zeros(len(neighbors))
        elif self.scipy_index is not None:
            if manhatten:
                p = 1.0
            else:
                p = 2.0
            dists, neighbors = self.scipy_index.query(
                query, k, eps=approx, p=p, distance_upper_bound=radius)
            mask = radius >= dists
            neighbors, dists = neighbors[mask], dists[mask]
        elif self.annoy_index is not None:
            neighbors, dists = self.annoy_index.get_nns_by_vector(
                query, k, include_distances=True)  # search_k = n_trees * n
            neighbors, dists = np.array(neighbors), np.array(dists)
            mask = radius >= dists
            neighbors, dists = neighbors[mask], dists[mask]
        elif self.voxel_index is not None:
            neighbors = self.voxel_index.neighbors(query, k)
            dists = self.voxel_index.distances(query, neighbors)
        else:
            neighbors, dists = [], []
            print('No query index built')

        if distances:
            return neighbors, dists
        else:
            return neighbors
def create_eval_tile(config):

    scans_path = Path(config['scans_path'])
    intersecting_flight = config['eval_source_scan']
    flight = np.load(scans_path / (intersecting_flight+".npy"))

    kd = kdtree._build(flight[:, :3])
    q = kdtree._query(kd, 
                      np.array([config['eval_tile_center']]),
                      k=config['eval_tile_size'])
    
    tile = flight[tuple(q)]

    q = kdtree._query(kd, tile[:, :3], k=config['max_n_size'])

    setup_eval_hdf5(config)

    save_neighborhoods_hdf5_eval(tile, 
                                np.array(q), 
                                flight, 
                                config, 
                                pb_pos=0)
Beispiel #6
0
def harmonize(model,
              source_scan_path,
              target_scan_num,
              config,
              save=False,
              sample_size=None):

    harmonized_path = Path(config['dataset']['harmonized_path'])
    plots_path = harmonized_path / "plots"
    plots_path.mkdir(exist_ok=True, parents=True)

    n_size = config['train']['neighborhood_size']
    b_size = config['train']['batch_size']
    chunk_size = config['dataset']['dataloader_size']
    transforms = get_transforms(config)
    G = GlobalShift(**config["dataset"])

    source_scan = np.load(source_scan_path)

    if config['dataset']['shift']:
        source_scan = G(source_scan)

    source_scan_num = int(source_scan[0, 8])

    if sample_size is not None:
        sample = np.random.choice(source_scan.shape[0], sample_size)
    else:
        sample = np.arange(source_scan.shape[0])

    model = model.to(config['train']['device'])
    model.eval()

    kd = kdtree._build(source_scan[:, :3])

    query = kdtree._query(kd, source_scan[sample, :3], k=n_size)

    query = np.array(query)
    size = len(query)

    hz = torch.empty(size).double()
    ip = torch.empty(size).double()
    cr = torch.empty(size).double()

    running_loss = 0

    pbar1 = get_pbar(range(0, len(query), chunk_size),
                     int(np.ceil(source_scan.shape[0] / chunk_size)),
                     f"Hzing Scan {source_scan_num}-->{target_scan_num}",
                     0,
                     leave=True,
                     disable=config['dataset']['tqdm'])

    for i in pbar1:
        query_chunk = query[i:i + chunk_size, :]
        source_chunk = source_scan[i:i + chunk_size, :]
        source_chunk = np.expand_dims(source_chunk, 1)

        neighborhoods = np.concatenate(
            (source_chunk, source_scan[query_chunk]), axis=1)

        dataset = LidarDatasetNP(neighborhoods, transform=transforms)

        dataloader = DataLoader(dataset,
                                batch_size=b_size,
                                num_workers=config['train']['num_workers'])

        pbar2 = get_pbar(dataloader,
                         len(dataloader),
                         "  Processing Chunk",
                         1,
                         disable=config['dataset']['tqdm'])

        with torch.no_grad():
            for j, batch in enumerate(pbar2):
                batch[:, 0,
                      -1] = target_scan_num  # specify that we wish to harmonize

                # batch = torch.tensor(np.expand_dims(ex, 0))
                batch = batch.to(config['train']['device'])

                # dublin specific?
                h_target = batch[:, 0, 3].clone()
                i_target = batch[:, 1, 3].clone()

                harmonization, interpolation, _ = model(batch)

                ldx = i + (j * b_size)
                hdx = i + (j + 1) * b_size

                hz[ldx:hdx] = harmonization.cpu().squeeze()
                ip[ldx:hdx] = interpolation.cpu().squeeze()
                cr[ldx:hdx] = i_target.cpu()  # corruption

                loss = torch.mean(torch.abs(harmonization.squeeze() -
                                            h_target))
                running_loss += loss.item()
                pbar2.set_postfix({"loss": f"{running_loss/(i+j+1):.3f}"})

    # visualize results
    hz = hz.numpy()
    hz = np.clip(hz, 0, 1)
    ip = ip.numpy()
    ip = np.clip(ip, 0, 1)
    cr = cr.numpy()
    cr = np.expand_dims(cr, 1)

    if config['dataset']['name'] == "dublin" and sample_size is None:
        create_kde(source_scan[sample, 3],
                   hz.squeeze(),
                   xlabel="ground truth harmonization",
                   ylabel="predicted harmonization",
                   output_path=plots_path /
                   f"{source_scan_num}-{target_scan_num}_harmonization.png")

        create_kde(cr.squeeze(),
                   ip.squeeze(),
                   xlabel="ground truth interpolation",
                   ylabel="predicted interpolation",
                   output_path=plots_path /
                   f"{source_scan_num}-{target_scan_num}_interpolation.png")

        create_kde(source_scan[sample, 3],
                   cr.squeeze(),
                   xlabel="ground truth",
                   ylabel="corruption",
                   output_path=plots_path /
                   f"{source_scan_num}-{target_scan_num}_corruption.png")

    # insert results into original scan
    harmonized_scan = np.hstack(
        (source_scan[sample, :3], np.expand_dims(hz, 1), source_scan[sample,
                                                                     4:]))

    if config['dataset']['name'] == "dublin":
        scan_error = np.mean(np.abs((source_scan[sample, 3]) - hz.squeeze()))
        print(f"Scan {source_scan_num} Harmonize MAE: {scan_error}")

    if save:
        np.save((Path(config['dataset']['harmonized_path']) /
                 (str(source_scan_num) + ".npy")), harmonized_scan)

    return harmonized_scan
Beispiel #7
0
def get_hist_overlap(pc1, pc2, sample_overlap_size=10000, hist_bin_length=25):
    # Params:
    #     pc1: point cloud 1 (np array with shape ([m, k1]))
    #     pc2: point cloud 2 (np array with shape ([n, k2]))
    #
    # k1 and k2 must contain at least x and y coordinates. 
    
    #
    # Returns:
    #     
     
    # define a data range
    pc_combined = np.concatenate((pc1, pc2))
    data_range = np.array(
        [[pc_combined[:, 0].min(), pc_combined[:, 0].max()],
        [pc_combined[:, 1].min(), pc_combined[:, 1].max()],
        [pc_combined[:, 2].min(), pc_combined[:, 2].max()]])
    
    bin_counts = [int((f[1]-f[0])/hist_bin_length) for f in data_range]

    del pc_combined  # save some mem
    
    # define bins based on data_range:
    x_bins = np.linspace(data_range[0][0], data_range[0][1], num=bin_counts[0])
    y_bins = np.linspace(data_range[1][0], data_range[1][1], num=bin_counts[1])
    z_bins = np.linspace(data_range[2][0], data_range[2][1], num=bin_counts[2])
    
    # Collect some number of points as overlap between these point clouds
    # build kd tree so we can search for points in pc2
    kd = kdtree._build(pc2[:, :3])

    # collect a sample of points in pc1 to query in pc2
    sample_overlap = np.random.choice(len(pc1), size=sample_overlap_size)
    pc1_sample = pc1[sample_overlap]

    # query pc1 sample in pc2. note that we want lots of nearby neighbors
    query = kdtree._query(kd, pc1_sample[:, :3], k=150, dmax=1)
    
    # Count the number of neighbors found at each query point
    counts = np.zeros((len(query), 1))
    for i in range(len(query)):
        counts[i][0] = len(query[i])

    # Append this to our sample
    pc1_sample_with_counts = np.concatenate((pc1_sample[:, :3], counts), axis=1)

    # this needs to be transformed such that the points (X, Y) occur in the
    # array `count` times. This will make histogram creation easier.
    rows = []
    for i in range(len(pc1_sample_with_counts)):
        row = pc1_sample_with_counts[i, :3]
        row = np.expand_dims(row, 0)
        if pc1_sample_with_counts[i, 2]:
            duplication = np.repeat(row, pc1_sample_with_counts[i, 3], axis=0)
            rows.append(duplication)
    
    pc1_sample_f = np.concatenate(rows, axis=0)
    
    # build histogram over data
    hist, edges = np.histogramdd(
        pc1_sample_f[:, :3], 
        bins=[x_bins, y_bins, z_bins])

    return (hist, edges), pc1_sample_f