Example #1
0
def main(args):
    data_file = args.data_file
    outdir = utils.add_trailing_slash(args.outdir)

    outdir_seeds = outdir + 'seeds/'
    outdir_nn = outdir + 'nn/'

    utils.make_dir(outdir)
    utils.make_dir(outdir_seeds)
    utils.make_dir(outdir_nn)

    for folder in xrange(parameters.jobs):
        utils.make_dir(outdir_seeds + repr(folder))

    data_frame = pd.read_csv(data_file)

    points_matrix = data_frame.as_matrix(
        [parameters.x_col, parameters.y_col, parameters.z_col])
    name = data_frame[parameters.name_col]

    data_substacks = utils.points_to_substack(points_matrix, name)

    seeds = list()
    global_kdtree = cKDTree(points_matrix)
    for substack, data in data_substacks.iteritems():
        X = np.vstack(data)
        X = np.float64(X)
        kdtree = cKDTree(X)
        _, index = kdtree.query(np.mean(X, axis=0))
        _, centroid = global_kdtree.query(X[index, :])
        seeds.append(centroid)

    print len(seeds)
    n_neighbors = graph_utils.compute_minimum_nearest_neighbors(points_matrix)

    with open(outdir_nn + repr(n_neighbors), 'w') as nn_file:
        nn_file.close()

    folder = 0
    while len(seeds) > 0:
        seed = seeds.pop()
        with open(outdir_seeds + repr(folder) + '/' + repr(seed),
                  'w') as seed_file:
            seed_file.close()
        folder = (folder + 1) % parameters.jobs
Example #2
0
def main(args):
    data_file = args.data_file
    outdir = utils.add_trailing_slash(args.outdir)

    outdir_seeds = outdir + 'seeds/'
    outdir_nn = outdir + 'nn/'

    utils.make_dir(outdir)
    utils.make_dir(outdir_seeds)
    utils.make_dir(outdir_nn)

    for folder in xrange(parameters.jobs):
        utils.make_dir(outdir_seeds + repr(folder))

    data_frame = pd.read_csv(data_file)

    points_matrix = data_frame.as_matrix([parameters.x_col, parameters.y_col, parameters.z_col])
    name = data_frame[parameters.name_col]

    data_substacks = utils.points_to_substack(points_matrix, name)

    seeds = list()
    global_kdtree = cKDTree(points_matrix)
    for substack, data in data_substacks.iteritems():
        X = np.vstack(data)
        X = np.float64(X)
        kdtree = cKDTree(X)
        _, index = kdtree.query(np.mean(X, axis=0))
        _, centroid = global_kdtree.query(X[index, :])
        seeds.append(centroid)

    print len(seeds)
    n_neighbors = graph_utils.compute_minimum_nearest_neighbors(points_matrix)

    with open(outdir_nn + repr(n_neighbors), 'w') as nn_file:
        nn_file.close()

    folder = 0
    while len(seeds) > 0:
        seed = seeds.pop()
        with open(outdir_seeds + repr(folder) + '/' + repr(seed), 'w') as seed_file:
            seed_file.close()
        folder = (folder + 1) % parameters.jobs
Example #3
0
def main(args):
    data_file = args.data_file
    outdir = utils.add_trailing_slash(args.outdir)
    
    utils.make_dir(outdir)
    
    data_frame = pd.read_csv(data_file)
    
    max_distance = args.max_distance
    n_neighbors = args.n_neighbors
    seeds_folder = utils.add_trailing_slash(args.seeds_folder)
    sigma = args.sigma
    debug = args.debug
    
    if debug:
        outdir_embeddings = outdir + 'embeddings/'
        outdir_reconstructions = outdir + 'reconstructions/'
        outdir_csvs = outdir + 'csv_patches/'
        outdir_single_points = outdir + 'single_points/'
        outdir_faulty = outdir + 'faulty/'
        utils.make_dir(outdir_embeddings)
        utils.make_dir(outdir_reconstructions)
        utils.make_dir(outdir_csvs)
        utils.make_dir(outdir_single_points)
        utils.make_dir(outdir_faulty)
    
    seeds = utils.get_filenames(seeds_folder)
    
    for seed in seeds:
        X = data_frame.as_matrix([parameters.x_col, parameters.y_col, parameters.z_col])
        name = data_frame[parameters.name_col]
        
        patch_maker = PatchMaker(X, int(seed), n_neighbors, max_distance)
        patch = patch_maker.patch_data()
        
        if len(patch) == 1:
            print "There is one point in patch from seed " + seed + " with geodesic radius " + repr(max_distance)
            print "Most likely a false positive, skipping..."
            if debug:
                print "Saving patch with one point for debug purposes..."
                single_frame_patch = data_frame[data_frame.index.isin(patch)]
                single_frame_patch.to_csv(outdir_single_points + seed + '.csv', index=False)
            continue
        elif len(patch) == 2:
            print "There are two points in patch from seed " + seed + " with geodesic radius " + repr(max_distance)
            print "Most likely two false positives, skipping..."
            if debug:
                print "Saving patch with two points for debug purposes..."
                single_frame_patch = data_frame[data_frame.index.isin(patch)]
                single_frame_patch.to_csv(outdir_single_points + seed + '.csv', index=False)
            continue
        
        data_frame_patch = data_frame[data_frame.index.isin(patch)]
        
        X_patch = data_frame_patch.as_matrix([parameters.x_col, parameters.y_col, parameters.z_col])
        
        n_neighbors_patch = graph_utils.compute_minimum_nearest_neighbors(X_patch)
        
        iso = IsomapEmbedder(n_neighbors_patch)
        
        try:
            points_2d = iso.compute(X_patch)
        except ValueError:
            print "Processing seed " + seed + "..."
            print "Got a strange ValueError due to sparse representation, skipping the patch..."
            if debug:
                print "Saving faulty patch for debug purposes..."
                data_frame_patch.to_csv(outdir_faulty + seed + '.csv', index=False)
            continue
        
        metric = EuclideanMetric()
        kernel = GaussianKernel(sigma, metric)
        
        weights = kernel.compute_multiple(points_2d)
        np.fill_diagonal(weights, 0)
        
        low = Lowess(metric, parameters.robust_iter)
        
        points_3d_rebuilt = low.fit_transform(points_2d, X_patch, weights)
        
        surface_cleaner = SurfaceCleaner(metric)
        surface_distance_penalty = surface_cleaner.compute_distances(X_patch, points_3d_rebuilt)
        
        data_frame_patch[parameters.distance_col] = surface_distance_penalty
        data_frame_patch.to_csv(outdir + seed + '.marker', index=False)
        
        if debug:
            embed_df = pd.DataFrame(data={parameters.x_col: points_2d[:, 0],
                parameters.y_col: points_2d[:, 1],
                parameters.distance_col: surface_distance_penalty})
            embed_df.to_csv(outdir_embeddings + seed + '.csv', index=False)
            
            rebuild_df = pd.DataFrame(data={parameters.x_col: points_3d_rebuilt[:, 0],
                parameters.y_col: points_3d_rebuilt[:, 1],
                parameters.z_col: points_3d_rebuilt[:, 2],
                parameters.distance_col: surface_distance_penalty})
            rebuild_df.to_csv(outdir_reconstructions + seed + '.csv', index=False)
            
            data_frame_patch.to_csv(outdir_csvs + seed + '.csv', columns=[parameters.x_col,
                parameters.y_col,
                parameters.z_col,
                parameters.distance_col], index=False)
Example #4
0
def main(args):
    data_file = args.data_file
    outdir = utils.add_trailing_slash(args.outdir)

    utils.make_dir(outdir)

    data_frame = pd.read_csv(data_file)

    max_distance = args.max_distance
    n_neighbors = args.n_neighbors
    seeds_folder = utils.add_trailing_slash(args.seeds_folder)
    sigma = args.sigma
    debug = args.debug

    if debug:
        outdir_embeddings = outdir + 'embeddings/'
        outdir_reconstructions = outdir + 'reconstructions/'
        outdir_csvs = outdir + 'csv_patches/'
        outdir_single_points = outdir + 'single_points/'
        outdir_faulty = outdir + 'faulty/'
        utils.make_dir(outdir_embeddings)
        utils.make_dir(outdir_reconstructions)
        utils.make_dir(outdir_csvs)
        utils.make_dir(outdir_single_points)
        utils.make_dir(outdir_faulty)

    seeds = utils.get_filenames(seeds_folder)

    for seed in seeds:
        X = data_frame.as_matrix(
            [parameters.x_col, parameters.y_col, parameters.z_col])
        name = data_frame[parameters.name_col]

        patch_maker = PatchMaker(X, int(seed), n_neighbors, max_distance)
        patch = patch_maker.patch_data()

        if len(patch) == 1:
            print "There is one point in patch from seed " + seed + " with geodesic radius " + repr(
                max_distance)
            print "Most likely a false positive, skipping..."
            if debug:
                print "Saving patch with one point for debug purposes..."
                single_frame_patch = data_frame[data_frame.index.isin(patch)]
                single_frame_patch.to_csv(outdir_single_points + seed + '.csv',
                                          index=False)
            continue
        elif len(patch) == 2:
            print "There are two points in patch from seed " + seed + " with geodesic radius " + repr(
                max_distance)
            print "Most likely two false positives, skipping..."
            if debug:
                print "Saving patch with two points for debug purposes..."
                single_frame_patch = data_frame[data_frame.index.isin(patch)]
                single_frame_patch.to_csv(outdir_single_points + seed + '.csv',
                                          index=False)
            continue

        data_frame_patch = data_frame[data_frame.index.isin(patch)]

        X_patch = data_frame_patch.as_matrix(
            [parameters.x_col, parameters.y_col, parameters.z_col])

        n_neighbors_patch = graph_utils.compute_minimum_nearest_neighbors(
            X_patch)

        iso = IsomapEmbedder(n_neighbors_patch)

        try:
            points_2d = iso.compute(X_patch)
        except ValueError:
            print "Processing seed " + seed + "..."
            print "Got a strange ValueError due to sparse representation, skipping the patch..."
            if debug:
                print "Saving faulty patch for debug purposes..."
                data_frame_patch.to_csv(outdir_faulty + seed + '.csv',
                                        index=False)
            continue

        metric = EuclideanMetric()
        kernel = GaussianKernel(sigma, metric)

        weights = kernel.compute_multiple(points_2d)
        np.fill_diagonal(weights, 0)

        low = Lowess(metric, parameters.robust_iter)

        points_3d_rebuilt = low.fit_transform(points_2d, X_patch, weights)

        surface_cleaner = SurfaceCleaner(metric)
        surface_distance_penalty = surface_cleaner.compute_distances(
            X_patch, points_3d_rebuilt)

        data_frame_patch[parameters.distance_col] = surface_distance_penalty
        data_frame_patch.to_csv(outdir + seed + '.marker', index=False)

        if debug:
            embed_df = pd.DataFrame(
                data={
                    parameters.x_col: points_2d[:, 0],
                    parameters.y_col: points_2d[:, 1],
                    parameters.distance_col: surface_distance_penalty
                })
            embed_df.to_csv(outdir_embeddings + seed + '.csv', index=False)

            rebuild_df = pd.DataFrame(
                data={
                    parameters.x_col: points_3d_rebuilt[:, 0],
                    parameters.y_col: points_3d_rebuilt[:, 1],
                    parameters.z_col: points_3d_rebuilt[:, 2],
                    parameters.distance_col: surface_distance_penalty
                })
            rebuild_df.to_csv(outdir_reconstructions + seed + '.csv',
                              index=False)

            data_frame_patch.to_csv(outdir_csvs + seed + '.csv',
                                    columns=[
                                        parameters.x_col, parameters.y_col,
                                        parameters.z_col,
                                        parameters.distance_col
                                    ],
                                    index=False)