def get_merge_candidates(table_id,
                         save_dir=f"{HOME}/benchmarks/",
                         n_threads=1):
    cg = chunkedgraph.ChunkedGraph(table_id)

    bounds = np.array(cg.cv.bounds.to_list()).reshape(2, -1).T
    bounds -= bounds[:, 0:1]

    chunk_id_bounds = np.ceil((bounds / cg.chunk_size[:, None])).astype(np.int)

    chunk_coord_gen = itertools.product(*[range(*r) for r in chunk_id_bounds])
    chunk_coords = np.array(list(chunk_coord_gen), dtype=np.int)

    order = np.arange(len(chunk_coords))
    np.random.shuffle(order)

    n_blocks = np.min([len(order), n_threads * 3])
    blocks = np.array_split(order, n_blocks)

    cg_serialized_info = cg.get_serialized_info()
    if n_threads > 1:
        del cg_serialized_info["credentials"]

    multi_args = []
    for block in blocks:
        multi_args.append([cg_serialized_info, chunk_coords[block]])

    if n_threads == 1:
        results = mu.multiprocess_func(_get_merge_candidates,
                                       multi_args,
                                       n_threads=n_threads,
                                       verbose=False,
                                       debug=n_threads == 1)
    else:
        results = mu.multisubprocess_func(_get_merge_candidates,
                                          multi_args,
                                          n_threads=n_threads)
    merge_edges = []
    merge_edge_weights = []
    for result in results:
        merge_edges.extend(result[0])
        merge_edge_weights.extend(result[1])

    save_folder = f"{save_dir}/{table_id}/"

    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    with h5py.File(f"{save_folder}/merge_edge_stats.h5", "w") as f:
        f.create_dataset("merge_edges", data=merge_edges, compression="gzip")
        f.create_dataset("merge_edge_weights",
                         data=merge_edge_weights,
                         compression="gzip")
Example #2
0
def mesh_lvl2_previews(cg,
                       lvl2_node_ids,
                       cv_path=None,
                       cv_mesh_dir=None,
                       mip=2,
                       simplification_factor=999999,
                       max_err=40,
                       parallel_download=8,
                       verbose=True,
                       cache_control="no-cache",
                       n_threads=1):

    serialized_cg_info = cg.get_serialized_info()
    del serialized_cg_info["credentials"]

    if not isinstance(lvl2_node_ids, dict):
        lvl2_node_ids = dict(zip(lvl2_node_ids, [None] * len(lvl2_node_ids)))

    mesh_dir = cv_mesh_dir or cg._mesh_dir

    multi_args = []
    for lvl2_node_id in lvl2_node_ids.keys():
        multi_args.append([
            serialized_cg_info, lvl2_node_id, lvl2_node_ids[lvl2_node_id],
            cv_path, mesh_dir, mip, simplification_factor, max_err,
            parallel_download, verbose, cache_control
        ])

    # Run parallelizing
    if n_threads == 1:
        mu.multiprocess_func(_mesh_lvl2_previews_threads,
                             multi_args,
                             n_threads=n_threads,
                             verbose=False,
                             debug=n_threads == 1)
    else:
        mu.multisubprocess_func(_mesh_lvl2_previews_threads,
                                multi_args,
                                n_threads=n_threads)
Example #3
0
def rewrite_segmentation(dataset_name, n_threads=64, n_units_per_thread=None):
    if dataset_name == "pinky":
        cv_url = "gs://nkem/pinky40_v11/mst_trimmed_sem_remap/region_graph/"
        from_url = "gs://neuroglancer/pinky40_v11/watershed/"
        to_url = "gs://neuroglancer/svenmd/pinky40_v11/watershed/"
    elif dataset_name == "basil":
        cv_url = "gs://nkem/basil_4k_oldnet/region_graph/"
        from_url = "gs://neuroglancer/ranl/basil_4k_oldnet/ws/"
        to_url = "gs://neuroglancer/svenmd/basil_4k_oldnet_cg/watershed/"
    else:
        raise Exception("Dataset unknown")

    file_paths = np.sort(
        glob.glob(
            creator_utils.dir_from_layer_name(
                creator_utils.layer_name_from_cv_url(cv_url)) + "/*rg2cg*"))

    if n_units_per_thread is None:
        file_path_blocks = np.array_split(file_paths, n_threads * 3)
    else:
        n_blocks = int(np.ceil(len(file_paths) / n_units_per_thread))
        file_path_blocks = np.array_split(file_paths, n_blocks)

    multi_args = []
    for fp_block in file_path_blocks:
        multi_args.append([fp_block, from_url, to_url])

    # Run parallelizing
    if n_threads == 1:
        mu.multiprocess_func(_rewrite_segmentation_thread,
                             multi_args,
                             n_threads=n_threads,
                             verbose=True,
                             debug=n_threads == 1)
    else:
        mu.multisubprocess_func(_rewrite_segmentation_thread,
                                multi_args,
                                n_threads=n_threads)
def count_nodes_and_edges(table_id, n_threads=1):
    cg = chunkedgraph.ChunkedGraph(table_id)

    bounds = np.array(cg.cv.bounds.to_list()).reshape(2, -1).T
    bounds -= bounds[:, 0:1]

    chunk_id_bounds = np.ceil((bounds / cg.chunk_size[:, None])).astype(np.int)

    chunk_coord_gen = itertools.product(*[range(*r) for r in chunk_id_bounds])
    chunk_coords = np.array(list(chunk_coord_gen), dtype=np.int)

    order = np.arange(len(chunk_coords))
    np.random.shuffle(order)

    n_blocks = np.min([len(order), n_threads * 3])
    blocks = np.array_split(order, n_blocks)

    cg_serialized_info = cg.get_serialized_info()
    if n_threads > 1:
        del cg_serialized_info["credentials"]

    multi_args = []
    for block in blocks:
        multi_args.append([cg_serialized_info, chunk_coords[block]])

    if n_threads == 1:
        results = mu.multiprocess_func(_count_nodes_and_edges,
                                       multi_args,
                                       n_threads=n_threads,
                                       verbose=False,
                                       debug=n_threads == 1)
    else:
        results = mu.multisubprocess_func(_count_nodes_and_edges,
                                          multi_args,
                                          n_threads=n_threads)

    n_edges_per_chunk = []
    n_nodes_per_chunk = []
    for result in results:
        n_nodes_per_chunk.extend(result[0])
        n_edges_per_chunk.extend(result[1])

    return n_nodes_per_chunk, n_edges_per_chunk
def get_latest_roots(cg,
                     time_stamp: Optional[datetime.datetime] = None,
                     n_threads: int = 1) -> Sequence[np.uint64]:

    # Create filters: time and id range
    max_seg_id = cg.get_max_seg_id(cg.root_chunk_id) + 1

    if n_threads == 1:
        n_blocks = 1
    else:
        n_blocks = int(np.min([n_threads * 3 + 1, max_seg_id]))

    seg_id_blocks = np.linspace(1, max_seg_id, n_blocks + 1, dtype=np.uint64)

    cg_serialized_info = cg.get_serialized_info()

    if n_threads > 1:
        del cg_serialized_info["credentials"]

    multi_args = []
    for i_id_block in range(0, len(seg_id_blocks) - 1):
        multi_args.append([
            seg_id_blocks[i_id_block], seg_id_blocks[i_id_block + 1],
            cg_serialized_info, time_stamp
        ])

    if n_threads == 1:
        results = mu.multiprocess_func(_read_root_rows_thread,
                                       multi_args,
                                       n_threads=n_threads,
                                       verbose=False,
                                       debug=n_threads == 1)
    else:
        results = mu.multisubprocess_func(_read_root_rows_thread,
                                          multi_args,
                                          n_threads=n_threads)

    root_ids = []
    for result in results:
        root_ids.extend(result)

    return np.array(root_ids, dtype=np.uint64)
Example #6
0
def all_angle_weighted_distances(ds, angles, weights, rep_inds, inds):

    data = []
    real_inds, slice_bnds = np.unique(rep_inds, return_index=True)

    ind_map = []
    for ii in range(len(real_inds) - 1):
        row = slice(slice_bnds[ii], slice_bnds[ii + 1])
        data.append((ds[row], angles[row], weights[row]))
        ind_map.append(real_inds[ii])
    row = slice(slice_bnds[-1], len(ds))
    data.append((ds[row], angles[row], weights[row]))
    ind_map.append(real_inds[-1])

    rs = mu.multiprocess_func(_multi_angle_weighted_distance, data)

    rs_out = np.nan * np.zeros(len(inds))
    rs_out[np.array(ind_map)] = rs

    return rs_out
Example #7
0
def oriented_vector_cones(center_vectors,
                          num_points,
                          widest_angle=np.pi / 3,
                          normalize=False):
    """Produces all ray cones
    """
    if normalize:
        cv_norm = center_vectors / \
            np.linalg.norm(center_vector, axis=1)[:, np.newaxis]
    else:
        cv_norm = center_vectors

    thetas = np.arccos(cv_norm[:, 2])
    phis = np.arctan2(cv_norm[:, 1], cv_norm[:, 0])

    vs_raw = unit_vector_sampler(num_points, widest_angle=widest_angle)

    Rtranses = []
    data = []
    for phi, theta in zip(phis, thetas):
        data.append((phi, theta, vs_raw))
    vector_cones = mu.multiprocess_func(_rotated_cone, data)
    return vector_cones
Example #8
0
def write_flat_segmentation(cg,
                            dataset_name,
                            bounding_box=None,
                            block_factor=2,
                            n_threads=1,
                            mip=0):
    """ Applies the mapping in the chunkedgraph to the supervoxels to create
        a flattened segmentation

    :param cg: chunkedgraph instance
    :param dataset_name: str
    :param bounding_box: np.array
    :param block_factor: int
    :param n_threads: int
    :param mip: int
    :return: bool
    """

    if dataset_name == "pinky":
        from_url = "gs://neuroglancer/svenmd/pinky40_v11/watershed/"
        to_url = "gs://neuroglancer/svenmd/pinky40_v11/segmentation/"
    elif dataset_name == "basil":
        from_url = "gs://neuroglancer/svenmd/basil_4k_oldnet_cg/watershed/"
        to_url = "gs://neuroglancer/svenmd/basil_4k_oldnet_cg/segmentation/"
    else:
        raise Exception("Dataset unknown")

    from_cv = cloudvolume.CloudVolume(from_url, mip=mip)

    dataset_bounding_box = np.array(from_cv.bounds.to_list())

    block_bounding_box_cg = \
        [np.floor(dataset_bounding_box[:3] / cg.chunk_size).astype(np.int),
         np.ceil(dataset_bounding_box[3:] / cg.chunk_size).astype(np.int)]

    if bounding_box is not None:
        bounding_box_cg = \
            [np.floor(bounding_box[0] / cg.chunk_size).astype(np.int),
             np.ceil(bounding_box[1] / cg.chunk_size).astype(np.int)]

        m = block_bounding_box_cg[0] < bounding_box_cg[0]
        block_bounding_box_cg[0][m] = bounding_box_cg[0][m]

        m = block_bounding_box_cg[1] > bounding_box_cg[1]
        block_bounding_box_cg[1][m] = bounding_box_cg[1][m]

    block_iter = itertools.product(
        np.arange(block_bounding_box_cg[0][0], block_bounding_box_cg[1][0],
                  block_factor),
        np.arange(block_bounding_box_cg[0][1], block_bounding_box_cg[1][1],
                  block_factor),
        np.arange(block_bounding_box_cg[0][2], block_bounding_box_cg[1][2],
                  block_factor))
    blocks = np.array(list(block_iter))

    cg_info = cg.get_serialized_info()

    multi_args = []
    for start_block in blocks:
        end_block = start_block + block_factor
        m = end_block > block_bounding_box_cg[1]
        end_block[m] = block_bounding_box_cg[1][m]

        multi_args.append(
            [cg_info, start_block, end_block, from_url, to_url, mip])

    # Run parallelizing
    if n_threads == 1:
        mu.multiprocess_func(_write_flat_segmentation_thread,
                             multi_args,
                             n_threads=n_threads,
                             verbose=True,
                             debug=n_threads == 1)
    else:
        mu.multisubprocess_func(_write_flat_segmentation_thread,
                                multi_args,
                                n_threads=n_threads)
def generate_thumbnails(
    filename,
    batch_name,
    min_height,
    width,
    title_column,
    abstract_column,
    author_column_contains,
    twitter_column_contains,
    save_author_string,
    thumbnail_directory,
    use_oxford,
    n_threads,
):
    data = pd.read_csv(filename)

    author_columns = []
    for c in data.columns:
        if re.match(author_column_contains, c) is not None:
            author_columns.append(c)

    twitter_columns = []
    if twitter_column_contains is not False:
        for c in data.columns:
            if re.match(twitter_column_contains, c) is not None:
                twitter_columns.append(c)

    author_list = []
    author_list_with_handles = []
    for ii, row in data[author_columns].iterrows():
        auths = row[~pd.isna(row)].tolist()
        author_list.append(make_author_string(auths, use_oxford=use_oxford))
        try:
            twit_row = data.iloc[ii][twitter_columns]
            handles = twit_row[~pd.isna(row).values].tolist()
            author_list_with_handles.append(
                make_author_string(auths,
                                   twitter_list=handles,
                                   use_oxford=use_oxford))
        except:
            print("Twitter handles failed!")
            author_list_with_handles = author_list

    title_list = data[title_column].tolist()
    abstract_list = data[abstract_column].tolist()

    if not os.path.exists(thumbnail_directory):
        os.mkdir(thumbnail_directory)

    if batch_name is None:
        batch_dir = f"batch_{str(datetime.date.today()).replace('-', '_')}"
    else:
        batch_dir = batch_name

    if not os.path.exists(f"{thumbnail_directory}/{batch_dir}"):
        os.mkdir(f"{thumbnail_directory}/{batch_dir}")

    if n_threads > 1:
        print(f"Making all images with {n_threads} processes...")
        all_args = []
        t0 = time.time()
        for title, authors, abstract in zip(title_list, author_list,
                                            abstract_list):
            all_args.append([
                title,
                authors,
                abstract,
                width,
                min_height,
                thumbnail_directory,
                batch_dir,
            ])
        mu.multiprocess_func(_save_data_multithreaded,
                             all_args,
                             n_threads=n_threads)
        print(f"\tImages produced in {time.time()-t0:.2f} s.")
    else:
        for title, authors, abstract in tqdm.tqdm(zip(title_list, author_list,
                                                      abstract_list),
                                                  total=len(title_list)):
            img = thumbnail_image(title,
                                  authors,
                                  abstract,
                                  image_width=width,
                                  min_height=min_height)
            fname = simple_filename(title,
                                    f"{thumbnail_directory}/{batch_dir}",
                                    max_words=8)
            img.save(
                fname,
                dpi=(150, 150),
            )

    if save_author_string:
        data["authors_with_handles"] = author_list_with_handles
        pure_filename = os.path.split(filename)[-1]
        fn = pure_filename.split(".")
        out_name = f"{thumbnail_directory}/{batch_dir}/{fn[-2].replace('/','')}_with_tweets.csv"
        data.to_csv(out_name)
        print(f"Data saved to {out_name}")
    return
def get_root_ids_and_sv_chunks(table_id,
                               save_dir=f"{HOME}/benchmarks/",
                               n_threads=1):
    cg = chunkedgraph.ChunkedGraph(table_id)

    save_folder = f"{save_dir}/{table_id}/"

    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    if not os.path.exists(f"{save_folder}/root_ids.h5"):
        root_ids = chunkedgraph_comp.get_latest_roots(cg, n_threads=n_threads)

        with h5py.File(f"{save_folder}/root_ids.h5", "w") as f:
            f.create_dataset("root_ids", data=root_ids)
    else:
        with h5py.File(f"{save_folder}/root_ids.h5", "r") as f:
            root_ids = f["root_ids"].value

    cg_serialized_info = cg.get_serialized_info()
    if n_threads > 1:
        del cg_serialized_info["credentials"]

    order = np.arange(len(root_ids))
    np.random.shuffle(order)

    order = order

    n_blocks = np.min([len(order), n_threads * 3])
    blocks = np.array_split(order, n_blocks)

    multi_args = []
    for block in blocks:
        multi_args.append([cg_serialized_info, root_ids[block]])

    if n_threads == 1:
        results = mu.multiprocess_func(_get_root_ids_and_sv_chunks,
                                       multi_args,
                                       n_threads=n_threads,
                                       verbose=False,
                                       debug=n_threads == 1)
    else:
        results = mu.multisubprocess_func(_get_root_ids_and_sv_chunks,
                                          multi_args,
                                          n_threads=n_threads)

    root_ids = []
    n_l1_nodes_per_root = []
    rep_l1_nodes = []
    rep_l1_chunk_ids = []
    for result in results:
        root_ids.extend(result[0])
        n_l1_nodes_per_root.extend(result[1])
        rep_l1_nodes.extend(result[2])
        rep_l1_chunk_ids.extend(result[3])

    save_folder = f"{save_dir}/{table_id}/"

    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    with h5py.File(f"{save_folder}/root_stats.h5", "w") as f:
        f.create_dataset("root_ids", data=root_ids, compression="gzip")
        f.create_dataset("n_l1_nodes_per_root",
                         data=n_l1_nodes_per_root,
                         compression="gzip")
        f.create_dataset("rep_l1_nodes", data=rep_l1_nodes, compression="gzip")
        f.create_dataset("rep_l1_chunk_ids",
                         data=rep_l1_chunk_ids,
                         compression="gzip")
Example #11
0
def get_merge_split_timings(table_id,
                            save_dir=f"{HOME}/benchmarks/",
                            job_size=500,
                            n_threads=1):
    save_folder = f"{save_dir}/{table_id}/"

    merge_edges, merge_edge_weights = load_merge_stats(save_folder)

    probs = merge_edge_weights / np.sum(merge_edge_weights)

    if n_threads == 1:
        n_jobs = n_threads * 3
    else:
        n_jobs = n_threads * 3

    cg = chunkedgraph.ChunkedGraph(table_id)
    cg_serialized_info = cg.get_serialized_info()
    if n_threads > 0:
        del cg_serialized_info["credentials"]

    time_start = time.time()
    order = np.arange(len(merge_edges))

    np.random.seed(np.int(time.time()))

    replace = False

    blocks = np.random.choice(order,
                              job_size * n_jobs,
                              p=probs,
                              replace=replace).reshape(n_jobs, job_size)

    multi_args = []
    for block in blocks:
        multi_args.append([cg_serialized_info, merge_edges[block]])
    print(f"Building jobs took {time.time()-time_start}s")

    time_start = time.time()
    if n_threads == 1:
        results = mu.multiprocess_func(_get_merge_timings,
                                       multi_args,
                                       n_threads=n_threads,
                                       verbose=False,
                                       debug=n_threads == 1)
    else:
        results = mu.multisubprocess_func(_get_merge_timings,
                                          multi_args,
                                          n_threads=n_threads)
    dt = time.time() - time_start

    timings = []
    for result in results:
        timings.extend(result[0])

    percentiles = [np.percentile(timings, k) for k in range(1, 100, 1)]
    mean = np.mean(timings)
    std = np.std(timings)
    median = np.median(timings)

    merge_results = {
        "percentiles": percentiles,
        "p01": percentiles[0],
        "p05": percentiles[4],
        "p95": percentiles[94],
        "p99": percentiles[98],
        "mean": mean,
        "std": std,
        "median": median,
        "total_time_s": dt,
        "job_size": job_size,
        "n_jobs": n_jobs,
        "n_threads": n_threads,
        "replace": replace,
        "requests_per_s": job_size * n_jobs / dt
    }

    timings = []
    for result in results:
        timings.extend(result[1])

    percentiles = [np.percentile(timings, k) for k in range(1, 100, 1)]
    mean = np.mean(timings)
    std = np.std(timings)
    median = np.median(timings)

    split_results = {
        "percentiles": percentiles,
        "p01": percentiles[0],
        "p05": percentiles[4],
        "p95": percentiles[94],
        "p99": percentiles[98],
        "mean": mean,
        "std": std,
        "median": median,
        "total_time_s": dt,
        "job_size": job_size,
        "n_jobs": n_jobs,
        "n_threads": n_threads,
        "replace": replace,
        "requests_per_s": job_size * n_jobs / dt
    }

    return merge_results, split_results
Example #12
0
    def mesh_single_layer(self, layer, bounding_box=None, block_factor=2,
                          n_threads=128):
        assert layer <= self.highest_mesh_layer

        dataset_bounding_box = np.array(self.cv.bounds.to_list())

        block_bounding_box_cg = \
            [np.floor(dataset_bounding_box[:3] /
                      self.cg.chunk_size).astype(np.int),
             np.ceil(dataset_bounding_box[3:] /
                     self.cg.chunk_size).astype(np.int)]

        if bounding_box is not None:
            bounding_box_cg = \
                [np.floor(bounding_box[0] /
                          self.cg.chunk_size).astype(np.int),
                 np.ceil(bounding_box[1] /
                         self.cg.chunk_size).astype(np.int)]

            m = block_bounding_box_cg[0] < bounding_box_cg[0]
            block_bounding_box_cg[0][m] = bounding_box_cg[0][m]

            m = block_bounding_box_cg[1] > bounding_box_cg[1]
            block_bounding_box_cg[1][m] = bounding_box_cg[1][m]

        block_bounding_box_cg /= 2 ** np.max([0, layer - 2])
        block_bounding_box_cg = np.ceil(block_bounding_box_cg)

        n_jobs = np.product(block_bounding_box_cg[1] -
                            block_bounding_box_cg[0]) / \
                 block_factor ** 2 < n_threads

        while n_jobs < n_threads and block_factor > 1:
            block_factor -= 1

            n_jobs = np.product(block_bounding_box_cg[1] -
                                block_bounding_box_cg[0]) / \
                     block_factor ** 2 < n_threads

        block_iter = itertools.product(np.arange(block_bounding_box_cg[0][0],
                                                 block_bounding_box_cg[1][0],
                                                 block_factor),
                                       np.arange(block_bounding_box_cg[0][1],
                                                 block_bounding_box_cg[1][1],
                                                 block_factor),
                                       np.arange(block_bounding_box_cg[0][2],
                                                 block_bounding_box_cg[1][2],
                                                 block_factor))

        blocks = np.array(list(block_iter), dtype=np.int)

        cg_info = self.cg.get_serialized_info()
        del (cg_info['credentials'])

        multi_args = []
        for start_block in blocks:
            end_block = start_block + block_factor
            m = end_block > block_bounding_box_cg[1]
            end_block[m] = block_bounding_box_cg[1][m]

            multi_args.append([cg_info, start_block, end_block, self.cg.cv_path,
                               self.cv_mesh_dir, self.mesh_mip, layer])
        random.shuffle(multi_args)

        random.shuffle(multi_args)

        # Run parallelizing
        if n_threads == 1:
            mu.multiprocess_func(meshgen._mesh_layer_thread, multi_args,
                                 n_threads=n_threads, verbose=True,
                                 debug=n_threads == 1)
        else:
            mu.multisubprocess_func(meshgen._mesh_layer_thread, multi_args,
                                    n_threads=n_threads,
                                    suffix="%s_%d" % (self.table_id, layer))
Example #13
0
def create_chunked_graph(table_id=None,
                         cv_url=None,
                         ws_url=None,
                         fan_out=2,
                         bbox=None,
                         chunk_size=(512, 512, 128),
                         verbose=False,
                         n_threads=1):
    """ Creates chunked graph from downloaded files

    :param table_id: str
    :param cv_url: str
    :param ws_url: str
    :param fan_out: int
    :param bbox: [[x_, y_, z_], [_x, _y, _z]]
    :param chunk_size: tuple
    :param verbose: bool
    :param n_threads: int
    """
    if cv_url is None or ws_url is None:
        if "basil" in table_id:
            cv_url = "gs://nkem/basil_4k_oldnet/region_graph/"
            ws_url = "gs://neuroglancer/svenmd/basil_4k_oldnet_cg/watershed/"
        elif "pinky40" in table_id:
            cv_url = "gs://nkem/pinky40_v11/mst_trimmed_sem_remap/region_graph/"
            ws_url = "gs://neuroglancer/svenmd/pinky40_v11/watershed/"
        elif "pinky100" in table_id:
            cv_url = "gs://nkem/pinky100_v0/region_graph/"
            ws_url = "gs://neuroglancer/nkem/pinky100_v0/ws/lost_no-random/bbox1_0/"
        else:
            raise Exception("Could not identify region graph ressource")

    times = []
    time_start = time.time()

    chunk_size = np.array(list(chunk_size))

    file_paths = np.sort(
        glob.glob(
            creator_utils.dir_from_layer_name(
                creator_utils.layer_name_from_cv_url(cv_url)) + "/*"))

    file_path_blocks = np.array_split(file_paths, n_threads * 3)

    multi_args = []
    for fp_block in file_path_blocks:
        multi_args.append([fp_block, table_id, chunk_size, bbox])

    if n_threads == 1:
        results = mu.multiprocess_func(_preprocess_chunkedgraph_data_thread,
                                       multi_args,
                                       n_threads=n_threads,
                                       verbose=True,
                                       debug=n_threads == 1)
    else:
        results = mu.multisubprocess_func(_preprocess_chunkedgraph_data_thread,
                                          multi_args,
                                          n_threads=n_threads)

    in_chunk_connected_paths = np.array([])
    in_chunk_connected_ids = np.array([], dtype=np.uint64).reshape(-1, 3)
    in_chunk_disconnected_paths = np.array([])
    in_chunk_disconnected_ids = np.array([], dtype=np.uint64).reshape(-1, 3)
    between_chunk_paths = np.array([])
    between_chunk_ids = np.array([], dtype=np.uint64).reshape(-1, 2, 3)
    isolated_paths = np.array([])
    isolated_ids = np.array([], dtype=np.uint64).reshape(-1, 3)

    for result in results:
        in_chunk_connected_paths = np.concatenate(
            [in_chunk_connected_paths, result[0]])
        in_chunk_connected_ids = np.concatenate(
            [in_chunk_connected_ids, result[1]])
        in_chunk_disconnected_paths = np.concatenate(
            [in_chunk_disconnected_paths, result[2]])
        in_chunk_disconnected_ids = np.concatenate(
            [in_chunk_disconnected_ids, result[3]])
        between_chunk_paths = np.concatenate([between_chunk_paths, result[4]])
        between_chunk_ids = np.concatenate([between_chunk_ids, result[5]])
        isolated_paths = np.concatenate([isolated_paths, result[6]])
        isolated_ids = np.concatenate([isolated_ids, result[7]])

    assert len(in_chunk_connected_ids) == len(in_chunk_connected_paths) == \
           len(in_chunk_disconnected_ids) == len(in_chunk_disconnected_paths) == \
           len(isolated_ids) == len(isolated_paths)

    in_chunk_connected_ids, in_chunk_connected_paths = \
        _sort_arrays(in_chunk_connected_ids, in_chunk_connected_paths)

    in_chunk_disconnected_ids, in_chunk_disconnected_paths = \
        _sort_arrays(in_chunk_disconnected_ids, in_chunk_disconnected_paths)

    isolated_ids, isolated_paths = \
        _sort_arrays(isolated_ids, isolated_paths)

    times.append(["Preprocessing", time.time() - time_start])

    print("Preprocessing took %.3fs = %.2fh" %
          (times[-1][1], times[-1][1] / 3600))

    time_start = time.time()

    multi_args = []

    in_chunk_id_blocks = np.array_split(in_chunk_connected_ids,
                                        max(1, n_threads))
    cumsum = 0

    for in_chunk_id_block in in_chunk_id_blocks:
        multi_args.append([
            between_chunk_ids, between_chunk_paths, in_chunk_id_block, cumsum
        ])
        cumsum += len(in_chunk_id_block)

    # Run parallelizing
    if n_threads == 1:
        results = mu.multiprocess_func(_between_chunk_masks_thread,
                                       multi_args,
                                       n_threads=n_threads,
                                       verbose=True,
                                       debug=n_threads == 1)
    else:
        results = mu.multisubprocess_func(_between_chunk_masks_thread,
                                          multi_args,
                                          n_threads=n_threads)

    times.append(["Data sorting", time.time() - time_start])

    print("Data sorting took %.3fs = %.2fh" %
          (times[-1][1], times[-1][1] / 3600))

    time_start = time.time()

    n_layers = int(
        np.ceil(
            pychunkedgraph.backend.chunkedgraph_utils.log_n(
                np.max(in_chunk_connected_ids) + 1, fan_out))) + 2

    print("N layers: %d" % n_layers)

    cg = chunkedgraph.ChunkedGraph(table_id=table_id,
                                   n_layers=np.uint64(n_layers),
                                   fan_out=np.uint64(fan_out),
                                   chunk_size=np.array(chunk_size,
                                                       dtype=np.uint64),
                                   cv_path=ws_url,
                                   is_new=True)

    # Fill lowest layer and create first abstraction layer
    # Create arguments for parallelizing

    multi_args = []
    for result in results:
        offset, between_chunk_paths_out_masked, between_chunk_paths_in_masked = result

        for i_chunk in range(len(between_chunk_paths_out_masked)):
            multi_args.append([
                table_id, in_chunk_connected_paths[offset + i_chunk],
                in_chunk_disconnected_paths[offset + i_chunk],
                isolated_paths[offset + i_chunk],
                between_chunk_paths_in_masked[i_chunk],
                between_chunk_paths_out_masked[i_chunk], verbose
            ])

    random.shuffle(multi_args)

    print("%d jobs for creating layer 1 + 2" % len(multi_args))

    # Run parallelizing
    if n_threads == 1:
        mu.multiprocess_func(_create_atomic_layer_thread,
                             multi_args,
                             n_threads=n_threads,
                             verbose=True,
                             debug=n_threads == 1)
    else:
        mu.multisubprocess_func(_create_atomic_layer_thread,
                                multi_args,
                                n_threads=n_threads)

    times.append(["Layers 1 + 2", time.time() - time_start])

    # Fill higher abstraction layers
    child_chunk_ids = in_chunk_connected_ids.copy()
    for layer_id in range(3, n_layers + 1):

        time_start = time.time()

        print("\n\n\n --- LAYER %d --- \n\n\n" % layer_id)

        parent_chunk_ids = child_chunk_ids // cg.fan_out
        parent_chunk_ids = parent_chunk_ids.astype(np.int)

        u_pcids, inds = np.unique(parent_chunk_ids,
                                  axis=0,
                                  return_inverse=True)

        if len(u_pcids) > n_threads:
            n_threads_per_process = 1
        else:
            n_threads_per_process = int(np.ceil(n_threads / len(u_pcids)))

        multi_args = []
        for ind in range(len(u_pcids)):
            multi_args.append([
                table_id, layer_id,
                child_chunk_ids[inds == ind].astype(np.int),
                n_threads_per_process
            ])

        child_chunk_ids = u_pcids

        # Run parallelizing
        if n_threads == 1:
            mu.multiprocess_func(_add_layer_thread,
                                 multi_args,
                                 n_threads=n_threads,
                                 verbose=True,
                                 debug=n_threads == 1)
        else:
            mu.multisubprocess_func(_add_layer_thread,
                                    multi_args,
                                    n_threads=n_threads,
                                    suffix=str(layer_id))

        times.append(["Layer %d" % layer_id, time.time() - time_start])

    for time_entry in times:
        print("%s: %.2fs = %.2fmin = %.2fh" %
              (time_entry[0], time_entry[1], time_entry[1] / 60,
               time_entry[1] / 3600))