def get_merge_candidates(table_id, save_dir=f"{HOME}/benchmarks/", n_threads=1): cg = chunkedgraph.ChunkedGraph(table_id) bounds = np.array(cg.cv.bounds.to_list()).reshape(2, -1).T bounds -= bounds[:, 0:1] chunk_id_bounds = np.ceil((bounds / cg.chunk_size[:, None])).astype(np.int) chunk_coord_gen = itertools.product(*[range(*r) for r in chunk_id_bounds]) chunk_coords = np.array(list(chunk_coord_gen), dtype=np.int) order = np.arange(len(chunk_coords)) np.random.shuffle(order) n_blocks = np.min([len(order), n_threads * 3]) blocks = np.array_split(order, n_blocks) cg_serialized_info = cg.get_serialized_info() if n_threads > 1: del cg_serialized_info["credentials"] multi_args = [] for block in blocks: multi_args.append([cg_serialized_info, chunk_coords[block]]) if n_threads == 1: results = mu.multiprocess_func(_get_merge_candidates, multi_args, n_threads=n_threads, verbose=False, debug=n_threads == 1) else: results = mu.multisubprocess_func(_get_merge_candidates, multi_args, n_threads=n_threads) merge_edges = [] merge_edge_weights = [] for result in results: merge_edges.extend(result[0]) merge_edge_weights.extend(result[1]) save_folder = f"{save_dir}/{table_id}/" if not os.path.exists(save_folder): os.makedirs(save_folder) with h5py.File(f"{save_folder}/merge_edge_stats.h5", "w") as f: f.create_dataset("merge_edges", data=merge_edges, compression="gzip") f.create_dataset("merge_edge_weights", data=merge_edge_weights, compression="gzip")
def mesh_lvl2_previews(cg, lvl2_node_ids, cv_path=None, cv_mesh_dir=None, mip=2, simplification_factor=999999, max_err=40, parallel_download=8, verbose=True, cache_control="no-cache", n_threads=1): serialized_cg_info = cg.get_serialized_info() del serialized_cg_info["credentials"] if not isinstance(lvl2_node_ids, dict): lvl2_node_ids = dict(zip(lvl2_node_ids, [None] * len(lvl2_node_ids))) mesh_dir = cv_mesh_dir or cg._mesh_dir multi_args = [] for lvl2_node_id in lvl2_node_ids.keys(): multi_args.append([ serialized_cg_info, lvl2_node_id, lvl2_node_ids[lvl2_node_id], cv_path, mesh_dir, mip, simplification_factor, max_err, parallel_download, verbose, cache_control ]) # Run parallelizing if n_threads == 1: mu.multiprocess_func(_mesh_lvl2_previews_threads, multi_args, n_threads=n_threads, verbose=False, debug=n_threads == 1) else: mu.multisubprocess_func(_mesh_lvl2_previews_threads, multi_args, n_threads=n_threads)
def rewrite_segmentation(dataset_name, n_threads=64, n_units_per_thread=None): if dataset_name == "pinky": cv_url = "gs://nkem/pinky40_v11/mst_trimmed_sem_remap/region_graph/" from_url = "gs://neuroglancer/pinky40_v11/watershed/" to_url = "gs://neuroglancer/svenmd/pinky40_v11/watershed/" elif dataset_name == "basil": cv_url = "gs://nkem/basil_4k_oldnet/region_graph/" from_url = "gs://neuroglancer/ranl/basil_4k_oldnet/ws/" to_url = "gs://neuroglancer/svenmd/basil_4k_oldnet_cg/watershed/" else: raise Exception("Dataset unknown") file_paths = np.sort( glob.glob( creator_utils.dir_from_layer_name( creator_utils.layer_name_from_cv_url(cv_url)) + "/*rg2cg*")) if n_units_per_thread is None: file_path_blocks = np.array_split(file_paths, n_threads * 3) else: n_blocks = int(np.ceil(len(file_paths) / n_units_per_thread)) file_path_blocks = np.array_split(file_paths, n_blocks) multi_args = [] for fp_block in file_path_blocks: multi_args.append([fp_block, from_url, to_url]) # Run parallelizing if n_threads == 1: mu.multiprocess_func(_rewrite_segmentation_thread, multi_args, n_threads=n_threads, verbose=True, debug=n_threads == 1) else: mu.multisubprocess_func(_rewrite_segmentation_thread, multi_args, n_threads=n_threads)
def count_nodes_and_edges(table_id, n_threads=1): cg = chunkedgraph.ChunkedGraph(table_id) bounds = np.array(cg.cv.bounds.to_list()).reshape(2, -1).T bounds -= bounds[:, 0:1] chunk_id_bounds = np.ceil((bounds / cg.chunk_size[:, None])).astype(np.int) chunk_coord_gen = itertools.product(*[range(*r) for r in chunk_id_bounds]) chunk_coords = np.array(list(chunk_coord_gen), dtype=np.int) order = np.arange(len(chunk_coords)) np.random.shuffle(order) n_blocks = np.min([len(order), n_threads * 3]) blocks = np.array_split(order, n_blocks) cg_serialized_info = cg.get_serialized_info() if n_threads > 1: del cg_serialized_info["credentials"] multi_args = [] for block in blocks: multi_args.append([cg_serialized_info, chunk_coords[block]]) if n_threads == 1: results = mu.multiprocess_func(_count_nodes_and_edges, multi_args, n_threads=n_threads, verbose=False, debug=n_threads == 1) else: results = mu.multisubprocess_func(_count_nodes_and_edges, multi_args, n_threads=n_threads) n_edges_per_chunk = [] n_nodes_per_chunk = [] for result in results: n_nodes_per_chunk.extend(result[0]) n_edges_per_chunk.extend(result[1]) return n_nodes_per_chunk, n_edges_per_chunk
def get_latest_roots(cg, time_stamp: Optional[datetime.datetime] = None, n_threads: int = 1) -> Sequence[np.uint64]: # Create filters: time and id range max_seg_id = cg.get_max_seg_id(cg.root_chunk_id) + 1 if n_threads == 1: n_blocks = 1 else: n_blocks = int(np.min([n_threads * 3 + 1, max_seg_id])) seg_id_blocks = np.linspace(1, max_seg_id, n_blocks + 1, dtype=np.uint64) cg_serialized_info = cg.get_serialized_info() if n_threads > 1: del cg_serialized_info["credentials"] multi_args = [] for i_id_block in range(0, len(seg_id_blocks) - 1): multi_args.append([ seg_id_blocks[i_id_block], seg_id_blocks[i_id_block + 1], cg_serialized_info, time_stamp ]) if n_threads == 1: results = mu.multiprocess_func(_read_root_rows_thread, multi_args, n_threads=n_threads, verbose=False, debug=n_threads == 1) else: results = mu.multisubprocess_func(_read_root_rows_thread, multi_args, n_threads=n_threads) root_ids = [] for result in results: root_ids.extend(result) return np.array(root_ids, dtype=np.uint64)
def all_angle_weighted_distances(ds, angles, weights, rep_inds, inds): data = [] real_inds, slice_bnds = np.unique(rep_inds, return_index=True) ind_map = [] for ii in range(len(real_inds) - 1): row = slice(slice_bnds[ii], slice_bnds[ii + 1]) data.append((ds[row], angles[row], weights[row])) ind_map.append(real_inds[ii]) row = slice(slice_bnds[-1], len(ds)) data.append((ds[row], angles[row], weights[row])) ind_map.append(real_inds[-1]) rs = mu.multiprocess_func(_multi_angle_weighted_distance, data) rs_out = np.nan * np.zeros(len(inds)) rs_out[np.array(ind_map)] = rs return rs_out
def oriented_vector_cones(center_vectors, num_points, widest_angle=np.pi / 3, normalize=False): """Produces all ray cones """ if normalize: cv_norm = center_vectors / \ np.linalg.norm(center_vector, axis=1)[:, np.newaxis] else: cv_norm = center_vectors thetas = np.arccos(cv_norm[:, 2]) phis = np.arctan2(cv_norm[:, 1], cv_norm[:, 0]) vs_raw = unit_vector_sampler(num_points, widest_angle=widest_angle) Rtranses = [] data = [] for phi, theta in zip(phis, thetas): data.append((phi, theta, vs_raw)) vector_cones = mu.multiprocess_func(_rotated_cone, data) return vector_cones
def write_flat_segmentation(cg, dataset_name, bounding_box=None, block_factor=2, n_threads=1, mip=0): """ Applies the mapping in the chunkedgraph to the supervoxels to create a flattened segmentation :param cg: chunkedgraph instance :param dataset_name: str :param bounding_box: np.array :param block_factor: int :param n_threads: int :param mip: int :return: bool """ if dataset_name == "pinky": from_url = "gs://neuroglancer/svenmd/pinky40_v11/watershed/" to_url = "gs://neuroglancer/svenmd/pinky40_v11/segmentation/" elif dataset_name == "basil": from_url = "gs://neuroglancer/svenmd/basil_4k_oldnet_cg/watershed/" to_url = "gs://neuroglancer/svenmd/basil_4k_oldnet_cg/segmentation/" else: raise Exception("Dataset unknown") from_cv = cloudvolume.CloudVolume(from_url, mip=mip) dataset_bounding_box = np.array(from_cv.bounds.to_list()) block_bounding_box_cg = \ [np.floor(dataset_bounding_box[:3] / cg.chunk_size).astype(np.int), np.ceil(dataset_bounding_box[3:] / cg.chunk_size).astype(np.int)] if bounding_box is not None: bounding_box_cg = \ [np.floor(bounding_box[0] / cg.chunk_size).astype(np.int), np.ceil(bounding_box[1] / cg.chunk_size).astype(np.int)] m = block_bounding_box_cg[0] < bounding_box_cg[0] block_bounding_box_cg[0][m] = bounding_box_cg[0][m] m = block_bounding_box_cg[1] > bounding_box_cg[1] block_bounding_box_cg[1][m] = bounding_box_cg[1][m] block_iter = itertools.product( np.arange(block_bounding_box_cg[0][0], block_bounding_box_cg[1][0], block_factor), np.arange(block_bounding_box_cg[0][1], block_bounding_box_cg[1][1], block_factor), np.arange(block_bounding_box_cg[0][2], block_bounding_box_cg[1][2], block_factor)) blocks = np.array(list(block_iter)) cg_info = cg.get_serialized_info() multi_args = [] for start_block in blocks: end_block = start_block + block_factor m = end_block > block_bounding_box_cg[1] end_block[m] = block_bounding_box_cg[1][m] multi_args.append( [cg_info, start_block, end_block, from_url, to_url, mip]) # Run parallelizing if n_threads == 1: mu.multiprocess_func(_write_flat_segmentation_thread, multi_args, n_threads=n_threads, verbose=True, debug=n_threads == 1) else: mu.multisubprocess_func(_write_flat_segmentation_thread, multi_args, n_threads=n_threads)
def generate_thumbnails( filename, batch_name, min_height, width, title_column, abstract_column, author_column_contains, twitter_column_contains, save_author_string, thumbnail_directory, use_oxford, n_threads, ): data = pd.read_csv(filename) author_columns = [] for c in data.columns: if re.match(author_column_contains, c) is not None: author_columns.append(c) twitter_columns = [] if twitter_column_contains is not False: for c in data.columns: if re.match(twitter_column_contains, c) is not None: twitter_columns.append(c) author_list = [] author_list_with_handles = [] for ii, row in data[author_columns].iterrows(): auths = row[~pd.isna(row)].tolist() author_list.append(make_author_string(auths, use_oxford=use_oxford)) try: twit_row = data.iloc[ii][twitter_columns] handles = twit_row[~pd.isna(row).values].tolist() author_list_with_handles.append( make_author_string(auths, twitter_list=handles, use_oxford=use_oxford)) except: print("Twitter handles failed!") author_list_with_handles = author_list title_list = data[title_column].tolist() abstract_list = data[abstract_column].tolist() if not os.path.exists(thumbnail_directory): os.mkdir(thumbnail_directory) if batch_name is None: batch_dir = f"batch_{str(datetime.date.today()).replace('-', '_')}" else: batch_dir = batch_name if not os.path.exists(f"{thumbnail_directory}/{batch_dir}"): os.mkdir(f"{thumbnail_directory}/{batch_dir}") if n_threads > 1: print(f"Making all images with {n_threads} processes...") all_args = [] t0 = time.time() for title, authors, abstract in zip(title_list, author_list, abstract_list): all_args.append([ title, authors, abstract, width, min_height, thumbnail_directory, batch_dir, ]) mu.multiprocess_func(_save_data_multithreaded, all_args, n_threads=n_threads) print(f"\tImages produced in {time.time()-t0:.2f} s.") else: for title, authors, abstract in tqdm.tqdm(zip(title_list, author_list, abstract_list), total=len(title_list)): img = thumbnail_image(title, authors, abstract, image_width=width, min_height=min_height) fname = simple_filename(title, f"{thumbnail_directory}/{batch_dir}", max_words=8) img.save( fname, dpi=(150, 150), ) if save_author_string: data["authors_with_handles"] = author_list_with_handles pure_filename = os.path.split(filename)[-1] fn = pure_filename.split(".") out_name = f"{thumbnail_directory}/{batch_dir}/{fn[-2].replace('/','')}_with_tweets.csv" data.to_csv(out_name) print(f"Data saved to {out_name}") return
def get_root_ids_and_sv_chunks(table_id, save_dir=f"{HOME}/benchmarks/", n_threads=1): cg = chunkedgraph.ChunkedGraph(table_id) save_folder = f"{save_dir}/{table_id}/" if not os.path.exists(save_folder): os.makedirs(save_folder) if not os.path.exists(f"{save_folder}/root_ids.h5"): root_ids = chunkedgraph_comp.get_latest_roots(cg, n_threads=n_threads) with h5py.File(f"{save_folder}/root_ids.h5", "w") as f: f.create_dataset("root_ids", data=root_ids) else: with h5py.File(f"{save_folder}/root_ids.h5", "r") as f: root_ids = f["root_ids"].value cg_serialized_info = cg.get_serialized_info() if n_threads > 1: del cg_serialized_info["credentials"] order = np.arange(len(root_ids)) np.random.shuffle(order) order = order n_blocks = np.min([len(order), n_threads * 3]) blocks = np.array_split(order, n_blocks) multi_args = [] for block in blocks: multi_args.append([cg_serialized_info, root_ids[block]]) if n_threads == 1: results = mu.multiprocess_func(_get_root_ids_and_sv_chunks, multi_args, n_threads=n_threads, verbose=False, debug=n_threads == 1) else: results = mu.multisubprocess_func(_get_root_ids_and_sv_chunks, multi_args, n_threads=n_threads) root_ids = [] n_l1_nodes_per_root = [] rep_l1_nodes = [] rep_l1_chunk_ids = [] for result in results: root_ids.extend(result[0]) n_l1_nodes_per_root.extend(result[1]) rep_l1_nodes.extend(result[2]) rep_l1_chunk_ids.extend(result[3]) save_folder = f"{save_dir}/{table_id}/" if not os.path.exists(save_folder): os.makedirs(save_folder) with h5py.File(f"{save_folder}/root_stats.h5", "w") as f: f.create_dataset("root_ids", data=root_ids, compression="gzip") f.create_dataset("n_l1_nodes_per_root", data=n_l1_nodes_per_root, compression="gzip") f.create_dataset("rep_l1_nodes", data=rep_l1_nodes, compression="gzip") f.create_dataset("rep_l1_chunk_ids", data=rep_l1_chunk_ids, compression="gzip")
def get_merge_split_timings(table_id, save_dir=f"{HOME}/benchmarks/", job_size=500, n_threads=1): save_folder = f"{save_dir}/{table_id}/" merge_edges, merge_edge_weights = load_merge_stats(save_folder) probs = merge_edge_weights / np.sum(merge_edge_weights) if n_threads == 1: n_jobs = n_threads * 3 else: n_jobs = n_threads * 3 cg = chunkedgraph.ChunkedGraph(table_id) cg_serialized_info = cg.get_serialized_info() if n_threads > 0: del cg_serialized_info["credentials"] time_start = time.time() order = np.arange(len(merge_edges)) np.random.seed(np.int(time.time())) replace = False blocks = np.random.choice(order, job_size * n_jobs, p=probs, replace=replace).reshape(n_jobs, job_size) multi_args = [] for block in blocks: multi_args.append([cg_serialized_info, merge_edges[block]]) print(f"Building jobs took {time.time()-time_start}s") time_start = time.time() if n_threads == 1: results = mu.multiprocess_func(_get_merge_timings, multi_args, n_threads=n_threads, verbose=False, debug=n_threads == 1) else: results = mu.multisubprocess_func(_get_merge_timings, multi_args, n_threads=n_threads) dt = time.time() - time_start timings = [] for result in results: timings.extend(result[0]) percentiles = [np.percentile(timings, k) for k in range(1, 100, 1)] mean = np.mean(timings) std = np.std(timings) median = np.median(timings) merge_results = { "percentiles": percentiles, "p01": percentiles[0], "p05": percentiles[4], "p95": percentiles[94], "p99": percentiles[98], "mean": mean, "std": std, "median": median, "total_time_s": dt, "job_size": job_size, "n_jobs": n_jobs, "n_threads": n_threads, "replace": replace, "requests_per_s": job_size * n_jobs / dt } timings = [] for result in results: timings.extend(result[1]) percentiles = [np.percentile(timings, k) for k in range(1, 100, 1)] mean = np.mean(timings) std = np.std(timings) median = np.median(timings) split_results = { "percentiles": percentiles, "p01": percentiles[0], "p05": percentiles[4], "p95": percentiles[94], "p99": percentiles[98], "mean": mean, "std": std, "median": median, "total_time_s": dt, "job_size": job_size, "n_jobs": n_jobs, "n_threads": n_threads, "replace": replace, "requests_per_s": job_size * n_jobs / dt } return merge_results, split_results
def mesh_single_layer(self, layer, bounding_box=None, block_factor=2, n_threads=128): assert layer <= self.highest_mesh_layer dataset_bounding_box = np.array(self.cv.bounds.to_list()) block_bounding_box_cg = \ [np.floor(dataset_bounding_box[:3] / self.cg.chunk_size).astype(np.int), np.ceil(dataset_bounding_box[3:] / self.cg.chunk_size).astype(np.int)] if bounding_box is not None: bounding_box_cg = \ [np.floor(bounding_box[0] / self.cg.chunk_size).astype(np.int), np.ceil(bounding_box[1] / self.cg.chunk_size).astype(np.int)] m = block_bounding_box_cg[0] < bounding_box_cg[0] block_bounding_box_cg[0][m] = bounding_box_cg[0][m] m = block_bounding_box_cg[1] > bounding_box_cg[1] block_bounding_box_cg[1][m] = bounding_box_cg[1][m] block_bounding_box_cg /= 2 ** np.max([0, layer - 2]) block_bounding_box_cg = np.ceil(block_bounding_box_cg) n_jobs = np.product(block_bounding_box_cg[1] - block_bounding_box_cg[0]) / \ block_factor ** 2 < n_threads while n_jobs < n_threads and block_factor > 1: block_factor -= 1 n_jobs = np.product(block_bounding_box_cg[1] - block_bounding_box_cg[0]) / \ block_factor ** 2 < n_threads block_iter = itertools.product(np.arange(block_bounding_box_cg[0][0], block_bounding_box_cg[1][0], block_factor), np.arange(block_bounding_box_cg[0][1], block_bounding_box_cg[1][1], block_factor), np.arange(block_bounding_box_cg[0][2], block_bounding_box_cg[1][2], block_factor)) blocks = np.array(list(block_iter), dtype=np.int) cg_info = self.cg.get_serialized_info() del (cg_info['credentials']) multi_args = [] for start_block in blocks: end_block = start_block + block_factor m = end_block > block_bounding_box_cg[1] end_block[m] = block_bounding_box_cg[1][m] multi_args.append([cg_info, start_block, end_block, self.cg.cv_path, self.cv_mesh_dir, self.mesh_mip, layer]) random.shuffle(multi_args) random.shuffle(multi_args) # Run parallelizing if n_threads == 1: mu.multiprocess_func(meshgen._mesh_layer_thread, multi_args, n_threads=n_threads, verbose=True, debug=n_threads == 1) else: mu.multisubprocess_func(meshgen._mesh_layer_thread, multi_args, n_threads=n_threads, suffix="%s_%d" % (self.table_id, layer))
def create_chunked_graph(table_id=None, cv_url=None, ws_url=None, fan_out=2, bbox=None, chunk_size=(512, 512, 128), verbose=False, n_threads=1): """ Creates chunked graph from downloaded files :param table_id: str :param cv_url: str :param ws_url: str :param fan_out: int :param bbox: [[x_, y_, z_], [_x, _y, _z]] :param chunk_size: tuple :param verbose: bool :param n_threads: int """ if cv_url is None or ws_url is None: if "basil" in table_id: cv_url = "gs://nkem/basil_4k_oldnet/region_graph/" ws_url = "gs://neuroglancer/svenmd/basil_4k_oldnet_cg/watershed/" elif "pinky40" in table_id: cv_url = "gs://nkem/pinky40_v11/mst_trimmed_sem_remap/region_graph/" ws_url = "gs://neuroglancer/svenmd/pinky40_v11/watershed/" elif "pinky100" in table_id: cv_url = "gs://nkem/pinky100_v0/region_graph/" ws_url = "gs://neuroglancer/nkem/pinky100_v0/ws/lost_no-random/bbox1_0/" else: raise Exception("Could not identify region graph ressource") times = [] time_start = time.time() chunk_size = np.array(list(chunk_size)) file_paths = np.sort( glob.glob( creator_utils.dir_from_layer_name( creator_utils.layer_name_from_cv_url(cv_url)) + "/*")) file_path_blocks = np.array_split(file_paths, n_threads * 3) multi_args = [] for fp_block in file_path_blocks: multi_args.append([fp_block, table_id, chunk_size, bbox]) if n_threads == 1: results = mu.multiprocess_func(_preprocess_chunkedgraph_data_thread, multi_args, n_threads=n_threads, verbose=True, debug=n_threads == 1) else: results = mu.multisubprocess_func(_preprocess_chunkedgraph_data_thread, multi_args, n_threads=n_threads) in_chunk_connected_paths = np.array([]) in_chunk_connected_ids = np.array([], dtype=np.uint64).reshape(-1, 3) in_chunk_disconnected_paths = np.array([]) in_chunk_disconnected_ids = np.array([], dtype=np.uint64).reshape(-1, 3) between_chunk_paths = np.array([]) between_chunk_ids = np.array([], dtype=np.uint64).reshape(-1, 2, 3) isolated_paths = np.array([]) isolated_ids = np.array([], dtype=np.uint64).reshape(-1, 3) for result in results: in_chunk_connected_paths = np.concatenate( [in_chunk_connected_paths, result[0]]) in_chunk_connected_ids = np.concatenate( [in_chunk_connected_ids, result[1]]) in_chunk_disconnected_paths = np.concatenate( [in_chunk_disconnected_paths, result[2]]) in_chunk_disconnected_ids = np.concatenate( [in_chunk_disconnected_ids, result[3]]) between_chunk_paths = np.concatenate([between_chunk_paths, result[4]]) between_chunk_ids = np.concatenate([between_chunk_ids, result[5]]) isolated_paths = np.concatenate([isolated_paths, result[6]]) isolated_ids = np.concatenate([isolated_ids, result[7]]) assert len(in_chunk_connected_ids) == len(in_chunk_connected_paths) == \ len(in_chunk_disconnected_ids) == len(in_chunk_disconnected_paths) == \ len(isolated_ids) == len(isolated_paths) in_chunk_connected_ids, in_chunk_connected_paths = \ _sort_arrays(in_chunk_connected_ids, in_chunk_connected_paths) in_chunk_disconnected_ids, in_chunk_disconnected_paths = \ _sort_arrays(in_chunk_disconnected_ids, in_chunk_disconnected_paths) isolated_ids, isolated_paths = \ _sort_arrays(isolated_ids, isolated_paths) times.append(["Preprocessing", time.time() - time_start]) print("Preprocessing took %.3fs = %.2fh" % (times[-1][1], times[-1][1] / 3600)) time_start = time.time() multi_args = [] in_chunk_id_blocks = np.array_split(in_chunk_connected_ids, max(1, n_threads)) cumsum = 0 for in_chunk_id_block in in_chunk_id_blocks: multi_args.append([ between_chunk_ids, between_chunk_paths, in_chunk_id_block, cumsum ]) cumsum += len(in_chunk_id_block) # Run parallelizing if n_threads == 1: results = mu.multiprocess_func(_between_chunk_masks_thread, multi_args, n_threads=n_threads, verbose=True, debug=n_threads == 1) else: results = mu.multisubprocess_func(_between_chunk_masks_thread, multi_args, n_threads=n_threads) times.append(["Data sorting", time.time() - time_start]) print("Data sorting took %.3fs = %.2fh" % (times[-1][1], times[-1][1] / 3600)) time_start = time.time() n_layers = int( np.ceil( pychunkedgraph.backend.chunkedgraph_utils.log_n( np.max(in_chunk_connected_ids) + 1, fan_out))) + 2 print("N layers: %d" % n_layers) cg = chunkedgraph.ChunkedGraph(table_id=table_id, n_layers=np.uint64(n_layers), fan_out=np.uint64(fan_out), chunk_size=np.array(chunk_size, dtype=np.uint64), cv_path=ws_url, is_new=True) # Fill lowest layer and create first abstraction layer # Create arguments for parallelizing multi_args = [] for result in results: offset, between_chunk_paths_out_masked, between_chunk_paths_in_masked = result for i_chunk in range(len(between_chunk_paths_out_masked)): multi_args.append([ table_id, in_chunk_connected_paths[offset + i_chunk], in_chunk_disconnected_paths[offset + i_chunk], isolated_paths[offset + i_chunk], between_chunk_paths_in_masked[i_chunk], between_chunk_paths_out_masked[i_chunk], verbose ]) random.shuffle(multi_args) print("%d jobs for creating layer 1 + 2" % len(multi_args)) # Run parallelizing if n_threads == 1: mu.multiprocess_func(_create_atomic_layer_thread, multi_args, n_threads=n_threads, verbose=True, debug=n_threads == 1) else: mu.multisubprocess_func(_create_atomic_layer_thread, multi_args, n_threads=n_threads) times.append(["Layers 1 + 2", time.time() - time_start]) # Fill higher abstraction layers child_chunk_ids = in_chunk_connected_ids.copy() for layer_id in range(3, n_layers + 1): time_start = time.time() print("\n\n\n --- LAYER %d --- \n\n\n" % layer_id) parent_chunk_ids = child_chunk_ids // cg.fan_out parent_chunk_ids = parent_chunk_ids.astype(np.int) u_pcids, inds = np.unique(parent_chunk_ids, axis=0, return_inverse=True) if len(u_pcids) > n_threads: n_threads_per_process = 1 else: n_threads_per_process = int(np.ceil(n_threads / len(u_pcids))) multi_args = [] for ind in range(len(u_pcids)): multi_args.append([ table_id, layer_id, child_chunk_ids[inds == ind].astype(np.int), n_threads_per_process ]) child_chunk_ids = u_pcids # Run parallelizing if n_threads == 1: mu.multiprocess_func(_add_layer_thread, multi_args, n_threads=n_threads, verbose=True, debug=n_threads == 1) else: mu.multisubprocess_func(_add_layer_thread, multi_args, n_threads=n_threads, suffix=str(layer_id)) times.append(["Layer %d" % layer_id, time.time() - time_start]) for time_entry in times: print("%s: %.2fs = %.2fmin = %.2fh" % (time_entry[0], time_entry[1], time_entry[1] / 60, time_entry[1] / 3600))