def _read_agg_files(filenames, base_path): with cloudvolume.Storage(base_path, n_threads=10) as stor: files = stor.get_files(filenames) edge_list = [] for file in files: if file["error"] or file["content"] is None: continue content = zstd.ZstdDecompressor().decompressobj().decompress( file["content"]) edge_list.append( np.frombuffer(content, dtype=basetypes.NODE_ID).reshape(-1, 2)) return edge_list
def handlerino_periodically_write_to_cloud(*args, **kwargs): global num_messages num_messages = num_messages + 1 print(num_messages, args[0]['data']) messages.append(args[0]['data']) with open('output.txt', 'a') as f: f.write(str(args[0]['data']) + '\n') if num_messages % 1000 == 0: print('Writing result data to cloud') cv_path = cg._cv_path filename = f'{datetime.now()}_meshes_{num_messages}' with cloudvolume.Storage(cv_path) as storage: storage.put_file(file_path=f'meshing_run_data/{filename}', content=','.join(map(str, messages)), compress=False, cache_control='no-cache')
def handlerino_write_to_cloud(*args, **kwargs): global num_messages num_messages = num_messages + 1 print(num_messages, args[0]['data']) messages.append(args[0]['data']) with open('output.txt', 'a') as f: f.write(str(args[0]['data']) + '\n') if num_messages == 1000: print('DONE') cv_path = cg._cv_path with cloudvolume.Storage(cv_path) as storage: storage.put_file( file_path='frag_test/frag_test_summary_no_dust_threshold', content=','.join(map(str, messages)), compress=False, cache_control='no-cache')
def write_mesh_to_cv(cv, cv_folder, mesh, mesh_id): mesh_dir = os.path.join(cv_folder, ) if not os.path.isdir(mesh_dir): os.makedirs(mesh_dir) n_vertices = mesh.vertices.shape[0] vertices = np.array(mesh.vertices, dtype=np.float32) vertex_index_format = [ np.uint32(n_vertices), # Number of vertices (3 coordinates) vertices, np.array(mesh.faces, dtype=np.uint32) ] outs = b''.join([array.tobytes() for array in vertex_index_format]) with cloudvolume.Storage(cv.layer_cloudpath, progress=cv.progress) as stor: fname_man = os.path.join(cv.info['mesh'], f'{mesh_id}:0') frag_id = f'9{mesh_id}:0' fname = os.path.join(cv.info['mesh'], frag_id) d_man = {'fragments': [frag_id]} stor.put_json(fname_man, json.dumps(d_man)) stor.put_file(file_path=fname, content=outs, compress=True)
def collect_edge_data(im, chunk_coord, aff_dtype=np.float32): """ Loads edge for single chunk :param im: IngestionManager :param chunk_coord: np.ndarray array of three ints :param aff_dtype: np.dtype :return: dict of np.ndarrays """ subfolder = "chunked_rg" base_path = f"{im.storage_path}/{subfolder}/" chunk_coord = np.array(chunk_coord) chunk_id = im.cg.get_chunk_id(layer=1, x=chunk_coord[0], y=chunk_coord[1], z=chunk_coord[2]) filenames = collections.defaultdict(list) swap = collections.defaultdict(list) for x in [chunk_coord[0] - 1, chunk_coord[0]]: for y in [chunk_coord[1] - 1, chunk_coord[1]]: for z in [chunk_coord[2] - 1, chunk_coord[2]]: if im.is_out_of_bounce(np.array([x, y, z])): continue # EDGES WITHIN CHUNKS filename = f"in_chunk_0_{x}_{y}_{z}_{chunk_id}.data" filenames["in"].append(filename) for d in [-1, 1]: for dim in range(3): diff = np.zeros([3], dtype=np.int) diff[dim] = d adjacent_chunk_coord = chunk_coord + diff adjacent_chunk_id = im.cg.get_chunk_id(layer=1, x=adjacent_chunk_coord[0], y=adjacent_chunk_coord[1], z=adjacent_chunk_coord[2]) if im.is_out_of_bounce(adjacent_chunk_coord): continue c_chunk_coords = _get_cont_chunk_coords(im, chunk_coord, adjacent_chunk_coord) larger_id = np.max([chunk_id, adjacent_chunk_id]) smaller_id = np.min([chunk_id, adjacent_chunk_id]) chunk_id_string = f"{smaller_id}_{larger_id}" for c_chunk_coord in c_chunk_coords: x, y, z = c_chunk_coord # EDGES BETWEEN CHUNKS filename = f"between_chunks_0_{x}_{y}_{z}_{chunk_id_string}.data" filenames["between"].append(filename) swap[filename] = larger_id == chunk_id # EDGES FROM CUTS OF SVS filename = f"fake_0_{x}_{y}_{z}_{chunk_id_string}.data" filenames["cross"].append(filename) swap[filename] = larger_id == chunk_id edge_data = {} read_counter = collections.Counter() dtype = [("sv1", np.uint64), ("sv2", np.uint64), ("aff", aff_dtype), ("area", np.uint64)] for k in filenames: # print(k, len(filenames[k])) with cloudvolume.Storage(base_path, n_threads=10) as stor: files = stor.get_files(filenames[k]) data = [] for file in files: if file["content"] is None: # print(f"{file['filename']} not created or empty") continue if file["error"] is not None: # print(f"error reading {file['filename']}") continue if swap[file["filename"]]: this_dtype = [dtype[1], dtype[0], dtype[2], dtype[3]] content = np.frombuffer(file["content"], dtype=this_dtype) else: content = np.frombuffer(file["content"], dtype=dtype) data.append(content) read_counter[k] += 1 try: edge_data[k] = rfn.stack_arrays(data, usemask=False) except: raise () # # TEST # with cloudvolume.Storage(base_path, n_threads=10) as stor: # files = list(stor.list_files()) # # true_counter = collections.Counter() # for file in files: # if str(chunk_id) in file: # true_counter[file.split("_")[0]] += 1 # # print("Truth", true_counter) # print("Reality", read_counter) return edge_data
def _collect_edge_data(imanager, chunk_coord): """ Loads edge for single chunk :param imanager: IngestionManager :param chunk_coord: np.ndarray array of three ints :param aff_dtype: np.dtype :param v3_data: bool :return: dict of np.ndarrays """ subfolder = "chunked_rg" base_path = f"{imanager.cg_meta.data_source.agglomeration}/{subfolder}/" chunk_coord = np.array(chunk_coord) x, y, z = chunk_coord chunk_id = compute_chunk_id(layer=1, x=x, y=y, z=z) filenames = defaultdict(list) swap = defaultdict(list) x, y, z = chunk_coord for _x, _y, _z in product([x - 1, x], [y - 1, y], [z - 1, z]): if imanager.cg_meta.is_out_of_bounds(np.array([_x, _y, _z])): continue filename = f"in_chunk_0_{_x}_{_y}_{_z}_{chunk_id}.data" filenames[EDGE_TYPES.in_chunk].append(filename) for d in [-1, 1]: for dim in range(3): diff = np.zeros([3], dtype=np.int) diff[dim] = d adjacent_chunk_coord = chunk_coord + diff x, y, z = adjacent_chunk_coord adjacent_chunk_id = compute_chunk_id(layer=1, x=x, y=y, z=z) if imanager.cg_meta.is_out_of_bounds(adjacent_chunk_coord): continue c_chunk_coords = _get_cont_chunk_coords(imanager, chunk_coord, adjacent_chunk_coord) larger_id = np.max([chunk_id, adjacent_chunk_id]) smaller_id = np.min([chunk_id, adjacent_chunk_id]) chunk_id_string = f"{smaller_id}_{larger_id}" for c_chunk_coord in c_chunk_coords: x, y, z = c_chunk_coord filename = f"between_chunks_0_{x}_{y}_{z}_{chunk_id_string}.data" filenames[EDGE_TYPES.between_chunk].append(filename) swap[filename] = larger_id == chunk_id # EDGES FROM CUTS OF SVS filename = f"fake_0_{x}_{y}_{z}_{chunk_id_string}.data" filenames[EDGE_TYPES.cross_chunk].append(filename) swap[filename] = larger_id == chunk_id edge_data = {} read_counter = Counter() for k in filenames: with cloudvolume.Storage(base_path, n_threads=10) as stor: files = stor.get_files(filenames[k]) data = [] for file in files: if file["error"] or file["content"] is None: continue edge_dtype = imanager.cg_meta.edge_dtype if swap[file["filename"]]: this_dtype = [edge_dtype[1], edge_dtype[0]] + edge_dtype[2:] content = np.frombuffer(file["content"], dtype=this_dtype) else: content = np.frombuffer(file["content"], dtype=edge_dtype) data.append(content) read_counter[k] += 1 try: edge_data[k] = rfn.stack_arrays(data, usemask=False) except: raise ValueError() edge_data_df = pd.DataFrame(edge_data[k]) edge_data_dfg = (edge_data_df.groupby(["sv1", "sv2"]).aggregate( np.sum).reset_index()) edge_data[k] = edge_data_dfg.to_records() return edge_data
def ingest_from_precomputed(cv_path, table_id): cv = cloudvolume.CloudVolume(cv_path) mesh_dir = f"{cv.cloudpath}/{cv.info['mesh']}" cv_stor = cloudvolume.Storage(mesh_dir)