def _read_agg_files(filenames, base_path):
    with cloudvolume.Storage(base_path, n_threads=10) as stor:
        files = stor.get_files(filenames)

    edge_list = []
    for file in files:
        if file["error"] or file["content"] is None:
            continue
        content = zstd.ZstdDecompressor().decompressobj().decompress(
            file["content"])
        edge_list.append(
            np.frombuffer(content, dtype=basetypes.NODE_ID).reshape(-1, 2))
    return edge_list
Example #2
0
def handlerino_periodically_write_to_cloud(*args, **kwargs):
    global num_messages
    num_messages = num_messages + 1
    print(num_messages, args[0]['data'])
    messages.append(args[0]['data'])
    with open('output.txt', 'a') as f:
        f.write(str(args[0]['data']) + '\n')
    if num_messages % 1000 == 0:
        print('Writing result data to cloud')
        cv_path = cg._cv_path
        filename = f'{datetime.now()}_meshes_{num_messages}'
        with cloudvolume.Storage(cv_path) as storage:
            storage.put_file(file_path=f'meshing_run_data/{filename}',
                             content=','.join(map(str, messages)),
                             compress=False,
                             cache_control='no-cache')
Example #3
0
def handlerino_write_to_cloud(*args, **kwargs):
    global num_messages
    num_messages = num_messages + 1
    print(num_messages, args[0]['data'])
    messages.append(args[0]['data'])
    with open('output.txt', 'a') as f:
        f.write(str(args[0]['data']) + '\n')
    if num_messages == 1000:
        print('DONE')
        cv_path = cg._cv_path
        with cloudvolume.Storage(cv_path) as storage:
            storage.put_file(
                file_path='frag_test/frag_test_summary_no_dust_threshold',
                content=','.join(map(str, messages)),
                compress=False,
                cache_control='no-cache')
Example #4
0
def write_mesh_to_cv(cv, cv_folder, mesh, mesh_id):
    mesh_dir = os.path.join(cv_folder, )
    if not os.path.isdir(mesh_dir):
        os.makedirs(mesh_dir)
    n_vertices = mesh.vertices.shape[0]

    vertices = np.array(mesh.vertices, dtype=np.float32)

    vertex_index_format = [
        np.uint32(n_vertices),  # Number of vertices (3 coordinates)
        vertices,
        np.array(mesh.faces, dtype=np.uint32)
    ]
    outs = b''.join([array.tobytes() for array in vertex_index_format])

    with cloudvolume.Storage(cv.layer_cloudpath, progress=cv.progress) as stor:
        fname_man = os.path.join(cv.info['mesh'], f'{mesh_id}:0')
        frag_id = f'9{mesh_id}:0'
        fname = os.path.join(cv.info['mesh'], frag_id)
        d_man = {'fragments': [frag_id]}
        stor.put_json(fname_man, json.dumps(d_man))
        stor.put_file(file_path=fname, content=outs, compress=True)
Example #5
0
def collect_edge_data(im, chunk_coord, aff_dtype=np.float32):
    """ Loads edge for single chunk

    :param im: IngestionManager
    :param chunk_coord: np.ndarray
        array of three ints
    :param aff_dtype: np.dtype
    :return: dict of np.ndarrays
    """
    subfolder = "chunked_rg"

    base_path = f"{im.storage_path}/{subfolder}/"

    chunk_coord = np.array(chunk_coord)

    chunk_id = im.cg.get_chunk_id(layer=1,
                                  x=chunk_coord[0],
                                  y=chunk_coord[1],
                                  z=chunk_coord[2])

    filenames = collections.defaultdict(list)
    swap = collections.defaultdict(list)
    for x in [chunk_coord[0] - 1, chunk_coord[0]]:
        for y in [chunk_coord[1] - 1, chunk_coord[1]]:
            for z in [chunk_coord[2] - 1, chunk_coord[2]]:

                if im.is_out_of_bounce(np.array([x, y, z])):
                    continue

                # EDGES WITHIN CHUNKS
                filename = f"in_chunk_0_{x}_{y}_{z}_{chunk_id}.data"
                filenames["in"].append(filename)

    for d in [-1, 1]:
        for dim in range(3):
            diff = np.zeros([3], dtype=np.int)
            diff[dim] = d

            adjacent_chunk_coord = chunk_coord + diff
            adjacent_chunk_id = im.cg.get_chunk_id(layer=1,
                                                   x=adjacent_chunk_coord[0],
                                                   y=adjacent_chunk_coord[1],
                                                   z=adjacent_chunk_coord[2])

            if im.is_out_of_bounce(adjacent_chunk_coord):
                continue

            c_chunk_coords = _get_cont_chunk_coords(im, chunk_coord,
                                                    adjacent_chunk_coord)

            larger_id = np.max([chunk_id, adjacent_chunk_id])
            smaller_id = np.min([chunk_id, adjacent_chunk_id])
            chunk_id_string = f"{smaller_id}_{larger_id}"

            for c_chunk_coord in c_chunk_coords:
                x, y, z = c_chunk_coord

                # EDGES BETWEEN CHUNKS
                filename = f"between_chunks_0_{x}_{y}_{z}_{chunk_id_string}.data"
                filenames["between"].append(filename)

                swap[filename] = larger_id == chunk_id

                # EDGES FROM CUTS OF SVS
                filename = f"fake_0_{x}_{y}_{z}_{chunk_id_string}.data"
                filenames["cross"].append(filename)

                swap[filename] = larger_id == chunk_id

    edge_data = {}
    read_counter = collections.Counter()

    dtype = [("sv1", np.uint64), ("sv2", np.uint64), ("aff", aff_dtype),
             ("area", np.uint64)]
    for k in filenames:
        # print(k, len(filenames[k]))

        with cloudvolume.Storage(base_path, n_threads=10) as stor:
            files = stor.get_files(filenames[k])

        data = []
        for file in files:
            if file["content"] is None:
                # print(f"{file['filename']} not created or empty")
                continue

            if file["error"] is not None:
                # print(f"error reading {file['filename']}")
                continue

            if swap[file["filename"]]:
                this_dtype = [dtype[1], dtype[0], dtype[2], dtype[3]]
                content = np.frombuffer(file["content"], dtype=this_dtype)
            else:
                content = np.frombuffer(file["content"], dtype=dtype)

            data.append(content)

            read_counter[k] += 1

        try:
            edge_data[k] = rfn.stack_arrays(data, usemask=False)
        except:
            raise ()

    # # TEST
    # with cloudvolume.Storage(base_path, n_threads=10) as stor:
    #     files = list(stor.list_files())
    #
    # true_counter = collections.Counter()
    # for file in files:
    #     if str(chunk_id) in file:
    #         true_counter[file.split("_")[0]] += 1
    #
    # print("Truth", true_counter)
    # print("Reality", read_counter)

    return edge_data
def _collect_edge_data(imanager, chunk_coord):
    """ Loads edge for single chunk
    :param imanager: IngestionManager
    :param chunk_coord: np.ndarray
        array of three ints
    :param aff_dtype: np.dtype
    :param v3_data: bool
    :return: dict of np.ndarrays
    """
    subfolder = "chunked_rg"
    base_path = f"{imanager.cg_meta.data_source.agglomeration}/{subfolder}/"
    chunk_coord = np.array(chunk_coord)
    x, y, z = chunk_coord
    chunk_id = compute_chunk_id(layer=1, x=x, y=y, z=z)

    filenames = defaultdict(list)
    swap = defaultdict(list)
    x, y, z = chunk_coord
    for _x, _y, _z in product([x - 1, x], [y - 1, y], [z - 1, z]):
        if imanager.cg_meta.is_out_of_bounds(np.array([_x, _y, _z])):
            continue
        filename = f"in_chunk_0_{_x}_{_y}_{_z}_{chunk_id}.data"
        filenames[EDGE_TYPES.in_chunk].append(filename)

    for d in [-1, 1]:
        for dim in range(3):
            diff = np.zeros([3], dtype=np.int)
            diff[dim] = d
            adjacent_chunk_coord = chunk_coord + diff
            x, y, z = adjacent_chunk_coord
            adjacent_chunk_id = compute_chunk_id(layer=1, x=x, y=y, z=z)

            if imanager.cg_meta.is_out_of_bounds(adjacent_chunk_coord):
                continue
            c_chunk_coords = _get_cont_chunk_coords(imanager, chunk_coord,
                                                    adjacent_chunk_coord)

            larger_id = np.max([chunk_id, adjacent_chunk_id])
            smaller_id = np.min([chunk_id, adjacent_chunk_id])
            chunk_id_string = f"{smaller_id}_{larger_id}"

            for c_chunk_coord in c_chunk_coords:
                x, y, z = c_chunk_coord
                filename = f"between_chunks_0_{x}_{y}_{z}_{chunk_id_string}.data"
                filenames[EDGE_TYPES.between_chunk].append(filename)
                swap[filename] = larger_id == chunk_id

                # EDGES FROM CUTS OF SVS
                filename = f"fake_0_{x}_{y}_{z}_{chunk_id_string}.data"
                filenames[EDGE_TYPES.cross_chunk].append(filename)
                swap[filename] = larger_id == chunk_id

    edge_data = {}
    read_counter = Counter()
    for k in filenames:
        with cloudvolume.Storage(base_path, n_threads=10) as stor:
            files = stor.get_files(filenames[k])
        data = []
        for file in files:
            if file["error"] or file["content"] is None:
                continue

            edge_dtype = imanager.cg_meta.edge_dtype
            if swap[file["filename"]]:
                this_dtype = [edge_dtype[1], edge_dtype[0]] + edge_dtype[2:]
                content = np.frombuffer(file["content"], dtype=this_dtype)
            else:
                content = np.frombuffer(file["content"], dtype=edge_dtype)

            data.append(content)
            read_counter[k] += 1
        try:
            edge_data[k] = rfn.stack_arrays(data, usemask=False)
        except:
            raise ValueError()

        edge_data_df = pd.DataFrame(edge_data[k])
        edge_data_dfg = (edge_data_df.groupby(["sv1", "sv2"]).aggregate(
            np.sum).reset_index())
        edge_data[k] = edge_data_dfg.to_records()
    return edge_data
Example #7
0
def ingest_from_precomputed(cv_path, table_id):
    cv = cloudvolume.CloudVolume(cv_path)

    mesh_dir = f"{cv.cloudpath}/{cv.info['mesh']}"

    cv_stor = cloudvolume.Storage(mesh_dir)