Beispiel #1
0
def voxel_down_dataset(input_path: str,
                       output_path: str,
                       voxel_size: Union[dict, int] = 500):
    files = glob.glob(input_path + '*.pkl')
    if not os.path.isdir(output_path):
        os.makedirs(output_path)

    print("Starting to voxel down dataset...")
    for file in tqdm(files):
        slashs = [pos for pos, char in enumerate(file) if char == '/']
        name = file[slashs[-1] + 1:-4]

        ce = ensembles.ensemble_from_pkl(file)
        ce = voxel_down(ce, voxel_size=voxel_size)
        ce.save2pkl(output_path + name + '.pkl')
Beispiel #2
0
def load_obj(
        data_type: str, file: str
) -> Union[HybridMesh, HybridCloud, PointCloud, CloudEnsemble]:
    if data_type == 'obj':
        return basics.load_pkl(file)
    if data_type == 'ce':
        return ensembles.ensemble_from_pkl(file)
    if data_type == 'hc':
        hc = HybridCloud()
        return hc.load_from_pkl(file)
    if data_type == 'hm':
        hm = HybridMesh()
        return hm.load_from_pkl(file)
    else:
        pc = PointCloud()
        return pc.load_from_pkl(file)
Beispiel #3
0
def dataspecs2csv(set_paths: dict, out_path: str, ssds: dict):
    out_path = os.path.expanduser(out_path)
    out_file = open(out_path, 'w')
    spec_writer = csv.writer(out_file, delimiter=',')
    for key in set_paths:
        ssd = ssds[key]
        spec_writer.writerow([
            key, 'edge_length', 'size', 'n_dendrite', 'n_axon', 'n_soma',
            'n_bouton', 'n_terminal', 'n_neck', 'n_head', 'v_dendrite',
            'v_axon', 'v_soma', 'v_bouton', 'v_terminal', 'v_neck', 'v_head'
        ])
        set_path = os.path.expanduser(set_paths[key])
        files = glob.glob(set_path + '*.pkl')
        for file in tqdm(files):
            sso_id = int(re.findall(r"/sso_(\d+).", file)[0])
            sso = ssd.get_super_segmentation_object(sso_id)
            ce = ensembles.ensemble_from_pkl(file)
            n_unique = np.unique(ce.node_labels, return_counts=True)
            v_unique = np.unique(ce.labels, return_counts=True)
            n_list = []
            n_dict = {}
            for ix, n in enumerate(n_unique[0]):
                n_dict[n] = n_unique[1][ix]
            v_list = []
            v_dict = {}
            for ix, v in enumerate(v_unique[0]):
                v_dict[v] = v_unique[1][ix]
            for i in range(7):
                try:
                    n_list.append(n_dict[i])
                except KeyError:
                    n_list.append(0)
                try:
                    v_list.append(v_dict[i])
                except KeyError:
                    v_list.append(0)
            spec_writer.writerow([
                sso_id,
                int(sso.total_edge_length()), sso.size, *n_list, *v_list
            ])
        out_file.write('\n\n\n\n')
    out_file.close()
Beispiel #4
0
    def load_cmp(self, idx: int, seed: int):
        """ Method for comparing ground truth with processed data. Ground truth must be given in first file list,
            processed files in the second file list.
        """

        while idx < len(self.files1):
            gt_file = self.files1[idx]
            pred_file = self.files2[idx]

            slashs = [pos for pos, char in enumerate(gt_file) if char == '/']
            filename = gt_file[slashs[-1] + 1:-4]

            gt = ensembles.ensemble_from_pkl(gt_file)
            pred = PointCloud().load_from_pkl(pred_file)

            res = self.core_next(gt, pred, filename, seed=seed)
            if res is None:
                return
            else:
                idx += res
def poissonize_dataset(input_path: str, output_path: str, tech_density: int, obj_factor: float):
    """ Converts all objects, saved as pickle files at input_path, into poisson disk sampled HybridClouds and
        saves them at output_path with the same names.

    Args:
        input_path: Path to pickle files with HybridMeshs.
        output_path: Path to folder in which results should be stored.
        tech_density: poisson sampling density in point/um²
    """
    files = glob.glob(input_path + '*.pkl')
    if not os.path.isdir(output_path):
        os.makedirs(output_path)

    print("Starting to transform mesh dataset into poisson dataset...")
    for file in tqdm(files):
        slashs = [pos for pos, char in enumerate(file) if char == '/']
        name = file[slashs[-1] + 1:-4]
        print(name)
        ce = None
        try:
            hm = HybridMesh()
            hm.load_from_pkl(file)
        except TypeError:
            ce = ensembles.ensemble_from_pkl(file)
            hm = ce.hc
        if not isinstance(hm, HybridMesh) or hm.faces is None:
            raise ValueError("Poisson sampling requires existing faces.")
        result = hybridmesh2poisson(hm, tech_density, obj_factor)
        if ce is None:
            result.save2pkl(output_path + name + '_poisson.pkl')
        else:
            ce.change_hybrid(result)
            ce.save2pkl(output_path + name + '_poisson.pkl')
            for key in ce.clouds:
                cloud = ce.clouds[key]
                print(f"\nProcessing {key}")
                ce.clouds[key] = hybridmesh2poisson(cloud, tech_density, obj_factor)
                ce.save2pkl(output_path + name + '_poisson.pkl')
        ce.reset_ensemble()
        ce.hc.set_verts2node(None)
        ce.save2pkl(output_path + name + '_poisson.pkl')
def split(data_path: str,
          filename: str,
          bio_density: float = None,
          capacity: int = None,
          tech_density: int = None,
          density_splitting: bool = True,
          chunk_size: int = None,
          splitted_hcs: dict = None,
          redundancy: int = 1,
          label_remove: List[int] = None,
          split_jitter: int = 0,
          node_label_ignore_sampling: List[int] = None):
    """
    Splits HybridClouds given as pickle files at data_path into multiple subgraphs and saves that chunking information
    in the new folder 'splitted' as a pickled dict. The dict has filenames of the HybridClouds as keys and lists of
    subgraphs as values. The subgraphs are saved as numpy arrays of the indices of skeleton nodes which belong to the
    respective chunk.

    Splitting is done by drawing a random node as the base node for a subgraph extraction. All nodes included in
    the extracted subgraph get removed from the total nodes. The algorithm continues the splitting by drawing new
    random nodes from the remaining nodes as the new base nodes until all nodes have been included in at least one
    subgraph.

    Args:
        data_path: Path to HybridClouds saved as pickle files.
        filename: the file in which splitting information should get saved.
        tech_density: poisson sampling density with which data set was preprocessed in point/um²
        bio_density: chunk sampling density in point/um²
        capacity: number of points which can get processed with given network architecture
        density_splitting: Flag for switching between density and context modes
        chunk_size: Is only used in context mode. Here, the subgraphs get generated by a certain size.
        splitted_hcs: Existing version of splitting information for updates
        redundancy: Indicates how many iterations of base nodes should get used. 1 means, that base nodes get randomly
            drawn from the remaining nodes until all nodes have been included in at least one subgraph. redundancy = n
            means, that base nodes get randomly drawn until all nodes have been included in subgraphs at least n times.
        label_remove: List of labels indicating which nodes should get removed.
        split_jitter: Adds jitter to the context size of the generated chunks.
        node_label_ignore_sampling: Skeleton node with this label will not be used as base node for context
            generation.
    """
    # check validity of method call
    if density_splitting:
        if bio_density is None or tech_density is None or capacity is None:
            raise ValueError(
                "density-based splitting requires bio_density, tech_density and capacity."
            )
        # calculate number of vertices for extracting max surface area (only used for density-based splitting)
        vert_num = int(capacity * tech_density / bio_density)
    else:
        if chunk_size is None:
            raise ValueError("context-based splitting requires chunk_size.")
    # gather all files at given path
    data_path = os.path.expanduser(data_path)
    files = glob.glob(data_path + '*.pkl')
    if splitted_hcs is None:
        splitted_hcs = {}
    # iterate all files at data_path
    for file in tqdm(files):
        slashs = [pos for pos, char in enumerate(file) if char == '/']
        name = file[slashs[-1] + 1:-4]
        # apply splitting algorithm to each file which does not have existing splitting information
        if name in splitted_hcs.keys():
            continue
        print(
            f"No splitting information found for {name}. Splitting it now...")
        obj = ensembles.ensemble_from_pkl(file)
        # remove labels
        if label_remove is not None:
            obj.remove_nodes(labels=label_remove)
        nodes = np.array(obj.graph().nodes)
        base_points = []
        if node_label_ignore_sampling is not None:
            for ignore_l in node_label_ignore_sampling:
                base_points.extend(
                    np.transpose(np.nonzero(obj.node_labels == ignore_l)[0]))
        subgraphs = []
        for i in range(redundancy):
            # prepare mask for filtering subgraph nodes
            mask = np.ones(len(nodes), dtype=bool)
            # existing base nodes should not get chosen as a base node again
            mask[np.isin(nodes, base_points)] = False
            # identify remaining nodes
            remaining_nodes = nodes[mask]
            while len(remaining_nodes) != 0:
                # choose random base node from the remaining nodes
                choice = np.random.choice(remaining_nodes, 1)
                base_points.append(choice[0])
                # extract subgraph around the chosen base node using the specified splitting method
                if density_splitting:
                    subgraph = objects.density_splitting(
                        obj, choice[0], vert_num)
                else:
                    jitter = random.randint(0, split_jitter)
                    subgraph = objects.context_splitting_kdt(obj,
                                                             choice[0],
                                                             chunk_size +
                                                             jitter,
                                                             radius=800)
                subgraphs.append(subgraph)
                # remove nodes of the extracted subgraph from the remaining nodes
                mask[np.isin(nodes, subgraph)] = False
                remaining_nodes = nodes[mask]
        # update splitting dict with new subgraphs for current object
        splitted_hc = list(zip(obj.nodes[base_points], subgraphs))
        splitted_hcs[name] = splitted_hc
        with open(filename, 'wb') as f:
            pickle.dump(splitted_hcs, f)
        f.close()
        # save base points for later viewing in corresponding
        base_points = np.array(base_points)
        slashs = [pos for pos, char in enumerate(filename) if char == '/']
        identifier = filename[slashs[-1] + 1:-4]
        basefile = f'{filename[:slashs[-1]]}/base_points/{identifier}/'
        if not os.path.exists(basefile):
            os.makedirs(basefile)
        with open(f'{basefile}{name}_basepoints.pkl', 'wb') as f:
            pickle.dump(base_points, f)
    return splitted_hcs