def remove_masked_objects( src: DataURI, dst: DataURI, feature_id: DataURI, object_id: DataURI, ) -> "OBJECTS": src = DataModel.g.dataset_uri(ntpath.basename(object_id), group="objects") logger.debug(f"Getting objects {src}") with DatasetManager(src, out=None, dtype="float32", fillvalue=0) as DM: ds_objects = DM.sources[0] scale = ds_objects.get_metadata("scale") print(f"Scaling objects by: {scale}") objects_fullname = ds_objects.get_metadata("fullname") objects_scale = ds_objects.get_metadata("scale") objects_offset = ds_objects.get_metadata("offset") objects_crop_start = ds_objects.get_metadata("crop_start") objects_crop_end = ds_objects.get_metadata("crop_end") logger.debug(f"Getting objects from {src} and file {objects_fullname}") from survos2.frontend.components.entity import make_entity_df, setup_entity_table tabledata, entities_df = setup_entity_table( objects_fullname, scale=objects_scale, offset=objects_offset, crop_start=objects_crop_start, crop_end=objects_crop_end, ) entities = np.array(make_entity_df(np.array(entities_df), flipxy=False)) logger.debug(f"Removing entities using feature as mask: {feature_id}") src = DataModel.g.dataset_uri(ntpath.basename(feature_id), group="features") with DatasetManager(src, out=None, dtype="float32", fillvalue=0) as DM: mask = DM.sources[0][:] logger.debug(f"Initial number of objects: {len(entities_df)}") refined_entity_df = make_entity_df( remove_masked_entities((mask == 0) * 1.0, np.array(entities_df))) logger.debug(f"Removing entities using mask with shape {mask.shape}") result_list = [] for i in range(len(refined_entity_df)): result_list.append([ refined_entity_df.iloc[i]["class_code"], refined_entity_df.iloc[i]["z"], refined_entity_df.iloc[i]["y"], refined_entity_df.iloc[i]["x"], ]) return result_list
def organize_entities(img_vol, clustered_pts, entity_meta, flipxy=False, plot_all=False): class_idxs = entity_meta.keys() classwise_entities = [] for c in class_idxs: pt_idxs = clustered_pts[:, 3] == int(c) classwise_pts = clustered_pts[pt_idxs] clustered_df = make_entity_df(classwise_pts, flipxy=flipxy) classwise_pts = np.array(clustered_df) classwise_entities.append(classwise_pts) entity_meta[c]["entities"] = classwise_pts if plot_all: plt.figure(figsize=(9, 9)) plt.imshow(img_vol[img_vol.shape[0] // 4, :], cmap="gray") plt.scatter(classwise_pts[:, 1], classwise_pts[:, 2], c="cyan") plt.title( str(entity_meta[c]["name"]) + " Clustered Locations: " + str(len(classwise_pts))) combined_clustered_pts = np.concatenate(classwise_entities) return combined_clustered_pts, entity_meta
def load_entities(entities_arr, flipxy=True): entities_df = make_entity_df(entities_arr, flipxy=flipxy) tmp_fullpath = os.path.abspath( os.path.join(tempfile.gettempdir(), os.urandom(24).hex() + ".csv")) print(entities_df) print(f"Creating temp file: {tmp_fullpath}") entities_df.to_csv(tmp_fullpath, line_terminator="") object_scale = 1.0 object_offset = (0.0, 0.0, 0.0) object_crop_start = (0.0, 0.0, 0.0) object_crop_end = (1e9, 1e9, 1e9) objects_type = __objects_names__[0] ds = ws.auto_create_dataset( DataModel.g.current_session + "@" + DataModel.g.current_workspace, objects_type, __objects_group__, __objects_dtype__, fill=__objects_fill__, ) ds.set_attr("kind", objects_type) ds.set_attr("fullname", tmp_fullpath) src = DataModel.g.dataset_uri("__data__") with DatasetManager(src, out=None, dtype="float32", fillvalue=0) as DM: src_dataset = DM.sources[0] img_volume = src_dataset[:] logger.info(f"Got __data__ volume of size {img_volume.shape}") ds[:] = np.zeros_like(img_volume) ds.set_attr("scale", object_scale) ds.set_attr("offset", list(object_offset)) ds.set_attr("crop_start", list(object_crop_start)) ds.set_attr("crop_end", list(object_crop_end)) csv_saved_fullname = ds.save_file(tmp_fullpath) logger.info(f"Saving {tmp_fullpath} to {csv_saved_fullname}") ds.set_attr("fullname", csv_saved_fullname) os.remove(tmp_fullpath)
def precrop(img_volume, entities_df, precrop_coord, precrop_vol_size): """ View a ROI from a big volume by creating a temp dataset from a crop. Crop both the volume and the associated entities. Used for big volumes tha never get loaded into viewer. """ logger.info( f"Preprocess cropping at {precrop_coord} to {precrop_vol_size}") img_volume, precropped_pts = crop_vol_and_pts_centered( img_volume, np.array(entities_df), location=precrop_coord, patch_size=precrop_vol_size, debug_verbose=True, offset=True, ) entities_df = make_entity_df(precropped_pts, flipxy=False) return img_volume, entities_df
def test_make_entity_df(): points = np.array([[10,10,10,0],[10,20,20,0],[10,30,30,0],[10,40,40,0],[10,50,50,0]]) result = make_entity_df(points) assert isinstance(result, pd.DataFrame) assert result.shape == (5,4)
def setup_entity_table( entities_fullname, entities_df=None, scale=1.0, offset=(0, 0, 0), crop_start=(0, 0, 0), crop_end=(MAX_SIZE, MAX_SIZE, MAX_SIZE), flipxy=True, ): if entities_df == None: print(f"Reading entity csv: {entities_fullname}") entities_df = pd.read_csv(entities_fullname) print(entities_df) # otherwise ignore filename index_column = len([col for col in entities_df.columns if "index" in col]) > 0 print(index_column) entities_df.drop( entities_df.columns[entities_df.columns.str.contains("unnamed", case=False)], axis=1, inplace=True, ) # entities_df.drop( # entities_df.columns[entities_df.columns.str.contains("index", case=False)], # axis=1, # inplace=True, # ) # class_code_column = ( # len([col for col in entities_df.columns if "class_code" in col]) > 0 # ) # if not class_code_column: # entities_df["class_code"] = 0 # cropped_pts = crop_pts_bb(np.array(entities_df), [crop_start[0],crop_end[0],crop_start[1], crop_end[1], crop_start[2], crop_end[2]]) # print(cropped_pts) entities_df = make_entity_df(np.array(entities_df), flipxy=flipxy) logger.debug( f"Loaded entities {entities_df.shape} applying scale {scale} and offset {offset} and crop start {crop_start}, crop_end {crop_end}" ) tabledata = [] entities_df["z"] = (entities_df["z"] * scale) + offset[0] entities_df["x"] = (entities_df["x"] * scale) + offset[1] entities_df["y"] = (entities_df["y"] * scale) + offset[2] print("-" * 100) if index_column: logger.debug("Loading pts") for i in range(len(entities_df)): entry = ( i, # entities_df.iloc[i]["index"], entities_df.iloc[i]["z"], entities_df.iloc[i]["x"], entities_df.iloc[i]["y"], 0, ) tabledata.append(entry) else: logger.debug("Loading entities") for i in range(len(entities_df)): entry = ( i, entities_df.iloc[i]["z"], entities_df.iloc[i]["x"], entities_df.iloc[i]["y"], entities_df.iloc[i]["class_code"], ) tabledata.append(entry) tabledata = np.array( tabledata, dtype=[ ("index", int), ("z", float), ("x", float), ("y", float), ("class_code", int), ], ) logger.debug(f"Loaded {len(tabledata)} entities.") return tabledata, entities_df
def aggregate( entity_df, img_shape, outlier_score_thresh=0.9, min_cluster_size=2, min_samples=1, params={ "algorithm": "HDBSCAN", "min_cluster_size": 2, "min_samples": 1 }, ): entity_df = make_entity_df(np.array(entity_df)) X_rescaled, scaling = normalized_coordinates(entity_df, img_shape) if params["algorithm"] == "HDBSCAN": clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, min_samples=min_samples).fit(X_rescaled) label_code = clusterer.labels_ num_clusters_found = len(np.unique(label_code)) print(f"Number of clustered found {num_clusters_found}") core_samples_mask = np.zeros_like(clusterer.labels_, dtype=bool) core_samples_mask = clusterer.outlier_scores_ < outlier_score_thresh # core_samples_mask[db.core_sample_indices_] = True labels = clusterer.labels_ np.sum(clusterer.outlier_scores_ > outlier_score_thresh) print(clusterer.outlier_scores_.shape, core_samples_mask.shape) unique_labels = set(labels) else: clusterer = DBSCAN(eps=params["eps"], min_samples=params["min_samples"]).fit(X_rescaled) label_code = clusterer.labels_ num_clusters_found = len(np.unique(label_code)) print(f"Number of clustered found {num_clusters_found}") labels = clusterer.labels_ unique_labels = set(labels) cluster_coords = [] cluster_sizes = [] other_coords = [] for l in np.unique(labels)[0:]: if np.sum(labels == l) < 34: cluster_coords.append(X_rescaled[labels == l]) cluster_sizes.append(np.sum(labels == l)) else: other_coords.append(X_rescaled[labels == l]) cluster_coords = np.array(cluster_coords) cluster_sizes = np.array(cluster_sizes) print(f"Mean cluster size: {np.mean(cluster_sizes)}") refined_ent = np.concatenate(cluster_coords) print(f"Refined entity array shape {refined_ent.shape}") agg = aggregate_cluster_votes(cluster_coords) refined_ent = np.array([centroid_3d_with_class(c) for c in agg]) refined_ent[:, 0] = refined_ent[:, 0] * 1 / scaling[0] refined_ent[:, 1] = refined_ent[:, 1] * 1 / scaling[2] refined_ent[:, 2] = refined_ent[:, 2] * 1 / scaling[1] refined_entity_df = make_entity_df(refined_ent, flipxy=False) print(f"Aggregated entity length {len(agg)}") return refined_entity_df