def update_data_source_targets( dataset: WkwData, target_index_tuple_list: Sequence[Tuple[int, float]]): """ Create an updated list of datasources from a wkwdataset and a list of sample index, target_class pair """ list_source_idx = [ dataset.get_source_idx_from_sample_idx(sample_idx) for (sample_idx, _) in target_index_tuple_list ] source_list = [] for cur_target_tuple in target_index_tuple_list: cur_target = cur_target_tuple[1] sample_idx = cur_target_tuple[0] s_index = list_source_idx[sample_idx] s = dataset.data_sources[s_index] source_list.append( DataSource(id=s.id, input_path=s.input_path, input_bbox=s.input_bbox, input_mean=s.input_mean, input_std=s.input_std, target_path=s.target_path, target_bbox=s.target_bbox, target_class=cur_target, target_binary=s.target_binary)) return source_list
def update_data_source_bbox(dataset: WkwData, bbox_list: Sequence[Tuple[int, Sequence[int]]]): """ Create an updated list of datasources from a wkwdataset and a list of index and bounding box tuples """ assert len(bbox_list) == len(dataset) source_list = [] for sample_idx, (source_idx, cur_bbox) in enumerate(bbox_list): s = dataset.data_sources[source_idx] source_list.append( DataSource(id=str(sample_idx), input_path=s.input_path, input_bbox=cur_bbox, input_mean=s.input_mean, input_std=s.input_std, target_path=s.target_path, target_bbox=cur_bbox, target_class=s.target_class, target_binary=s.target_binary)) return source_list
skeletons = [Skeleton(skel_dir) for skel_dir in skel_dirs] print(f'Time to read skeleton: {time.time() - start}') # Read the coordinates and target class of all three skeletons into the volume data frame volume_df = get_volume_df(skeletons=skeletons) # Get the ingredients for making the datasources bboxes = bboxesFromArray(volume_df[['x', 'y', 'z']].values) input_dir = '/tmpscratch/webknossos/Connectomics_Department/2018-11-13_scMS109_1to7199_v01_l4_06_24_fixed_mag8_artifact_pred/color/1' target_class = volume_df['class'].values.astype(np.float) target_binary = 1 target_dir = input_dir input_mean = 148.0 input_std = 36.0 # Create a list of data sources source_list = [] for i, cur_bbox in enumerate(bboxes): cur_target = target_class[i] source_list.append( DataSource(id=str(i), input_path=input_dir, input_bbox=cur_bbox.tolist(), input_mean=input_mean, input_std=input_std, target_path=target_dir, target_bbox=cur_bbox.tolist(), target_class=cur_target, target_binary=target_binary)) # Json name json_name = os.path.join(get_data_dir(), 'test_data_three_bboxes.json') # Write to json file WkwData.datasources_to_json(source_list, json_name)
file_name = '/u/alik/code/genEM3/playground/AK/classifier/.log/10X_9_9_1_um_with_myelin_Final.pkl' w_loaded = Widget.load(file_name=file_name) # Get the datasources source_list = [] sources_fromWidget = w_loaded.dataset.data_sources for i, cur_source in enumerate(sources_fromWidget): # correct the bbox back to the original bbox # fix shape cur_input_bbox = remove_bbox_margin(cur_source.input_bbox, margin=35) cur_target_bbox = remove_bbox_margin(cur_source.target_bbox, margin=35) # Update the binary targets to two binary decisions for the presence of image artefacts and Myelin cur_targets = [ w_loaded.annotation_list[i][1].get('Debris'), w_loaded.annotation_list[i][1].get('Myelin') ] source_list.append( DataSource(id=cur_source.id, input_path=getMag8DatasetDir(), input_bbox=cur_input_bbox, input_mean=cur_source.input_mean, input_std=cur_source.input_std, target_path=getMag8DatasetDir(), target_bbox=cur_target_bbox, target_class=cur_targets, target_binary=cur_source.target_binary)) # Json name json_name = os.path.join(get_data_dir(), '10x_test_bboxes', '10X_9_9_1_um_double_binary_v01.json') # Write to json file WkwData.datasources_to_json(source_list, json_name)
[27500, 22000, 3969, 560, 560, 16], [27500, 22000, 4021, 560, 560, 9], [27500, 22000, 4065, 560, 560, 12], [27500, 22000, 4163, 560, 560, 9], [27500, 22000, 4255, 560, 560, 11]] num_samples = sum([bbox[5] for bbox in bboxes_add]) * 560 * 560 / 140 / 140 target_binary_add = 1 target_class_add = 0.0 input_mean_add = 148.0 input_std_add = 36.0 path_add = "/tmpscratch/webknossos/Connectomics_Department/2018-11-13_scMS109_1to7199_v01_l4_06_24_fixed_mag8/color/1" data_sources = WkwData.datasources_from_json(datasources_json_path) data_sources_max_id = max( [int(data_source.id) for data_source in data_sources]) data_sources_out = data_sources for bbox_idx, bbox_add in enumerate(bboxes_add): data_source_out = DataSource(id=str(data_sources_max_id + bbox_idx + 1), input_path=path_add, input_bbox=bbox_add, input_mean=input_mean_add, input_std=input_std_add, target_path=path_add, target_bbox=bbox_add, target_class=target_class_add, target_binary=target_binary_add) data_sources_out.append(data_source_out) WkwData.datasources_to_json(data_sources_out, datasources_json_path_out)
run_root, '../../data/debris_clean_added_bboxes2_datasource.json') datasources_json_path_out = os.path.join( run_root, '../../data/debris_clean_added_bboxes2_wiggle_datasource.json') wiggles = [[-35, 0], [35, 0], [0, -35], [0, 35]] data_sources = WkwData.datasources_from_json(datasources_json_path) data_sources_out = [] for data_source in data_sources: data_sources_out.append(data_source) id = data_source.id bbox = data_source.input_bbox if (data_source.target_class == 1) & (bbox[3:] == [140, 140, 1]): for wiggle_idx, wiggle in enumerate(wiggles): id_out = '{:05.0f}'.format(int(id) + (wiggle_idx + 1) * 1E4) bbox_out = [bbox[0] + wiggle[0], bbox[1] + wiggle[1], *bbox[2:]] data_source_out = DataSource( id=id_out, input_path=data_source.input_path, input_bbox=bbox_out, input_mean=data_source.input_mean, input_std=data_source.input_std, target_path=data_source.target_path, target_bbox=bbox_out, target_class=data_source.target_class, target_binary=data_source.target_binary) data_sources_out.append(data_source_out) WkwData.datasources_to_json(data_sources_out, datasources_json_path_out)
[24568, 15582, 3365, 140, 140, 50], [24761, 15838, 3341, 140, 140, 50], [29011, 18583, 4956, 140, 140, 50], [29723, 22197, 5076, 140, 140, 50], [29948, 16404, 6123, 140, 140, 50]] # create a list of the data sources dataSources = [] # Append debris locations for idx, curBbox in enumerate(bboxes_debris): # convert bbox to normal python list and integer. numpy arrays are not serializable curBbox = [int(num) for num in curBbox] curSource = DataSource(id=str(idx), input_path=getMag8DatasetDir(), input_bbox=curBbox, input_mean=148.0, input_std=36.0, target_path=getMag8DatasetDir(), target_bbox=curBbox, target_class=1.0, target_binary=1) dataSources.append(curSource) # Append clean locations for idx, curBbox in enumerate(bboxes_clean): # The initial 600 Indices are taken by the debris locations idx = idx + numTrainingExamples curSource = DataSource(id=str(idx), input_path=getMag8DatasetDir(), input_bbox=curBbox, input_mean=148.0, input_std=36.0, target_path=getMag8DatasetDir(),
# make sure it is only a single slice (3rd dim size = 0) return np.hstack([topLeft, dims[0:2], np.ones(1)]).astype(int) # Get the bounding boxes of each debris location curDims = np.array([140, 140, 0]) bboxFromCenterFixedDim = lambda coord: bboxFromCenter(coord, curDims) bboxes_positive = np.apply_along_axis(bboxFromCenterFixedDim, 1, coordArray) datasources = [] for id in range(bboxes_positive.shape[0]): input_bbox = [int(el) for el in list(bboxes_positive[id, :])] datasource = DataSource(id=str(id), input_path=input_path, input_bbox=input_bbox, input_mean=input_mean, input_std=input_std, target_path=target_path, target_bbox=input_bbox, target_class=1, target_binary=1) datasources.append(datasource) ####################################### # Get Negative Examples from random sampling ####################################### sample_pos_x = np.random.randint(wkw_lims[0], wkw_lims[0] + wkw_lims[3] - sample_dims[0], num_samples) sample_pos_y = np.random.randint(wkw_lims[1], wkw_lims[1] + wkw_lims[4] - sample_dims[1],
input_std = 36.0 input_path = wkw_path target_path = wkw_path json_path = 'datasources_distributed.json' sample_pos_x = np.random.randint(wkw_lims[0], wkw_lims[0] + wkw_lims[3] - sample_dims[0], num_samples) sample_pos_y = np.random.randint(wkw_lims[1], wkw_lims[1] + wkw_lims[4] - sample_dims[1], num_samples) sample_pos_z = np.random.randint(wkw_lims[2], wkw_lims[2] + wkw_lims[5] - sample_dims[2], num_samples) datasources = [] for id in range(num_samples): input_bbox = [ int(sample_pos_x[id]), int(sample_pos_y[id]), int(sample_pos_z[id]), sample_dims[0], sample_dims[1], sample_dims[2] ] target_bbox = input_bbox datasource = DataSource(id, input_path, input_bbox, input_mean, input_std, target_path, target_bbox) datasources.append(datasource) WkwData.datasources_to_json(datasources, json_path)