def bin_data(cnn_output, training_data_src, size = (3, 12, 12), cutoff = 15, pad = True, verbose = False, collect_cnn = False, maxip=0): '''collect connected components from CNN and bin into True positives and false positives based on labels cnn_output = pth/list of folders containing CNN output training_data_src = pth/list of folders containing numpy arrays [c,z,y,x] c = 0: raw data, c=1: nonzeropixels representing ground truth centers, requiring same naming as src_cnn size = used in get_pixels_around_center; distance from point in zyx. Note: this is effectively a radius (NOT DIAMETER). note: cutoff = allowable distance (IN PIXELS) for a True positive when considering ground truth centers with centers of mass of cnn-labelled connected components pad = (optional) important for edge cases in training set (i.e. points that don't have sufficient border around them) True if pnt is on edge of image, function pads evenly Flase if pnt is on edge of image, drop collect_cnn (optional): if true keep these data for training as well maxip = int, number of maxips to ravel into data return: {'true_positives': tps, 'false_positives': fps, 'ground_truths': gts} ''' cnn_output = listdirfull(cnn_output) if type(cnn_output) == str else cnn_output training_data_src = listdirfull(training_data_src) if type(training_data_src) == str else training_data_src intersection = list(set([os.path.basename(xx[:-4]) for xx in training_data_src]).intersection(set([os.path.basename(xx) for xx in cnn_output]))) if verbose: print('Collect cnn == {}'.format(collect_cnn)) tps=[]; fps=[]; gts = [] for pth in intersection: if verbose: sys.stdout.write('Starting {}'.format(pth)) #load raw and gts data = load_np(os.path.join(os.path.dirname(training_data_src[0]), pth+'.npy')) raw = data[0] ground_truth = data[1] gt = np.asarray(np.nonzero(ground_truth)).T #get labels and pair based on distance centers = probabilitymap_to_centers_thresh(os.path.join(os.path.dirname(cnn_output[0]), pth), threshold = 1, numZSlicesPerSplit=250, overlapping_planes = 40, cores = 4, verbose = verbose) try: paired, tp, fp, fn = pairwise_distance_metrics_multiple_cutoffs(gt, centers, verbose=False, return_paired=True, cutoffs=[cutoff])[0] #optional cnn_src = os.path.join(os.path.dirname(cnn_output[0]), pth) if collect_cnn == True else False TP = [centers[xx[1]] for xx in paired] TPS = get_pixels_around_center(np.asarray(TP).astype('int'), raw, cnn_src=cnn_src, size=size, pad=pad, maxip=maxip) FP = np.asarray(list(set(centers).difference(set(TP)))) FPS = get_pixels_around_center(np.asarray(FP).astype('int'), raw, cnn_src=cnn_src, size=size, pad=pad, maxip=maxip) #append tps.append(TPS); fps.append(FPS); gts.append(gt) except Exception, e: break print ('\n\n\nSkipping {}, due to error: {}\n\n\n'.format(pth, e))
#zmd added 20190312 - these should be in order of points inputted from raw space np.save(os.path.join(dst, "annotation_pixel_value_coordinates.npy"), point_lst) df = count_structure_lister(id_table, *point_lst).fillna(0) df.to_csv( os.path.join( dst, os.path.basename(id_table).replace('.xlsx', '') + '_with_anatomical_assignment_of_cell_counts.csv')) #load and convert to single voxel loc zyx = np.asarray([ str((int(xx[0]), int(xx[1]), int(xx[2]))) for xx in load_np(converted_points) ]) zyx_cnt = Counter(zyx) #manually call transformix.. transformed_dst = os.path.join(dst1, 'transformed_points') makedir(transformed_dst) if qc_overlay_transform_type == 'all': tp0 = [ xx for xx in listall(os.path.dirname(cellvol.ch_to_reg_to_atlas), 'TransformParameters.0.txt') if 'sig_to_reg' in xx and 'regtoatlas' not in xx ][0] tp1 = [ xx
def apply_random_forest(classifier, raw_src, cnn_src, collect_cnn = False, size = (3,12,12), pad=False, cores=4, numZSlicesPerSplit=300, overlapping_planes = 20, chunks=10, maxip=0): ''' THIS IS MEMORY INEFFICIENT - SEE random_forest.py for better functions classifier = pretrained random forest or path to pretrained random forest raw_src = folder of tiffs of raw input data cnn_src = folder of tiffs from cnn output pad = if True, pad the edges of objects determined. False: remove edge cases, usually better since they will be few in number cores = number of cores for parallelization, larger the number the less memory efficient numZSlicesPerSplit: chunk of zplanes to process at once. Adjust this and cores based on memory constraints. overlapping_planes: number of planes on each side to overlap by, this should be a comfortable amount larger than the maximum z distances of a single object chunks = number of chunks to divide connected components by. The larger the number the more memory efficiency, but a bit more IO required collect_cnn = optional to include cnn data for random forest input Returns a dictionary consisting of k=centers, v=corresponding pixel indices determine by CNN TO DO - MAKE SURE MAPPING STAYS THE SAME AND ORDER IS NOT LOST classifier = '/home/wanglab/wang/pisano/Python/lightsheet/supp_files/h129_rf_classifier.pkl' cnn_src = '/home/wanglab/wang/pisano/conv_net/annotations/better_res/h129_memmap_arrays_cnn_output/20170204_tp_bl6_cri_1000r_02_1hfds_647_0010na_25msec_z7d5um_10povlap_ch00_z200-400_y1350-1700_x3100-3450' raw_src = load_np('/home/wanglab/wang/pisano/conv_net/annotations/better_res/h129_memmap_arrays/20170204_tp_bl6_cri_1000r_02_1hfds_647_0010na_25msec_z7d5um_10povlap_ch00_z200-400_y1350-1700_x3100-3450.npy')[0] ''' rf = joblib.load(classifier) if type(classifier) == str else classifier #load and find centers from cnn center_pixels_dct = probabilitymap_to_centers_thresh(cnn_src, threshold = 1, numZSlicesPerSplit=numZSlicesPerSplit, overlapping_planes = overlapping_planes, cores = cores, return_pixels = True, verbose = False) #optional if collect_cnn == False: cnn_src = False #load and collect pixels - if path to folder of tiffs will be done in memory efficient way if type(raw_src) == str and raw_src[:-4] == '.npy': inn = get_pixels_around_center(pnts=center_pixels_dct.keys(), src=load_np(raw_src), cnn_src=cnn_src, size = size, pad=pad, return_pairs=True, maxip=maxip) elif type(raw_src) == str and raw_src[:-4] == '.tif': inn = get_pixels_around_center(pnts=center_pixels_dct.keys(), src=tifffile.imread(raw_src), cnn_src=cnn_src, size = size, pad=pad, return_pairs=True, maxip=maxip) elif type(raw_src) == str: inn = get_pixels_around_center_mem_eff(pnts=center_pixels_dct.keys(), src=raw_src, cnn_src=cnn_src, size = size, pad=pad, return_pairs=True, cores=cores, chunks=chunks,maxip=maxip) elif str(type(raw_src)) in ["<class 'numpy.core.memmap.memmap'>", "<type 'numpy.ndarray'>"]: inn = get_pixels_around_center(pnts=center_pixels_dct.keys(), src=raw_src, cnn_src=cnn_src, size = size, pad=pad, return_pairs=True, maxip=maxip) #predict out = rf.predict(inn.values()) #remove centers that are considered false positives centers = np.asarray([xx for i,xx in enumerate(inn.keys()) if out[i]==1]) #remove non determine centers from above center_pixels_dct = {tuple(c):center_pixels_dct[tuple(c)] for c in centers} return center_pixels_dct
#parse dct = bin_data(cnn_output, training_data_src, size=size, cutoff = cutoff, pad = pad, verbose=verbose, maxip=maxip) tps = dct['true_positives']; fps = dct['false_positives']; gts = dct['ground_truths'] #train kwargs = train_random_forest(tps, fps, n_estimator = n_estimator, max_depth = max_depth, balance = balance, cores = cores, kfold_splits = kfold_splits, dst = dst, average=precision_score) #plot #%matplotlib inline save = '/home/wanglab/Downloads/rf' plot_roc(save=save, **kwargs) #apply cnn_src = listdirfull(cnn_output); cnn_src.sort(); cnn_src = cnn_src[0] inn = listdirfull(training_data_src); inn.sort(); raw_src = load_np(inn[0])[0] gt = load_np(inn[0])[1] centers = apply_random_forest(kwargs['classifier'], raw_src, cnn_src, size = (7,25,25)) #show from tools.conv_net.functions.dilation import dilate_with_element, ball gt = dilate_with_element(gt, ball(5)) src = np.zeros_like(gt) for c in centers.astype('int'): src[c[0],c[1],c[2]] = 1 src = dilate_with_element(src, ball(5)) #Sweep: <-- usually performance is not affected that much by this for n_estimator in (10,20,50,100): for max_depth in (5,10,20,50,100): print('\n\n n_estimator--{}, max_depth--{}'.format(n_estimator, max_depth))
dst1 = os.path.join(dst0, 'elastix'); makedir(dst1) #####check cell detection (modeled from lightsheet/tools/registration/transform_cell_counts) #3dunet cell dataframe dataframe = pd.read_csv(listdirfull(os.path.join(fld, '3dunet_output/pooled_cell_measures'), '.csv')[0]) #####generate a downsized version###### if generate_downsized_overlay: cellvolloaded = tifffile.imread(cellvol.resampled_for_elastix_vol) cnn_cellvolloaded = np.zeros_like(cellvolloaded) zyx = dataframe[['z','y','x']].values #adjust for reorientation THEN rescaling, remember full size data needs dimension change releative to resample fullsizedimensions = get_fullsizedims_from_kwargs(kwargs) #don't get from kwargs['volumes'][0].fullsizedimensions it's bad! use this instead zyx = fix_contour_orientation(zyx, verbose=verbose, **kwargs) #now in orientation of resample zyx = points_resample(zyx, original_dims = fix_dimension_orientation(fullsizedimensions, **kwargs), resample_dims = tifffile.imread(cellvol.resampled_for_elastix_vol).shape, verbose = verbose)[:, :3] zyx = np.asarray([str((int(xx[0]), int(xx[1]), int(xx[2]))) for xx in load_np(zyx)]) from collections import Counter zyx_cnt = Counter(zyx) #now overlay for zyx,v in zyx_cnt.iteritems(): z,y,x = [int(xx) for xx in zyx.replace('(','',).replace(')','').split(',')] try: cnn_cellvolloaded[z,y,x] = v*100 except Exception, e: print e merged = np.stack([cnn_cellvolloaded, cellvolloaded, np.zeros_like(cellvolloaded)], -1) merged = np.swapaxes(merged, 0,2)#reorient to horizontal tifffile.imsave(os.path.join(dst, '{}_points_merged_resampled_for_elastix.tif'.format(os.path.basename(fld))), merged) #EXAMPLE USING LIGHTSHEET - assumes marking centers in the 'raw' full sized cell channel. This will transform those centers into "atlas" space (in this case the moving image) #in this case the "inverse transform has the atlas as the moving image in the first step, and the autofluorescence channel as the moving image in the second step
#make 200,350,350 zrng = range(200, 600, 200) #at least a delta of 100 yrng = range(1000, 5000, 350) xrng = range(1000, 5000, 350) src = '/home/wanglab/wang/pisano/tracing_output/antero_4x/20170204_tp_bl6_cri_1000r_02/full_sizedatafld/20170204_tp_bl6_cri_1000r_02_1hfds_647_0010na_25msec_z7d5um_10povlap_ch00' src = '/home/wanglab/wang/pisano/tracing_output/antero_4x/20170116_tp_bl6_lob7_500r_09/full_sizedatafld/20170116_tp_bl6_lob7_500r_09_647_010na_z7d5um_75msec_10povlp_ch00' src = '/home/wanglab/wang/pisano/tracing_output/antero_4x/20170130_tp_bl6_sim_1750r_03/full_sizedatafld/20170130_tp_bl6_sim_1750r_03_647_010na_1hfds_z7d5um_50msec_10povlp_ch00' src = '/home/wanglab/wang/pisano/tracing_output/retro_4x/20180215_jg_bl6f_prv_10/full_sizedatafld/20180215_jg_bl6f_prv_10_647_010na_z7d5um_250msec_10povlap_ch00' src = '/home/wanglab/wang/pisano/tracing_output/retro_4x/20180215_jg_bl6f_prv_07/full_sizedatafld/20180215_jg_bl6f_prv_07_647_010na_z7d5um_250msec_10povlap_ch00' dst = os.path.join('/home/wanglab/Downloads/', os.path.basename(src)) makedir(dst) lst = listdirfull(src, keyword='.tif') lst.sort() make_memmap_from_tiff_list(lst, dst + '.npy') arr = load_np(dst + '.npy') makedir(dst) dst = os.path.join( '/home/wanglab/wang/pisano/conv_net/annotations/better_res', os.path.basename(src)) makedir(dst) for i in range(len(zrng) - 1): for ii in range(len(yrng) - 1): for iii in range(len(xrng) - 1): z, zz = zrng[i], zrng[i + 1] y, yy = yrng[ii], yrng[ii + 1] x, xx = xrng[iii], xrng[iii + 1] tifffile.imsave(os.path.join( dst, '{}_z{}-{}_y{}-{}_x{}-{}.tif'.format( os.path.basename(src), z, zz, y, yy, x, xx)), arr[z:zz, y:yy, x:xx],