def get_fullsizedims_from_kwargs(kwargs): '''fullsizedims of vols is incorrect when using terastitcher...this fixes that ''' vol = [xx for xx in kwargs['volumes'] if xx.ch_type == 'cellch'][0] zf = len(listdirfull(vol.full_sizedatafld_vol, '.tif')) yf, xf = tifffile.imread(listdirfull(vol.full_sizedatafld_vol, 'tif')[0]).shape return tuple((zf, yf, xf))
def apply_classifier(classifier, raw_src, cnn_src, collect_cnn = False, size = (3,12,12), pad=False, cores=10, numZSlicesPerSplit=50, overlapping_planes = 15, verbose=True, save=True, maxip=0): ''' classifier = pretrained random forest or path to pretrained random forest raw_src = folder of tiffs of raw input data cnn_src = folder of tiffs from cnn output size pad = if True, pad the edges of objects determined. False: remove edge cases, usually better since they will be few in number cores = number of cores for parallelization, larger the number the less memory efficient numZSlicesPerSplit: chunk of zplanes to process at once. Adjust this and cores based on memory constraints. overlapping_planes: number of planes on each side to overlap by, this should be a comfortable amount larger than the maximum z distances of a single object save (optional): #optional save to prevent rerun of jobs collect_cnn = optional to include cnn data for random forest input Returns ---------------- a dictionary consisting of k=centers, v=[corresponding pixel indices determine by CNN, maximum intensity, list of maximum radius/plane] ''' #handle inputs threshold = 1 zyx_search_range = (2,10,10) zdim = len(listdirfull(cnn_src, keyword='.tif')) #optional save to prevent rerun of jobs if save: save = cnn_src+'_apply_classifier_tmp' makedir(save) #run if verbose: sys.stdout.write('\n Thesholding, determining connected pixels, identifying center of masses, applying classifier\n\n'); sys.stdout.flush(); st = time.time() rng = range(0, zdim, numZSlicesPerSplit); jobs = len(rng); iterlst=[(cnn_src, raw_src, collect_cnn, z, zdim, numZSlicesPerSplit, overlapping_planes, threshold, classifier, size, zyx_search_range, pad, job, jobs, verbose, save, maxip) for job, z in enumerate(rng)] #par vs not par if cores > 1: p = mp.Pool(cores) center_pixels_intensity_radius_lst = p.starmap(apply_classifier_helper, iterlst) p.terminate() else: center_pixels_intensity_radius_lst = [] for i in iterlst: center_pixels_intensity_radius_lst.append(apply_classifier_helper(i)) #optional reload: if save: center_pixels_intensity_radius_lst = [load_dictionary(xx) for xx in listdirfull(save)] shutil.rmtree(save) #unpack if verbose: sys.stdout.write('\n...finished, formatting dictionary...'); sys.stdout.flush() center_pixels_intensity_radius_dct = {}; [center_pixels_intensity_radius_dct.update(xx) for xx in center_pixels_intensity_radius_lst] if 'None' in center_pixels_intensity_radius_dct: del center_pixels_intensity_radius_dct['None'] if verbose: print ('Total time {} minutes'.format(round((time.time() - st) / 60))) if verbose: print('{} centers found.'.format(len(center_pixels_intensity_radius_dct))) return center_pixels_intensity_radius_dct
def bin_data(cnn_output, training_data_src, size = (3, 12, 12), cutoff = 15, pad = True, verbose = False, collect_cnn = False, maxip=0): '''collect connected components from CNN and bin into True positives and false positives based on labels cnn_output = pth/list of folders containing CNN output training_data_src = pth/list of folders containing numpy arrays [c,z,y,x] c = 0: raw data, c=1: nonzeropixels representing ground truth centers, requiring same naming as src_cnn size = used in get_pixels_around_center; distance from point in zyx. Note: this is effectively a radius (NOT DIAMETER). note: cutoff = allowable distance (IN PIXELS) for a True positive when considering ground truth centers with centers of mass of cnn-labelled connected components pad = (optional) important for edge cases in training set (i.e. points that don't have sufficient border around them) True if pnt is on edge of image, function pads evenly Flase if pnt is on edge of image, drop collect_cnn (optional): if true keep these data for training as well maxip = int, number of maxips to ravel into data return: {'true_positives': tps, 'false_positives': fps, 'ground_truths': gts} ''' cnn_output = listdirfull(cnn_output) if type(cnn_output) == str else cnn_output training_data_src = listdirfull(training_data_src) if type(training_data_src) == str else training_data_src intersection = list(set([os.path.basename(xx[:-4]) for xx in training_data_src]).intersection(set([os.path.basename(xx) for xx in cnn_output]))) if verbose: print('Collect cnn == {}'.format(collect_cnn)) tps=[]; fps=[]; gts = [] for pth in intersection: if verbose: sys.stdout.write('Starting {}'.format(pth)) #load raw and gts data = load_np(os.path.join(os.path.dirname(training_data_src[0]), pth+'.npy')) raw = data[0] ground_truth = data[1] gt = np.asarray(np.nonzero(ground_truth)).T #get labels and pair based on distance centers = probabilitymap_to_centers_thresh(os.path.join(os.path.dirname(cnn_output[0]), pth), threshold = 1, numZSlicesPerSplit=250, overlapping_planes = 40, cores = 4, verbose = verbose) try: paired, tp, fp, fn = pairwise_distance_metrics_multiple_cutoffs(gt, centers, verbose=False, return_paired=True, cutoffs=[cutoff])[0] #optional cnn_src = os.path.join(os.path.dirname(cnn_output[0]), pth) if collect_cnn == True else False TP = [centers[xx[1]] for xx in paired] TPS = get_pixels_around_center(np.asarray(TP).astype('int'), raw, cnn_src=cnn_src, size=size, pad=pad, maxip=maxip) FP = np.asarray(list(set(centers).difference(set(TP)))) FPS = get_pixels_around_center(np.asarray(FP).astype('int'), raw, cnn_src=cnn_src, size=size, pad=pad, maxip=maxip) #append tps.append(TPS); fps.append(FPS); gts.append(gt) except Exception, e: break print ('\n\n\nSkipping {}, due to error: {}\n\n\n'.format(pth, e))
def get_fullsizedimensions(dct): """ works around param dict in case paths were missaved """ try: kwargs = load_kwargs(dct) vol = [xx for xx in kwargs["volumes"] if xx.ch_type =="cellch"][0] zf = len(listdirfull(vol.full_sizedatafld_vol, ".tif")) yf,xf = tifffile.imread(listdirfull(vol.full_sizedatafld_vol, "tif")[0]).shape fullsizedimensions = tuple((zf, yf, xf)) except: #if param dict is messed up fsz = os.path.join(os.path.dirname(dct), "full_sizedatafld") vols = os.listdir(fsz); vols.sort() src = os.path.join(fsz, vols[len(vols)-1]) #hack - try to load param_dict instead? if not os.path.isdir(src): src = os.path.join(fsz, vols[len(vols)-2]) zf = len(listdirfull(src, ".tif")) yf,xf = tifffile.imread(listdirfull(src, "tif")[0]).shape fullsizedimensions = tuple((zf, yf, xf)) return fullsizedimensions
def get_resampledvol_n_dimensions(dct): """ works around param dict in case paths were missaved """ try: kwargs = load_kwargs(dct) vol = [xx for xx in kwargs["volumes"] if xx.ch_type =="cellch"][0] resampled_vol = vol.resampled_for_elastix_vol resampled_dims = tifffile.imread(resampled_vol).shape except FileNotFoundError: fls = listdirfull(os.path.dirname(dct), ".tif"); fls.sort() resampled_vol = fls[-1] #will be the last one, bc of the 647 channel resampled_dims = tifffile.imread(resampled_vol).shape return resampled_dims, resampled_vol
def check_registration_cross_sections(out): for z in [100, 200, 300, 400, 500]: print(z) nm_im = {} for fld in lst: kwargs = load_kwargs(fld) vol = [xx for xx in kwargs["volumes"] if xx.ch_type == "cellch"][0] fl = [ fl for fl in listdirfull(vol.full_sizedatafld_vol) if str(z).zfill(4) in fl ][0] nm_im[os.path.basename(fld)] = fl dst = os.path.join(out, "cell_ch_z{}.png".format(str(z).zfill(4))) tile(src=[ adjust_gamma(tifffile.imread(xx), gamma=.6, gain=3) for xx in nm_im.values() ], subtitles=[xx for xx in nm_im.keys()], dst=dst) #check reg nm_im = {} for fld in lst: kwargs = load_kwargs(fld) fl = os.path.join(fld, "clearmap_cluster_output", "elastix_auto_to_atlas", "result.1.tif") if os.path.exists(fl): nm_im[os.path.basename(fld)] = fl #read once ims = [equalize_hist(tifffile.imread(xx)) for xx in nm_im.values()] for z in [50, 100, 150, 200, 250, 300, 350, 400]: print(z) dst = os.path.join(out, "regqc_z{}.png".format(str(z).zfill(4))) tile(src=[i[z] for i in ims], subtitles=[xx for xx in nm_im.keys()], dst=dst) print("saved in : {}".format(out))
f1 = 2*( (precision*recall)/(precision+recall) ) #calculating f1 score except Exception as e: print(e) f1 = np.nan #if tp, fn, etc. are 0 df.loc[df.parameters == os.path.basename(fl), "f1"] = f1 df.loc[df.parameters == os.path.basename(fl), "tp"] = tp df.loc[df.parameters == os.path.basename(fl), "fp"] = fp df.loc[df.parameters == os.path.basename(fl), "fn"] = fn #export csv per brain/volume df.to_csv(os.path.join(dst, "%s.csv" % os.path.basename(brain))) #%% dfs = listdirfull(dst) dfs = [pd.read_csv(xx).sort_values(by = ["parameters"]) for xx in dfs if ".csv" in xx] tps = np.array([df.tp.values for df in dfs]).sum(axis = 0) fps = np.array([df.fp.values for df in dfs]).sum(axis = 0) fns = np.array([df.fn.values for df in dfs]).sum(axis = 0) precisions_cm = [(tp/(tp+fps[i])) for i, tp in enumerate(tps)] #add 1 for plot precisions_cm.append(1); precisions_cm = np.array(precisions_cm) recalls_cm = [(tp/(tp+fns[i])) for i, tp in enumerate(tps)] #add 0 for plot recalls_cm.append(0); recalls_cm = np.array(recalls_cm) src = "/jukebox/wang/zahra/conv_net/training/h129/experiment_dirs/20181115_zd_train/precision_recall_curve_295590.csv" df = pd.read_csv(src) precisions_cn = df["precision"].values recalls_cn = df["recall"].values
def overlay_qc(args): #unpacking this way for multiprocessing fld, folder_suffix, output_folder, verbose, doubletransform, make_volumes = args try: #get 3dunet cell dataframe csv file input_csv = listdirfull(os.path.join(fld, folder_suffix), ".csv") assert len(input_csv) == 1, "multiple csv files" dataframe = pd.read_csv(input_csv[0]) #location to save out dst = os.path.join(output_folder, os.path.basename(fld)); makedir(dst) #EXAMPLE USING LIGHTSHEET - assumes marking centers in the "raw" full sized cell channel. This will transform those #centers into "atlas" space (in this case the moving image) #in this case the "inverse transform has the atlas as the moving image in the first step, #and the autofluorescence channel as the moving image in the second step #NOTE - it seems that the registration of cell to auto is failing on occasion....thus get new files... ################################ cell_inverse_folder = listdirfull(os.path.join(fld, "elastix_inverse_transform"), "cellch")[0] a2r = listall(cell_inverse_folder, "atlas2reg_TransformParameters"); a2r.sort() r2s = listall(cell_inverse_folder, "reg2sig_TransformParameters"); r2s.sort() #possibly remove #IMPORTANT. the idea is to apply cfos->auto->atlas transformfiles = r2s + a2r if doubletransform else a2r #might get rid of r2s lightsheet_parameter_dictionary = os.path.join(fld, "param_dict.p") converted_points = generate_transformed_cellcount(dataframe, dst, transformfiles, lightsheet_parameter_dictionary, verbose=verbose) #load and convert to single voxel loc zyx = np.asarray([str((int(xx[0]), int(xx[1]), int(xx[2]))) for xx in np.nan_to_num(np.load(converted_points))]) from collections import Counter zyx_cnt = Counter(zyx) #check... if make_volumes: #manually call transformix kwargs = load_dictionary(lightsheet_parameter_dictionary) vol = [xx for xx in kwargs["volumes"] if xx.ch_type == "cellch"][0].resampled_for_elastix_vol transformed_vol = os.path.join(dst, "transformed_volume"); makedir(transformed_vol) if not doubletransform: transformfiles = [os.path.join(fld, "elastix/TransformParameters.0.txt"), os.path.join(fld, "elastix/TransformParameters.1.txt")] transformfiles = modify_transform_files(transformfiles, transformed_vol) #copy over elastix files transformix_command_line_call(vol, transformed_vol, transformfiles[-1]) else: v=[xx for xx in kwargs["volumes"] if xx.ch_type == "cellch"][0] #sig to reg tps = [listall(os.path.dirname(v.ch_to_reg_to_atlas), "/TransformParameters.0")[0], listall(os.path.dirname(v.ch_to_reg_to_atlas), "/TransformParameters.1")[0]] #reg to atlas transformfiles = tps+[os.path.join(fld, "elastix/TransformParameters.0.txt"), os.path.join(fld, "elastix/TransformParameters.1.txt")] transformfiles = modify_transform_files(transformfiles, transformed_vol) #copy over elastix files transformix_command_line_call(vol, transformed_vol, transformfiles[-1]) #cell_registered channel cell_reg = tifffile.imread(os.path.join(transformed_vol, "result.tif")) tifffile.imsave(os.path.join(transformed_vol, "result.tif"), cell_reg, compress=1) cell_cnn = np.zeros_like(cell_reg) tarr = []; badlist=[] for zyx,v in zyx_cnt.items(): z,y,x = [int(xx) for xx in zyx.replace("(","",).replace(")","").split(",")] tarr.append([z,y,x]) try: cell_cnn[z,y,x] = v*100 except: badlist.append([z,y,x]) #apply x y dilation r = 2 selem = ball(r)[int(r/2)] cell_cnn = cell_cnn.astype("uint8") cell_cnn = np.asarray([cv2.dilate(cell_cnn[i], selem, iterations = 1) for i in range(cell_cnn.shape[0])]) tarr=np.asarray(tarr) if len(badlist)>0: print("{} errors in mapping with cell_cnn shape {}, each max dim {}, \npossibly due to a registration overshoot \ or not using double transform\n\n{}".format(len(badlist), cell_cnn.shape, np.max(tarr,0), badlist)) merged = np.stack([cell_cnn, cell_reg, np.zeros_like(cell_reg)], -1) tifffile.imsave(os.path.join(transformed_vol, "merged.tif"), merged)#, compress=1) #out = np.concatenate([cell_cnn, cell_reg, ], 0) #####check at the resampled for elastix phase before transform...this mapping looks good... if make_volumes: #make zyx numpy arry zyx = dataframe[["z","y","x"]].values kwargs = load_dictionary(lightsheet_parameter_dictionary) vol = [xx for xx in kwargs["volumes"] if xx.ch_type =="cellch"][0] fullsizedimensions = get_fullsizedims_from_kwargs(kwargs) #don"t get from kwargs["volumes"][0].fullsizedimensions it"s bad! use this instead zyx = fix_contour_orientation(zyx, verbose=verbose, **kwargs) #now in orientation of resample zyx = points_resample(zyx, original_dims = fix_dimension_orientation(fullsizedimensions, **kwargs), resample_dims = tifffile.imread(vol.resampled_for_elastix_vol).shape, verbose = verbose)[:, :3] #cell channel cell_ch = tifffile.imread(vol.resampled_for_elastix_vol) cell_cnn = np.zeros_like(cell_ch) tarr = []; badlist=[] for _zyx in zyx: z,y,x = [int(xx) for xx in _zyx] tarr.append([z,y,x]) try: cell_cnn[z,y,x] = 100 except: badlist.append([z,y,x]) tarr=np.asarray(tarr) merged = np.stack([cell_cnn, cell_ch, np.zeros_like(cell_ch)], -1) tifffile.imsave(os.path.join(transformed_vol, "resampled_merged.tif"), merged)#, compress=1) except Exception as e: print(e) with open(error_file, "a") as err_fl: err_fl.write("\n\n{} {}\n\n".format(fld, e))
training_data_src = '/home/wanglab/wang/pisano/conv_net/annotations/better_res/h129_memmap_arrays' #parse dct = bin_data(cnn_output, training_data_src, size=size, cutoff = cutoff, pad = pad, verbose=verbose, maxip=maxip) tps = dct['true_positives']; fps = dct['false_positives']; gts = dct['ground_truths'] #train kwargs = train_random_forest(tps, fps, n_estimator = n_estimator, max_depth = max_depth, balance = balance, cores = cores, kfold_splits = kfold_splits, dst = dst, average=precision_score) #plot #%matplotlib inline save = '/home/wanglab/Downloads/rf' plot_roc(save=save, **kwargs) #apply cnn_src = listdirfull(cnn_output); cnn_src.sort(); cnn_src = cnn_src[0] inn = listdirfull(training_data_src); inn.sort(); raw_src = load_np(inn[0])[0] gt = load_np(inn[0])[1] centers = apply_random_forest(kwargs['classifier'], raw_src, cnn_src, size = (7,25,25)) #show from tools.conv_net.functions.dilation import dilate_with_element, ball gt = dilate_with_element(gt, ball(5)) src = np.zeros_like(gt) for c in centers.astype('int'): src[c[0],c[1],c[2]] = 1 src = dilate_with_element(src, ball(5)) #Sweep: <-- usually performance is not affected that much by this for n_estimator in (10,20,50,100):
from skimage.external import tifffile import skimage from tools.utils.directorydeterminer import directorydeterminer from scipy.ndimage.interpolation import zoom from tools.registration.transform_cell_counts import generate_transformed_cellcount, get_fullsizedims_from_kwargs, points_resample from tools.registration.transform_list_of_points import modify_transform_files from tools.imageprocessing.orientation import fix_contour_orientation, fix_dimension_orientation import matplotlib.gridspec as gridspec from tools.conv_net.functions.dilation import dilate_with_element from skimage.morphology import ball if __name__ == '__main__': #set up dst = '/home/wanglab/wang/pisano/tracing_output/qc/antero_no_jg'; makedir(dst) lst = [xx for xx in listdirfull('/home/wanglab/wang/pisano/tracing_output/antero_4x') if 'jg' not in os.path.basename(xx)] dst = '/home/wanglab/wang/pisano/tracing_output/qc/antero_only_jg'; makedir(dst) lst = [xx for xx in listdirfull('/home/wanglab/wang/pisano/tracing_output/antero_4x') if 'jg' in os.path.basename(xx)] cnn_transform_type = 'affine_only_reg_to_sig';#both for regwatlas, and only affine for sig adn reg #'all', 'single': don't consider reg with sig at all volume_transform_type = 'single';#both for regwatlas, and only affine for sig adn reg #'all', 'single': don't consider reg with sig at all verbose = True generate_registered_overlay = False #this looks bad generate_downsized_overlay = True #this looks better #fld = '/home/wanglab/wang/pisano/tracing_output/antero_4x/20170130_tp_bl6_sim_1750r_03' #loop for fld in lst: try: print fld kwargs = load_kwargs(fld)
transparent=True) plt.close() return parent_list #%% if __name__ == "__main__": ann_pth = "/jukebox/LightSheetTransfer/atlas/allen_atlas/annotation_2017_25um_sagittal_forDVscans_16bit.tif" atl_pth = "/jukebox/LightSheetTransfer/atlas/allen_atlas/average_template_25_sagittal_forDVscans.tif" pth = "/jukebox/LightSheetTransfer/kelly/201908_cfos" subdir = "cell_region_assignment_99percentile_no_erosion_20190909" #combine to get all paths pths = listdirfull(pth, "647") pths.sort() #%% #make clearmap style heatmaps for pth in pths: dst = make_heatmaps( pth, subdir) #subdir = directory in which cell coordinates are stored print("made heat map for: {}".format(pth)) #%% #analysis for dorsal up brains ##############################################################################MAKE P-VALUE MAPS############################################################ #make destination directory dst = "/jukebox/wang/seagravesk/lightsheet/cfos_raw_images/pooled_analysis/2019" pvaldst = "/jukebox/wang/seagravesk/lightsheet/cfos_raw_images/pooled_analysis/2019/pvalue_maps/"
@author: wanglab """ import os, subprocess as sp, tifffile, numpy as np, shutil, matplotlib.pyplot as plt, matplotlib as mpl from tools.analysis.analyze_injection_inverse_transform import pool_injections_inversetransform from tools.utils.io import makedir, load_kwargs, listdirfull from tools.imageprocessing.orientation import fix_orientation data = "/jukebox/wang/pisano/tracing_output/eaat4" src = "/jukebox/wang/zahra/eaat4_screening/201910_analysis/transformed_volumes" atl_pth = "/jukebox/LightSheetTransfer/atlas/sagittal_atlas_20um_iso.tif" ann_pth = "/jukebox/LightSheetTransfer/atlas/annotation_sagittal_atlas_20um_iso.tif" dst = "/jukebox/wang/zahra/eaat4_screening/201910_analysis/merged_volumes" makedir(dst) imgs = listdirfull(src, "trnsfm2atl") imgs.sort() sites = np.array([tifffile.imread(xx) for xx in imgs]) #the y-axis cutoff for visualization ann_raw = tifffile.imread(ann_pth) #apparent cutoff anns = np.unique(ann_raw).astype(int) print(ann_raw.shape) #annotation IDs of the cerebellum ONLY that are actually represented in annotation file iids = { "Lingula (I)": 912, "Lobule II": 976, "Lobule III": 984, "Lobule IV-V": 1091,
from skimage.util import view_as_windows from skimage.external import tifffile if __name__ == '__main__': #make 200,350,350 zrng = range(200, 600, 200) #at least a delta of 100 yrng = range(1000, 5000, 350) xrng = range(1000, 5000, 350) src = '/home/wanglab/wang/pisano/tracing_output/antero_4x/20170204_tp_bl6_cri_1000r_02/full_sizedatafld/20170204_tp_bl6_cri_1000r_02_1hfds_647_0010na_25msec_z7d5um_10povlap_ch00' src = '/home/wanglab/wang/pisano/tracing_output/antero_4x/20170116_tp_bl6_lob7_500r_09/full_sizedatafld/20170116_tp_bl6_lob7_500r_09_647_010na_z7d5um_75msec_10povlp_ch00' src = '/home/wanglab/wang/pisano/tracing_output/antero_4x/20170130_tp_bl6_sim_1750r_03/full_sizedatafld/20170130_tp_bl6_sim_1750r_03_647_010na_1hfds_z7d5um_50msec_10povlp_ch00' src = '/home/wanglab/wang/pisano/tracing_output/retro_4x/20180215_jg_bl6f_prv_10/full_sizedatafld/20180215_jg_bl6f_prv_10_647_010na_z7d5um_250msec_10povlap_ch00' src = '/home/wanglab/wang/pisano/tracing_output/retro_4x/20180215_jg_bl6f_prv_07/full_sizedatafld/20180215_jg_bl6f_prv_07_647_010na_z7d5um_250msec_10povlap_ch00' dst = os.path.join('/home/wanglab/Downloads/', os.path.basename(src)) makedir(dst) lst = listdirfull(src, keyword='.tif') lst.sort() make_memmap_from_tiff_list(lst, dst + '.npy') arr = load_np(dst + '.npy') makedir(dst) dst = os.path.join( '/home/wanglab/wang/pisano/conv_net/annotations/better_res', os.path.basename(src)) makedir(dst) for i in range(len(zrng) - 1): for ii in range(len(yrng) - 1): for iii in range(len(xrng) - 1): z, zz = zrng[i], zrng[i + 1] y, yy = yrng[ii], yrng[ii + 1] x, xx = xrng[iii], xrng[iii + 1]
for i, comp in enumerate(comparisons): print(conds[i]) ttest_stats( comp[0], comp[1], os.path.join(final_dst, conds[i] + "/multiple_comparisons_output"), conds[i]) #%% #2019 dataset ann_pth = "/jukebox/LightSheetTransfer/atlas/allen_atlas/annotation_2017_25um_sagittal_forDVscans_16bit.tif" atl_pth = "/jukebox/LightSheetTransfer/atlas/allen_atlas/average_template_25_sagittal_forDVscans.tif" pth = "/jukebox/LightSheetTransfer/kelly/201908_cfos" subdir = "cell_region_assignment_99percentile_no_erosion_20190909" #combine to get all paths pths = listdirfull(pth, "647") pths.sort() #analysis for dorsal up brains #make destination directory dst = "/jukebox/wang/seagravesk/lightsheet/cfos_raw_images/pooled_analysis/2019" pvaldst = "/jukebox/wang/seagravesk/lightsheet/cfos_raw_images/pooled_analysis/2019/pvalue_maps/" if not os.path.exists(dst): os.mkdir(dst) if not os.path.exists(pvaldst): os.mkdir(pvaldst) if not os.path.exists(pvaldst + "/dorsal_up"): os.mkdir(pvaldst + "/dorsal_up") ctrl_du_heatmaps = [ os.path.join(pth, os.path.join(xx, subdir + "/cells_heatmap.tif")) for xx in os.listdir(pth) if "647" in xx and "mouse" in xx
def pool_injections_inversetransform(**kwargs): """Function to pool several injection sites. Assumes that the basic registration AND inverse transform using elastix has been run. If not, runs inverse transform. Additions to analyze_injection.py and pool_injections_for_analysis(). Inputs ----------- kwargs: "inputlist": inputlist, #list of folders generated previously from software "channel": "01", "channel_type": "injch", "filter_kernel": (5,5,5), #gaussian blur in pixels (if registered to ABA then 1px likely is 25um) "threshold": 10 (int, value to use for thresholding, this value represents the number of stand devs above the mean of the gblurred image) "num_sites_to_keep": #int, number of injection sites to keep, useful if multiple distinct sites "injectionscale": 45000, #use to increase intensity of injection site visualizations generated - DOES NOT AFFECT DATA "imagescale": 2, #use to increase intensity of background site visualizations generated - DOES NOT AFFECT DATA "reorientation": ("2","0","1"), #use to change image orientation for visualization only "crop": #use to crop volume, values below assume horizontal imaging and sagittal atlas False cerebellum: "[:,390:,:]" caudal midbrain: "[:,300:415,:]" midbrain: "[:,215:415,:]" thalamus: "[:,215:345,:]" anterior cortex: "[:,:250,:]" "dst": "/home/wanglab/Downloads/test", #save location "save_individual": True, #optional to save individual images, useful to inspect brains, which you can then remove bad brains from list and rerun function "colormap": "plasma", "atlas": "/jukebox/LightSheetTransfer/atlas/sagittal_atlas_20um_iso.tif", #whole brain atlas Optional: ---------- "save_array": path to folder to save out numpy array per brain of binarized detected site "save_tif": saves out tif volume per brain of binarized detected site "dpi": dots per square inch to save at Returns ----------------count_threshold a pooled image consisting of max IP of reorientations provide in kwargs. a list of structures (csv file) with pixel counts, pooling across brains. if save individual will save individual images, useful for inspection and/or visualization """ inputlist = kwargs["inputlist"] dst = kwargs["dst"] makedir(dst) injscale = kwargs["injectionscale"] if "injectionscale" in kwargs else 1 imagescale = kwargs["imagescale"] if "imagescale" in kwargs else 1 axes = kwargs["reorientation"] if "reorientation" in kwargs else ("0", "1", "2") cmap = kwargs["colormap"] if "colormap" in kwargs else "plasma" save_array = kwargs["save_array"] if "save_array" in kwargs else False save_tif = kwargs["save_tif"] if "save_tif" in kwargs else False num_sites_to_keep = kwargs[ "num_sites_to_keep"] if "num_sites_to_keep" in kwargs else 1 ann = sitk.GetArrayFromImage(sitk.ReadImage(kwargs["annotation"])) #if kwargs["crop"]: (from original analyze injection function, no functionality here if points file exist) # ann = eval("ann{}".format(kwargs["crop"])) nonzeros = [] #not needed as mapped points from point_transformix used #id_table = kwargs["id_table"] if "id_table" in kwargs else "/jukebox/temp_wang/pisano/Python/lightsheet/supp_files/allen_id_table.xlsx" #allen_id_table = pd.read_excel(id_table) for i in range(len(inputlist)): #to iteratre through brains pth = inputlist[i] #path of each processed brain print(" loading:\n {}".format(pth)) dct = load_kwargs(pth) #load kwargs of brain as dct try: inj_vol = [xx for xx in dct["volumes"] if xx.ch_type == "injch" ][0] #set injection channel volume im = tifffile.imread(inj_vol.resampled_for_elastix_vol ) #load inj_vol as numpy array if kwargs["crop"]: im = eval("im{}".format(kwargs["crop"])) #; print im.shape #run find site function to segment inj site using non-registered resampled for elastix volume - pulled directly from tools.registration.register.py and tools.analysis.analyze_injection.py array = find_site(im, thresh=kwargs["threshold"], filter_kernel=kwargs["filter_kernel"], num_sites_to_keep=num_sites_to_keep) * injscale if save_array: np.save( os.path.join(dst, "{}".format(os.path.basename(pth)) + ".npy"), array.astype("uint16")) if save_tif: tifffile.imsave( os.path.join(dst, "{}".format(os.path.basename(pth)) + ".tif"), array.astype("uint16")) #optional "save_individual" if kwargs["save_individual"]: im = im * imagescale a = np.concatenate((np.max( im, axis=0), np.max(array.astype("uint16"), axis=0)), axis=1) b = np.concatenate((np.fliplr( np.rot90(np.max(fix_orientation(im, axes=axes), axis=0), k=3)), np.fliplr( np.rot90(np.max(fix_orientation( array.astype("uint16"), axes=axes), axis=0), k=3))), axis=1) plt.figure() plt.imshow(np.concatenate((b, a), axis=0), cmap=cmap, alpha=1) plt.axis("off") plt.savefig(os.path.join( dst, "{}".format(os.path.basename(pth)) + ".pdf"), dpi=300, transparent=True) plt.close() #find all nonzero pixels in resampled for elastix volume print(" finding nonzero pixels for voxel counts...\n") nz = np.nonzero(array) nonzeros.append(zip(*nz)) #<-for pooled image #find transform file inverse_fld = inj_vol.inverse_elastixfld inj_fld = listdirfull(inverse_fld, "inj")[0] atlas2reg2sig_fld = listdirfull(inj_fld, "atlas2reg2sig")[0] transformfile = os.path.join(atlas2reg2sig_fld, "reg2sig_TransformParameters.1.txt") if not os.path.exists(transformfile): #if transformed points exist print( "Transform file file not found. Running elastix inverse transform... \n" ) transformfile = make_inverse_transform( [xx for xx in dct["volumes"] if xx.ch_type == "injch"][0], cores=6, **dct) else: print("Inverse transform exists. \n") #apply resizing point transform txtflnm = point_transform_due_to_resizing(array, chtype="injch", **dct) #run transformix on points points_file = point_transformix(txtflnm, transformfile) tdf = transformed_pnts_to_allen(points_file, ann, ch_type="injch", point_or_index=None, **dct) #map to allen atlas if i == 0: df = tdf.copy() countcol = "count" if "count" in df.columns else "cell_count" df.drop([countcol], axis=1, inplace=True) df[os.path.basename(pth)] = tdf[countcol] except: print( "could not recover injection site, inspect manually for parameter dictionary errors or missing inj channel \n\n" ) #cell counts to csv df.to_csv(os.path.join(dst, "voxel_counts.csv")) print("\n\nCSV file of cell counts, saved as {}\n\n\n".format( os.path.join(dst, "voxel_counts.csv"))) #condense nonzero pixels nzs = [ str(x) for xx in nonzeros for x in xx ] #this list has duplicates if two brains had the same voxel w label c = Counter(nzs) arr = np.zeros(im.shape) print("Collecting nonzero pixels for pooled image...") tick = 0 #generating pooled array where voxel value = total number of brains with that voxel as positive for k, v in c.items(): k = [int(xx) for xx in k.replace("(", "").replace(")", "").split(",")] arr[k[0], k[1], k[2]] = int(v) tick += 1 if tick % 50000 == 0: print(" {}".format(tick)) #load atlas and generate final figure print("Generating final figure...") atlas = tifffile.imread(kwargs["atlas"]) #reads atlas print( "Zooming in atlas..." ) #necessary to have a representative heat map as these segmentations are done from the resized volume, diff dimensions than atlas zoomed_atlas = zoom( atlas, 1.3) #zooms atlas; different than original analyze_injection.py sites = fix_orientation(arr, axes=axes) #cropping if kwargs["crop"]: zoomed_atlas = eval("zoomed_atlas{}".format(kwargs["crop"])) zoomed_atlas = fix_orientation(zoomed_atlas, axes=axes) my_cmap = eval("plt.cm.{}(np.arange(plt.cm.RdBu.N))".format(cmap)) my_cmap[:1, :4] = 0.0 my_cmap = mpl.colors.ListedColormap(my_cmap) my_cmap.set_under("w") plt.figure() plt.imshow(np.max(zoomed_atlas, axis=0), cmap="gray") plt.imshow(np.max(sites, axis=0), alpha=0.99, cmap=my_cmap) plt.colorbar() plt.axis("off") dpi = int(kwargs["dpi"]) if "dpi" in kwargs else 300 plt.savefig(os.path.join(dst, "heatmap.pdf"), dpi=dpi, transparent=True) plt.close() print("Saved as {}".format(os.path.join(dst, "heatmap.pdf"))) return df
sp.call( ["transformix", "-in", invol, "-out", outpth, "-tp", transformfile]) print(svlc, "\n Transformix File Generated: {}".format(outpth)) return outpth if __name__ == "__main__": print(sys.argv) jobid = int(os.environ["SLURM_ARRAY_TASK_ID"]) src = "/jukebox/wang/pisano/tracing_output/eaat4" dst = "/jukebox/wang/zahra/eaat4_screening/201910_analysis" brains = listdirfull(src) brain = brains[jobid] kwargs = load_kwargs(brain) cellvol = [ vol for vol in kwargs["volumes"] if vol.ch_type == "cellch" or vol.ch_type == "injch" ][0] fullszfld = cellvol.full_sizedatafld_vol imgs = [os.path.join(fullszfld, xx) for xx in os.listdir(fullszfld)] imgs.sort() stk = np.array([tif.imread(img) for img in imgs])[:, 1700:, :] #stk = tif.imread(src).astype("uint16")
def make_folder_heirarchy(image_dictionary, dst=False, transfertype='move', scalefactor=(1.63, 1.63, 7.5), percent_overlap=0.1, cores=False, **kwargs): '''Function to make folders for compatibility with Terastitcher Inputs: -------------- image_dictionary: dctionary generated from make_image_dictionary dst (optional): to make folder structure somewhere else transfertype (optional): 'move': move files from current location to dst 'copy': copy files from current location to dst Returns: ----------------- paths to each channel folder ''' #inputs if not dst: dst = image_dictionary['sourcefolder'] makedir(dst) #image dims ypx,xpx = tifffile.imread(listdirfull(image_dictionary['sourcefolder'])[0]).shape #factor in percent overlap ypx = ypx * (1-percent_overlap) xpx = xpx * (1-percent_overlap) #tiles xtile = image_dictionary['xtile'] ytile = image_dictionary['ytile'] sys.stdout.write('\nMaking Folders,'); sys.stdout.flush() #'''WORKED BUT NEED TO FLIP Z AND Y iterlst = [] for ch in image_dictionary['channels']: chdst = dst+'/'+ch; makedir(chdst) for y in range(image_dictionary['ytile']): ynm = str(int(ypx*y*scalefactor[1])*10).zfill(6) ydst = chdst+'/'+ynm; makedir(ydst) for x in range(image_dictionary['xtile']): xnm = str(int(xpx*x*scalefactor[0])*10).zfill(6) xdst = ydst+'/'+ynm+'_'+xnm; makedir(xdst) for z in image_dictionary['zchanneldct']: znm = str(int(int(z)*scalefactor[2])*10).zfill(6) lst = image_dictionary['zchanneldct'][str(z).zfill(4)][ch]; lst.sort() iterlst.append((lst[(y*(ytile)+x)], xdst+'/'+ynm+'_'+xnm+'_'+znm+'.tif', transfertype)) #print y,x,z,znm, (y*(ytile)+x), ynm, xnm, znm, os.path.basename(lst[(y*(ytile)+x)])[20:60] #generate backup just in case #try: # kwargs['terastitcher_dct'] = {xx[0]:xx[1] for xx in iterlst} # save_kwargs(**kwargs) #except Exception, e: # print ('Exception: {}...not saving terastitcher_dct'.format(e)) #move/copy files if cores >= 2: sys.stdout.write(' populating folders: {} files using {} cores...\n'.format(len(iterlst), cores)); sys.stdout.flush() p = mp.Pool(cores) p.starmap(make_folder_heirarchy_helper, iterlst) p.terminate() else: sys.stdout.write(' populating folders...'); sys.stdout.flush() [make_folder_heirarchy_helper(i) for i in iterlst] sys.stdout.write('finished.\n'); sys.stdout.flush() return