def reference_registration(): """ This script is used to register the stitched reference channels for the processed hybridization. The comparison is sequential (from Hyb1-->HybN) and not all the hybridization steps are required. The output are pickle files with the recalculated corners according to the registration The input parameters are entered via argparse Parameters: ----------- path: string. Exact path to the folder with the stitched .sf.hdf5 reference_gene: string. Reference gene used for stitching fraction: float. Fraction of the image to use for the registration. Selection start from the center of the image. Default 0.2 """ # Inputs of the function parser = argparse.ArgumentParser(description='Register the stitched images \ of the reference channels') parser.add_argument('-path', help='path to the folder with the stitched \ XX.sf.hdf5 and XX_data_reg.pkl files') parser.add_argument('-reference_gene', help='Reference gene used for the \ stitching') parser.add_argument('-fraction',default=0.2, help='fraction of the picture to use for \ registration',type=float) args = parser.parse_args() # retrieve the parameters processing_directory = args.path reference_gene = args.reference_gene fraction = args.fraction # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the processing directory processing_directory=utils.check_trailing_slash(processing_directory,os_windows) hr.register_final_images_reg_data_only(processing_directory, gene=reference_gene, sub_pic_frac=fraction, use_MPI=False, apply_to_corners=True, apply_warping = False, region=None, compare_in_seq=False)
def add_coords_to_experimental_metadata(): """ Script used to add the coords to the Experimental_metadata.yaml file. In order to work: - The Experimental_metadata.yaml file needs to have the processing hybridization and the coords at point 0 initialized: ex: TilesPositions: Hybridization1: 0: - The coords file extracted from the Nikon microscope need to be in a text file ex. Hyb1_Coords.txt. The Coords tag is necessary and Hyb is the hybridization_key - The input parameters are passed via argparse Parameters: ----------- hybridization_number: string Hybridization number (ex. 3) describing the hybridization to process. path: string Exact path to the folder containing the text file with the coordinates. hybridization_key: string Possible values 'Hyb' or 'Strip'. To add coordinates for stripping if necessary. """ # Inputs of the function parser = argparse.ArgumentParser(description='Add microscope coords to \ experimental_metadata.yaml file') parser.add_argument('-hybridization_number', help='hybridization to add the coords to') parser.add_argument( '-path', help='path to the folder with the experimental_metadata.yaml file') parser.add_argument('-hybridization_key', default='Hyb', help='String to identify the specific \ coords file') args = parser.parse_args() # retrieve the parameters processing_directory = args.path hyb_nr = args.hybridization_number hyb_key = args.hybridization_key # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the processing directory processing_directory = utils.check_trailing_slash(processing_directory, os_windows) utils.add_coords_to_yaml(processing_directory, hyb_nr, hyb_key)
def preprocessing_script(): """ This script will process all the hybridization folders combined in a processing folder. The input parameters are passed using arparse Parameters: ----------- scheduler: string tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). default = False. If False the process will run on the local computer using nCPUs-1 path: string Path to the processing directory """ # Inputs of the function parser = argparse.ArgumentParser(description='Preprocessing script') parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003') parser.add_argument('-path', help='processing directory') args = parser.parse_args() # Directory to process processing_directory = args.path # Dask scheduler address scheduler_address = args.scheduler if scheduler_address: # Start dask client on server or cluster client=Client(scheduler_address) else: # Start dask client on local machine. It will use all the availabe # cores -1 # number of core to use ncores = multiprocessing.cpu_count()-1 cluster = LocalCluster(n_workers=ncores) client=Client(cluster) # Subdirectories of the processing_directory that need to be skipped for the # analysis blocked_directories = ['_logs'] # Starting logger utils.init_file_logger(processing_directory) logger = logging.getLogger() # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the processing directory processing_directory=utils.check_trailing_slash(processing_directory,os_windows) # Get a list of the hybridization to process processing_hyb_list = next(os.walk(processing_directory))[1] # Remove the blocked directories from the directories to process processing_hyb_list = [el for el in processing_hyb_list if el not in blocked_directories ] for processing_hyb in processing_hyb_list: # Determine the hyb number from the name hybridization_number = processing_hyb.split('_hyb')[-1] hybridization = 'Hybridization' + hybridization_number hyb_dir = processing_directory + processing_hyb + add_slash # Parse the Experimental metadata file (serial) experiment_infos,image_properties, hybridizations_infos, \ converted_positions, microscope_parameters =\ utils.experimental_metadata_parser(hyb_dir) # Parse the configuration file flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir) # ----------------- .nd2 FILE CONVERSION ------------------------------ # Create the temporary subdirectory tree (serial) tmp_dir_path, tmp_gene_dirs=utils.create_subdirectory_tree(hyb_dir,\ hybridization,hybridizations_infos,processing_hyb,suffix='tmp',add_slash=add_slash) # Get the list of the nd2 files to process inside the directory files_list = glob.glob(hyb_dir+processing_hyb+'_raw_data'+add_slash+'*.nd2') # Get the list of genes that are analyzed in the current hybridization gene_list = list(hybridizations_infos[hybridization].keys()) # Organize the file to process in a list which order match the gene_list for # parallel processing organized_files_list = [f for gene in gene_list for f in files_list if gene+'.nd2' in f ] organized_tmp_dir_list = [f for gene in gene_list for f in tmp_gene_dirs if gene in f ] # Each .nd2 file will be processed in a worker part of a different node # Get the addresses of one process/node to use for conversion node_addresses = utils.identify_nodes(client) workers_conversion = [list(el.items())[0][1] for key,el in node_addresses.items()] # Run the conversion futures_processes=client.map(io.nd2_to_npy,gene_list,organized_files_list, tmp_gene_dirs,processing_hyb=processing_hyb, use_ram=flt_rawcnt_config['use_ram'], max_ram=flt_rawcnt_config['max_ram'], workers=workers_conversion) client.gather(futures_processes) # --------------------------------------------------------------------- # ----------------- FILTERING AND RAW COUNTING ------------------------ # Create directories # Create the directory where to save the filtered images suffix = 'filtered_png' filtered_png_img_dir_path, filtered_png_img_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos, processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name']) suffix = 'filtered_npy' filtered_img_dir_path, filtered_img_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos, processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name']) # Create the directory where to save the counting suffix = 'counting' counting_dir_path, counting_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb, suffix,add_slash,flt_rawcnt_config['skip_tags_counting'], flt_rawcnt_config['skip_genes_counting'], analysis_name=flt_rawcnt_config['analysis_name']) if flt_rawcnt_config['illumination_correction']: # Create the directory where to save the counting suffix = 'illumination_funcs' illumination_func_dir_path, illumination_func_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb, suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name']) # Loop through channels and calculate illumination for gene in hybridizations_infos[hybridization].keys(): flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy') logger.debug('Create average image for gene %s', gene) # Chunking the image list num_chunks = sum(list(client.ncores().values())) chunked_list = utils.list_chunking(flist_img_to_filter,num_chunks) # Scatter the images sublists to process in parallel futures = client.scatter(chunked_list) # Create dask processing graph output = [] for future in futures: ImgMean = delayed(utils.partial_image_mean)(future) output.append(ImgMean) ImgMean_all = delayed(sum)(output) ImgMean_all = ImgMean_all/float(len(futures)) # Compute the graph ImgMean = ImgMean_all.compute() logger.debug('Create illumination function for gene %s',gene) # Create illumination function Illumination=filters.gaussian(ImgMean,sigma=(20,300,300)) # Normalization of the illumination Illumination_flat=np.amax(Illumination,axis=0) Illumination_norm=Illumination_flat/np.amax(Illumination_flat) logger.debug('Save illumination function for gene %s',gene) # Save the illumination function illumination_path = [ill_path for ill_path in illumination_func_gene_dirs if gene in ill_path][0] illumination_fname=illumination_path+gene+'_illumination_func.npy' np.save(illumination_fname,Illumination_norm,allow_pickle=False) # Broadcast the illumination function to all the cores client.scatter(Illumination_norm, broadcast=True) logger.debug('Filtering %s',gene) # Filtering and counting futures_processes=client.map(counting.filtering_and_counting_ill_correction,flist_img_to_filter, \ illumination_function=Illumination_norm,\ filtered_png_img_gene_dirs=filtered_png_img_gene_dirs,\ filtered_img_gene_dirs =filtered_img_gene_dirs,\ counting_gene_dirs=counting_gene_dirs,plane_keep=flt_rawcnt_config['plane_keep'], \ min_distance=flt_rawcnt_config['min_distance'], stringency=flt_rawcnt_config['stringency'],\ skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting']) client.gather(futures_processes) else: for gene in hybridizations_infos[hybridization].keys(): flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy') # filtering logger.debug('Filtering without illumination correction %s',gene) futures_processes=client.map(counting.filtering_and_counting,flist_img_to_filter, \ filtered_png_img_gene_dirs=filtered_png_img_gene_dirs, \ filtered_img_gene_dirs=filtered_img_gene_dirs, \ counting_gene_dirs=counting_gene_dirs, \ plane_keep=flt_rawcnt_config['plane_keep'], min_distance=flt_rawcnt_config['min_distance'],\ stringency=flt_rawcnt_config['stringency'],\ skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting']) client.gather(futures_processes) # --------------------------------------------------------------------- # # ----------------- COMBINE THE FILTERED DATA IN .ppf.hdf5 ------------------------ # # Combine the filter data in one single .ppf for each hybridization # # This step will run in serial mode and will not need to shuffle data # # between cores because everything is on the common file system # logger.debug('Create .ppf.hdf5 file') # # Create the ppf.hdf5 file that contains the filtered data in uint16 # preprocessing_file_path = hdf5_utils.hdf5_create_preprocessing_file(hybridizations_infos,processing_hyb, # hybridization,flt_rawcnt_config['analysis_name'], hyb_dir,converted_positions,image_properties) # logger.debug('Write the .npy filtered files into the .ppf file') # # Load and write the .npy tmp images into the hdf5 file # # open the hdf5 file # with h5py.File(preprocessing_file_path) as f_hdl: # # Loop through each gene # for gene in hybridizations_infos[hybridization].keys(): # logger.debug('Writing %s images in .ppf.hdf5',gene) # # list of the files to transfer # filtered_gene_dir = [fdir for fdir in filtered_img_gene_dirs if gene in fdir][0] # filtered_files_list = glob.glob(filtered_gene_dir+'*.npy') # # loop through the list of file # for f_file in filtered_files_list: # pos = f_file.split('/')[-1].split('_')[-1].split('.')[0] # f_hdl[gene]['FilteredData'][pos][:] =np.load(f_file) # f_hdl.flush() # # --------------------------------------------------------------------- # # ----------------- STITCHING ------------------------ # # Load the stitching parameters from the .yaml file # # Stitch the image in 2D or 3D (3D need more work/testing) # nr_dim = flt_rawcnt_config['nr_dim'] # # Estimated overlapping between images according to the Nikon software # est_overlap = image_properties['Overlapping_percentage'] # # Number of peaks to use for the alignment # nr_peaks = flt_rawcnt_config['nr_peaks'] # # Determine if the coords need to be flipped # y_flip = flt_rawcnt_config['y_flip'] # # Method to use for blending # # can be 'linear' or 'non linear' # # The methods that performs the best is the 'non linear' # blend = flt_rawcnt_config['blend'] # # Reference gene for stitching # reference_gene = flt_rawcnt_config['reference_gene'] # pixel_size = image_properties['PixelSize'] # # Get the list of the filtered files of the reference gene # filtered_gene_dir = [gene_dir for gene_dir in filtered_img_gene_dirs if reference_gene in gene_dir][0] # filtered_files_list = glob.glob(filtered_gene_dir+'*.npy') # # Create pointer of the hdf5 file that will store the stitched reference image # # for the current hybridization # # Writing # tile_file_base_name = flt_rawcnt_config['analysis_name']+'_'+ processing_hyb # data_name = (tile_file_base_name # + '_' + reference_gene # + '_stitching_data') # stitching_file_name = tile_file_base_name + '.sf.hdf5' # stitching_file= h5py.File(hyb_dir+stitching_file_name,'w',libver='latest') # replace with 'a' as soon as you fix the error # # Determine the tiles organization # tiles, contig_tuples, nr_pixels, z_count, micData = stitching.get_pairwise_input_npy(image_properties,converted_positions, hybridization, # est_overlap = est_overlap, y_flip = False, nr_dim = 2) # # Align the tiles # futures_processes=client.map(pairwisesingle.align_single_pair_npy,contig_tuples, # filtered_files_list=filtered_files_list,micData=micData, # nr_peaks=nr_peaks) # # Gather the futures # data = client.gather(futures_processes) # # In this case the order of the returned contingency tuples is with # # the order of the input contig_tuples # # P_all = [el for data_single in data for el in data_single[0]] # P_all =[data_single[0] for data_single in data ] # P_all = np.array(P_all) # P_all = P_all.flat[:] # covs_all = [data_single[1] for data_single in data] # alignment = {'P': P_all, # 'covs': covs_all} # # Calculates a shift in global coordinates for each tile (global # # alignment) and then applies these shifts to the corner coordinates # # of each tile and returns and saves these shifted corner coordinates. # joining = stitching.get_place_tile_input(hyb_dir, tiles, contig_tuples, # micData, nr_pixels, z_count, # alignment, data_name, # nr_dim=nr_dim) # # Create the hdf5 file structure # stitched_group, linear_blending, blend = hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels, # reference_gene, blend = 'non linear') # # Fill the hdf5 containing the stitched image with empty data and # # create the blending mask # stitched_group['final_image'][:]= np.zeros(joining['final_image_shape'],dtype=np.float64) # if blend is not None: # # make mask # stitched_group['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:],dtype=np.float64) # tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask']) # # Create the subdirectory used to save the blended tiles # suffix = 'blended_tiles' # blended_tiles_directory = utils.create_single_directory(hyb_dir,reference_gene, hybridization,processing_hyb,suffix,add_slash, # analysis_name=flt_rawcnt_config['analysis_name']) # # Get the directory with the filtered npy images of the reference_gene to use for stitching # stitching_files_dir = [npy_dir for npy_dir in filtered_img_gene_dirs if reference_gene in npy_dir][0] # # Create the tmp directory where to save the masks # suffix = 'masks' # masked_tiles_directory = utils.create_single_directory(hyb_dir,reference_gene, hybridization,processing_hyb,suffix,add_slash, # analysis_name=flt_rawcnt_config['analysis_name']) # # Create and save the mask files # for corn_value,corner_coords in joining['corner_list']: # if not(np.isnan(corner_coords[0])): # cur_mask = stitched_group['blending_mask'][int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels), # int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)] # fname = masked_tiles_directory + flt_rawcnt_config['analysis_name'] +'_'+processing_hyb+'_'+reference_gene+'_masks_joining_pos_'+str(corn_value) # np.save(fname,cur_mask) # # Blend all the tiles and save them in a directory # futures_processes = client.map(tilejoining.generate_blended_tile_npy,joining['corner_list'], # stitching_files_dir = stitching_files_dir, # blended_tiles_directory = blended_tiles_directory, # masked_tiles_directory = masked_tiles_directory, # analysis_name = flt_rawcnt_config['analysis_name'], # processing_hyb = processing_hyb,reference_gene = reference_gene, # micData = micData,tiles = tiles,nr_pixels=nr_pixels, # linear_blending=linear_blending) # _ = client.gather(futures_processes) # # Write the stitched image # tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles,reference_gene, nr_pixels) # # close the hdf5 file # stitching_file.close() # # Delete the directories with blended tiles and masks # shutil.rmtree(blended_tiles_directory) # shutil.rmtree(masked_tiles_directory) # ----------------- DELETE FILES ------------------------ # Don't delete the *.npy files here because can be used to # create the final images using the apply stitching related function client.close()
def filtering_speed(): """ This script will process all the hybridization folders combined in a processing folder. The input parameters are passed using arparse Parameters: ----------- scheduler: string tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). default = False. If False the process will run on the local computer using nCPUs-1 path: string Path to the processing directory """ # Inputs of the function parser = argparse.ArgumentParser(description='Preprocessing script') parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003') parser.add_argument('-path', help='processing directory') args = parser.parse_args() # Directory to process processing_directory = args.path # Dask scheduler address scheduler_address = args.scheduler if scheduler_address: # Start dask client on server or cluster client=Client(scheduler_address) else: # Start dask client on local machine. It will use all the availabe # cores -1 # number of core to use ncores = multiprocessing.cpu_count()-1 cluster = LocalCluster(n_workers=ncores) client=Client(cluster) # Subdirectories of the processing_directory that need to be skipped for the # analysis blocked_directories = ['_logs'] # Starting logger utils.init_file_logger(processing_directory) logger = logging.getLogger() # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the processing directory processing_directory=utils.check_trailing_slash(processing_directory,os_windows) # Get a list of the hybridization to process processing_hyb_list = next(os.walk(processing_directory))[1] # Remove the blocked directories from the directories to process processing_hyb_list = [el for el in processing_hyb_list if el not in blocked_directories ] for processing_hyb in processing_hyb_list: # Determine the hyb number from the name hybridization_number = processing_hyb.split('_hyb')[-1] hybridization = 'Hybridization' + hybridization_number hyb_dir = processing_directory + processing_hyb + add_slash # Parse the Experimental metadata file (serial) experiment_infos,image_properties, hybridizations_infos, \ converted_positions, microscope_parameters =\ utils.experimental_metadata_parser(hyb_dir) # Parse the configuration file flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir) # ----------------- FILTERING AND RAW COUNTING ------------------------ # Create directories # Create the directory where to save the filtered images suffix = 'filtered_png' filtered_png_img_dir_path, filtered_png_img_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos, processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name']) suffix = 'filtered_npy' filtered_img_dir_path, filtered_img_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos, processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name']) # Create the directory where to save the counting suffix = 'counting' counting_dir_path, counting_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb, suffix,add_slash,flt_rawcnt_config['skip_tags_counting'], flt_rawcnt_config['skip_genes_counting'], analysis_name=flt_rawcnt_config['analysis_name']) for gene in hybridizations_infos[hybridization].keys(): flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy') # filtering logger.debug('Filtering without illumination correction %s',gene) futures_processes=client.map(counting.filtering_and_counting,flist_img_to_filter, \ filtered_png_img_gene_dirs=filtered_png_img_gene_dirs, \ filtered_img_gene_dirs=filtered_img_gene_dirs, \ counting_gene_dirs=counting_gene_dirs, \ plane_keep=flt_rawcnt_config['plane_keep'], min_distance=flt_rawcnt_config['min_distance'],\ stringency=flt_rawcnt_config['stringency'],\ skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting']) client.gather(futures_processes) # ----------------- RAW COUNTING ONLY------------------------ skip_genes_counting=flt_rawcnt_config['skip_genes_counting'] skip_tags_counting=flt_rawcnt_config['skip_tags_counting'] # Create the directory where to save the counting suffix = 'counting' counting_dir_path, counting_gene_dirs = \ utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb, suffix,add_slash,flt_rawcnt_config['skip_tags_counting'], flt_rawcnt_config['skip_genes_counting'], analysis_name=flt_rawcnt_config['analysis_name']) suffix = 'filtered_npy' gene_list = list(hybridizations_infos[hybridization].keys()) analysis_name=flt_rawcnt_config['analysis_name'] sufx_dir_path = hyb_dir+analysis_name+'_'+processing_hyb+'_'+suffix+add_slash for gene in hybridizations_infos[hybridization].keys(): # Filtering image according to gene if gene not in skip_genes_counting or [tag for tag in skip_tags_counting if tag not in gene]: if analysis_name: filtered_images_directory = sufx_dir_path+analysis_name+'_'+processing_hyb+'_'+ gene+'_'+suffix+add_slash else: filtered_images_directory = sufx_dir_path +processing_hyb+'_'+ gene +'_'+suffix+add_slash flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy') # filtering logger.debug('Filtering without illumination correction %s',gene) futures_processes=client.map(counting.counting_only,flist_img_to_filter, \ counting_gene_dirs=counting_gene_dirs, \ min_distance=flt_rawcnt_config['min_distance'],\ stringency=flt_rawcnt_config['stringency']) client.gather(futures_processes) client.close()
def staining_segmentation(): """ This script will segment the selected staining and output the identified objects. All the parameters are entered via argparse. Parameters: ----------- scheduler: string tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). default = False. If False the process will run on the local computer using nCPUs-1 path: string Path to the processing directory processing_file: string Path to the hdf5 file with the staning to process segmentation_staining: string Staining to be segmented """ # Inputs of the function parser = argparse.ArgumentParser(description='Segmentation script') parser.add_argument( '-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003') parser.add_argument('-path', help='processing directory') parser.add_argument('-processing_file', help='path to the file with the \ staning to process') parser.add_argument('-segmentation_staining', help='staining to be \ segmented') args = parser.parse_args() # Directory to process processing_directory = args.path # File to process processing_file = args.processing_file # staining to segment segmentation_staining = args.segmentation_staining # Dask scheduler address scheduler_address = args.scheduler if scheduler_address: # Start dask client on server or cluster client = Client(scheduler_address) else: # Start dask client on local machine. It will use all the availabe # cores -1 # number of core to use ncores = multiprocessing.cpu_count() - 1 cluster = LocalCluster(n_workers=ncores) client = Client(cluster) # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the processing directory processing_directory = utils.check_trailing_slash(processing_directory, os_windows) segmentation_parameters = utils.general_yaml_parser( processing_directory + 'Staining_segmentation.config.yaml') # Chunking parameters chunk_size = segmentation_parameters[segmentation_staining][ 'image_chunking_parameters']['chunk_size'] percent_padding = segmentation_parameters[segmentation_staining][ 'image_chunking_parameters']['percent_padding'] # Segmentation parameters trimming = segmentation_parameters[segmentation_staining][ 'segmentation_parameters']['trimming'] min_object_size = segmentation_parameters[segmentation_staining][ 'segmentation_parameters']['min_object_size'] disk_radium_rank_filer = segmentation_parameters[segmentation_staining][ 'segmentation_parameters']['disk_radium_rank_filer'] min_distance = segmentation_parameters[segmentation_staining][ 'segmentation_parameters']['min_distance'] threshold_rel = segmentation_parameters[segmentation_staining][ 'segmentation_parameters']['threshold_rel'] # Load the image (will be modified after the change to hdf5 input) img = io.imread(processing_file) # Image chunking nr_chunks,nc_chunks,Coords_Chunks_list, Coords_Padded_Chunks_list,r_coords_tl_all_padded,\ c_coords_tl_all_padded,r_coords_br_all_padded,c_coords_br_all_padded = \ object_based_segmentation.image_chunking(img,chunk_size,percent_padding) # Create the chunks idx Chunks_idxs_linear = np.arange(len(Coords_Padded_Chunks_list), dtype='int32') # Distribute the chunks idx and distridute them in an array according to the position # in the chunked image Chunks_idxs = Chunks_idxs_linear.reshape(nr_chunks, nc_chunks) # Flatten the array for make it easier the creation of the coords combination Chunks_idxs_rows = np.ravel(Chunks_idxs) Chunks_idxs_cols = np.ravel(Chunks_idxs, order='F') # Calculate coords of the overlapping chunks Overlapping_chunks_coords = list() counter = 0 left_pos = Chunks_idxs_rows[0] for el in Chunks_idxs_rows[1:]: if counter < nc_chunks - 1: Coords_left = Coords_Padded_Chunks_list[left_pos] Coords_right = Coords_Padded_Chunks_list[el] row_tl = Coords_left[0] row_br = Coords_left[1] col_tl = Coords_right[2] col_br = Coords_left[3] Overlapping_chunks_coords.append((row_tl, row_br, col_tl, col_br)) left_pos = el counter += 1 else: left_pos = el counter = 0 counter = 0 top_pos = Chunks_idxs_cols[0] for el in Chunks_idxs_cols[1:]: if counter < nr_chunks - 1: Coords_top = Coords_Padded_Chunks_list[top_pos] Coords_bottom = Coords_Padded_Chunks_list[el] row_tl = Coords_bottom[0] row_br = Coords_top[1] col_tl = Coords_top[2] col_br = Coords_top[3] Overlapping_chunks_coords.append((row_tl, row_br, col_tl, col_br)) counter += 1 top_pos = el else: top_pos = el counter = 0 # Now i use this approach for testing. If the image gets to big to fit in RAM # then save the files and load them separately in each node chunked_image_seq = list() for coords in Coords_Padded_Chunks_list: chunked_image_seq.append(img[coords[0]:coords[1], coords[2]:coords[3]]) # Run the segmentation futures_processes = client.map( object_based_segmentation.polyT_segmentation, chunked_image_seq, min_object_size=min_object_size, min_distance=min_distance, disk_radium_rank_filer=disk_radium_rank_filer, threshold_rel=threshold_rel, trimming=trimming) Objects_list = client.gather(futures_processes) # Recalculate labels and coords processed_images_data = dict() max_starting_label = 0 total_data_dict = dict() for idx, objs_chunk in enumerate(Objects_list): for label, cvalues in objs_chunk.items(): new_label = max_starting_label + 1 coords = Coords_Padded_Chunks_list[idx][0::2] total_data_dict[new_label] = cvalues + coords max_starting_label = new_label # Calculate all the intersecting objects futures_processes = client.map( object_based_segmentation.OverlappingCouples, Overlapping_chunks_coords, TotalDataDict=total_data_dict) All_intersecting = client.gather(futures_processes) # Put together the couple with the same label for multiple intersection # for the labels of objects where there is intersection between multiple regions # Then scatter all of them and calculate intersection # Combine the results from the parallel processing flatten_couple = [el for grp in All_intersecting for el in grp] # Remove duplicates flatten_couple = list(set(flatten_couple)) # Create a list of the labels (removing the repeats) singles = list() [singles.append(x) for cpl in flatten_couple for x in cpl] singles = list(set(singles)) # Identify the couples containing singles Combined_all_singles = list() for item in singles: Combined_single = list() for couple in flatten_couple: if item in couple: Combined_single.append(couple) Combined_all_singles.append(Combined_single) if Combined_all_singles: # Combine all the intersecting labeles start = Combined_all_singles[0] ComparisonList = Combined_all_singles[1:].copy() #merged=start.copy() merged = list() SavedCombinations = list() tmp_list = ComparisonList.copy() KeepGoing = True Loop = 0 while KeepGoing: Loop += 1 for idx, el in enumerate(ComparisonList): if set(start).intersection(set(el)): #merged=list(set(merged)|set(el)) [merged.append(x) for x in el] tmp_list = [e for e in tmp_list if e != el] intersection = list(set.intersection(set(merged), set(start))) if intersection: merged = list(set.union(set(merged), set(start))) #merged=list(set(merged)) start = merged.copy() merged = list() ComparisonList = tmp_list.copy() #tmp_list.append(merged) else: SavedCombinations.append(start) start = tmp_list[0] tmp_list = tmp_list[1:] ComparisonList = tmp_list.copy() if len(tmp_list) < 1: [SavedCombinations.append(x) for x in tmp_list] KeepGoing = False # Remove all the duplicated labeled that intersect # in this case the labeled are merged. It will be nice to run an extra # segmentation on the merged objects # If it is too slow this step can be parallelised SavedLab_list = list() CleanedDict = total_data_dict.copy() for couple in SavedCombinations: SaveLab, RemoveLabs, NewCoords = object_based_segmentation.IntersectionCouples( couple, total_data_dict) SavedLab_list.append(SaveLab) for lab in RemoveLabs: del CleanedDict[lab] CleanedDict[SaveLab] = NewCoords else: CleanedDict = total_data_dict # Calculate all objects properties all_objects_list = [(key, coords) for key, coords in CleanedDict.items()] futures_processes = client.map( object_based_segmentation.obj_properties_calculator, all_objects_list) all_objects_properties_list = client.gather(futures_processes) # convert the list to a dictionary all_objects_properties_dict = { k: v for d in all_objects_properties_list for k, v in d.items() } # Save all the objects segmented_objs_fname = processing_directory + 'segmented_' + segmentation_staining + '_all_objs_properties.pkl' pickle.dump(all_objects_properties_dict, open(segmented_objs_fname, 'wb'))
def dots_coords_correction(): """ This script is used to collect all the raw countings from the different hybridization, correct the coords according to the registration of the reference gene and remove the dots that overlap in the overlapping regions between the images. Save the aggregate coords and also the coords after dots processing Input via argparse Parameters: ----------- path: string. Exact path to the experiment folder pxl: int Radius of pixel used to create the neighbourhood (nhood) used to define when two dots are the same """ # Inputs of the function parser = argparse.ArgumentParser(description='Dots coords consolidation \ and correction') parser.add_argument('-path', help='path to the experiment folder') parser.add_argument('-pixel_radius', help='adius of pixel used to create the nhood \ that is used to define that two pixels are the same', type=int) parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003') args = parser.parse_args() # retrieve the parameters processing_experiment_directory = args.path pxl = args.pixel_radius # Dask scheduler address scheduler_address = args.scheduler if scheduler_address: # Start dask client on server or cluster client=Client(scheduler_address) else: # Start dask client on local machine. It will use all the availabe # cores -1 # number of core to use ncores = multiprocessing.cpu_count()-1 cluster = LocalCluster(n_workers=ncores) client=Client(cluster) # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the experiment directory processing_experiment_directory=utils.check_trailing_slash(processing_experiment_directory,os_windows) stitched_reference_files_dir = processing_experiment_directory + 'stitched_reference_files' # Check training slash in the stitched reference directory stitched_reference_files_dir=utils.check_trailing_slash(stitched_reference_files_dir,os_windows) # Collect the infos of the experiment and the processing # Parse the Experimental metadata file (serial) experiment_infos,image_properties, hybridizations_infos, \ converted_positions, microscope_parameters =\ utils.experimental_metadata_parser(processing_experiment_directory) # Parse the configuration file flt_rawcnt_config = utils.filtering_raw_counting_config_parser(processing_experiment_directory) # get the reference gene reference_gene = flt_rawcnt_config['reference_gene'] # get the overlapping percentage and image_size overlapping_percentage = image_properties['Overlapping_percentage'] # Consider a square image image_size = image_properties['HybImageSize']['columns'] # Combine all counts all_raw_counts = dots_coords_calculations.combine_raw_counting_results(flt_rawcnt_config, hybridizations_infos,experiment_infos, processing_experiment_directory,stitched_reference_files_dir, reference_gene,add_slash) # Create a dictionary with only the selected peaks coords after alignment aligned_peaks_dict = all_raw_counts['selected_peaks_coords_aligned'] # Create list of tuples to process each hybridization/gene on a different core combinations = dots_coords_calculations.processing_combinations(list(hybridizations_infos.keys()),aligned_peaks_dict) # Add corresponding registration_data and the corresponding coords files to the # tuple is order to recduce the size of the info transferred in the newtwork added_combinations =list() for idx,combination in enumerate(combinations): hybridization = combination[0] gene = combination[1] reg_data_combination = all_raw_counts['registration_data'][hybridization] aligned_peaks_dict_gene = all_raw_counts['selected_peaks_coords_aligned'][hybridization][gene] combination_dict = { 'hybridization':hybridization, 'gene':gene, 'reg_data_combination':reg_data_combination, 'aligned_peaks_dict_gene': aligned_peaks_dict_gene } added_combinations.append(combination_dict) # Process each gene in parallel futures_processes = client.map(dots_coords_calculations.function_to_run_dots_removal_parallel,added_combinations, overlapping_percentage = overlapping_percentage, image_size = image_size,pxl = pxl) cleaned_dots_list = client.gather(futures_processes) # Convert the list of dictionaries in one single dictionary # The saved dictionary cotains all the dots, the reference to the tile pos # has been removed during the overlapping dots removal step all_countings = dict() all_countings['all_coords_cleaned'] = dict() all_countings['all_coords'] = dict() all_countings['removed_coords'] = dict() for el in cleaned_dots_list: hybridization = list(el.keys())[0] gene = list(el[hybridization].keys())[0] renamed_gene = gene + '_' + hybridization all_countings['all_coords_cleaned'][renamed_gene] = el[hybridization][gene]['all_coords_cleaned'] all_countings['all_coords'][renamed_gene] = el[hybridization][gene]['all_coords'] all_countings['removed_coords'][renamed_gene] = el[hybridization][gene]['removed_coords'] # Save all the data counting_data_name = processing_experiment_directory +experiment_infos['ExperimentName']+'_all_cleaned_raw_counting_data.pkl' pickle.dump(all_countings,open(counting_data_name,'wb')) client.close()
def apply_stitching(): """ Script to apply the registration to all the osmFISH channels. It will create a stitched image in an hdf5 file All the parameters are entered via argparse Parameters: ----------- experiment_path: string Path to the folder with the hybridizations reference_files_path: string Path to the folder with the _reg_data.pkl files scheduler: string tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). default = False. If False the process will run on the local computer using nCPUs-1 """ parser = argparse.ArgumentParser(description='Create the stitched images \ after registration') parser.add_argument('-experiment_path', help='path to the folder with the hybridizations') parser.add_argument('-reference_files_path', help='path to the folder with the \ _reg_data.pkl files') parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003') args = parser.parse_args() processing_experiment_directory = args.experiment_path stitched_reference_files_dir = args.reference_files_path # Dask scheduler address scheduler_address = args.scheduler if scheduler_address: # Start dask client on server or cluster client=Client(scheduler_address) else: # Start dask client on local machine. It will use all the availabe # cores -1 # number of core to use ncores = multiprocessing.cpu_count()-1 cluster = LocalCluster(n_workers=ncores) client=Client(cluster) # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the processing directory processing_experiment_directory=utils.check_trailing_slash(processing_experiment_directory,os_windows) stitched_reference_files_dir=utils.check_trailing_slash(stitched_reference_files_dir,os_windows) # Starting logger utils.init_file_logger(processing_experiment_directory) logger = logging.getLogger() # Collect the infos of the experiment and the processing # Parse the Experimental metadata file (serial) experiment_infos,image_properties, hybridizations_infos, \ converted_positions, microscope_parameters =\ utils.experimental_metadata_parser(processing_experiment_directory) # Parse the configuration file flt_rawcnt_config = utils.filtering_raw_counting_config_parser(processing_experiment_directory) # Get the reference gene used reference_gene = flt_rawcnt_config['reference_gene'] # Stitch the image in 2D or 3D (3D need more work/testing) nr_dim = flt_rawcnt_config['nr_dim'] # Determine the hybridizations to process if isinstance(flt_rawcnt_config['hybs_to_stitch'],list): hybridizations_to_process = flt_rawcnt_config['hybs_to_stitch'] else: if flt_rawcnt_config['hybs_to_stitch'] == 'All': hybridizations_to_process = list(hybridizations_infos.keys()) else: raise ValueError('Error in the hybridizations to stitch') for hybridization in hybridizations_to_process: # Determine the genes to stitch in the processing hybridization genes_processing = list(hybridizations_infos[hybridization].keys()) hyb_short = re.sub('Hybridization','hyb',hybridization) processing_hyb = experiment_infos['ExperimentName']+'_'+hyb_short hyb_dir = processing_experiment_directory+processing_hyb+add_slash # Create pointer of the hdf5 file that will store the stitched images # for the current hybridization tile_file_base_name = flt_rawcnt_config['analysis_name']+'_'+experiment_infos['ExperimentName']+'_'+hyb_short stitching_file_name = tile_file_base_name + '.reg.sf.hdf5' data_name = (tile_file_base_name + '_' + reference_gene + '_stitching_data_reg') stitching_file= h5py.File(stitched_reference_files_dir+stitching_file_name,'w',libver='latest') # replace with 'a' as soon as you fix the error # Determine the tiles organization joining, tiles, nr_pixels, z_count, micData = stitching.get_place_tile_input_apply_npy(hyb_dir,stitched_reference_files_dir,data_name,image_properties,nr_dim) for gene in genes_processing: # Create the hdf5 file structure stitched_group, linear_blending, blend = hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels, gene, blend = 'non linear') # Fill the hdf5 containing the stitched image with empty data and # create the blending mask stitched_group['final_image'][:]= np.zeros(joining['final_image_shape'],dtype=np.uint16) if blend is not None: # make mask stitched_group['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:],dtype=np.uint16) tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask']) filtered_img_gene_dirs_path = hyb_dir+flt_rawcnt_config['analysis_name']+'_'+processing_hyb +'_filtered_npy'+add_slash filtered_img_gene_dirs = glob.glob(filtered_img_gene_dirs_path+'*') # Create the subdirectory used to save the blended tiles suffix = 'blended_tiles' blended_tiles_directory = utils.create_single_directory(hyb_dir,gene, hybridization,processing_hyb,suffix,add_slash, analysis_name=flt_rawcnt_config['analysis_name']) # Get the directory with the filtered npy images of the reference_gene to use for stitching stitching_files_dir = [npy_dir for npy_dir in filtered_img_gene_dirs if gene in npy_dir][0] stitching_files_dir= stitching_files_dir+add_slash # Create the tmp directory where to save the masks suffix = 'masks' masked_tiles_directory = utils.create_single_directory(hyb_dir,gene,hybridization,processing_hyb,suffix,add_slash, analysis_name=flt_rawcnt_config['analysis_name']) # Create and save the mask files for corn_value,corner_coords in joining['corner_list']: if not(np.isnan(corner_coords[0])): cur_mask = stitched_group['blending_mask'][int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels), int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)] fname = masked_tiles_directory + flt_rawcnt_config['analysis_name'] +'_'+processing_hyb+'_'+gene+'_masks_joining_pos_'+str(corn_value) np.save(fname,cur_mask) # Blend all the tiles and save them in a directory futures_processes = client.map(tilejoining.generate_blended_tile_npy,joining['corner_list'], stitching_files_dir = stitching_files_dir, blended_tiles_directory = blended_tiles_directory, masked_tiles_directory = masked_tiles_directory, analysis_name = flt_rawcnt_config['analysis_name'], processing_hyb = processing_hyb,reference_gene = gene, micData = micData,tiles = tiles,nr_pixels=nr_pixels, linear_blending=linear_blending) _ = client.gather(futures_processes) # Write the stitched image tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles,gene, nr_pixels) stitching_file.flush() # Remove directories with blended tiles and masks shutil.rmtree(blended_tiles_directory) shutil.rmtree(masked_tiles_directory) stitching_file.close() client.close()
def run_stitching_reference_only(): """ This script will stitch the filtered data of the hybridizations in experiment directory. The inputs parameters are passed using arparse Parameters: ----------- scheduler: string tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). default = False. If False the process will run on the local computer using nCPUs-1 path: string Path to the experiment directory """ # Inputs of the function parser = argparse.ArgumentParser(description='Preprocessing script') parser.add_argument( '-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003') parser.add_argument('-path', help='processing directory') args = parser.parse_args() # Directory to process processing_directory = args.path # Dask scheduler address scheduler_address = args.scheduler if scheduler_address: # Start dask client on server or cluster client = Client(scheduler_address) else: # Start dask client on local machine. It will use all the availabe # cores -1 # number of core to use ncores = multiprocessing.cpu_count() - 1 cluster = LocalCluster(n_workers=ncores) client = Client(cluster) # Subdirectories of the processing_directory that need to be skipped for the # analysis blocked_directories = ['_logs'] # Starting logger # utils.init_file_logger(processing_directory) # logger = logging.getLogger() # Determine the operating system running the code os_windows, add_slash = utils.determine_os() # Check training slash in the processing directory processing_directory = utils.check_trailing_slash(processing_directory, os_windows) # Get a list of the hybridization to process processing_hyb_list = next(os.walk(processing_directory))[1] # Remove the blocked directories from the directories to process processing_hyb_list = [ el for el in processing_hyb_list if el not in blocked_directories ] for processing_hyb in processing_hyb_list: # Determine the hyb number from the name hybridization_number = processing_hyb.split('_hyb')[-1] hybridization = 'Hybridization' + hybridization_number hyb_dir = processing_directory + processing_hyb + add_slash # Parse the Experimental metadata file (serial) experiment_infos,image_properties, hybridizations_infos, \ converted_positions, microscope_parameters =\ utils.experimental_metadata_parser(hyb_dir) # Parse the configuration file flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir) # ----------------- STITCHING ------------------------ # Determine the directory of the filtered images suffix = 'filtered_npy' analysis_name = flt_rawcnt_config['analysis_name'] sufx_dir_path = hyb_dir + analysis_name + '_' + processing_hyb + '_' + suffix + add_slash # Reference gene for stitching reference_gene = flt_rawcnt_config['reference_gene'] filtered_gene_dir = sufx_dir_path + analysis_name + '_' + processing_hyb + '_' + reference_gene + '_' + suffix + add_slash # Load the stitching parameters from the .yaml file # Stitch the image in 2D or 3D (3D need more work/testing) nr_dim = flt_rawcnt_config['nr_dim'] # Estimated overlapping between images according to the Nikon software est_overlap = np.float_(image_properties['Overlapping_percentage']) # Number of peaks to use for the alignment nr_peaks = flt_rawcnt_config['nr_peaks'] # Determine if the coords need to be flipped y_flip = flt_rawcnt_config['y_flip'] # Method to use for blending # can be 'linear' or 'non linear' # The methods that performs the best is the 'non linear' blend = flt_rawcnt_config['blend'] pixel_size = image_properties['PixelSize'] # Get the list of the filtered files of the reference gene filtered_files_list = glob.glob(filtered_gene_dir + '*.npy') # Create pointer of the hdf5 file that will store the stitched reference image # for the current hybridization # Writing tile_file_base_name = flt_rawcnt_config[ 'analysis_name'] + '_' + processing_hyb data_name = (tile_file_base_name + '_' + reference_gene + '_stitching_data') stitching_file_name = tile_file_base_name + '.sf.hdf5' stitching_file = h5py.File( hyb_dir + stitching_file_name, 'w', libver='latest') # replace with 'a' as soon as you fix the error # Determine the tiles organization tiles, contig_tuples, nr_pixels, z_count, micData = stitching.get_pairwise_input_npy( image_properties, converted_positions, hybridization, est_overlap, y_flip=False, nr_dim=2) # Align the tiles futures_processes = client.map(pairwisesingle.align_single_pair_npy, contig_tuples, filtered_files_list=filtered_files_list, micData=micData, nr_peaks=nr_peaks) # Gather the futures data = client.gather(futures_processes) # In this case the order of the returned contingency tuples is with # the order of the input contig_tuples # P_all = [el for data_single in data for el in data_single[0]] P_all = [data_single[0] for data_single in data] P_all = np.array(P_all) P_all = P_all.flat[:] covs_all = [data_single[1] for data_single in data] alignment = {'P': P_all, 'covs': covs_all} # Calculates a shift in global coordinates for each tile (global # alignment) and then applies these shifts to the corner coordinates # of each tile and returns and saves these shifted corner coordinates. joining = stitching.get_place_tile_input(hyb_dir, tiles, contig_tuples, micData, nr_pixels, z_count, alignment, data_name, nr_dim=nr_dim) # Create the hdf5 file structure stitched_group, linear_blending, blend = hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy( stitching_file, joining, nr_pixels, reference_gene, blend='non linear') # Fill the hdf5 containing the stitched image with empty data and # create the blending mask stitched_group['final_image'][:] = np.zeros( joining['final_image_shape'], dtype=np.float64) if blend is not None: # make mask stitched_group['blending_mask'][:] = np.zeros( joining['final_image_shape'][-2:], dtype=np.float64) tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask']) # Create the subdirectory used to save the blended tiles suffix = 'blended_tiles' blended_tiles_directory = utils.create_single_directory( hyb_dir, reference_gene, hybridization, processing_hyb, suffix, add_slash, analysis_name=flt_rawcnt_config['analysis_name']) # Create the tmp directory where to save the masks suffix = 'masks' masked_tiles_directory = utils.create_single_directory( hyb_dir, reference_gene, hybridization, processing_hyb, suffix, add_slash, analysis_name=flt_rawcnt_config['analysis_name']) # Create and save the mask files for corn_value, corner_coords in joining['corner_list']: if not (np.isnan(corner_coords[0])): cur_mask = stitched_group['blending_mask'][ int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels), int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)] fname = masked_tiles_directory + flt_rawcnt_config[ 'analysis_name'] + '_' + processing_hyb + '_' + reference_gene + '_masks_joining_pos_' + str( corn_value) np.save(fname, cur_mask) # Blend all the tiles and save them in a directory futures_processes = client.map( tilejoining.generate_blended_tile_npy, joining['corner_list'], stitching_files_dir=filtered_gene_dir, blended_tiles_directory=blended_tiles_directory, masked_tiles_directory=masked_tiles_directory, analysis_name=flt_rawcnt_config['analysis_name'], processing_hyb=processing_hyb, reference_gene=reference_gene, micData=micData, tiles=tiles, nr_pixels=nr_pixels, linear_blending=linear_blending) _ = client.gather(futures_processes) # Write the stitched image tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles, reference_gene, nr_pixels) # close the hdf5 file stitching_file.close() # Delete the directories with blended tiles and masks shutil.rmtree(blended_tiles_directory) shutil.rmtree(masked_tiles_directory) client.close()
def process_standalone_experiment(): """ Script to run conversion, filtering and raw counting on a small set of images. The analysis run locally All the parameters are entered with argparse Parameters: ----------- path: string Path to the experiment to process analysis_name: string Name of the analysis stringency: int Value of the stringency to use in the threshold selection. Default=0 min_distance: int Min distance betwenn to peaks. Default=5 min_plane: int Min plane for z-stack cropping. Default=None max_plane: int: Max plane for z-stack cropping. Default=None ncores: int Number of cores to use for the processing. Deafault=1 """ # input to the function parser = argparse.ArgumentParser( description='Counting and filtering experiment') parser.add_argument('-path', help='path to experiment to analyze') parser.add_argument('-analysis_name', help='analysis name') parser.add_argument('-stringency', help='stringency', default=0, type=int) parser.add_argument('-min_distance', help='min distance between peaks', default=5, type=int) parser.add_argument('-min_plane', help='starting plane to consider', default=None, type=int) parser.add_argument('-max_plane', help='ending plane to consider', default=None, type=int) parser.add_argument('-ncores', help='number of cores to use', default=1, type=int) # Parse the input args args = parser.parse_args() processing_directory = args.path analysis_name = args.analysis_name stringency = args.stringency min_distance = args.min_distance min_plane = args.min_plane max_plane = args.max_plane ncores = args.ncores if min_plane != None and max_plane != None: plane_keep = [min_plane, max_plane] else: plane_keep = None # Determine the os type os_windows, add_slash = utils.determine_os() # Starting logger utils.init_file_logger(processing_directory) logger = logging.getLogger() logger.debug('min_plane%s', min_plane) logger.debug('max_plane %s', max_plane) logger.debug('keep_planes value %s', plane_keep) # Start the distributed client client = Client(n_workers=ncores, threads_per_worker=1) logger.debug('client %s', client) logger.debug('check that workers are on the same directory %s', client.run(os.getcwd)) # Check trail slash processing_directory = utils.check_trailing_slash(processing_directory, os_windows) # Determine the experiment name exp_name = processing_directory.split(add_slash)[-2] logger.debug('Experiment name: %s', exp_name) # Create the directories where to save the output tmp_dir_path = processing_directory + analysis_name + '_' + exp_name + '_tmp' + add_slash filtered_dir_path = processing_directory + analysis_name + '_' + exp_name + '_filtered' + add_slash counting_dir_path = processing_directory + analysis_name + '_' + exp_name + '_counting_pkl' + add_slash try: os.stat(tmp_dir_path) except: os.mkdir(tmp_dir_path) os.chmod(tmp_dir_path, 0o777) try: os.stat(filtered_dir_path) except: os.mkdir(filtered_dir_path) os.chmod(filtered_dir_path, 0o777) try: os.stat(counting_dir_path) except: os.mkdir(counting_dir_path) os.chmod(counting_dir_path, 0o777) # Get the list of the nd2 files to process inside the directory files_list = glob.glob(processing_directory + '*.nd2') logger.debug('files to process %s', files_list) # Convert the .nd2 data for raw_data_gene_fname in files_list: fname = raw_data_gene_fname.split(add_slash)[-1][:-4] logger.debug('fname %s', fname) with nd2.Nd2(raw_data_gene_fname) as nd2file: for channel in nd2file.channels: for fov in nd2file.fields_of_view: img_stack = np.empty( [len(nd2file.z_levels), nd2file.height, nd2file.width], dtype='uint16') images = nd2file.select(channels=channel, fields_of_view=fov, z_levels=nd2file.z_levels) for idx, im in enumerate(images): img_stack[idx, :, :] = im converted_fname = tmp_dir_path + exp_name + '_' + fname + '_' + channel + '_fov_' + str( fov) + '.npy' np.save(converted_fname, img_stack, allow_pickle=False) logger.debug('Finished .nd2 file conversion') # Filtering all the data # Get list of the files to process flist_img_to_filter = glob.glob(tmp_dir_path + '*.npy') # logger.debug('files to filter %s',flist_img_to_filter) # Parallel process all the data futures_processes=client.map(filtering_and_counting_experiment,flist_img_to_filter, \ filtered_dir_path=filtered_dir_path, \ counting_dir_path=counting_dir_path, \ exp_name=exp_name,plane_keep=plane_keep,add_slash=add_slash, \ min_distance=min_distance, stringency=stringency) client.gather(futures_processes) client.close() logger.debug('Finished filtering and counting') # delete the tmp folders shutil.rmtree(tmp_dir_path)