예제 #1
0
def preprocessing_script():
    """
    This script will process all the hybridization folders combined in a 
    processing folder. The input parameters are passed using arparse

    Parameters:
    -----------
    
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    path: string
        Path to the processing directory


    """


    # Inputs of the function
    parser = argparse.ArgumentParser(description='Preprocessing script')
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    parser.add_argument('-path', help='processing directory')
    args = parser.parse_args()
    
    # Directory to process
    processing_directory = args.path
    # Dask scheduler address
    scheduler_address = args.scheduler
    
    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)

    # Subdirectories of the processing_directory that need to be skipped for the
    # analysis
    blocked_directories = ['_logs']

    # Starting logger
    utils.init_file_logger(processing_directory)
    logger = logging.getLogger()

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory=utils.check_trailing_slash(processing_directory,os_windows)

    # Get a list of the hybridization to process
    processing_hyb_list = next(os.walk(processing_directory))[1]

    # Remove the blocked directories from the directories to process
    processing_hyb_list = [el for el in processing_hyb_list if el not in blocked_directories ]

    for processing_hyb in processing_hyb_list:
    
        # Determine the hyb number from the name
        hybridization_number = processing_hyb.split('_hyb')[-1]
        hybridization = 'Hybridization' + hybridization_number
        hyb_dir = processing_directory + processing_hyb + add_slash
        
        # Parse the Experimental metadata file (serial)
        experiment_infos,image_properties, hybridizations_infos, \
        converted_positions, microscope_parameters =\
        utils.experimental_metadata_parser(hyb_dir)
        
        # Parse the configuration file 
        flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir)
        
        
        # ----------------- .nd2 FILE CONVERSION ------------------------------

        # Create the temporary subdirectory tree (serial)
        tmp_dir_path, tmp_gene_dirs=utils.create_subdirectory_tree(hyb_dir,\
                    hybridization,hybridizations_infos,processing_hyb,suffix='tmp',add_slash=add_slash)

        # Get the list of the nd2 files to process inside the directory
        files_list = glob.glob(hyb_dir+processing_hyb+'_raw_data'+add_slash+'*.nd2')

        # Get the list of genes that are analyzed in the current hybridization
        gene_list = list(hybridizations_infos[hybridization].keys())

        # Organize the file to process in a list which order match the gene_list for
        # parallel processing
        organized_files_list = [f for gene in gene_list for f in files_list if gene+'.nd2' in f  ]
        organized_tmp_dir_list = [f for gene in gene_list for f in tmp_gene_dirs if gene in f  ]

        # Each .nd2 file will be processed in a worker part of a different node
        # Get the addresses of one process/node to use for conversion
        node_addresses = utils.identify_nodes(client)
        workers_conversion = [list(el.items())[0][1] for key,el in node_addresses.items()]

        # Run the conversion
        futures_processes=client.map(io.nd2_to_npy,gene_list,organized_files_list,
                                    tmp_gene_dirs,processing_hyb=processing_hyb,
                                    use_ram=flt_rawcnt_config['use_ram'],
                                    max_ram=flt_rawcnt_config['max_ram'],
                                    workers=workers_conversion)
        client.gather(futures_processes)

        

        # ---------------------------------------------------------------------
        
        
        # ----------------- FILTERING AND RAW COUNTING ------------------------
        
        # Create directories 

        # Create the directory where to save the filtered images
        suffix = 'filtered_png'
        filtered_png_img_dir_path, filtered_png_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        suffix = 'filtered_npy'
        filtered_img_dir_path, filtered_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        # Create the directory where to save the counting
        suffix = 'counting'
        counting_dir_path, counting_gene_dirs = \
            utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                            suffix,add_slash,flt_rawcnt_config['skip_tags_counting'],
                            flt_rawcnt_config['skip_genes_counting'],
                            analysis_name=flt_rawcnt_config['analysis_name'])


        if flt_rawcnt_config['illumination_correction']:

            # Create the directory where to save the counting
            suffix = 'illumination_funcs'
            illumination_func_dir_path, illumination_func_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                                                suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

            # Loop through channels and calculate illumination
            for gene in hybridizations_infos[hybridization].keys():
                
                flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')

                logger.debug('Create average image for gene %s', gene)

                # Chunking the image list
                num_chunks = sum(list(client.ncores().values()))
                chunked_list = utils.list_chunking(flist_img_to_filter,num_chunks)

                # Scatter the images sublists to process in parallel
                futures = client.scatter(chunked_list)

                # Create dask processing graph
                output = []
                for future in futures:
                    ImgMean = delayed(utils.partial_image_mean)(future)
                    output.append(ImgMean)
                ImgMean_all = delayed(sum)(output)
                ImgMean_all = ImgMean_all/float(len(futures))

                # Compute the graph
                ImgMean = ImgMean_all.compute()

                logger.debug('Create illumination function for gene %s',gene)
                # Create illumination function
                Illumination=filters.gaussian(ImgMean,sigma=(20,300,300))

                # Normalization of the illumination
                Illumination_flat=np.amax(Illumination,axis=0)
                Illumination_norm=Illumination_flat/np.amax(Illumination_flat)

                logger.debug('Save illumination function for gene %s',gene)
                # Save the illumination function
                illumination_path = [ill_path for ill_path in illumination_func_gene_dirs if gene in ill_path][0]
                illumination_fname=illumination_path+gene+'_illumination_func.npy'
                np.save(illumination_fname,Illumination_norm,allow_pickle=False)  

                # Broadcast the illumination function to all the cores
                client.scatter(Illumination_norm, broadcast=True)

                logger.debug('Filtering %s',gene)
                # Filtering and counting
                futures_processes=client.map(counting.filtering_and_counting_ill_correction,flist_img_to_filter, \
                                illumination_function=Illumination_norm,\
                                filtered_png_img_gene_dirs=filtered_png_img_gene_dirs,\
                                filtered_img_gene_dirs =filtered_img_gene_dirs,\
                                counting_gene_dirs=counting_gene_dirs,plane_keep=flt_rawcnt_config['plane_keep'], \
                                min_distance=flt_rawcnt_config['min_distance'], stringency=flt_rawcnt_config['stringency'],\
                                skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting'])
                client.gather(futures_processes)
               

        else:
            for gene in hybridizations_infos[hybridization].keys():
                flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')
                # filtering
                logger.debug('Filtering without illumination correction %s',gene)

                futures_processes=client.map(counting.filtering_and_counting,flist_img_to_filter, \
                                        filtered_png_img_gene_dirs=filtered_png_img_gene_dirs, \
                                        filtered_img_gene_dirs=filtered_img_gene_dirs, \
                                        counting_gene_dirs=counting_gene_dirs, \
                                        plane_keep=flt_rawcnt_config['plane_keep'], min_distance=flt_rawcnt_config['min_distance'],\
                                        stringency=flt_rawcnt_config['stringency'],\
                                        skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting'])

                client.gather(futures_processes)
                
        # ---------------------------------------------------------------------
        
        # # ----------------- COMBINE THE FILTERED DATA IN .ppf.hdf5 ------------------------
        # # Combine the filter data in one single .ppf for each hybridization
        # # This step will run in serial mode and will not need to shuffle data
        # #  between cores because everything is on the common file system

        # logger.debug('Create .ppf.hdf5 file')

        # # Create the ppf.hdf5 file that contains the filtered data in uint16
        # preprocessing_file_path = hdf5_utils.hdf5_create_preprocessing_file(hybridizations_infos,processing_hyb,
        #                                 hybridization,flt_rawcnt_config['analysis_name'], hyb_dir,converted_positions,image_properties)

        # logger.debug('Write the .npy filtered files into the .ppf file')
        # # Load and write the .npy tmp images into the hdf5 file

        # # open the hdf5 file
        # with h5py.File(preprocessing_file_path) as f_hdl:
        #     # Loop through each gene
        #     for gene in hybridizations_infos[hybridization].keys():

        #         logger.debug('Writing %s images in .ppf.hdf5',gene)
        #         # list of the files to transfer
        #         filtered_gene_dir = [fdir for fdir in filtered_img_gene_dirs if gene in fdir][0]
        #         filtered_files_list = glob.glob(filtered_gene_dir+'*.npy')

        #         # loop through the list of file
        #         for f_file in filtered_files_list:
        #             pos = f_file.split('/')[-1].split('_')[-1].split('.')[0]
        #             f_hdl[gene]['FilteredData'][pos][:] =np.load(f_file)
        #             f_hdl.flush()
        
        # # ---------------------------------------------------------------------
        
        # # ----------------- STITCHING ------------------------
        # # Load the stitching parameters from the .yaml file

        # # Stitch the image in 2D or 3D (3D need more work/testing)
        # nr_dim = flt_rawcnt_config['nr_dim']

        # # Estimated overlapping between images according to the Nikon software
        # est_overlap = image_properties['Overlapping_percentage']

        # # Number of peaks to use for the alignment
        # nr_peaks = flt_rawcnt_config['nr_peaks']

        # # Determine if the coords need to be flipped

        # y_flip = flt_rawcnt_config['y_flip']

        # # Method to use for blending
        # # can be 'linear' or 'non linear'
        # # The methods that performs the best is the 'non linear'

        # blend = flt_rawcnt_config['blend']

        # # Reference gene for stitching
        # reference_gene = flt_rawcnt_config['reference_gene']

        # pixel_size = image_properties['PixelSize']

        # # Get the list of the filtered files of the reference gene
        # filtered_gene_dir = [gene_dir for gene_dir in filtered_img_gene_dirs if reference_gene in gene_dir][0]
        # filtered_files_list = glob.glob(filtered_gene_dir+'*.npy')

        # # Create pointer of the hdf5 file that will store the stitched reference image
        # # for the current hybridization
        # # Writing
        # tile_file_base_name = flt_rawcnt_config['analysis_name']+'_'+ processing_hyb
        # data_name   = (tile_file_base_name
        #                 + '_' + reference_gene
        #                 + '_stitching_data')

        # stitching_file_name = tile_file_base_name + '.sf.hdf5'
        # stitching_file= h5py.File(hyb_dir+stitching_file_name,'w',libver='latest')  # replace with 'a' as soon as you fix the error


        # # Determine the tiles organization
        # tiles, contig_tuples, nr_pixels, z_count, micData = stitching.get_pairwise_input_npy(image_properties,converted_positions, hybridization,
        #                         est_overlap = est_overlap, y_flip = False, nr_dim = 2)



        # # Align the tiles 
        # futures_processes=client.map(pairwisesingle.align_single_pair_npy,contig_tuples,
        #                             filtered_files_list=filtered_files_list,micData=micData, 
        #                         nr_peaks=nr_peaks)

        # # Gather the futures
        # data = client.gather(futures_processes)


        # # In this case the order of the returned contingency tuples is with
        # # the order of the input contig_tuples

        # # P_all = [el for data_single in data for el in data_single[0]]
        # P_all =[data_single[0] for data_single in data ]
        # P_all = np.array(P_all)
        # P_all = P_all.flat[:]
        # covs_all = [data_single[1] for data_single in data]
        # alignment = {'P': P_all,
        #             'covs': covs_all}


        # # Calculates a shift in global coordinates for each tile (global
        # # alignment) and then applies these shifts to the  corner coordinates
        # # of each tile and returns and saves these shifted corner coordinates.
        # joining = stitching.get_place_tile_input(hyb_dir, tiles, contig_tuples,
        #                                             micData, nr_pixels, z_count,
        #                                             alignment, data_name,
        #                                             nr_dim=nr_dim)

        # # Create the hdf5 file structure
        # stitched_group, linear_blending, blend =  hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels,
        #                                 reference_gene, blend = 'non linear')

        # # Fill the hdf5 containing the stitched image with empty data and
        # # create the blending mask
        # stitched_group['final_image'][:]= np.zeros(joining['final_image_shape'],dtype=np.float64)
        # if blend is not None:
        #     # make mask
        #     stitched_group['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:],dtype=np.float64)
        #     tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask'])

            
        # # Create the subdirectory used to save the blended tiles
        # suffix = 'blended_tiles'
        # blended_tiles_directory = utils.create_single_directory(hyb_dir,reference_gene, hybridization,processing_hyb,suffix,add_slash,
        #                                 analysis_name=flt_rawcnt_config['analysis_name'])

        # # Get the directory with the filtered npy images of the reference_gene to use for stitching
        # stitching_files_dir = [npy_dir for npy_dir in filtered_img_gene_dirs if reference_gene in npy_dir][0]


        # # Create the tmp directory where to save the masks
        # suffix = 'masks'
        # masked_tiles_directory = utils.create_single_directory(hyb_dir,reference_gene, hybridization,processing_hyb,suffix,add_slash,
        #                                 analysis_name=flt_rawcnt_config['analysis_name'])

        # # Create and save the mask files
        # for corn_value,corner_coords in joining['corner_list']:
        #     if not(np.isnan(corner_coords[0])):
        #         cur_mask = stitched_group['blending_mask'][int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels),
        #                             int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)]

        #         fname = masked_tiles_directory + flt_rawcnt_config['analysis_name'] +'_'+processing_hyb+'_'+reference_gene+'_masks_joining_pos_'+str(corn_value)
        #         np.save(fname,cur_mask)


        # # Blend all the tiles and save them in a directory
        # futures_processes = client.map(tilejoining.generate_blended_tile_npy,joining['corner_list'],
        #                             stitching_files_dir = stitching_files_dir,
        #                             blended_tiles_directory = blended_tiles_directory,
        #                             masked_tiles_directory = masked_tiles_directory,
        #                             analysis_name = flt_rawcnt_config['analysis_name'],
        #                             processing_hyb = processing_hyb,reference_gene = reference_gene,
        #                             micData = micData,tiles = tiles,nr_pixels=nr_pixels,
        #                             linear_blending=linear_blending)



        # _ = client.gather(futures_processes)


        # # Write the stitched image
        # tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles,reference_gene, nr_pixels)

        # # close the hdf5 file
        # stitching_file.close()


        # # Delete the directories with blended tiles and masks
        # shutil.rmtree(blended_tiles_directory)
        # shutil.rmtree(masked_tiles_directory)

        # ----------------- DELETE FILES ------------------------
        # Don't delete the *.npy files here because can be used to 
        # create the final images using the apply stitching related function    









    client.close()
예제 #2
0
def filtering_speed():
    """
    This script will process all the hybridization folders combined in a 
    processing folder. The input parameters are passed using arparse

    Parameters:
    -----------
    
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    path: string
        Path to the processing directory


    """


    # Inputs of the function
    parser = argparse.ArgumentParser(description='Preprocessing script')
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    parser.add_argument('-path', help='processing directory')
    args = parser.parse_args()
    
    # Directory to process
    processing_directory = args.path
    # Dask scheduler address
    scheduler_address = args.scheduler
    
    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)

    # Subdirectories of the processing_directory that need to be skipped for the
    # analysis
    blocked_directories = ['_logs']

    # Starting logger
    utils.init_file_logger(processing_directory)
    logger = logging.getLogger()

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory=utils.check_trailing_slash(processing_directory,os_windows)

    # Get a list of the hybridization to process
    processing_hyb_list = next(os.walk(processing_directory))[1]

    # Remove the blocked directories from the directories to process
    processing_hyb_list = [el for el in processing_hyb_list if el not in blocked_directories ]

    for processing_hyb in processing_hyb_list:
    
        # Determine the hyb number from the name
        hybridization_number = processing_hyb.split('_hyb')[-1]
        hybridization = 'Hybridization' + hybridization_number
        hyb_dir = processing_directory + processing_hyb + add_slash
        
        # Parse the Experimental metadata file (serial)
        experiment_infos,image_properties, hybridizations_infos, \
        converted_positions, microscope_parameters =\
        utils.experimental_metadata_parser(hyb_dir)
        
        # Parse the configuration file 
        flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir)
        
    
        
        # ----------------- FILTERING AND RAW COUNTING ------------------------
        
        # Create directories 

        # Create the directory where to save the filtered images
        suffix = 'filtered_png'
        filtered_png_img_dir_path, filtered_png_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        suffix = 'filtered_npy'
        filtered_img_dir_path, filtered_img_gene_dirs = \
                utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,
                            processing_hyb,suffix,add_slash,analysis_name=flt_rawcnt_config['analysis_name'])

        # Create the directory where to save the counting
        suffix = 'counting'
        counting_dir_path, counting_gene_dirs = \
            utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                            suffix,add_slash,flt_rawcnt_config['skip_tags_counting'],
                            flt_rawcnt_config['skip_genes_counting'],
                            analysis_name=flt_rawcnt_config['analysis_name'])


    
        for gene in hybridizations_infos[hybridization].keys():
            flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')
            # filtering
            logger.debug('Filtering without illumination correction %s',gene)

            futures_processes=client.map(counting.filtering_and_counting,flist_img_to_filter, \
                                    filtered_png_img_gene_dirs=filtered_png_img_gene_dirs, \
                                    filtered_img_gene_dirs=filtered_img_gene_dirs, \
                                    counting_gene_dirs=counting_gene_dirs, \
                                    plane_keep=flt_rawcnt_config['plane_keep'], min_distance=flt_rawcnt_config['min_distance'],\
                                    stringency=flt_rawcnt_config['stringency'],\
                                    skip_genes_counting=flt_rawcnt_config['skip_genes_counting'],skip_tags_counting=flt_rawcnt_config['skip_tags_counting'])

            client.gather(futures_processes)


        # ----------------- RAW COUNTING ONLY------------------------
        
        skip_genes_counting=flt_rawcnt_config['skip_genes_counting']
        skip_tags_counting=flt_rawcnt_config['skip_tags_counting']

        # Create the directory where to save the counting
        suffix = 'counting'
        counting_dir_path, counting_gene_dirs = \
            utils.create_subdirectory_tree(hyb_dir,hybridization,hybridizations_infos,processing_hyb,
                            suffix,add_slash,flt_rawcnt_config['skip_tags_counting'],
                            flt_rawcnt_config['skip_genes_counting'],
                            analysis_name=flt_rawcnt_config['analysis_name'])

        suffix = 'filtered_npy'
        gene_list = list(hybridizations_infos[hybridization].keys())
        analysis_name=flt_rawcnt_config['analysis_name']
        sufx_dir_path = hyb_dir+analysis_name+'_'+processing_hyb+'_'+suffix+add_slash
        
    
        for gene in hybridizations_infos[hybridization].keys():

            # Filtering image according to gene
            if gene not in skip_genes_counting or [tag for tag in skip_tags_counting if tag not in gene]:
                if analysis_name:
                    filtered_images_directory =  sufx_dir_path+analysis_name+'_'+processing_hyb+'_'+ gene+'_'+suffix+add_slash
                else:
                    filtered_images_directory =  sufx_dir_path +processing_hyb+'_'+ gene +'_'+suffix+add_slash
                
                flist_img_to_filter=glob.glob(hyb_dir+processing_hyb+'_tmp/'+processing_hyb+'_'+gene+'_tmp/*.npy')
                # filtering
                logger.debug('Filtering without illumination correction %s',gene)

                futures_processes=client.map(counting.counting_only,flist_img_to_filter, \
                                        counting_gene_dirs=counting_gene_dirs, \
                                        min_distance=flt_rawcnt_config['min_distance'],\
                                        stringency=flt_rawcnt_config['stringency'])

                client.gather(futures_processes)






    client.close()
예제 #3
0
def apply_stitching():

    """
    Script to apply the registration to all the osmFISH channels. It will create
    a stitched image in an hdf5 file

    All the parameters are entered via argparse

    Parameters:
    -----------

    experiment_path: string
        Path to the folder with the hybridizations
    reference_files_path: string
        Path to the folder with the _reg_data.pkl files
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    """

    parser = argparse.ArgumentParser(description='Create the stitched images \
                                    after registration')

    parser.add_argument('-experiment_path', help='path to the folder with the hybridizations')
    parser.add_argument('-reference_files_path', help='path to the folder with the \
                        _reg_data.pkl files')
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    args = parser.parse_args()

    processing_experiment_directory = args.experiment_path
    stitched_reference_files_dir = args.reference_files_path
    # Dask scheduler address
    scheduler_address = args.scheduler

    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)


    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_experiment_directory=utils.check_trailing_slash(processing_experiment_directory,os_windows)

    stitched_reference_files_dir=utils.check_trailing_slash(stitched_reference_files_dir,os_windows)

    # Starting logger
    utils.init_file_logger(processing_experiment_directory)
    logger = logging.getLogger()


    # Collect the infos of the experiment and the processing
    # Parse the Experimental metadata file (serial)
    experiment_infos,image_properties, hybridizations_infos, \
    converted_positions, microscope_parameters =\
    utils.experimental_metadata_parser(processing_experiment_directory)

    # Parse the configuration file 
    flt_rawcnt_config = utils.filtering_raw_counting_config_parser(processing_experiment_directory)

    # Get the reference gene used
    reference_gene = flt_rawcnt_config['reference_gene']

    # Stitch the image in 2D or 3D (3D need more work/testing)
    nr_dim = flt_rawcnt_config['nr_dim']

    # Determine the hybridizations to process
    if isinstance(flt_rawcnt_config['hybs_to_stitch'],list):
        hybridizations_to_process = flt_rawcnt_config['hybs_to_stitch']
    else:
        if flt_rawcnt_config['hybs_to_stitch'] == 'All':
                hybridizations_to_process = list(hybridizations_infos.keys())
        
        else:
            raise ValueError('Error in the hybridizations to stitch')
    


    for hybridization in hybridizations_to_process:
        
        # Determine the genes to stitch in the processing hybridization
        genes_processing = list(hybridizations_infos[hybridization].keys())


        hyb_short = re.sub('Hybridization','hyb',hybridization)
        processing_hyb = experiment_infos['ExperimentName']+'_'+hyb_short
        hyb_dir = processing_experiment_directory+processing_hyb+add_slash

        # Create pointer of the hdf5 file that will store the stitched images
        # for the current hybridization

        tile_file_base_name = flt_rawcnt_config['analysis_name']+'_'+experiment_infos['ExperimentName']+'_'+hyb_short
        stitching_file_name = tile_file_base_name + '.reg.sf.hdf5'

        data_name = (tile_file_base_name
                            + '_' + reference_gene
                            + '_stitching_data_reg')

        stitching_file= h5py.File(stitched_reference_files_dir+stitching_file_name,'w',libver='latest')  # replace with 'a' as soon as you fix the error

        # Determine the tiles organization
        joining, tiles, nr_pixels, z_count, micData = stitching.get_place_tile_input_apply_npy(hyb_dir,stitched_reference_files_dir,data_name,image_properties,nr_dim)

        for gene in genes_processing:
        
            # Create the hdf5 file structure
            stitched_group, linear_blending, blend =  hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(stitching_file, joining, nr_pixels,
                                            gene, blend = 'non linear')

            # Fill the hdf5 containing the stitched image with empty data and
            # create the blending mask
            stitched_group['final_image'][:]= np.zeros(joining['final_image_shape'],dtype=np.uint16)
            if blend is not None:
                # make mask
                stitched_group['blending_mask'][:] = np.zeros(joining['final_image_shape'][-2:],dtype=np.uint16)
                tilejoining.make_mask(joining, nr_pixels, stitched_group['blending_mask'])

            filtered_img_gene_dirs_path = hyb_dir+flt_rawcnt_config['analysis_name']+'_'+processing_hyb +'_filtered_npy'+add_slash
            filtered_img_gene_dirs = glob.glob(filtered_img_gene_dirs_path+'*')

            # Create the subdirectory used to save the blended tiles
            suffix = 'blended_tiles'
            blended_tiles_directory = utils.create_single_directory(hyb_dir,gene, hybridization,processing_hyb,suffix,add_slash,
                                            analysis_name=flt_rawcnt_config['analysis_name'])

            # Get the directory with the filtered npy images of the reference_gene to use for stitching
            stitching_files_dir = [npy_dir for npy_dir in filtered_img_gene_dirs if gene in npy_dir][0]
            stitching_files_dir= stitching_files_dir+add_slash

            # Create the tmp directory where to save the masks
            suffix = 'masks'
            masked_tiles_directory = utils.create_single_directory(hyb_dir,gene,hybridization,processing_hyb,suffix,add_slash,
                                            analysis_name=flt_rawcnt_config['analysis_name'])

            # Create and save the mask files
            for corn_value,corner_coords in joining['corner_list']:
                if not(np.isnan(corner_coords[0])):
                    cur_mask = stitched_group['blending_mask'][int(corner_coords[0]):int(corner_coords[0]) + int(nr_pixels),
                                        int(corner_coords[1]):int(corner_coords[1]) + int(nr_pixels)]

                    fname = masked_tiles_directory + flt_rawcnt_config['analysis_name'] +'_'+processing_hyb+'_'+gene+'_masks_joining_pos_'+str(corn_value)
                    np.save(fname,cur_mask)


            # Blend all the tiles and save them in a directory
            futures_processes = client.map(tilejoining.generate_blended_tile_npy,joining['corner_list'],
                                        stitching_files_dir = stitching_files_dir,
                                        blended_tiles_directory = blended_tiles_directory,
                                        masked_tiles_directory = masked_tiles_directory,
                                        analysis_name = flt_rawcnt_config['analysis_name'],
                                        processing_hyb = processing_hyb,reference_gene = gene,
                                        micData = micData,tiles = tiles,nr_pixels=nr_pixels,
                                        linear_blending=linear_blending)



            _ = client.gather(futures_processes)


            # Write the stitched image
            tilejoining.make_final_image_npy(joining, stitching_file, blended_tiles_directory, tiles,gene, nr_pixels)
            stitching_file.flush()

            # Remove directories with blended tiles and masks
            shutil.rmtree(blended_tiles_directory)
            shutil.rmtree(masked_tiles_directory)


        stitching_file.close()

    
    client.close()
예제 #4
0
def dots_coords_correction():
    """
    This script is used to collect all the raw countings from the different
    hybridization, correct the coords according to the registration of the 
    reference gene and remove the dots that overlap in the overlapping
    regions between the images. Save the aggregate coords and also the coords after dots processing

    Input via argparse

    Parameters:
    -----------

    path: string. 
        Exact path to the experiment folder
    pxl: int 
        Radius of pixel used to create the neighbourhood (nhood) used to define 
        when two dots are the same
    
    """

    # Inputs of the function
    parser = argparse.ArgumentParser(description='Dots coords consolidation \
                                    and correction')

    parser.add_argument('-path', help='path to the experiment folder')
    parser.add_argument('-pixel_radius', help='adius of pixel used to create the nhood \
                            that is used to define that two pixels are the same', 
                            type=int)
    parser.add_argument('-scheduler', default=False, help='dask scheduler address ex. tcp://192.168.0.4:7003')
    
    args = parser.parse_args()

    # retrieve the parameters
    processing_experiment_directory = args.path
    pxl = args.pixel_radius

    # Dask scheduler address
    scheduler_address = args.scheduler

    if scheduler_address:
        # Start dask client on server or cluster
        client=Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count()-1
        cluster = LocalCluster(n_workers=ncores)
        client=Client(cluster)

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the experiment directory
    processing_experiment_directory=utils.check_trailing_slash(processing_experiment_directory,os_windows)

    stitched_reference_files_dir = processing_experiment_directory + 'stitched_reference_files'

    # Check training slash in the stitched reference directory
    stitched_reference_files_dir=utils.check_trailing_slash(stitched_reference_files_dir,os_windows)

    # Collect the infos of the experiment and the processing
    # Parse the Experimental metadata file (serial)
    experiment_infos,image_properties, hybridizations_infos, \
    converted_positions, microscope_parameters =\
    utils.experimental_metadata_parser(processing_experiment_directory)

    # Parse the configuration file 
    flt_rawcnt_config = utils.filtering_raw_counting_config_parser(processing_experiment_directory)


    # get the reference gene
    reference_gene = flt_rawcnt_config['reference_gene']

    # get the overlapping percentage and image_size
    overlapping_percentage = image_properties['Overlapping_percentage']

    # Consider a square image
    image_size = image_properties['HybImageSize']['columns']

    # Combine all counts
    all_raw_counts = dots_coords_calculations.combine_raw_counting_results(flt_rawcnt_config,
                                    hybridizations_infos,experiment_infos,
                                    processing_experiment_directory,stitched_reference_files_dir,
                                    reference_gene,add_slash)

    # Create a dictionary with only the selected peaks coords after alignment
    aligned_peaks_dict = all_raw_counts['selected_peaks_coords_aligned']

    # Create list of tuples to process each hybridization/gene on a different core
    combinations = dots_coords_calculations.processing_combinations(list(hybridizations_infos.keys()),aligned_peaks_dict)

    # Add corresponding registration_data and the corresponding coords files to the
    # tuple is order to recduce the size of the info transferred in the newtwork
    added_combinations =list()
    for idx,combination in enumerate(combinations):
        hybridization = combination[0]
        gene = combination[1]
        reg_data_combination = all_raw_counts['registration_data'][hybridization]
        aligned_peaks_dict_gene = all_raw_counts['selected_peaks_coords_aligned'][hybridization][gene]
        combination_dict = {
                'hybridization':hybridization,
                'gene':gene,
                'reg_data_combination':reg_data_combination,
                'aligned_peaks_dict_gene': aligned_peaks_dict_gene
        }
        added_combinations.append(combination_dict)

        # Process each gene in parallel
        futures_processes = client.map(dots_coords_calculations.function_to_run_dots_removal_parallel,added_combinations,
                            overlapping_percentage = overlapping_percentage,
                            image_size = image_size,pxl = pxl)

        cleaned_dots_list = client.gather(futures_processes)

    # Convert the list of dictionaries in one single dictionary
    # The saved dictionary cotains all the dots, the reference to the tile pos
    # has been removed during the overlapping dots removal step

    all_countings = dict()
    all_countings['all_coords_cleaned'] = dict()
    all_countings['all_coords'] = dict()
    all_countings['removed_coords'] = dict()

    for el in cleaned_dots_list:
        hybridization = list(el.keys())[0]
        gene = list(el[hybridization].keys())[0]
        
        renamed_gene = gene + '_' + hybridization
        
        all_countings['all_coords_cleaned'][renamed_gene] = el[hybridization][gene]['all_coords_cleaned']
        all_countings['all_coords'][renamed_gene] = el[hybridization][gene]['all_coords']
        all_countings['removed_coords'][renamed_gene] = el[hybridization][gene]['removed_coords']

    # Save all the data
    counting_data_name = processing_experiment_directory +experiment_infos['ExperimentName']+'_all_cleaned_raw_counting_data.pkl'
    pickle.dump(all_countings,open(counting_data_name,'wb'))

    client.close()
예제 #5
0
def run_stitching_reference_only():
    """
    This script will stitch the filtered data of the hybridizations
    in experiment directory. The inputs parameters are passed using arparse

    Parameters:
    -----------
    
    scheduler: string
        tcp address of the dask.distributed scheduler (ex. tcp://192.168.0.4:7003). 
        default = False. If False the process will run on the local computer using nCPUs-1

    path: string
        Path to the experiment directory

    """

    # Inputs of the function
    parser = argparse.ArgumentParser(description='Preprocessing script')
    parser.add_argument(
        '-scheduler',
        default=False,
        help='dask scheduler address ex. tcp://192.168.0.4:7003')
    parser.add_argument('-path', help='processing directory')
    args = parser.parse_args()

    # Directory to process
    processing_directory = args.path
    # Dask scheduler address
    scheduler_address = args.scheduler

    if scheduler_address:
        # Start dask client on server or cluster
        client = Client(scheduler_address)

    else:
        # Start dask client on local machine. It will use all the availabe
        # cores -1

        # number of core to use
        ncores = multiprocessing.cpu_count() - 1
        cluster = LocalCluster(n_workers=ncores)
        client = Client(cluster)

    # Subdirectories of the processing_directory that need to be skipped for the
    # analysis
    blocked_directories = ['_logs']

    # Starting logger
    # utils.init_file_logger(processing_directory)
    # logger = logging.getLogger()

    # Determine the operating system running the code
    os_windows, add_slash = utils.determine_os()

    # Check training slash in the processing directory
    processing_directory = utils.check_trailing_slash(processing_directory,
                                                      os_windows)

    # Get a list of the hybridization to process
    processing_hyb_list = next(os.walk(processing_directory))[1]

    # Remove the blocked directories from the directories to process
    processing_hyb_list = [
        el for el in processing_hyb_list if el not in blocked_directories
    ]

    for processing_hyb in processing_hyb_list:

        # Determine the hyb number from the name
        hybridization_number = processing_hyb.split('_hyb')[-1]
        hybridization = 'Hybridization' + hybridization_number
        hyb_dir = processing_directory + processing_hyb + add_slash

        # Parse the Experimental metadata file (serial)
        experiment_infos,image_properties, hybridizations_infos, \
        converted_positions, microscope_parameters =\
        utils.experimental_metadata_parser(hyb_dir)

        # Parse the configuration file
        flt_rawcnt_config = utils.filtering_raw_counting_config_parser(hyb_dir)

        # ----------------- STITCHING ------------------------

        # Determine the directory of the filtered images

        suffix = 'filtered_npy'
        analysis_name = flt_rawcnt_config['analysis_name']
        sufx_dir_path = hyb_dir + analysis_name + '_' + processing_hyb + '_' + suffix + add_slash

        # Reference gene for stitching
        reference_gene = flt_rawcnt_config['reference_gene']

        filtered_gene_dir = sufx_dir_path + analysis_name + '_' + processing_hyb + '_' + reference_gene + '_' + suffix + add_slash

        # Load the stitching parameters from the .yaml file

        # Stitch the image in 2D or 3D (3D need more work/testing)
        nr_dim = flt_rawcnt_config['nr_dim']

        # Estimated overlapping between images according to the Nikon software
        est_overlap = np.float_(image_properties['Overlapping_percentage'])

        # Number of peaks to use for the alignment
        nr_peaks = flt_rawcnt_config['nr_peaks']

        # Determine if the coords need to be flipped

        y_flip = flt_rawcnt_config['y_flip']

        # Method to use for blending
        # can be 'linear' or 'non linear'
        # The methods that performs the best is the 'non linear'

        blend = flt_rawcnt_config['blend']

        pixel_size = image_properties['PixelSize']

        # Get the list of the filtered files of the reference gene
        filtered_files_list = glob.glob(filtered_gene_dir + '*.npy')

        # Create pointer of the hdf5 file that will store the stitched reference image
        # for the current hybridization
        # Writing
        tile_file_base_name = flt_rawcnt_config[
            'analysis_name'] + '_' + processing_hyb
        data_name = (tile_file_base_name + '_' + reference_gene +
                     '_stitching_data')

        stitching_file_name = tile_file_base_name + '.sf.hdf5'
        stitching_file = h5py.File(
            hyb_dir + stitching_file_name, 'w',
            libver='latest')  # replace with 'a' as soon as you fix the error

        # Determine the tiles organization
        tiles, contig_tuples, nr_pixels, z_count, micData = stitching.get_pairwise_input_npy(
            image_properties,
            converted_positions,
            hybridization,
            est_overlap,
            y_flip=False,
            nr_dim=2)

        # Align the tiles
        futures_processes = client.map(pairwisesingle.align_single_pair_npy,
                                       contig_tuples,
                                       filtered_files_list=filtered_files_list,
                                       micData=micData,
                                       nr_peaks=nr_peaks)

        # Gather the futures
        data = client.gather(futures_processes)

        # In this case the order of the returned contingency tuples is with
        # the order of the input contig_tuples

        # P_all = [el for data_single in data for el in data_single[0]]
        P_all = [data_single[0] for data_single in data]
        P_all = np.array(P_all)
        P_all = P_all.flat[:]
        covs_all = [data_single[1] for data_single in data]
        alignment = {'P': P_all, 'covs': covs_all}

        # Calculates a shift in global coordinates for each tile (global
        # alignment) and then applies these shifts to the  corner coordinates
        # of each tile and returns and saves these shifted corner coordinates.
        joining = stitching.get_place_tile_input(hyb_dir,
                                                 tiles,
                                                 contig_tuples,
                                                 micData,
                                                 nr_pixels,
                                                 z_count,
                                                 alignment,
                                                 data_name,
                                                 nr_dim=nr_dim)

        # Create the hdf5 file structure
        stitched_group, linear_blending, blend = hdf5preparation.create_structures_hdf5_stitched_ref_gene_file_npy(
            stitching_file,
            joining,
            nr_pixels,
            reference_gene,
            blend='non linear')

        # Fill the hdf5 containing the stitched image with empty data and
        # create the blending mask
        stitched_group['final_image'][:] = np.zeros(
            joining['final_image_shape'], dtype=np.float64)
        if blend is not None:
            # make mask
            stitched_group['blending_mask'][:] = np.zeros(
                joining['final_image_shape'][-2:], dtype=np.float64)
            tilejoining.make_mask(joining, nr_pixels,
                                  stitched_group['blending_mask'])

        # Create the subdirectory used to save the blended tiles
        suffix = 'blended_tiles'
        blended_tiles_directory = utils.create_single_directory(
            hyb_dir,
            reference_gene,
            hybridization,
            processing_hyb,
            suffix,
            add_slash,
            analysis_name=flt_rawcnt_config['analysis_name'])

        # Create the tmp directory where to save the masks
        suffix = 'masks'
        masked_tiles_directory = utils.create_single_directory(
            hyb_dir,
            reference_gene,
            hybridization,
            processing_hyb,
            suffix,
            add_slash,
            analysis_name=flt_rawcnt_config['analysis_name'])

        # Create and save the mask files
        for corn_value, corner_coords in joining['corner_list']:
            if not (np.isnan(corner_coords[0])):
                cur_mask = stitched_group['blending_mask'][
                    int(corner_coords[0]):int(corner_coords[0]) +
                    int(nr_pixels),
                    int(corner_coords[1]):int(corner_coords[1]) +
                    int(nr_pixels)]

                fname = masked_tiles_directory + flt_rawcnt_config[
                    'analysis_name'] + '_' + processing_hyb + '_' + reference_gene + '_masks_joining_pos_' + str(
                        corn_value)
                np.save(fname, cur_mask)

        # Blend all the tiles and save them in a directory
        futures_processes = client.map(
            tilejoining.generate_blended_tile_npy,
            joining['corner_list'],
            stitching_files_dir=filtered_gene_dir,
            blended_tiles_directory=blended_tiles_directory,
            masked_tiles_directory=masked_tiles_directory,
            analysis_name=flt_rawcnt_config['analysis_name'],
            processing_hyb=processing_hyb,
            reference_gene=reference_gene,
            micData=micData,
            tiles=tiles,
            nr_pixels=nr_pixels,
            linear_blending=linear_blending)

        _ = client.gather(futures_processes)

        # Write the stitched image
        tilejoining.make_final_image_npy(joining, stitching_file,
                                         blended_tiles_directory, tiles,
                                         reference_gene, nr_pixels)

        # close the hdf5 file
        stitching_file.close()

        # Delete the directories with blended tiles and masks
        shutil.rmtree(blended_tiles_directory)
        shutil.rmtree(masked_tiles_directory)

    client.close()