def combine_with_track_data_batch(inputfolder, trackfolder, outputfolder): """ Add track IDs to the extracted coordinates in a parallel mode. Parameters ---------- inputfolder : str Path to a directory with coordinate files. trackfolder : str Path to a directory with track files. outputfolder : str Path to the output directory. """ files = filelib.list_subfolders(inputfolder, extensions=['csv']) trackfiles = filelib.list_subfolders(trackfolder, extensions=['xls', 'xlsx']) for fn in files: parts = fn.split('/')[-1].split('_') stem = parts[0] + '_' + parts[1] for trf in trackfiles: if trf.split('/')[0] == fn.split('/')[0] and len( trf.split(stem)) > 1: combine_with_track_data(inputfile=inputfolder + fn, trackfile=trackfolder + trf, outputfile=outputfolder + fn)
def convolve_batch(inputfolder, psffolder, outputfolder, **kwargs): """ Convolves all cell in a given input directory with all PSFs in a given psf directory. Parameters ---------- inputfolder : str Input directory with cell images to convolve. psffolder : str Directory with PSF images to use for convolution. outputfolder : str Output directory to save the convolved images. Keyword arguments ----------------- max_threads : int, optional The maximal number of processes to run in parallel. Default is 8. print_progress : bool, optional If True, the progress of the computation will be printed. Default is True. """ if not inputfolder.endswith('/'): inputfolder += '/' if not psffolder.endswith('/'): psffolder += '/' if not outputfolder.endswith('/'): outputfolder += '/' if os.path.exists(inputfolder): inputfiles = filelib.list_subfolders(inputfolder) else: inputfiles = [] warnings.warn( 'Input directory ' + inputfolder + ' does not exist! Run the "generate_cells" step to create input cells' ) if os.path.exists(psffolder): psffiles = filelib.list_subfolders(psffolder) else: psffiles = [] warnings.warn( 'PSF directory ' + psffolder + ' does not exist! Run the "generate_psfs" step to create PSF images' ) items = [(inputfile, psffile) for inputfile in inputfiles for psffile in psffiles] kwargs['items'] = items kwargs['outputfolder'] = outputfolder kwargs['inputfolder'] = inputfolder kwargs['psffolder'] = psffolder run_parallel(process=__convolve_batch_helper, process_name='Convolution', **kwargs)
def accuracy_batch(inputfolder, outputfolder, combine_stat=True, **kwargs): """ Compares all images in a given input directory to corresponding ground truth images in a give reference directory. Parameters ---------- inputfolder : str Input directory with cell images to compare. outputfolder : str Output directory to save the computed accuracy values. combine_stat : bool, optional If True, the statistics for all cells will be combined into one csv file. Default is True. Keyword arguments ----------------- reffolder : str Reference dirctory with ground truth cell images. max_threads : int, optional The maximal number of processes to run in parallel. Default is 8. print_progress : bool, optional If True, the progress of the computation will be printed. Default is True. """ if not inputfolder.endswith('/'): inputfolder += '/' if not outputfolder.endswith('/'): outputfolder += '/' if os.path.exists(inputfolder): kwargs['items'] = filelib.list_subfolders(inputfolder) else: kwargs['items'] = [] warnings.warn('Input directory ' + inputfolder + ' does not exist!') if not os.path.exists(kwargs['reffolder']): kwargs['items'] = filelib.list_subfolders(inputfolder) warnings.warn('Reference directory ' + kwargs['reffolder'] + ' does not exist!') kwargs['inputfolder'] = inputfolder kwargs['outputfolder'] = outputfolder run_parallel(process=__compute_accuracy_measures_batch_helper, process_name='Compute accuracy measures', **kwargs) if os.path.exists(outputfolder) and combine_stat is True: filelib.combine_statistics(outputfolder)
def split_to_surfaces_batch(inputfolder, outputfolder, combine_tracks=False, adjust_frame_rate=False, metadata_file=None): """ Split one surface files located in a given folder into separate files for surfaces of individual cells. Parameters ---------- inputfolder : str Input directory outputfolder : str Output directory combine_tracks : bool, optional If True, connected time points will be combined into one file. Default is False. """ files = filelib.list_subfolders(inputfolder, extensions=['csv']) for fn in files: print(fn) ext = fn.split('.')[-1] if ext in ['csv']: split_to_surfaces(inputfolder + fn, outputfolder + fn[:-4] + '/', combine_tracks=combine_tracks, adjust_frame_rate=adjust_frame_rate, metadata_file=metadata_file)
def plot_3D_surfaces(inputfolder, outputfolder, points=True, gridsize=100): """ Plot 3D views of surfaces located in a given directory. Parameters ---------- inputfolder : str Input directory with surfaces. outputfolder : str Output directory to save the plots. points : bool, optional If True, surface points will be displayed. Default is True. gridsize : int, optional Dimension of the square grid to interpolate the surface points. Default is 100. """ files = filelib.list_subfolders(inputfolder, extensions=['csv']) for fn in files: s = Surface(filename=inputfolder + fn) s.centrate() s.to_spherical() s.Rgrid = s.interpolate(grid_size=gridsize) mesh = s.plot_surface(points=points) mesh.magnification = 3 filelib.make_folders([os.path.dirname(outputfolder + fn[:-4])]) mesh.save(outputfolder + fn[:-4] + '.png', size=(200, 200))
def plot_confusion_matrix(inputfolder, outputfolder, text_to_replace): filelib.make_folders([outputfolder]) files = filelib.list_subfolders(inputfolder, extensions=['csv']) for fn in files: stat = pd.read_csv(inputfolder + fn, sep='\t', index_col=0) classes = stat['Group'].unique() cl_frame = pd.DataFrame({'Class name': classes}) for i in range(len(cl_frame)): cl_frame.at[ i, 'Class code'] = stat[stat['Group'] == cl_frame.iloc[i] ['Class name']]['Actual class'].iloc[0] cl_frame.at[i, 'Class name'] = cl_frame.iloc[i]['Class name'].replace( 'FB', 'FR') for text in text_to_replace: cl_frame.at[ i, 'Class name'] = cl_frame.iloc[i]['Class name'].replace( text, '') cl_frame = cl_frame.sort_values('Class name') cl_frame['New class code'] = np.arange((len(cl_frame))) for i in range(len(cl_frame)): stat.at[stat[stat['Actual class'] == cl_frame.iloc[i]['Class code']].index, 'Actual class'] = cl_frame.iloc[i]['Class name'] stat.at[stat[stat['Predicted class'] == cl_frame.iloc[i]['Class code']].index, 'Predicted class'] = cl_frame.iloc[i]['Class name'] plot_confusion_matrix_from_data(stat['Actual class'], stat['Predicted class'], columns=cl_frame['Class name'], outputfile=outputfolder + fn[:-4] + '.png') plt.close()
def run_parallel(**kwargs): """ Run a given function in a parallel manner. Parameters ---------- kwargs : key, value pairings Arbitrary keyword arguments Keyword arguments ----------------- *items* : list List of items. For each item, the `process` will be called. The value of the `item` parameter of `process` will be set to the value of the current item from the list. Remaining keyword arguments will be passed to the `process` *max_threads* : int, optional The maximal number of processes to run in parallel Default is 8 *process* : callable The function that will be applied to each item of `kwargs.items`. The function should accept the argument `item`, which corresponds to one item from `kwargs.items`. An `item` is usually a name of the file that has to be processed or a list of files that have to be combined / convolved /analyzed together. The function should not return any output, but the output should be saved in a specified directory. *inputfolder* : str Input directory with files to process. *outputfolder* : str Output directory to save the results. """ files = filelib.list_subfolders(kwargs.get('inputfolder'), extensions=kwargs.get('extensions')) channelcodes = kwargs.get('channels', None) exclude = kwargs.get('exclude', None) if channelcodes is not None: files = list_of_files_to_combine(files, channelcodes) if exclude is not None: nfiles = [] for fn in files: cellfile = True for excl in exclude: if fn[-len(excl):] == excl: cellfile = False if cellfile: nfiles.append(fn) files = nfiles if kwargs.get('debug'): kwargs['item'] = files[0] kwargs.get('process')(**kwargs) else: kwargs['items'] = files prl.run_parallel(**kwargs) if kwargs.get('combine', True) and os.path.exists( kwargs.get('outputfolder', 'no_folder')): filelib.combine_statistics(kwargs.get('outputfolder'))
def resize_batch(inputfolder, outputfolder, voxel_sizes_for_resizing, **kwargs): """ Resizes all cell images in a given input directory in a parallel mode and saves them in a given output directory. Parameters ---------- inputfolder : str Input directory with cell images to resize. outputfolder : str Output directory to save the resized images. voxel_sizes_for_resizing : list List of new voxel sizes to which the input images should be resized. Each item of the list is a scalar (for the same voxels size along all axes) or sequence of scalars (voxel size in z, y and x). Keyword arguments ----------------- order : int, optional The order of the spline interpolation used for resizing. The order has to be in the range 0-5. Default is 1. max_threads : int, optional The maximal number of processes to run in parallel. Default is 8. print_progress : bool, optional If True, the progress of the computation will be printed. Default is True. append_resolution_to_filename : bool, optional If True, the information about the new voxel size will be added to the subdirectory name if the image is stored in a subdirectory, or to the image file name if the image is not stored in a subdirectory but in the root directorty. If False, a new directory will be created for the corresponding voxel size, an all file and subdirectory names will be kept as they are. Default is True. """ if not inputfolder.endswith('/'): inputfolder += '/' if not outputfolder.endswith('/'): outputfolder += '/' if os.path.exists(inputfolder): inputfiles = filelib.list_subfolders(inputfolder) else: inputfiles = [] warnings.warn('Input directory ' + inputfolder + ' does not exist!') items = [(inputfile, resolution) for inputfile in inputfiles for resolution in voxel_sizes_for_resizing] kwargs['items'] = items kwargs['outputfolder'] = outputfolder kwargs['inputfolder'] = inputfolder run_parallel(process=__resize_batch_helper, process_name='Resize', **kwargs)
def analyze_parallel(debug=False, **kwargs): files = filelib.list_subfolders(kwargs['inputfolder']) if debug: kwargs['item'] = files[0] analyze(**kwargs) else: kwargs['items'] = files parallel.run_parallel(process=analyze, **kwargs) filelib.combine_statistics(kwargs.get('outputfolder') + 'image_statistics/') filelib.combine_statistics(kwargs.get('outputfolder') + 'roi_statistics/')
def extract_coordinates_batch(inputfolder, outputfolder): """ Extract cell coordinates from vrml files located in a given directory in a parallel mode. Parameters ---------- inputfolder : str Path to the input directory. outputfolder : str Path to the output directory. """ files = filelib.list_subfolders(inputfolder, extensions=['wrl', 'vrml']) for fn in files: ext = fn.split('.')[-1] if ext in ['wrl', 'vrml']: extract_coordinates(inputfolder + fn, outputfolder + fn[:-4] + '.csv')
def plot_spectra(inputfolder, outputfolder, **kwargs): """ Plot bar plots for individual frequency spectra in a given directory. Parameters ---------- inputfolder : str Input directory with spectra to plot. outputfolder : str Output directory to save the bar plots. kwargs : key, value pairings Arbitrary keyword arguments to pass to the Spectrum.frequency_plot function. """ files = filelib.list_subfolders(inputfolder, extensions=['csv']) for fn in files: s = Spectrum(filename=inputfolder + fn) pl = s.frequency_plot(title=fn[:-4], **kwargs) filelib.make_folders([os.path.dirname(outputfolder + fn[:-4])]) pl.savefig(outputfolder + fn[:-4] + '.png') pl.clf()
def combine_log(inputfolder): """ Combines the files with computing time that are stored in a given directory. Parameters ---------- inputfolder : str Directory with the computing time logs. """ if not inputfolder.endswith('/'): inputfolder += '/' if os.path.exists(inputfolder): subfolders = filelib.list_subfolders(inputfolder, extensions=['csv']) if len(subfolders) > 0: array = [] for i, sf in enumerate(subfolders): data = pd.read_csv(inputfolder + sf, sep='\t', index_col=0) array.append(data) data = pd.concat(array, ignore_index=True, sort=True) data.to_csv(inputfolder[:-1] + '.csv', sep='\t')
def combine_surfaces(inputfolder, outputfolder): """ Combine surface files located in the same subfolder of a given input folder. Parameters ---------- inputfolder : str Input directory with files to combine. outputfolder : str Output directory to save the combined files. """ filelib.make_folders([outputfolder]) folders = os.listdir(inputfolder) p = re.compile('\d*\.*\d+') for folder in folders: files = filelib.list_subfolders(inputfolder + folder + '/', extensions=['csv']) stat = pd.DataFrame() for fn in files: curstat = pd.read_csv(inputfolder + folder + '/' + fn, sep='\t') curstat['Time'] = p.findall(fn.split('/')[-1])[-2] stat = pd.concat([stat, curstat], ignore_index=True) stat.to_csv(outputfolder + folder + '.csv', sep='\t')
def deconvolve_batch(inputfolder, outputfolder, deconvolution_algorithm, **kwargs): """ Deconvolves all cell images in a given input directory with multiple algorithm and settings. Parameters ---------- inputfolder : str Input directory with cell images to fiji. outputfolder : str Output directory to save the deconvolved images. deconvolution_algorithm : string, sequence of strings Name of the deconvolution algorithm from set of {deconvolution_lab_rif, deconvolution_lab_rltv, iterative_deconvolve_3d}. If a sequence is provided, all algorithms from the sequence will be tested. Keyword arguments ----------------- <deconvolution_algorithm>_<parameter> : scalar or sequence Values of the parameters for the deconvolution algorithms to be tested. <deconvolution_algorithm> is the name of the algorithm from set of {deconvolution_lab_rif, deconvolution_lab_rltv, iterative_deconvolve_3d} for which the parameters values refer to. <parameter> is the name of the parameter of the specified algorithm. For instance, 'deconvolution_lab_rltv_iterations' specifies the value(s) for the number of iterations of the 'deconvolution_lab_rltv' algorithm. If a sequence of parameter values is provided, all values from the sequence will be tested. log_computing_time : bool, optional If True, computing time spent on deconvolution will be recorded and stored in a given folder. Default is False. logfolder : str, optional Directory to store computing time when `log_computing_time` is set to True. If None, the logfolder will be set to `outputfolder` + "../log/". Default is None. max_threads : int, optional The maximal number of processes to run in parallel. Default is 8. print_progress : bool, optional If True, the progress of the computation will be printed. Default is True. """ if not inputfolder.endswith('/'): inputfolder += '/' if not outputfolder.endswith('/'): outputfolder += '/' if os.path.exists(inputfolder): inputfiles = filelib.list_subfolders(inputfolder) else: inputfiles = [] warnings.warn('Input directory ' + inputfolder + ' does not exist!') algorithm = np.array([deconvolution_algorithm]).flatten() items = [] for alg in algorithm: alg_params = [] alg_param_names = [] for kw in kwargs: if kw.startswith(alg): alg_param_names.append(kw) alg_params.append(np.array([kwargs[kw]]).flatten()) alg_params = list(itertools.product(*alg_params)) for cur_params in alg_params: param_args = dict() for i in range(len(alg_param_names)): param_args[alg_param_names[i].split(alg)[-1] [1:]] = cur_params[i] items.append((alg, param_args)) kwargs['items'] = [(inputfile, ) + item for inputfile in inputfiles for item in items] kwargs['outputfolder'] = outputfolder kwargs['inputfolder'] = inputfolder kwargs['imagej_path'] = deconvolution.get_fiji_path() if deconvolution.get_fiji_path() is None: raise TypeError( "Fiji path is not specified! Run the `python setup.py install` and specify the Fiji path" ) run_parallel(process=__deconvolve_batch_helper, process_name='Deconvolve', **kwargs)
def add_noise_batch(inputfolder, outputfolder, noise_kind, snr, test_snr_combinations=False, **kwargs): """ Adds synthetic noise to all images in a given input directory in a parallel mode and saves them in a given output directory. All combination of the given Gaussian and Poisson SNR are generated by first adding the Gaussian noise and then adding the Poisson noise. Parameters ---------- inputfolder : str Input directory with cell images to which noise should be added. outputfolder : str Output directory to save the noisy images. noise_kind : string, sequence of strings or None Name of the method to generate nose from set of {gaussian, poisson}. If a sequence is provided, several noise types will be added. If None, no noise will be added. snr : float or sequence of floats Target signal-to-noise ratio(s) (SNR) for each noise type. If None, no noise is added. test_snr_combinations : bool, optional If True and several noise types in the `kind` argument are provided, all combinations of the values provided in `snr` will be tested for each noise type. Default is False. Keyword arguments ----------------- max_threads : int, optional The maximal number of processes to run in parallel. Default is 8. print_progress : bool, optional If True, the progress of the computation will be printed. Default is True. """ if not inputfolder.endswith('/'): inputfolder += '/' if not outputfolder.endswith('/'): outputfolder += '/' if os.path.exists(inputfolder): inputfiles = filelib.list_subfolders(inputfolder) else: inputfiles = [] warnings.warn('Input directory ' + inputfolder + ' does not exist!') kind = np.array([noise_kind]).flatten() snr = np.array([snr]).flatten() if len(kind) > 1 and test_snr_combinations: snr_items = list(itertools.product(*[snr] * len(kind))) items = [(inputfile, kind, list(snr_item)) for inputfile in inputfiles for snr_item in snr_items] else: items = [(inputfile, kind, snr1) for inputfile in inputfiles for snr1 in snr] kwargs['items'] = items kwargs['outputfolder'] = outputfolder kwargs['inputfolder'] = inputfolder run_parallel(process=__add_noise_batch_helper, process_name='Add noise', **kwargs)