def get_mps(self, mp_name: str, use_mp: bool, datadir: pl.Path) -> pd.Series: """Collect MPs for sample anchoring.""" if use_mp: try: # Get measurement point for anchoring mp_dir_path = next( self.channelpaths.pop(i) for i, s in enumerate(self.channelpaths) if str('_' + mp_name + '_') in str(s)) mp_path = next(mp_dir_path.glob("*Position.csv")) mp_data = system.read_data(mp_path, header=Sett.header_row, test=False) mp_data = mp_data.loc[:, ['Position X', 'Position Y']] if not mp_data.empty: mp_bin = self.project_mps(mp_data, datadir, filename="MPs.csv") mp_df = pd.DataFrame({'MP': mp_bin.values.codes}) mp_df.to_csv(self.sampledir.joinpath("MPs.csv"), index=False) except (StopIteration, ValueError, UnboundLocalError): mp_bin = None msg = f'could not find MP position for {self.name}' lg.logprint(LAM_logger, msg, 'e') print(" -> Failed to find MP position data.") else: # Sets measurement point values to zero when MP's are not used mp_bin = pd.Series(0, name=self.name) system.save_to_file(mp_bin, datadir, "MPs.csv") system.save_to_file(mp_bin, self.sampledir, "MPs.csv", append=False) return mp_bin
def __init__(self, paths=None, child=False): if child: return # Creation of variables related to all samples, that are later passed # on to child classes. Samplegroups._groups = sorted(Store.samplegroups) Samplegroups._chanPaths = list(paths.datadir.glob('Norm_*')) Samplegroups.sample_paths = [ p for p in paths.samplesdir.iterdir() if p.is_dir() ] Samplegroups._addData = list(paths.datadir.glob('Avg_*')) # Data and other usable directories Samplegroups.paths = paths # Total length of needed data matrix of all anchored samples Samplegroups.bin_length = Store.totalLength # Get MPs of all samples mp_path = paths.datadir.joinpath('MPs.csv') Samplegroups.sample_mps = system.read_data(mp_path, header=0, test=False) # If anchor point index is defined, find the start index of samples if Store.center is not None: Samplegroups.center_bin = Store.center # Assign color for each sample group groupcolors = sns.xkcd_palette(Sett.palette_colors) for i, grp in enumerate(Samplegroups._groups): Samplegroups.grp_palette.update({grp: groupcolors[i]}) lg.logprint(LAM_logger, 'Sample groups established.', 'i')
def ask_new_channel(border_channel): """Ask user input to determine new border detection channel.""" if Sett.force_dialog: # If forcing no user input msg = "Border detection data not found for all samples." lg.logprint(LAM_logger, msg, 'i') return False flag = True print('\a') while flag: # Ask input until satisfied dlg = f'Border detection data not found.\nCurrent border detection channel is {border_channel}.\n'\ f'Change channel? [y/n]' ans = system.ask_user(dlg) # Ask whether to change channel if ans in ('Y', 'y'): dlg = "Give name of new border detection channel: " new_channel = system.ask_user(dlg) # Ask channel name change_keys(border_channel, new_channel) # Change variables Sett.border_channel = new_channel msg = f'Border detection channel changed from {border_channel} to {new_channel}.' print('\n' + msg) lg.logprint(LAM_logger, msg, 'i') return True if ans in ('N', 'n'): return False print('Command not understood.\n')
def clustering(self, dist=10): """Handle data for finding clusters of cells.""" kws = {'Dist': dist} # Maximum distance for considering clustering data = None # Listing of paths of channels on which clusters are to be found cluster_chans = [ p for p in self.channelPaths for t in Sett.cluster_channels if t.lower() == p.stem.lower() ] for path in cluster_chans: # Loop paths, read file, and find clusters try: data = system.read_data(path, header=0) except (FileNotFoundError, AttributeError): msg = "No file for channel {}".format(path.stem) lg.logprint(LAM_logger, "{}: {}".format(self.name, msg), 'w') print("-> {}".format(msg)) # Discard earlier versions of found clusters, if present if data is not None: data = data.loc[:, ~data.columns.str.contains('ClusterID')] data.name = path.stem # The name of the clustering channel # Find clusters self.find_distances(data, vol_incl=Sett.cl_inclusion, compare=Sett.cl_incl_type, clusters=True, **kws)
def subset_data(data, compare, vol_incl, sample): """Get indexes of cells based on values in a column.""" if not isinstance(data, pd.DataFrame): lg.logprint(LAM_logger, 'Wrong data type for subset_data()', 'e') msg = 'Wrong datatype for find_distance, Has to be pandas DataFrame.' print(msg) return None # Search for the filtering column: match_str = re.compile(Sett.incl_col, re.I) cols = data.columns.str.match(match_str) # If no columns or multiple found: if not cols.any(): e_msg = f"Column '{Sett.incl_col}' not found for {sample} {data.name}." print(f"ERROR: {e_msg}\n") lg.logprint(LAM_logger, e_msg, 'e') elif sum(cols) > 1: id_str = f"{sample} {data.name}" msg = f"Multiple columns with '{Sett.incl_col}' found for " + id_str print(f"WARNING: {msg}. Give specific name for filtering column.\n") # Find indices of data to retain: if compare.lower() == 'greater': # Get only cells that are greater value sub_ind = data.loc[(data.loc[:, cols].values >= vol_incl), :].index else: # Get only cells that are of lesser value sub_ind = data.loc[(data.loc[:, cols].values <= vol_incl), :].index return sub_ind
def project(paths): """Project features onto the vector.""" lg.logprint(LAM_logger, 'Begin channel projection and counting.', 'i') print("\n---Projecting and counting channels---") # Loop through all directories in the root directory for path in [ p for p in Sett.workdir.iterdir() if p.is_dir() and p.stem != 'Analysis Data' ]: # Initialize sample variables sample = GetSample(path, paths, process=False, projection=True) print(f" {sample.name} ...") # Find anchoring point of the sample sample.MP = sample.get_mps(Sett.MPname, Sett.useMP, paths.datadir) # Collection of data for each channel of the sample for path2 in [ p for p in sample.channelpaths if Sett.MPname.lower() != str(p).split('_')[-2].lower() ]: channel = GetChannel(path2, sample, Sett.AddData, paths.datadir) # If no variance in found additional data, it is discarded. if channel.datafail: datatypes = ', '.join(channel.datafail) info = "Invariant data discarded" msg = f" -> {info} - {channel.name}: {datatypes}" print(msg) # Project features of channel onto vector sample.data = sample.project_channel(channel) if channel.name == Sett.vectChannel and Sett.measure_width: DefineWidths(sample.data, sample.vector, sample.sampledir, paths.datadir) # Count occurrences in each bin if channel.name not in ["MPs"]: sample.find_counts(channel.name, paths.datadir) lg.logprint(LAM_logger, 'All channels projected and counted.', 'i')
def __call__(self, func, *args, **kws): plot_kws = merge_kws(MakePlot.base_kws, kws) with warnings.catch_warnings(): warnings.simplefilter('ignore', category=UserWarning) # Make canvas if needed: if 'no_grid' not in args: self.g = self.get_facet(**plot_kws) # Plot data self.g = func(self, **plot_kws) if self.plot_error: msg = "Plot not saved" print("INFO: {}".format(msg)) lg.logprint(LAM_logger, msg, 'w') return # Adjust plot sizes so that everything fits properly fig = plt.gcf() if 'adjust' in kws.keys(): adjust = kws['adjust'] fig.subplots_adjust(top=adjust.get('top'), bottom=adjust.get('bottom'), right=adjust.get('right'), left=adjust.get('left'), wspace=adjust.get('wspace'), hspace=adjust.get('hspace')) if 'hspace' in kws['adjust'].keys(): fig.subplots_adjust(hspace=kws['adjust'].get('hspace')) else: fig.subplots_adjust(top=0.85, bottom=0.2, hspace=0.75) self.add_elements(*args, **plot_kws) self.save_plot()
def test_count_projection(counts, name): if (counts == 0).sum() > counts.size / 3: print("\n") print('WARNING: Uneven projection <- vector may be faulty!') print("\n") print('\a') lg.logprint(LAM_logger, f'Uneven projection for {name}. Check vector quality.', 'w')
def save_analysis_info(self, samples, groups, channels): """For saving information of all analyzed samples.""" with open(str(self.outputdir.joinpath('Analysis_info.txt')), 'w') as f: f.write('GROUPS:\t') f.write(', '.join(groups)) f.write('\nSAMPLES:\t') f.write(', '.join(samples)) f.write('\nCHANNELS:\t') f.write(', '.join(channels)) lg.logprint(LAM_logger, 'Analysis info successfully saved.', 'i')
def read_widths(datadir): """Find and read width datafile""" filepath = datadir.joinpath('Sample_widths_norm.csv') try: widths = pd.read_csv(filepath, index_col=False) except FileNotFoundError: msg = 'Width data not found. Perform analysis with measure_width.' print(f'ERROR: {msg}') lg.logprint(LAM_logger, f'-> {msg}', 'e') return None return widths
def check_resize_step(resize, log=True): if Sett.SkeletonVector and Decimal(str(resize)) % Decimal( str(0.10)) != Decimal('0.0'): msg = 'Resizing not in step of 0.1' print("WARNING: {}".format(msg)) # Round setting down to nearest 0.1. Sett.SkeletonResize = math.floor(resize * 10) / 10 msg2 = 'SkeletonResize changed to {}'.format(Sett.SkeletonResize) print("-> {}".format(msg2)) if log: lg.logprint(LAM_logger, msg, 'w') lg.logprint(LAM_logger, msg2, 'i')
def read_channel(self, path): """Read channel data into a dataframe.""" try: data = system.read_data(str(path), header=Sett.header_row) channel = self.name if channel.lower() not in [ c.lower() for c in Store.channels ] and channel.lower() != Sett.MPname.lower(): Store.channels.append(self.name) return data except ValueError: lg.logprint(LAM_logger, 'Cannot read channel path {}'.format(path), 'ex')
def main_catch_exit(LAM_logger=None, gui_root=None): """Run main() while catching exceptions for logging.""" if LAM_logger is None: # If no logger given, get one LAM_logger = lg.setup_logger(__name__, new=True) lg.print_settings() # print settings of analysis to log lg.create_loggers() try: print("START ANALYSIS") main(gui_root=gui_root) # run analysis lg.logprint(LAM_logger, 'Completed', 'i') lg.close_loggers() print('\nCOMPLETED\n') # Catch and log possible exits from the analysis: except KeyboardInterrupt: lg.logprint(LAM_logger, 'STOPPED: keyboard interrupt', 'e') print("STOPPED: Keyboard interrupt by user.\n") lg.close_loggers() except SystemExit: lg.logprint(LAM_logger, 'EXIT\n\n', 'ex') print("STOPPED\n") lg.log_shutdown() except process.VectorError as e: print(e.message + '\n') print(f'Missing: {", ".join(e.samples)}') lg.logprint(LAM_logger, e.message, 'ex') lg.log_shutdown()
def get_data(self, *args, **kws): """Collect data from files and modify.""" melt = False # Create a DF for holding data of all samples, add identifiers, format: all_data = pd.DataFrame() for path in self.paths: data = read_data(path, header=0, test=False) if 'IDs' in kws.keys(): # Identifiers data = plot.identifiers(data, path, kws.get('IDs')) if 'melt' in kws.keys(): # Data to long format m_kws = kws.get('melt') if 'path_id' in args: # Get ID from filepath id_sep = kws.get('id_sep') try: id_var = path.stem.split('_')[id_sep] m_kws.update({'value_name': id_var}) except IndexError: msg = 'Faulty list index. Incorrect file names?' print('ERROR: {}'.format(msg)) lg.logprint(LAM_logger, msg, 'e') data = data.T.melt(id_vars=m_kws.get('id_vars'), value_vars=m_kws.get('value_vars'), var_name=m_kws.get('var_name'), value_name=m_kws.get('value_name')) data = data.dropna(subset=[m_kws.get('value_name')]) melt = True else: data = data.T if 'merge' in args: # Merge data with data from other paths if all_data.empty: all_data = data else: all_data = all_data.merge(data, how='outer', copy=False, on=kws.get('merge_on')) continue # If not merging, concatenate the data with others all_data = pd.concat([all_data, data], sort=True) # Give proper index to full data all_data.index = pd.RangeIndex(stop=all_data.shape[0]) # Drop outliers if 'drop_outlier' in args and Sett.Drop_Outliers: all_data = drop_outliers(all_data, melt, **kws) # Determine column data types all_data = all_data.infer_objects() return all_data
def peak_selection(datadir, gui_root=None): """Collect detected peaks for plotting.""" try: peaks = pd.read_csv(datadir.joinpath('Borders_peaks.csv')) except FileNotFoundError: msg = 'Borders NOT added to plots - missing Border_peaks.csv' print(f'\nINFO: {msg}') lg.logprint(LAM_logger, msg, 'i') return if Sett.force_dialog: Store.border_peaks = peaks elif Sett.select_peaks: # Ask for subset of peaks if needed ask_peaks(peaks, gui_root) else: Store.border_peaks = peaks
def find_sample_vector(self, path): # path = data directory """Find sample's vector data.""" try: # Find sample's vector file paths = list(self.sampledir.glob('Vector.*')) self.vector = system.read_vector(paths) self.vector_length = self.vector.length length_series = pd.Series(self.vector_length, name=self.name) system.save_to_file(length_series, path, 'Length.csv') # If vector file not found except (FileNotFoundError, IndexError): msg = f'Vector-file NOT found for {self.name}' lg.logprint(LAM_logger, msg, 'e') print(f'ERROR: {msg}') except (AttributeError, ValueError): # If vector file is faulty msg = f'Faulty vector for {self.name}' lg.logprint(LAM_logger, msg, 'c') print(f'CRITICAL: {msg}')
def distance_mean(self, dist=25): """Prepare and handle data for cell-to-cell distances.""" kws = {'Dist': dist} # Maximum distance used to find cells # List paths of channels where distances are to be found dist_chans = [ p for p in self.channelPaths for t in Sett.distance_channels if t.lower() == p.stem.lower() ] if Sett.use_target: # If distances are found against other channel: target = Sett.target_chan # Get the name of the target channel try: # Find target's data file, read, and update data to keywords file = '{}.csv'.format(target) test_namer = re.compile(file, re.I) target_path = [ p for p in self.channelPaths if test_namer.fullmatch(str(p.name)) ] test_data = system.read_data(target_path[0], header=0) kws.update({'test_data': test_data}) except (FileNotFoundError, IndexError): msg = "No file for channel {}".format(target) lg.logprint(LAM_logger, "{}: {}".format(self.name, msg), 'w') print("-> {}".format(msg)) return # Loop through the channels, read, and find distances for path in dist_chans: try: data = system.read_data(path, header=0) except FileNotFoundError: msg = "No file for channel {}".format(path.stem) lg.logprint(LAM_logger, "{}: {}".format(self.name, msg), 'w') print("-> {}".format(msg)) return # Discard earlier versions of calculated distances, if present data = data.loc[:, ~data.columns.str.startswith('Nearest_')] # Find distances data.name = path.stem self.find_distances(data, vol_incl=Sett.inclusion, compare=Sett.incl_type, **kws)
def width(self): """Create line plots of sample group widths.""" name = 'Sample_widths_norm.csv' filepath = list(self.sgroups.paths.datadir.glob(name)) if not filepath: print(" No width file found. Perform 'Count' with measure_width") lg.logprint(LAM_logger, 'No width file found', 'w') return # Collect data: handle = system.DataHandler(self.sgroups, filepath) all_data = handle.get_data('drop_outlier', **self.kws) var = 'Linear Position' all_data.loc[:, var] = all_data.loc[:, var].divide(2, fill_value=0) # Make plot: plotter = MakePlot(all_data, handle, 'Widths - All') p_kws = merge_kws(self.kws, {'row': None, 'col': None, 'ylabel': 'Units (coord system)', 'gridspec': {'bottom': 0.2}}) plotter(pfunc.lines, 'centerline', 'ticks', 'title', 'legend', 'labels', 'peaks', **p_kws)
def get_vect_data(self, channel): """Get channel data that is used for vector creation.""" try: # Search string: namer = str("_{}_".format(channel)) namerreg = re.compile(namer, re.I) # Search found paths with string dir_path = [ self.channelpaths[i] for i, s in enumerate(self.channelpaths) if namerreg.search(str(s)) ][0] vect_path = next(dir_path.glob('*Position.csv')) vect_data = system.read_data(vect_path, header=Sett.header_row) # Read data except (FileNotFoundError, IndexError): # If data file not found msg = 'No valid datafile for vector creation.' if LAM_logger is not None: lg.logprint(LAM_logger, msg, 'w') print('-> {}'.format(msg)) vect_data = None return vect_data
def test_control(): """Assert that control group exists, and if not, handle it.""" # If control group is not found: if Sett.cntrlGroup in Store.samplegroups: return True lg.logprint(LAM_logger, 'Set control group not found', 'c') # Test if entry is due to capitalization error: namer = re.compile(r"{}$".format(re.escape(Sett.cntrlGroup)), re.I) for group in Store.samplegroups: if re.match(namer, group): # If different capitalization: msg = "Control group-setting is case-sensitive!" print(f"WARNING: {msg}") # Change control to found group Sett.cntrlGroup = group msg = "Control group has been changed to" print("{} '{}'\n".format(msg, group)) lg.logprint(LAM_logger, f"-> Changed to {group}", 'i') return True # If control not found at all: msg = "Control group NOT found in sample groups!" print("\nWARNING: {}\n".format(msg)) if Sett.force_dialog: lg.logprint(LAM_logger, msg, 'e') Sett.statistics = False return False ask_control() return True
def read_data(filepath, header=Sett.header_row, test=True, index_col=False): """Read csv-data.""" data = None try: # Read data data = pd.read_csv(filepath, header=header, index_col=index_col) data = data.loc[:, ~data.columns.str.contains('^Unnamed')] if test: # Test that the data contains ID columns try: data.loc[:, 'ID'] except KeyError: msg = f'Column label test failed: ID not present at {filepath}' lg.logprint(LAM_logger, msg, 'ex') print(f'WARNING: read_data() call from {inspect.stack()[1][1]} line {inspect.stack()[1][2]}') print("Key 'ID' not found. Verify header row setting.\n\n") print(f"Path: {filepath}\n") except FileNotFoundError: lg.logprint(LAM_logger, 'File not found at {}'.format(filepath), 'e') print(f'WARNING: read_data() call from {inspect.stack()[1][1]} line {inspect.stack()[1][2]}') print(f'File {filepath.name} not found at {str(filepath.parent)}') return None except (AttributeError, pd.errors.EmptyDataError) as err: if isinstance(err, pd.errors.EmptyDataError): msg = "{} is empty. Skipped.".format(filepath.name) print("ERROR: {}".format(msg)) lg.logprint(LAM_logger, msg, 'e') return None msg = f"Data or columns may be faulty in {filepath.name}" print("WARNING: {}".format(msg)) lg.logprint(LAM_logger, msg, 'w') return data except pd.errors.ParserError: msg = f"{filepath} cannot be read." print(f"ERROR: {msg}") print("\nWrong header row?") lg.logprint(LAM_logger, msg, 'ex') return data
def relate_data(data, mp_bin=0, center=50, total_length=100): """Place sample data in context of all data, i.e. anchoring.""" try: length = data.shape[0] except AttributeError: length = len(data) if np.isnan(mp_bin): msg = "Missing MP-projection(s). See 'Analysis Data/MPs.csv'." print(f"CRITICAL: {msg}") lg.logprint(LAM_logger, msg, 'c') raise SystemExit # Insert smaller input data into larger DF defined by TotalLength insx = int(center - mp_bin) end = int(insx + length) insert = np.full(total_length, np.nan) # Bins outside input data are NaN data = np.where(data == np.nan, 0, data) # Set all NaN in input to 0 try: # Insertion insert[insx:end] = data except ValueError: msg = "relate_data() call from {} line {}".format( inspect.stack()[1][1], inspect.stack()[1][2]) print('ERROR: {}'.format(msg)) lg.logprint(LAM_logger, f'Failed {msg}\n', 'ex') msg = "If not using MPs, remove MPs.csv from 'Data Files'." if insert[insx:end].size - length == mp_bin: lg.logprint(LAM_logger, msg, 'i') raise SystemExit return insert, insx
def find_existing(paths: system.Paths): """Get MPs and count old projections when not projecting during 'Count'.""" msg = 'Collecting pre-existing data.' print(msg) lg.logprint(LAM_logger, msg, 'i') mps = pd.DataFrame(columns=Store.samples) for smpl in Store.samples: smplpath = paths.samplesdir.joinpath(smpl) # FIND MP if Sett.useMP: try: mp_df = pd.read_csv(smplpath.joinpath('MPs.csv')) mp_bin = mp_df.iat[0, 0] except FileNotFoundError: msg = "MP-data not found." add = "Provide MP-data or set useMP to False." print(f"ERROR: {msg}\n{add}") raise SystemExit else: mp_bin = 0 mps.loc[0, smpl] = mp_bin # FIND CHANNEL COUNTS for path in [ p for p in smplpath.iterdir() if p.suffix == '.csv' and p.stem not in ['Vector', 'MPs', Sett.MPname] ]: data = pd.read_csv(path) try: counts = np.bincount(data['DistBin'], minlength=Sett.projBins) counts = pd.Series(np.nan_to_num(counts), name=smpl) channel_string = str(f'All_{path.stem}.csv') system.save_to_file(counts, paths.datadir, channel_string) except ValueError: # If channel has not been projected print(f"Missing projection data: {path.stem} - {smpl}") print("-> Set project=True and perform Count") continue mps.to_csv(paths.datadir.joinpath('MPs.csv')) samples = mps.columns.tolist() groups = set({s.casefold(): s.split('_')[0] for s in samples}.values()) Store.samplegroups = sorted(groups)
def start(test_vectors=True, only_vectors=False): """Check that everything is OK when starting a run.""" # If workdir variable isn't pathlib.Path, make it so if not isinstance(Sett.workdir, pl.Path): Sett.workdir = pl.Path(Sett.workdir) # Check that at least one primary setting is True if not any([Sett.process_samples, Sett.process_counts, Sett.Create_Plots, Sett.process_dists, Sett.statistics]): lg.logprint(LAM_logger, 'All primary settings are False', 'e') print("\nAll primary settings are set to False.\n\nExiting ...") raise SystemExit # Otherwise create paths and directories paths = Paths(Sett.workdir, only_vectors=only_vectors) if not test_vectors: return paths # Check that vector channel data are found if Sett.process_samples or (Sett.measure_width and Sett.process_counts): samples = [p for p in Sett.workdir.iterdir() if p.is_dir() and p.name != 'Analysis Data'] failed = [] for sample in samples: try: next(sample.glob(f'*_{Sett.vectChannel}_*')) except StopIteration: failed.append(sample.name) if failed: msg = f"Vector channel data not found for {', '.join(failed)}" print(f'ERROR: {msg}') print('Check vector channel setting or data.') lg.logprint(LAM_logger, msg, 'e') raise SystemExit # Find and store all sample names Store.samples = [p.name for p in paths.samplesdir.iterdir() if p.is_dir()] return paths
def __init__(self, workdir: pl.Path, only_vectors: bool = False): """Creation of output folders.""" # Create path-variables necessary for the analysis self.outputdir = workdir.joinpath('Analysis Data') self.datadir = pl.Path(self.outputdir / 'Data Files') self.plotdir = pl.Path(self.outputdir / 'Plots') self.samplesdir = pl.Path(self.outputdir / 'Samples') self.statsdir = pl.Path(self.outputdir / 'Statistics') # If samples are to be processed and output data directory exists, # the directory will be removed with all files as not to interfere # with analysis. if not only_vectors: self.clear_analysis() # Create output directories pl.Path.mkdir(self.outputdir, exist_ok=True) pl.Path.mkdir(self.plotdir, exist_ok=True) pl.Path.mkdir(self.samplesdir, exist_ok=True) pl.Path.mkdir(self.datadir, exist_ok=True) pl.Path.mkdir(self.statsdir, exist_ok=True) if LAM_logger is not None: lg.logprint(LAM_logger, 'Directories successfully created.', 'i')
def create_samples(paths: system.Paths): """Create vectors for the samples.""" lg.logprint(LAM_logger, 'Begin vector creation.', 'i') print("---Processing samples---") # Test that resize-setting is in step of 0.1: if Sett.SkeletonVector: check_resize_step(Sett.SkeletonResize) # Loop Through samples to create vectors for path in [ p for p in Sett.workdir.iterdir() if p.is_dir() and p.stem != 'Analysis Data' ]: sample = GetSample(path, paths) print("{} ...".format(sample.name)) sample.vect_data = sample.get_vect_data(Sett.vectChannel) # Creation of vector for projection if Sett.SkeletonVector: sample.create_skeleton() else: sample.create_median() sample_dirs = [p for p in paths.samplesdir.iterdir() if p.is_dir()] pfunc.create_vector_plots(Sett.workdir, paths.samplesdir, sample_dirs) lg.logprint(LAM_logger, 'Vectors created.', 'i')
def get_widths(samplesdir, datadir): """Find widths of samples along their vectors.""" msg = "Necessary files for width approximation not found for " data, vector_data = None, None for path in [p for p in samplesdir.iterdir() if p.is_dir()]: # Find necessary data files: files = [p for p in path.iterdir() if p.is_file()] # Search terms vreg = re.compile('^vector.', re.I) # vector dreg = re.compile(f'^{Sett.vectChannel}.csv', re.I) # channel data try: # Match terms to found paths vect_paths = [p for p in files if vreg.match(p.name)] data_paths = [p for p in files if dreg.match(p.name)] # Read found paths vector_data = system.read_vector(vect_paths) data = system.read_data(data_paths[0], header=0) # Error handling except (StopIteration, IndexError): name = path.name full_msg = msg + name print(f"WARNING: {full_msg}") if 'vector_data' not in locals(): # if vector not found print("-> Could not read vector data.") continue if 'data' not in locals(): # if channel data not found print("Could not read channel data") print("Make sure channel is set right (vector channel)\n") continue lg.logprint(LAM_logger, full_msg, 'w') # Compute widths process.DefineWidths(data, vector_data, path, datadir)
def ask_control(): """Ask new control group if one not found.""" flag = 1 # Print groups and demand input for control: while flag: print('Found groups:') for i, grp in enumerate(sorted(Store.samplegroups)): print('{}: {}'.format(i, grp)) msg = "Select the number of control group: " print('\a') ans = system.ask_user(msg, dlgtype='integer') if ans is None: raise KeyboardInterrupt if 0 <= ans <= len(Store.samplegroups): # Change control based on input Sett.cntrlGroup = sorted(Store.samplegroups)[ans] print(f"Control group set as '{Sett.cntrlGroup}'.\n") flag = 0 else: print('Command not understood.') msg = f"-> Changed to group '{Sett.cntrlGroup}' by user" lg.logprint(LAM_logger, msg, 'i')
def detect_borders(paths, all_samples, palette, anchor, variables, scoring, threshold=0.5, channel='DAPI'): """ Midgut border detection by weighted scoring of binned variables. Args: ---- paths - LAM system.Paths-object that contains directory paths all_samples - Paths to sample folders palette - Color palette dict with sample groups as keys anchor - Anchoring bin of the samples in the full data matrix threshold - Minimum score for peak detection, i.e. borders variables - List of column names to collect from sample's channel data scoring - Dict of variable names with their scoring weight channel - The name of the data channel that is used, e.g. 'DAPI' data """ print('\n---Finding border regions---') lg.logprint(LAM_logger, 'Finding border regions.', 'i') b_dirpath = plotting_directory(paths.plotdir) # Get widths and if not found, abort widths = read_widths(paths.datadir) if widths is None: return # Establish object to store scores of individual samples border_data = FullBorders(all_samples, widths, anchor, palette) print(' Scoring samples ...') # Collect and score variables for each sample in the sample list for path in all_samples: sample = GetSampleBorders(path, channel, scoring, anchor, variables) # If expected variables are found, calculate sample scores if not sample.error: sample(border_data, b_dirpath) # If no data, return without finding borders if border_data.scores.isnull().values.all(): print('\nERROR: Missing data, border detection cancelled.') lg.logprint(LAM_logger, 'Border detection variables not found.', 'e') return # Once sample scores have been collected, find peaks print(' Finding peaks ...') flat, peaks = border_data(b_dirpath, threshold) # Add the locations of border peaks in each sample's individual binning binned_peaks = append_binning(border_data.sample_starts, peaks) # Save data flat.T.to_csv(paths.datadir.joinpath('Borders_scores.csv'), index=False) binned_peaks.to_csv(paths.datadir.joinpath('Borders_peaks.csv'), index=False) lg.logprint(LAM_logger, 'Border detection done.', 'i')
def get_clusters(self): """Gather sample data to compute clusters of cells.""" print('\n---Finding clusters---') lg.logprint(LAM_logger, 'Finding clusters', 'i') for grp in self._groups: # Get one sample group lg.logprint(LAM_logger, '-> group {}'.format(grp), 'i') print(' {} ...'.format(grp)) samplegroup = Group(grp) for path in samplegroup.groupPaths: # Get one sample of the group test_sample = Sample(path, samplegroup) test_sample.clustering(Sett.cl_max_dist) # Find clusters lg.logprint(LAM_logger, 'Clusters calculated', 'i')