예제 #1
0
 def get_mps(self, mp_name: str, use_mp: bool,
             datadir: pl.Path) -> pd.Series:
     """Collect MPs for sample anchoring."""
     if use_mp:
         try:  # Get measurement point for anchoring
             mp_dir_path = next(
                 self.channelpaths.pop(i)
                 for i, s in enumerate(self.channelpaths)
                 if str('_' + mp_name + '_') in str(s))
             mp_path = next(mp_dir_path.glob("*Position.csv"))
             mp_data = system.read_data(mp_path,
                                        header=Sett.header_row,
                                        test=False)
             mp_data = mp_data.loc[:, ['Position X', 'Position Y']]
             if not mp_data.empty:
                 mp_bin = self.project_mps(mp_data,
                                           datadir,
                                           filename="MPs.csv")
                 mp_df = pd.DataFrame({'MP': mp_bin.values.codes})
                 mp_df.to_csv(self.sampledir.joinpath("MPs.csv"),
                              index=False)
         except (StopIteration, ValueError, UnboundLocalError):
             mp_bin = None
             msg = f'could not find MP position for {self.name}'
             lg.logprint(LAM_logger, msg, 'e')
             print("    -> Failed to find MP position data.")
     else:  # Sets measurement point values to zero when MP's are not used
         mp_bin = pd.Series(0, name=self.name)
         system.save_to_file(mp_bin, datadir, "MPs.csv")
         system.save_to_file(mp_bin,
                             self.sampledir,
                             "MPs.csv",
                             append=False)
     return mp_bin
예제 #2
0
    def __init__(self, paths=None, child=False):
        if child:
            return

        # Creation of variables related to all samples, that are later passed
        # on to child classes.
        Samplegroups._groups = sorted(Store.samplegroups)
        Samplegroups._chanPaths = list(paths.datadir.glob('Norm_*'))
        Samplegroups.sample_paths = [
            p for p in paths.samplesdir.iterdir() if p.is_dir()
        ]
        Samplegroups._addData = list(paths.datadir.glob('Avg_*'))

        # Data and other usable directories
        Samplegroups.paths = paths

        # Total length of needed data matrix of all anchored samples
        Samplegroups.bin_length = Store.totalLength

        # Get MPs of all samples
        mp_path = paths.datadir.joinpath('MPs.csv')
        Samplegroups.sample_mps = system.read_data(mp_path,
                                                   header=0,
                                                   test=False)

        # If anchor point index is defined, find the start index of samples
        if Store.center is not None:
            Samplegroups.center_bin = Store.center

        # Assign color for each sample group
        groupcolors = sns.xkcd_palette(Sett.palette_colors)
        for i, grp in enumerate(Samplegroups._groups):
            Samplegroups.grp_palette.update({grp: groupcolors[i]})

        lg.logprint(LAM_logger, 'Sample groups established.', 'i')
예제 #3
0
def ask_new_channel(border_channel):
    """Ask user input to determine new border detection channel."""

    if Sett.force_dialog:  # If forcing no user input
        msg = "Border detection data not found for all samples."
        lg.logprint(LAM_logger, msg, 'i')
        return False

    flag = True
    print('\a')

    while flag:  # Ask input until satisfied
        dlg = f'Border detection data not found.\nCurrent border detection channel is {border_channel}.\n'\
              f'Change channel? [y/n]'
        ans = system.ask_user(dlg)  # Ask whether to change channel
        if ans in ('Y', 'y'):
            dlg = "Give name of new border detection channel: "
            new_channel = system.ask_user(dlg)  # Ask channel name
            change_keys(border_channel, new_channel)  # Change variables
            Sett.border_channel = new_channel
            msg = f'Border detection channel changed from {border_channel} to {new_channel}.'
            print('\n' + msg)
            lg.logprint(LAM_logger, msg, 'i')
            return True
        if ans in ('N', 'n'):
            return False
        print('Command not understood.\n')
예제 #4
0
    def clustering(self, dist=10):
        """Handle data for finding clusters of cells."""
        kws = {'Dist': dist}  # Maximum distance for considering clustering
        data = None

        # Listing of paths of channels on which clusters are to be found
        cluster_chans = [
            p for p in self.channelPaths for t in Sett.cluster_channels
            if t.lower() == p.stem.lower()
        ]
        for path in cluster_chans:  # Loop paths, read file, and find clusters
            try:
                data = system.read_data(path, header=0)
            except (FileNotFoundError, AttributeError):
                msg = "No file for channel {}".format(path.stem)
                lg.logprint(LAM_logger, "{}: {}".format(self.name, msg), 'w')
                print("-> {}".format(msg))

            # Discard earlier versions of found clusters, if present
            if data is not None:
                data = data.loc[:, ~data.columns.str.contains('ClusterID')]
                data.name = path.stem  # The name of the clustering channel

                # Find clusters
                self.find_distances(data,
                                    vol_incl=Sett.cl_inclusion,
                                    compare=Sett.cl_incl_type,
                                    clusters=True,
                                    **kws)
예제 #5
0
def subset_data(data, compare, vol_incl, sample):
    """Get indexes of cells based on values in a column."""

    if not isinstance(data, pd.DataFrame):
        lg.logprint(LAM_logger, 'Wrong data type for subset_data()', 'e')
        msg = 'Wrong datatype for find_distance, Has to be pandas DataFrame.'
        print(msg)
        return None

    # Search for the filtering column:
    match_str = re.compile(Sett.incl_col, re.I)
    cols = data.columns.str.match(match_str)

    # If no columns or multiple found:
    if not cols.any():
        e_msg = f"Column '{Sett.incl_col}' not found for {sample} {data.name}."
        print(f"ERROR: {e_msg}\n")
        lg.logprint(LAM_logger, e_msg, 'e')
    elif sum(cols) > 1:
        id_str = f"{sample} {data.name}"
        msg = f"Multiple columns with '{Sett.incl_col}' found for " + id_str
        print(f"WARNING: {msg}. Give specific name for filtering column.\n")

    # Find indices of data to retain:
    if compare.lower() == 'greater':  # Get only cells that are greater value
        sub_ind = data.loc[(data.loc[:, cols].values >= vol_incl), :].index
    else:  # Get only cells that are of lesser value
        sub_ind = data.loc[(data.loc[:, cols].values <= vol_incl), :].index
    return sub_ind
예제 #6
0
def project(paths):
    """Project features onto the vector."""
    lg.logprint(LAM_logger, 'Begin channel projection and counting.', 'i')
    print("\n---Projecting and counting channels---")
    # Loop through all directories in the root directory
    for path in [
            p for p in Sett.workdir.iterdir()
            if p.is_dir() and p.stem != 'Analysis Data'
    ]:
        # Initialize sample variables
        sample = GetSample(path, paths, process=False, projection=True)
        print(f"  {sample.name}  ...")
        # Find anchoring point of the sample
        sample.MP = sample.get_mps(Sett.MPname, Sett.useMP, paths.datadir)
        # Collection of data for each channel of the sample
        for path2 in [
                p for p in sample.channelpaths
                if Sett.MPname.lower() != str(p).split('_')[-2].lower()
        ]:
            channel = GetChannel(path2, sample, Sett.AddData, paths.datadir)
            # If no variance in found additional data, it is discarded.
            if channel.datafail:
                datatypes = ', '.join(channel.datafail)
                info = "Invariant data discarded"
                msg = f"   -> {info} - {channel.name}: {datatypes}"
                print(msg)
            # Project features of channel onto vector
            sample.data = sample.project_channel(channel)
            if channel.name == Sett.vectChannel and Sett.measure_width:
                DefineWidths(sample.data, sample.vector, sample.sampledir,
                             paths.datadir)
            # Count occurrences in each bin
            if channel.name not in ["MPs"]:
                sample.find_counts(channel.name, paths.datadir)
    lg.logprint(LAM_logger, 'All channels projected and counted.', 'i')
예제 #7
0
    def __call__(self, func, *args, **kws):
        plot_kws = merge_kws(MakePlot.base_kws, kws)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=UserWarning)
            # Make canvas if needed:
            if 'no_grid' not in args:
                self.g = self.get_facet(**plot_kws)
            # Plot data
            self.g = func(self, **plot_kws)
        if self.plot_error:
            msg = "Plot not saved"
            print("INFO: {}".format(msg))
            lg.logprint(LAM_logger, msg, 'w')
            return

        # Adjust plot sizes so that everything fits properly
        fig = plt.gcf()
        if 'adjust' in kws.keys():
            adjust = kws['adjust']
            fig.subplots_adjust(top=adjust.get('top'), bottom=adjust.get('bottom'),
                                right=adjust.get('right'), left=adjust.get('left'),
                                wspace=adjust.get('wspace'), hspace=adjust.get('hspace'))
            if 'hspace' in kws['adjust'].keys():
                fig.subplots_adjust(hspace=kws['adjust'].get('hspace'))
        else:
            fig.subplots_adjust(top=0.85, bottom=0.2, hspace=0.75)
        self.add_elements(*args, **plot_kws)
        self.save_plot()
예제 #8
0
def test_count_projection(counts, name):
    if (counts == 0).sum() > counts.size / 3:
        print("\n")
        print('WARNING: Uneven projection <- vector may be faulty!')
        print("\n")
        print('\a')
        lg.logprint(LAM_logger,
                    f'Uneven projection for {name}. Check vector quality.',
                    'w')
예제 #9
0
 def save_analysis_info(self, samples, groups, channels):
     """For saving information of all analyzed samples."""
     with open(str(self.outputdir.joinpath('Analysis_info.txt')), 'w') as f:
         f.write('GROUPS:\t')
         f.write(', '.join(groups))
         f.write('\nSAMPLES:\t')
         f.write(', '.join(samples))
         f.write('\nCHANNELS:\t')
         f.write(', '.join(channels))
     lg.logprint(LAM_logger, 'Analysis info successfully saved.', 'i')
예제 #10
0
def read_widths(datadir):
    """Find and read width datafile"""
    filepath = datadir.joinpath('Sample_widths_norm.csv')
    try:
        widths = pd.read_csv(filepath, index_col=False)
    except FileNotFoundError:
        msg = 'Width data not found. Perform analysis with measure_width.'
        print(f'ERROR: {msg}')
        lg.logprint(LAM_logger, f'-> {msg}', 'e')
        return None
    return widths
예제 #11
0
def check_resize_step(resize, log=True):
    if Sett.SkeletonVector and Decimal(str(resize)) % Decimal(
            str(0.10)) != Decimal('0.0'):
        msg = 'Resizing not in step of 0.1'
        print("WARNING: {}".format(msg))
        # Round setting down to nearest 0.1.
        Sett.SkeletonResize = math.floor(resize * 10) / 10
        msg2 = 'SkeletonResize changed to {}'.format(Sett.SkeletonResize)
        print("-> {}".format(msg2))
        if log:
            lg.logprint(LAM_logger, msg, 'w')
            lg.logprint(LAM_logger, msg2, 'i')
예제 #12
0
 def read_channel(self, path):
     """Read channel data into a dataframe."""
     try:
         data = system.read_data(str(path), header=Sett.header_row)
         channel = self.name
         if channel.lower() not in [
                 c.lower() for c in Store.channels
         ] and channel.lower() != Sett.MPname.lower():
             Store.channels.append(self.name)
         return data
     except ValueError:
         lg.logprint(LAM_logger, 'Cannot read channel path {}'.format(path),
                     'ex')
예제 #13
0
def main_catch_exit(LAM_logger=None, gui_root=None):
    """Run main() while catching exceptions for logging."""
    if LAM_logger is None:  # If no logger given, get one
        LAM_logger = lg.setup_logger(__name__, new=True)
        lg.print_settings()  # print settings of analysis to log
        lg.create_loggers()

    try:
        print("START ANALYSIS")
        main(gui_root=gui_root)  # run analysis
        lg.logprint(LAM_logger, 'Completed', 'i')
        lg.close_loggers()
        print('\nCOMPLETED\n')

    # Catch and log possible exits from the analysis:
    except KeyboardInterrupt:
        lg.logprint(LAM_logger, 'STOPPED: keyboard interrupt', 'e')
        print("STOPPED: Keyboard interrupt by user.\n")
        lg.close_loggers()

    except SystemExit:
        lg.logprint(LAM_logger, 'EXIT\n\n', 'ex')
        print("STOPPED\n")
        lg.log_shutdown()

    except process.VectorError as e:
        print(e.message + '\n')
        print(f'Missing: {", ".join(e.samples)}')
        lg.logprint(LAM_logger, e.message, 'ex')
        lg.log_shutdown()
예제 #14
0
    def get_data(self, *args, **kws):
        """Collect data from files and modify."""

        melt = False

        # Create a DF for holding data of all samples, add identifiers, format:
        all_data = pd.DataFrame()
        for path in self.paths:
            data = read_data(path, header=0, test=False)
            if 'IDs' in kws.keys():  # Identifiers
                data = plot.identifiers(data, path, kws.get('IDs'))
            if 'melt' in kws.keys():  # Data to long format
                m_kws = kws.get('melt')
                if 'path_id' in args:  # Get ID from filepath
                    id_sep = kws.get('id_sep')
                    try:
                        id_var = path.stem.split('_')[id_sep]
                        m_kws.update({'value_name': id_var})
                    except IndexError:
                        msg = 'Faulty list index. Incorrect file names?'
                        print('ERROR: {}'.format(msg))
                        lg.logprint(LAM_logger, msg, 'e')
                data = data.T.melt(id_vars=m_kws.get('id_vars'), value_vars=m_kws.get('value_vars'),
                                   var_name=m_kws.get('var_name'), value_name=m_kws.get('value_name'))
                data = data.dropna(subset=[m_kws.get('value_name')])
                melt = True
            else:
                data = data.T
            if 'merge' in args:  # Merge data with data from other paths
                if all_data.empty:
                    all_data = data
                else:
                    all_data = all_data.merge(data, how='outer', copy=False, on=kws.get('merge_on'))
                continue

            # If not merging, concatenate the data with others
            all_data = pd.concat([all_data, data], sort=True)

        # Give proper index to full data
        all_data.index = pd.RangeIndex(stop=all_data.shape[0])

        # Drop outliers
        if 'drop_outlier' in args and Sett.Drop_Outliers:
            all_data = drop_outliers(all_data, melt, **kws)

        # Determine column data types
        all_data = all_data.infer_objects()
        return all_data
예제 #15
0
def peak_selection(datadir, gui_root=None):
    """Collect detected peaks for plotting."""

    try:
        peaks = pd.read_csv(datadir.joinpath('Borders_peaks.csv'))
    except FileNotFoundError:
        msg = 'Borders NOT added to plots - missing Border_peaks.csv'
        print(f'\nINFO: {msg}')
        lg.logprint(LAM_logger, msg, 'i')
        return

    if Sett.force_dialog:
        Store.border_peaks = peaks
    elif Sett.select_peaks:  # Ask for subset of peaks if needed
        ask_peaks(peaks, gui_root)
    else:
        Store.border_peaks = peaks
예제 #16
0
    def find_sample_vector(self, path):  # path = data directory
        """Find sample's vector data."""
        try:  # Find sample's vector file
            paths = list(self.sampledir.glob('Vector.*'))
            self.vector = system.read_vector(paths)
            self.vector_length = self.vector.length
            length_series = pd.Series(self.vector_length, name=self.name)
            system.save_to_file(length_series, path, 'Length.csv')

        # If vector file not found
        except (FileNotFoundError, IndexError):
            msg = f'Vector-file NOT found for {self.name}'
            lg.logprint(LAM_logger, msg, 'e')
            print(f'ERROR: {msg}')
        except (AttributeError, ValueError):  # If vector file is faulty
            msg = f'Faulty vector for {self.name}'
            lg.logprint(LAM_logger, msg, 'c')
            print(f'CRITICAL: {msg}')
예제 #17
0
    def distance_mean(self, dist=25):
        """Prepare and handle data for cell-to-cell distances."""
        kws = {'Dist': dist}  # Maximum distance used to find cells

        # List paths of channels where distances are to be found
        dist_chans = [
            p for p in self.channelPaths for t in Sett.distance_channels
            if t.lower() == p.stem.lower()
        ]

        if Sett.use_target:  # If distances are found against other channel:
            target = Sett.target_chan  # Get the name of the target channel
            try:  # Find target's data file, read, and update data to keywords
                file = '{}.csv'.format(target)
                test_namer = re.compile(file, re.I)
                target_path = [
                    p for p in self.channelPaths
                    if test_namer.fullmatch(str(p.name))
                ]
                test_data = system.read_data(target_path[0], header=0)
                kws.update({'test_data': test_data})
            except (FileNotFoundError, IndexError):
                msg = "No file for channel {}".format(target)
                lg.logprint(LAM_logger, "{}: {}".format(self.name, msg), 'w')
                print("-> {}".format(msg))
                return

        # Loop through the channels, read, and find distances
        for path in dist_chans:
            try:
                data = system.read_data(path, header=0)
            except FileNotFoundError:
                msg = "No file for channel {}".format(path.stem)
                lg.logprint(LAM_logger, "{}: {}".format(self.name, msg), 'w')
                print("-> {}".format(msg))
                return
            # Discard earlier versions of calculated distances, if present
            data = data.loc[:, ~data.columns.str.startswith('Nearest_')]
            # Find distances
            data.name = path.stem
            self.find_distances(data,
                                vol_incl=Sett.inclusion,
                                compare=Sett.incl_type,
                                **kws)
예제 #18
0
    def width(self):
        """Create line plots of sample group widths."""
        name = 'Sample_widths_norm.csv'
        filepath = list(self.sgroups.paths.datadir.glob(name))
        if not filepath:
            print("   No width file found. Perform 'Count' with measure_width")
            lg.logprint(LAM_logger, 'No width file found', 'w')
            return
        # Collect data:
        handle = system.DataHandler(self.sgroups, filepath)
        all_data = handle.get_data('drop_outlier', **self.kws)
        var = 'Linear Position'
        all_data.loc[:, var] = all_data.loc[:, var].divide(2, fill_value=0)

        # Make plot:
        plotter = MakePlot(all_data, handle, 'Widths - All')
        p_kws = merge_kws(self.kws, {'row': None, 'col': None, 'ylabel': 'Units (coord system)',
                                     'gridspec': {'bottom': 0.2}})
        plotter(pfunc.lines, 'centerline', 'ticks', 'title', 'legend', 'labels', 'peaks', **p_kws)
예제 #19
0
 def get_vect_data(self, channel):
     """Get channel data that is used for vector creation."""
     try:
         # Search string:
         namer = str("_{}_".format(channel))
         namerreg = re.compile(namer, re.I)
         # Search found paths with string
         dir_path = [
             self.channelpaths[i] for i, s in enumerate(self.channelpaths)
             if namerreg.search(str(s))
         ][0]
         vect_path = next(dir_path.glob('*Position.csv'))
         vect_data = system.read_data(vect_path,
                                      header=Sett.header_row)  # Read data
     except (FileNotFoundError, IndexError):  # If data file not found
         msg = 'No valid datafile for vector creation.'
         if LAM_logger is not None:
             lg.logprint(LAM_logger, msg, 'w')
         print('-> {}'.format(msg))
         vect_data = None
     return vect_data
예제 #20
0
def test_control():
    """Assert that control group exists, and if not, handle it."""
    # If control group is not found:
    if Sett.cntrlGroup in Store.samplegroups:
        return True
    lg.logprint(LAM_logger, 'Set control group not found', 'c')

    # Test if entry is due to capitalization error:
    namer = re.compile(r"{}$".format(re.escape(Sett.cntrlGroup)), re.I)
    for group in Store.samplegroups:
        if re.match(namer, group):  # If different capitalization:
            msg = "Control group-setting is case-sensitive!"
            print(f"WARNING: {msg}")

            # Change control to found group
            Sett.cntrlGroup = group
            msg = "Control group has been changed to"
            print("{} '{}'\n".format(msg, group))
            lg.logprint(LAM_logger, f"-> Changed to {group}", 'i')
            return True

    # If control not found at all:
    msg = "Control group NOT found in sample groups!"
    print("\nWARNING: {}\n".format(msg))
    if Sett.force_dialog:
        lg.logprint(LAM_logger, msg, 'e')
        Sett.statistics = False
        return False
    ask_control()
    return True
예제 #21
0
def read_data(filepath, header=Sett.header_row, test=True, index_col=False):
    """Read csv-data."""
    data = None
    try:  # Read data
        data = pd.read_csv(filepath, header=header, index_col=index_col)
        data = data.loc[:, ~data.columns.str.contains('^Unnamed')]

        if test:  # Test that the data contains ID columns
            try:
                data.loc[:, 'ID']
            except KeyError:
                msg = f'Column label test failed: ID not present at {filepath}'
                lg.logprint(LAM_logger, msg, 'ex')
                print(f'WARNING: read_data() call from {inspect.stack()[1][1]} line {inspect.stack()[1][2]}')
                print("Key 'ID' not found. Verify header row setting.\n\n")
                print(f"Path: {filepath}\n")

    except FileNotFoundError:
        lg.logprint(LAM_logger, 'File not found at {}'.format(filepath), 'e')
        print(f'WARNING: read_data() call from {inspect.stack()[1][1]} line {inspect.stack()[1][2]}')
        print(f'File {filepath.name} not found at {str(filepath.parent)}')
        return None

    except (AttributeError, pd.errors.EmptyDataError) as err:
        if isinstance(err, pd.errors.EmptyDataError):
            msg = "{} is empty. Skipped.".format(filepath.name)
            print("ERROR: {}".format(msg))
            lg.logprint(LAM_logger, msg, 'e')
            return None
        msg = f"Data or columns may be faulty in {filepath.name}"
        print("WARNING: {}".format(msg))
        lg.logprint(LAM_logger, msg, 'w')
        return data

    except pd.errors.ParserError:
        msg = f"{filepath} cannot be read."
        print(f"ERROR: {msg}")
        print("\nWrong header row?")
        lg.logprint(LAM_logger, msg, 'ex')
    return data
예제 #22
0
def relate_data(data, mp_bin=0, center=50, total_length=100):
    """Place sample data in context of all data, i.e. anchoring."""
    try:
        length = data.shape[0]
    except AttributeError:
        length = len(data)
    if np.isnan(mp_bin):
        msg = "Missing MP-projection(s). See 'Analysis Data/MPs.csv'."
        print(f"CRITICAL: {msg}")
        lg.logprint(LAM_logger, msg, 'c')
        raise SystemExit
    # Insert smaller input data into larger DF defined by TotalLength
    insx = int(center - mp_bin)
    end = int(insx + length)
    insert = np.full(total_length, np.nan)  # Bins outside input data are NaN
    data = np.where(data == np.nan, 0, data)  # Set all NaN in input to 0
    try:  # Insertion
        insert[insx:end] = data
    except ValueError:
        msg = "relate_data() call from {} line {}".format(
            inspect.stack()[1][1],
            inspect.stack()[1][2])
        print('ERROR: {}'.format(msg))
        lg.logprint(LAM_logger, f'Failed {msg}\n', 'ex')
        msg = "If not using MPs, remove MPs.csv from 'Data Files'."
        if insert[insx:end].size - length == mp_bin:
            lg.logprint(LAM_logger, msg, 'i')
        raise SystemExit
    return insert, insx
예제 #23
0
def find_existing(paths: system.Paths):
    """Get MPs and count old projections when not projecting during 'Count'."""
    msg = 'Collecting pre-existing data.'
    print(msg)
    lg.logprint(LAM_logger, msg, 'i')
    mps = pd.DataFrame(columns=Store.samples)
    for smpl in Store.samples:
        smplpath = paths.samplesdir.joinpath(smpl)
        # FIND MP
        if Sett.useMP:
            try:
                mp_df = pd.read_csv(smplpath.joinpath('MPs.csv'))
                mp_bin = mp_df.iat[0, 0]
            except FileNotFoundError:
                msg = "MP-data not found."
                add = "Provide MP-data or set useMP to False."
                print(f"ERROR: {msg}\n{add}")
                raise SystemExit
        else:
            mp_bin = 0
        mps.loc[0, smpl] = mp_bin
        # FIND CHANNEL COUNTS
        for path in [
                p for p in smplpath.iterdir() if p.suffix == '.csv'
                and p.stem not in ['Vector', 'MPs', Sett.MPname]
        ]:
            data = pd.read_csv(path)
            try:
                counts = np.bincount(data['DistBin'], minlength=Sett.projBins)
                counts = pd.Series(np.nan_to_num(counts), name=smpl)
                channel_string = str(f'All_{path.stem}.csv')
                system.save_to_file(counts, paths.datadir, channel_string)
            except ValueError:  # If channel has not been projected
                print(f"Missing projection data: {path.stem} - {smpl}")
                print("-> Set project=True and perform Count")
                continue
    mps.to_csv(paths.datadir.joinpath('MPs.csv'))
    samples = mps.columns.tolist()
    groups = set({s.casefold(): s.split('_')[0] for s in samples}.values())
    Store.samplegroups = sorted(groups)
예제 #24
0
def start(test_vectors=True, only_vectors=False):
    """Check that everything is OK when starting a run."""

    # If workdir variable isn't pathlib.Path, make it so
    if not isinstance(Sett.workdir, pl.Path):
        Sett.workdir = pl.Path(Sett.workdir)

    # Check that at least one primary setting is True
    if not any([Sett.process_samples, Sett.process_counts, Sett.Create_Plots, Sett.process_dists, Sett.statistics]):
        lg.logprint(LAM_logger, 'All primary settings are False', 'e')
        print("\nAll primary settings are set to False.\n\nExiting ...")
        raise SystemExit

    # Otherwise create paths and directories
    paths = Paths(Sett.workdir, only_vectors=only_vectors)

    if not test_vectors:
        return paths

    # Check that vector channel data are found
    if Sett.process_samples or (Sett.measure_width and Sett.process_counts):
        samples = [p for p in Sett.workdir.iterdir() if p.is_dir() and p.name != 'Analysis Data']
        failed = []
        for sample in samples:
            try:
                next(sample.glob(f'*_{Sett.vectChannel}_*'))
            except StopIteration:
                failed.append(sample.name)
        if failed:
            msg = f"Vector channel data not found for {', '.join(failed)}"
            print(f'ERROR: {msg}')
            print('Check vector channel setting or data.')
            lg.logprint(LAM_logger, msg, 'e')
            raise SystemExit

    # Find and store all sample names
    Store.samples = [p.name for p in paths.samplesdir.iterdir() if p.is_dir()]
    return paths
예제 #25
0
    def __init__(self, workdir: pl.Path, only_vectors: bool = False):
        """Creation of output folders."""
        # Create path-variables necessary for the analysis
        self.outputdir = workdir.joinpath('Analysis Data')
        self.datadir = pl.Path(self.outputdir / 'Data Files')
        self.plotdir = pl.Path(self.outputdir / 'Plots')
        self.samplesdir = pl.Path(self.outputdir / 'Samples')
        self.statsdir = pl.Path(self.outputdir / 'Statistics')

        # If samples are to be processed and output data directory exists,
        # the directory will be removed with all files as not to interfere
        # with analysis.
        if not only_vectors:
            self.clear_analysis()

        # Create output directories
        pl.Path.mkdir(self.outputdir, exist_ok=True)
        pl.Path.mkdir(self.plotdir, exist_ok=True)
        pl.Path.mkdir(self.samplesdir, exist_ok=True)
        pl.Path.mkdir(self.datadir, exist_ok=True)
        pl.Path.mkdir(self.statsdir, exist_ok=True)
        if LAM_logger is not None:
            lg.logprint(LAM_logger, 'Directories successfully created.', 'i')
예제 #26
0
def create_samples(paths: system.Paths):
    """Create vectors for the samples."""
    lg.logprint(LAM_logger, 'Begin vector creation.', 'i')
    print("---Processing samples---")
    # Test that resize-setting is in step of 0.1:
    if Sett.SkeletonVector:
        check_resize_step(Sett.SkeletonResize)
    # Loop Through samples to create vectors
    for path in [
            p for p in Sett.workdir.iterdir()
            if p.is_dir() and p.stem != 'Analysis Data'
    ]:
        sample = GetSample(path, paths)
        print("{}  ...".format(sample.name))
        sample.vect_data = sample.get_vect_data(Sett.vectChannel)
        # Creation of vector for projection
        if Sett.SkeletonVector:
            sample.create_skeleton()
        else:
            sample.create_median()
    sample_dirs = [p for p in paths.samplesdir.iterdir() if p.is_dir()]
    pfunc.create_vector_plots(Sett.workdir, paths.samplesdir, sample_dirs)
    lg.logprint(LAM_logger, 'Vectors created.', 'i')
예제 #27
0
def get_widths(samplesdir, datadir):
    """Find widths of samples along their vectors."""
    msg = "Necessary files for width approximation not found for "
    data, vector_data = None, None

    for path in [p for p in samplesdir.iterdir() if p.is_dir()]:
        # Find necessary data files:
        files = [p for p in path.iterdir() if p.is_file()]

        # Search terms
        vreg = re.compile('^vector.', re.I)  # vector
        dreg = re.compile(f'^{Sett.vectChannel}.csv', re.I)  # channel data

        try:  # Match terms to found paths
            vect_paths = [p for p in files if vreg.match(p.name)]
            data_paths = [p for p in files if dreg.match(p.name)]
            # Read found paths
            vector_data = system.read_vector(vect_paths)
            data = system.read_data(data_paths[0], header=0)

        # Error handling
        except (StopIteration, IndexError):
            name = path.name
            full_msg = msg + name
            print(f"WARNING: {full_msg}")
            if 'vector_data' not in locals():  # if vector not found
                print("-> Could not read vector data.")
                continue
            if 'data' not in locals():  # if channel data not found
                print("Could not read channel data")
                print("Make sure channel is set right (vector channel)\n")
                continue
            lg.logprint(LAM_logger, full_msg, 'w')

        # Compute widths
        process.DefineWidths(data, vector_data, path, datadir)
예제 #28
0
def ask_control():
    """Ask new control group if one not found."""
    flag = 1

    # Print groups and demand input for control:
    while flag:
        print('Found groups:')
        for i, grp in enumerate(sorted(Store.samplegroups)):
            print('{}: {}'.format(i, grp))
        msg = "Select the number of control group: "
        print('\a')
        ans = system.ask_user(msg, dlgtype='integer')
        if ans is None:
            raise KeyboardInterrupt
        if 0 <= ans <= len(Store.samplegroups):
            # Change control based on input
            Sett.cntrlGroup = sorted(Store.samplegroups)[ans]
            print(f"Control group set as '{Sett.cntrlGroup}'.\n")
            flag = 0
        else:
            print('Command not understood.')

    msg = f"-> Changed to group '{Sett.cntrlGroup}' by user"
    lg.logprint(LAM_logger, msg, 'i')
예제 #29
0
def detect_borders(paths, all_samples, palette, anchor, variables, scoring, threshold=0.5, channel='DAPI'):
    """
    Midgut border detection by weighted scoring of binned variables.

    Args:
    ----
        paths - LAM system.Paths-object that contains directory paths
        all_samples - Paths to sample folders
        palette - Color palette dict with sample groups as keys
        anchor - Anchoring bin of the samples in the full data matrix
        threshold - Minimum score for peak detection, i.e. borders
        variables - List of column names to collect from sample's channel data
        scoring - Dict of variable names with their scoring weight
        channel - The name of the data channel that is used, e.g. 'DAPI' data
    """
    print('\n---Finding border regions---')
    lg.logprint(LAM_logger, 'Finding border regions.', 'i')
    b_dirpath = plotting_directory(paths.plotdir)

    # Get widths and if not found, abort
    widths = read_widths(paths.datadir)
    if widths is None:
        return

    # Establish object to store scores of individual samples
    border_data = FullBorders(all_samples, widths, anchor, palette)
    print('  Scoring samples  ...')

    # Collect and score variables for each sample in the sample list
    for path in all_samples:
        sample = GetSampleBorders(path, channel, scoring, anchor, variables)
        # If expected variables are found, calculate sample scores
        if not sample.error:
            sample(border_data, b_dirpath)

    # If no data, return without finding borders
    if border_data.scores.isnull().values.all():
        print('\nERROR: Missing data, border detection cancelled.')
        lg.logprint(LAM_logger, 'Border detection variables not found.', 'e')
        return

    # Once sample scores have been collected, find peaks
    print('  Finding peaks  ...')
    flat, peaks = border_data(b_dirpath, threshold)

    # Add the locations of border peaks in each sample's individual binning
    binned_peaks = append_binning(border_data.sample_starts, peaks)

    # Save data
    flat.T.to_csv(paths.datadir.joinpath('Borders_scores.csv'), index=False)
    binned_peaks.to_csv(paths.datadir.joinpath('Borders_peaks.csv'), index=False)
    lg.logprint(LAM_logger, 'Border detection done.', 'i')
예제 #30
0
    def get_clusters(self):
        """Gather sample data to compute clusters of cells."""
        print('\n---Finding clusters---')
        lg.logprint(LAM_logger, 'Finding clusters', 'i')

        for grp in self._groups:  # Get one sample group
            lg.logprint(LAM_logger, '-> group {}'.format(grp), 'i')
            print('  {}  ...'.format(grp))
            samplegroup = Group(grp)

            for path in samplegroup.groupPaths:  # Get one sample of the group
                test_sample = Sample(path, samplegroup)
                test_sample.clustering(Sett.cl_max_dist)  # Find clusters

        lg.logprint(LAM_logger, 'Clusters calculated', 'i')