Example #1
0
    def prepRegressionPlate(self):
        '''
        Packages data into a growth.GrowthPlate() object and performs a select number of class functions.

        Args:
            data (pandas.DataFrame): t (number of measurements) by n+1 (number of samples + one column for time)
            mapping (pandas.DataFrame): n (number of samples) by p (number of variables)
            subtract_control (boolean)
            thinning_step (int): how many time points to skip between selected time points. 
        '''

        plate = GrowthPlate(self.master_data, self.master_mapping)
        plate.convertTimeUnits(input=getTimeUnits('input'),
                               output=getTimeUnits('output'))
        plate.logData()
        plate.subtractBaseline(to_do=True,
                               poly=getValue('PolyFit'),
                               groupby=list(self.non_time_varbs))
        plate.subtractControl(to_do=self.subtract_control, drop=True)
        plate.key.to_csv(self.paths_dict['key'],
                         sep='\t',
                         header=True,
                         index=True)  # save model results

        self.plate = plate
        self.ntimepoints = plate.time.shape[0]
Example #2
0
def prepDataForFitting(data, mapping, subtract_baseline=True):
    '''
    Packages data set into a grwoth.GrowthPlate() object and transforms data in preparation for GP fitting.

    Args:
        data (pandas.DataFrame): number of time points (t) x number of variables plus-one (p+1)
            plus-one because Time is not an index but rather a column.
        mapping (pandas.DataFrame): number of wells/samples (n) x number of variables (p)
       
    Returns:
        plate (growth.GrwothPlate() object)
    '''

    # merge data-sets for easier analysis and perform basic summaries and manipulations
    plate = GrowthPlate(data=data, key=mapping)

    plate.computeBasicSummary()
    plate.computeFoldChange(subtract_baseline=subtract_baseline)
    plate.convertTimeUnits(input=getTimeUnits('input'),
                           output=getTimeUnits('output'))
    plate.raiseData(
    )  # replace non-positive values, necessary prior to log-transformation
    plate.logData()  # natural-log transform
    plate.subtractBaseline(
        subtract_baseline,
        poly=True)  # subtract first T0 (or rather divide by first T0)

    return plate
Example #3
0
def plotDeltaOD(ax, df, ylabel, xlabel, fontsize=20):
    '''
    Plots Delta OD: its mean (Avg key in df) and credible intervals (Low and Upp keys). 

    Args:
        ax (matplotlib.axes._subplots.AxesSubplot)
        df (pandas.DataFrame)
        ylabel (str)
        xlabel (str)
        fontsize (float)

    Returns: 
        ax (matplotlib.axes._subplots.AxesSubplot)    
    '''

    ax.plot(df.Time, df.Avg, lw=3.0)
    ax.fill_between(df.Time, df.Low, df.Upp, alpha=0.1)
    ax.axhline(y=0, xmin=0, xmax=df.Time.max(), lw=3.0, color=(0, 0, 0, 1))
    ax = largeTickLabels(ax, fontsize=fontsize)

    if xlabel:
        ax.set_xlabel('Time ({})'.format(getTimeUnits('output')),
                      fontsize=fontsize)
    if ylabel:
        ax.set_ylabel(r'${\Delta}$(ln OD)', fontsize=fontsize)

    return ax
Example #4
0
def setAxesLabels(ax, subtract_control, plot_params, fontsize=20):
    ''''
    Given an axis and analysis parameters, determine appropriate labels 
        for axes and adjus them accordingly. 

    Args:
        ax (matplotlib.axes._subplots.AxesSubplot) 
        subtract_control (boolean)
        plot_params (dictionary)
        fontsize (float)

    Returns:
        ax (matplotlib.axes._subplots.AxesSubplot) 
    '''

    if plot_params['plot_linear_od']:
        base = getValue('hypo_plot_y_label')
    else:
        base = 'ln {}'.format(getValue('hypo_plot_y_label'))

    # plot aesthetics
    if subtract_control:
        ylabel = 'Normalized {}'.format(base)
    else:
        ylabel = base

    ax.set_xlabel('Time ({})'.format(getTimeUnits('output')),
                  fontsize=plot_params['fontsize'])
    ax.set_ylabel(ylabel, fontsize=plot_params['fontsize'])

    return ax
Example #5
0
def setAxesLabels(ax, subtract_control, plot_params, logged=True, fontsize=20):
    ''''
    Given an axis and analysis parameters, determine appropriate labels 
        for axes and adjus them accordingly. 

    Args:
        ax (matplotlib.axes._subplots.AxesSubplot) 
        subtract_control (boolean)
        plot_params (dictionary)
        fontsize (float)

    Returns:
        ax (matplotlib.axes._subplots.AxesSubplot) 
    '''
    import matplotlib as mpl
    mpl.rcParams["mathtext.default"] = 'regular'
    mpl.rcParams["font.family"] = 'sans-serif'
    mpl.rcParams["font.sans-serif"] = 'Arial'
    # mpl.rcParams["text.usetex"] = True

    #if plot_params['plot_linear_od']:
    #    base = getValue('hypo_plot_y_label')
    #    base = r'$\frac{{{}}}{{{}}}$'.format(base+'(t)',base+'(0)')
    #else:
    if logged: base = 'ln {}'.format(getValue('hypo_plot_y_label'))
    else: base = getValue('hypo_plot_y_label')

    # plot aesthetics
    if subtract_control:
        ylabel = 'Normalized {}'.format(base)
    else:
        ylabel = base

    ax.set_xlabel('Time ({})'.format(getTimeUnits('output')),
                  fontsize=plot_params['fontsize'])
    ax.set_ylabel(ylabel, fontsize=plot_params['fontsize'])

    return ax
Example #6
0
def basicSummaryOnly(data, mapping, directory, args, verbose=False):
    '''
    If user only requested plotting, then for  each data file, perform a basic algebraic summary
        and plot data. Once completed, exit system. Otherwise, return None.
 
    Args:
        data (dictionary): keys are plate IDs and values are pandas.DataFrames with size t x (n+1)
            where t is the number of time-points and n is number of wells (i.e. samples),
            the additional 1 is due to the explicit 'Time' column, index is uninformative.
        mapping (dictionary): keys are plate IDs and values are pandas.DataFrames with size n x (p)
            where is the number of wells (or samples) in plate, and p are the number of variables or
            parameters described in dataframe.
        directory (dictionary): keys are folder names, values are their paths
        args
        verbose (boolean)

    Returns:
        None: if only_plot_plate argument is False. 
    '''

    if not args['obs']:  # if not only_basic_summary
        return None

    print(tidyMessage('AMiGA is summarizing and plotting data files'))

    list_keys = []

    for pid, data_df in data.items():

        # define paths where summary and plot will be saved
        key_file_path = assemblePath(directory['summary'], pid, '.txt')
        key_fig_path = assemblePath(directory['figures'], pid, '.pdf')

        # grab plate-specific samples
        #   index should be well IDs but a      column Well should also exist
        #   in main.py, annotateMappings() is called which ensures the above is the case
        mapping_df = mapping[pid]
        mapping_df = resetNameIndex(mapping_df, 'Well', False)

        # grab plate-specific data
        wells = list(mapping_df.Well.values)
        data_df = data_df.loc[:, ['Time'] + wells]

        # update plate-specific data with unique Sample Identifiers
        sample_ids = list(mapping_df.index.values)
        data_df.columns = ['Time'] + sample_ids

        # create GrowthPlate object, perform basic summary
        plate = GrowthPlate(data=data_df, key=mapping_df)
        plate.convertTimeUnits(input=getTimeUnits('input'),
                               output=getTimeUnits('output'))
        plate.computeBasicSummary()
        plate.computeFoldChange(subtract_baseline=True)

        # plot and save as PDF, also save key as TXT
        if not args['dp']:
            plate.plot(key_fig_path)

        if args['merges']: list_keys.append(plate.key)
        else:
            plate.key.to_csv(key_file_path, sep='\t', header=True, index=False)

        smartPrint(pid, verbose=verbose)

    if args['merges']:
        filename = selectFileName(args['fout'])
        summary_path = assembleFullName(directory['summary'], 'summary',
                                        filename, '_basic', '.txt')
        summary_df = pd.concat(list_keys, sort=False)
        summary_df.to_csv(summary_path, sep='\t', header=True, index=False)

    smartPrint(
        '\nSee {} for summary text file(s).'.format(directory['summary']),
        verbose)
    smartPrint('See {} for figure PDF(s).\n'.format(directory['figures']),
               verbose)

    msg = 'AMiGA completed your request and '
    msg += 'wishes you good luck with the analysis!'
    print(tidyMessage(msg))

    sys.exit()
Example #7
0
    def plot(self,
             save_path='',
             plot_fit=False,
             plot_derivative=False,
             plot_raw_with_fit=False):
        '''
        Creates a 8x12 grid plot (for 96-well plate) that shows the growth curves in each well.
            Plot aesthetics require several parameters that are saved in config.py and pulled using 
            functions in misc.py. Plot will be saved as a PDF to location passed via argument. Index
            column for object's key should be Well IDs but object's key should also have a Well column.

        Args:
            save_path (str): file path: if empty, plot will not be saved at all.
            plot_fit (boolean): whether to plot GP fits on top of raw OD.
            plot_derivative (boolean): if True, plot only the derivative of GP fit instead. 

        Returns:
            fig,axes: figure and axis handles.

        Action:
            if user passes save_path argument, plot will be saved as PDF in desired location 
        '''

        sns.set_style('whitegrid')

        self.addLocation()

        time = self.time

        cols = [
            'Sample_ID', 'Plate_ID', 'Well', 'Row', 'Column', 'Fold_Change',
            'OD_Max', 'OD_Baseline'
        ]
        key = self.key.reindex(
            cols,
            axis='columns',
        )
        key = key.dropna(axis=1, how='all')
        if 'Sample_ID' in key.columns:
            key = key.drop_duplicates().set_index('Sample_ID')

        # make sure plate is 96-well version, otherwise skip plotting
        if not self.isSingleMultiWellPlate():
            msg = 'WARNING: GrowthPlate() object for {} is not a 96-well plate. '.format(
                self.key.Plate_ID.iloc[0])
            msg += 'AMiGA can not plot it.\n'
            print(msg)
            return None

        if plot_derivative:
            base_y = self.gp_data.pivot(columns='Sample_ID',
                                        index='Time',
                                        values='GP_Derivative')
        elif plot_fit:
            base_y = self.gp_data.pivot(columns='Sample_ID',
                                        index='Time',
                                        values='OD_Growth_Data')
            overlay_y = self.gp_data.pivot(columns='Sample_ID',
                                           index='Time',
                                           values='OD_Growth_Fit')
        elif plot_raw_with_fit:
            base_y = self.gp_data.pivot(columns='Sample_ID',
                                        index='Time',
                                        values='OD_Data')
            overlay_y = self.gp_data.pivot(columns='Sample_ID',
                                           index='Time',
                                           values='OD_Fit')
        else:
            base_y = self.data  #gp_data.pivot(columns='Sample_ID',index='Time',values='OD_Data')

        fig, axes = plt.subplots(8, 12, figsize=[12, 8])

        # define window axis limits
        ymax = np.ceil(base_y.max(1).max())
        ymin = np.floor(base_y.min(1).min())

        if plot_fit: ymin = 0

        xmin = 0
        xmax = time.values[-1]
        xmax_up = int(np.ceil(xmax))  # round up to nearest integer

        for well in base_y.columns:

            # select proper sub-plot
            r, c = key.loc[well, ['Row', 'Column']] - 1
            ax = axes[r, c]

            # get colors based on fold-change and uration parameters
            if 'Fold_Change' in key.keys():
                color_l, color_f = getPlotColors(key.loc[well, 'Fold_Change'])
            else:
                color_l = getValue('fcn_line_color')
                color_f = getValue('fcn_face_color')

            # set window axis limits
            ax.set_xlim([xmin, xmax])
            ax.set_ylim([ymin, ymax])

            # define x-data and y-data points
            x = np.ravel(time.values)
            y = base_y.loc[:, well].values

            # plot line and fill_betwen, if plotting OD estimate
            ax.plot(x, y, color=color_l, lw=1.5, zorder=10)
            if not plot_derivative:
                ax.fill_between(x=x,
                                y1=[ax.get_ylim()[0]] * len(y),
                                y2=y,
                                color=color_f,
                                zorder=7)

            # add fit lines, if desired
            if plot_fit or plot_raw_with_fit:
                y_fit = overlay_y.loc[:, well].values
                ax.plot(x,
                        y_fit,
                        color='yellow',
                        alpha=0.65,
                        ls='--',
                        lw=1.5,
                        zorder=10)

            # show tick labels for bottom left subplot only, so by default no labels
            if plot_derivative:
                plt.setp(ax, yticks=[ymin, 0, ymax], yticklabels=[]
                         )  # zero derivative indicates no instantaneous growth
            else:
                plt.setp(ax, yticks=[ymin, ymax], yticklabels=[])
            plt.setp(ax, xticks=[xmin, xmax], xticklabels=[])

            # add well identifier on top left of each sub-plot
            well_color = getTextColors('Well_ID')
            ax.text(0.,
                    1.,
                    key.loc[well, 'Well'],
                    color=well_color,
                    ha='left',
                    va='top',
                    transform=ax.transAxes)

            # add Max OD value on top right of each sub-plot
            if self.mods.floored:
                od_max = key.loc[well, 'OD_Max'] - key.loc[well, 'OD_Baseline']
            else:
                od_max = key.loc[well, 'OD_Max']
            ax.text(1.,
                    1.,
                    "{0:.2f}".format(od_max),
                    color=getTextColors('OD_Max'),
                    ha='right',
                    va='top',
                    transform=ax.transAxes)

        # show tick labels for bottom left sub-plot only
        plt.setp(axes[7, 0], xticks=[0, xmax], xticklabels=[0, xmax_up])
        plt.setp(axes[7, 0], yticks=[ymin, ymax], yticklabels=[ymin, ymax])

        # add x- and y-labels and title
        ylabel_base = getValue('grid_plot_y_label')
        ylabel_mod = ['ln ' if self.mods.logged else ''][0]

        if plot_derivative: ylabel_text = 'd[ln{}]/dt'.format(ylabel_base)
        else: ylabel_text = ylabel_mod + ylabel_base

        # add labels and title
        fig.text(0.512,
                 0.07,
                 'Time ({})'.format(getTimeUnits('output')),
                 fontsize=15,
                 ha='center',
                 va='bottom')
        fig.text(0.100,
                 0.50,
                 ylabel_text,
                 fontsize=15,
                 ha='right',
                 va='center',
                 rotation='vertical')
        fig.suptitle(x=0.512,
                     y=0.93,
                     t=key.loc[well, 'Plate_ID'],
                     fontsize=15,
                     ha='center',
                     va='center')

        # if no file path passed, do not save
        if save_path != '': plt.savefig(save_path, bbox_inches='tight')

        self.key.drop(['Row', 'Column'], axis=1, inplace=True)

        plt.close()

        return fig, axes