def prepRegressionPlate(self): ''' Packages data into a growth.GrowthPlate() object and performs a select number of class functions. Args: data (pandas.DataFrame): t (number of measurements) by n+1 (number of samples + one column for time) mapping (pandas.DataFrame): n (number of samples) by p (number of variables) subtract_control (boolean) thinning_step (int): how many time points to skip between selected time points. ''' plate = GrowthPlate(self.master_data, self.master_mapping) plate.convertTimeUnits(input=getTimeUnits('input'), output=getTimeUnits('output')) plate.logData() plate.subtractBaseline(to_do=True, poly=getValue('PolyFit'), groupby=list(self.non_time_varbs)) plate.subtractControl(to_do=self.subtract_control, drop=True) plate.key.to_csv(self.paths_dict['key'], sep='\t', header=True, index=True) # save model results self.plate = plate self.ntimepoints = plate.time.shape[0]
def prepDataForFitting(data, mapping, subtract_baseline=True): ''' Packages data set into a grwoth.GrowthPlate() object and transforms data in preparation for GP fitting. Args: data (pandas.DataFrame): number of time points (t) x number of variables plus-one (p+1) plus-one because Time is not an index but rather a column. mapping (pandas.DataFrame): number of wells/samples (n) x number of variables (p) Returns: plate (growth.GrwothPlate() object) ''' # merge data-sets for easier analysis and perform basic summaries and manipulations plate = GrowthPlate(data=data, key=mapping) plate.computeBasicSummary() plate.computeFoldChange(subtract_baseline=subtract_baseline) plate.convertTimeUnits(input=getTimeUnits('input'), output=getTimeUnits('output')) plate.raiseData( ) # replace non-positive values, necessary prior to log-transformation plate.logData() # natural-log transform plate.subtractBaseline( subtract_baseline, poly=True) # subtract first T0 (or rather divide by first T0) return plate
def plotDeltaOD(ax, df, ylabel, xlabel, fontsize=20): ''' Plots Delta OD: its mean (Avg key in df) and credible intervals (Low and Upp keys). Args: ax (matplotlib.axes._subplots.AxesSubplot) df (pandas.DataFrame) ylabel (str) xlabel (str) fontsize (float) Returns: ax (matplotlib.axes._subplots.AxesSubplot) ''' ax.plot(df.Time, df.Avg, lw=3.0) ax.fill_between(df.Time, df.Low, df.Upp, alpha=0.1) ax.axhline(y=0, xmin=0, xmax=df.Time.max(), lw=3.0, color=(0, 0, 0, 1)) ax = largeTickLabels(ax, fontsize=fontsize) if xlabel: ax.set_xlabel('Time ({})'.format(getTimeUnits('output')), fontsize=fontsize) if ylabel: ax.set_ylabel(r'${\Delta}$(ln OD)', fontsize=fontsize) return ax
def setAxesLabels(ax, subtract_control, plot_params, fontsize=20): '''' Given an axis and analysis parameters, determine appropriate labels for axes and adjus them accordingly. Args: ax (matplotlib.axes._subplots.AxesSubplot) subtract_control (boolean) plot_params (dictionary) fontsize (float) Returns: ax (matplotlib.axes._subplots.AxesSubplot) ''' if plot_params['plot_linear_od']: base = getValue('hypo_plot_y_label') else: base = 'ln {}'.format(getValue('hypo_plot_y_label')) # plot aesthetics if subtract_control: ylabel = 'Normalized {}'.format(base) else: ylabel = base ax.set_xlabel('Time ({})'.format(getTimeUnits('output')), fontsize=plot_params['fontsize']) ax.set_ylabel(ylabel, fontsize=plot_params['fontsize']) return ax
def setAxesLabels(ax, subtract_control, plot_params, logged=True, fontsize=20): '''' Given an axis and analysis parameters, determine appropriate labels for axes and adjus them accordingly. Args: ax (matplotlib.axes._subplots.AxesSubplot) subtract_control (boolean) plot_params (dictionary) fontsize (float) Returns: ax (matplotlib.axes._subplots.AxesSubplot) ''' import matplotlib as mpl mpl.rcParams["mathtext.default"] = 'regular' mpl.rcParams["font.family"] = 'sans-serif' mpl.rcParams["font.sans-serif"] = 'Arial' # mpl.rcParams["text.usetex"] = True #if plot_params['plot_linear_od']: # base = getValue('hypo_plot_y_label') # base = r'$\frac{{{}}}{{{}}}$'.format(base+'(t)',base+'(0)') #else: if logged: base = 'ln {}'.format(getValue('hypo_plot_y_label')) else: base = getValue('hypo_plot_y_label') # plot aesthetics if subtract_control: ylabel = 'Normalized {}'.format(base) else: ylabel = base ax.set_xlabel('Time ({})'.format(getTimeUnits('output')), fontsize=plot_params['fontsize']) ax.set_ylabel(ylabel, fontsize=plot_params['fontsize']) return ax
def basicSummaryOnly(data, mapping, directory, args, verbose=False): ''' If user only requested plotting, then for each data file, perform a basic algebraic summary and plot data. Once completed, exit system. Otherwise, return None. Args: data (dictionary): keys are plate IDs and values are pandas.DataFrames with size t x (n+1) where t is the number of time-points and n is number of wells (i.e. samples), the additional 1 is due to the explicit 'Time' column, index is uninformative. mapping (dictionary): keys are plate IDs and values are pandas.DataFrames with size n x (p) where is the number of wells (or samples) in plate, and p are the number of variables or parameters described in dataframe. directory (dictionary): keys are folder names, values are their paths args verbose (boolean) Returns: None: if only_plot_plate argument is False. ''' if not args['obs']: # if not only_basic_summary return None print(tidyMessage('AMiGA is summarizing and plotting data files')) list_keys = [] for pid, data_df in data.items(): # define paths where summary and plot will be saved key_file_path = assemblePath(directory['summary'], pid, '.txt') key_fig_path = assemblePath(directory['figures'], pid, '.pdf') # grab plate-specific samples # index should be well IDs but a column Well should also exist # in main.py, annotateMappings() is called which ensures the above is the case mapping_df = mapping[pid] mapping_df = resetNameIndex(mapping_df, 'Well', False) # grab plate-specific data wells = list(mapping_df.Well.values) data_df = data_df.loc[:, ['Time'] + wells] # update plate-specific data with unique Sample Identifiers sample_ids = list(mapping_df.index.values) data_df.columns = ['Time'] + sample_ids # create GrowthPlate object, perform basic summary plate = GrowthPlate(data=data_df, key=mapping_df) plate.convertTimeUnits(input=getTimeUnits('input'), output=getTimeUnits('output')) plate.computeBasicSummary() plate.computeFoldChange(subtract_baseline=True) # plot and save as PDF, also save key as TXT if not args['dp']: plate.plot(key_fig_path) if args['merges']: list_keys.append(plate.key) else: plate.key.to_csv(key_file_path, sep='\t', header=True, index=False) smartPrint(pid, verbose=verbose) if args['merges']: filename = selectFileName(args['fout']) summary_path = assembleFullName(directory['summary'], 'summary', filename, '_basic', '.txt') summary_df = pd.concat(list_keys, sort=False) summary_df.to_csv(summary_path, sep='\t', header=True, index=False) smartPrint( '\nSee {} for summary text file(s).'.format(directory['summary']), verbose) smartPrint('See {} for figure PDF(s).\n'.format(directory['figures']), verbose) msg = 'AMiGA completed your request and ' msg += 'wishes you good luck with the analysis!' print(tidyMessage(msg)) sys.exit()
def plot(self, save_path='', plot_fit=False, plot_derivative=False, plot_raw_with_fit=False): ''' Creates a 8x12 grid plot (for 96-well plate) that shows the growth curves in each well. Plot aesthetics require several parameters that are saved in config.py and pulled using functions in misc.py. Plot will be saved as a PDF to location passed via argument. Index column for object's key should be Well IDs but object's key should also have a Well column. Args: save_path (str): file path: if empty, plot will not be saved at all. plot_fit (boolean): whether to plot GP fits on top of raw OD. plot_derivative (boolean): if True, plot only the derivative of GP fit instead. Returns: fig,axes: figure and axis handles. Action: if user passes save_path argument, plot will be saved as PDF in desired location ''' sns.set_style('whitegrid') self.addLocation() time = self.time cols = [ 'Sample_ID', 'Plate_ID', 'Well', 'Row', 'Column', 'Fold_Change', 'OD_Max', 'OD_Baseline' ] key = self.key.reindex( cols, axis='columns', ) key = key.dropna(axis=1, how='all') if 'Sample_ID' in key.columns: key = key.drop_duplicates().set_index('Sample_ID') # make sure plate is 96-well version, otherwise skip plotting if not self.isSingleMultiWellPlate(): msg = 'WARNING: GrowthPlate() object for {} is not a 96-well plate. '.format( self.key.Plate_ID.iloc[0]) msg += 'AMiGA can not plot it.\n' print(msg) return None if plot_derivative: base_y = self.gp_data.pivot(columns='Sample_ID', index='Time', values='GP_Derivative') elif plot_fit: base_y = self.gp_data.pivot(columns='Sample_ID', index='Time', values='OD_Growth_Data') overlay_y = self.gp_data.pivot(columns='Sample_ID', index='Time', values='OD_Growth_Fit') elif plot_raw_with_fit: base_y = self.gp_data.pivot(columns='Sample_ID', index='Time', values='OD_Data') overlay_y = self.gp_data.pivot(columns='Sample_ID', index='Time', values='OD_Fit') else: base_y = self.data #gp_data.pivot(columns='Sample_ID',index='Time',values='OD_Data') fig, axes = plt.subplots(8, 12, figsize=[12, 8]) # define window axis limits ymax = np.ceil(base_y.max(1).max()) ymin = np.floor(base_y.min(1).min()) if plot_fit: ymin = 0 xmin = 0 xmax = time.values[-1] xmax_up = int(np.ceil(xmax)) # round up to nearest integer for well in base_y.columns: # select proper sub-plot r, c = key.loc[well, ['Row', 'Column']] - 1 ax = axes[r, c] # get colors based on fold-change and uration parameters if 'Fold_Change' in key.keys(): color_l, color_f = getPlotColors(key.loc[well, 'Fold_Change']) else: color_l = getValue('fcn_line_color') color_f = getValue('fcn_face_color') # set window axis limits ax.set_xlim([xmin, xmax]) ax.set_ylim([ymin, ymax]) # define x-data and y-data points x = np.ravel(time.values) y = base_y.loc[:, well].values # plot line and fill_betwen, if plotting OD estimate ax.plot(x, y, color=color_l, lw=1.5, zorder=10) if not plot_derivative: ax.fill_between(x=x, y1=[ax.get_ylim()[0]] * len(y), y2=y, color=color_f, zorder=7) # add fit lines, if desired if plot_fit or plot_raw_with_fit: y_fit = overlay_y.loc[:, well].values ax.plot(x, y_fit, color='yellow', alpha=0.65, ls='--', lw=1.5, zorder=10) # show tick labels for bottom left subplot only, so by default no labels if plot_derivative: plt.setp(ax, yticks=[ymin, 0, ymax], yticklabels=[] ) # zero derivative indicates no instantaneous growth else: plt.setp(ax, yticks=[ymin, ymax], yticklabels=[]) plt.setp(ax, xticks=[xmin, xmax], xticklabels=[]) # add well identifier on top left of each sub-plot well_color = getTextColors('Well_ID') ax.text(0., 1., key.loc[well, 'Well'], color=well_color, ha='left', va='top', transform=ax.transAxes) # add Max OD value on top right of each sub-plot if self.mods.floored: od_max = key.loc[well, 'OD_Max'] - key.loc[well, 'OD_Baseline'] else: od_max = key.loc[well, 'OD_Max'] ax.text(1., 1., "{0:.2f}".format(od_max), color=getTextColors('OD_Max'), ha='right', va='top', transform=ax.transAxes) # show tick labels for bottom left sub-plot only plt.setp(axes[7, 0], xticks=[0, xmax], xticklabels=[0, xmax_up]) plt.setp(axes[7, 0], yticks=[ymin, ymax], yticklabels=[ymin, ymax]) # add x- and y-labels and title ylabel_base = getValue('grid_plot_y_label') ylabel_mod = ['ln ' if self.mods.logged else ''][0] if plot_derivative: ylabel_text = 'd[ln{}]/dt'.format(ylabel_base) else: ylabel_text = ylabel_mod + ylabel_base # add labels and title fig.text(0.512, 0.07, 'Time ({})'.format(getTimeUnits('output')), fontsize=15, ha='center', va='bottom') fig.text(0.100, 0.50, ylabel_text, fontsize=15, ha='right', va='center', rotation='vertical') fig.suptitle(x=0.512, y=0.93, t=key.loc[well, 'Plate_ID'], fontsize=15, ha='center', va='center') # if no file path passed, do not save if save_path != '': plt.savefig(save_path, bbox_inches='tight') self.key.drop(['Row', 'Column'], axis=1, inplace=True) plt.close() return fig, axes