def prepRegressionPlate(self): ''' Packages data into a growth.GrowthPlate() object and performs a select number of class functions. Args: data (pandas.DataFrame): t (number of measurements) by n+1 (number of samples + one column for time) mapping (pandas.DataFrame): n (number of samples) by p (number of variables) subtract_control (boolean) thinning_step (int): how many time points to skip between selected time points. ''' plate = GrowthPlate(self.master_data, self.master_mapping) plate.convertTimeUnits(input=getTimeUnits('input'), output=getTimeUnits('output')) plate.logData() plate.subtractBaseline(to_do=True, poly=getValue('PolyFit'), groupby=list(self.non_time_varbs)) plate.subtractControl(to_do=self.subtract_control, drop=True) plate.key.to_csv(self.paths_dict['key'], sep='\t', header=True, index=True) # save model results self.plate = plate self.ntimepoints = plate.time.shape[0]
def prepDataForFitting(data, mapping, subtract_baseline=True): ''' Packages data set into a grwoth.GrowthPlate() object and transforms data in preparation for GP fitting. Args: data (pandas.DataFrame): number of time points (t) x number of variables plus-one (p+1) plus-one because Time is not an index but rather a column. mapping (pandas.DataFrame): number of wells/samples (n) x number of variables (p) Returns: plate (growth.GrwothPlate() object) ''' # merge data-sets for easier analysis and perform basic summaries and manipulations plate = GrowthPlate(data=data, key=mapping) plate.computeBasicSummary() plate.computeFoldChange(subtract_baseline=subtract_baseline) plate.convertTimeUnits(input=getTimeUnits('input'), output=getTimeUnits('output')) plate.raiseData( ) # replace non-positive values, necessary prior to log-transformation plate.logData() # natural-log transform plate.subtractBaseline( subtract_baseline, poly=True) # subtract first T0 (or rather divide by first T0) return plate
def plotDeltaOD(ax, df, ylabel, xlabel, fontsize=20): ''' Plots Delta OD: its mean (Avg key in df) and credible intervals (Low and Upp keys). Args: ax (matplotlib.axes._subplots.AxesSubplot) df (pandas.DataFrame) ylabel (str) xlabel (str) fontsize (float) Returns: ax (matplotlib.axes._subplots.AxesSubplot) ''' ax.plot(df.Time, df.Avg, lw=3.0) ax.fill_between(df.Time, df.Low, df.Upp, alpha=0.1) ax.axhline(y=0, xmin=0, xmax=df.Time.max(), lw=3.0, color=(0, 0, 0, 1)) ax = largeTickLabels(ax, fontsize=fontsize) if xlabel: ax.set_xlabel('Time ({})'.format(getTimeUnits('output')), fontsize=fontsize) if ylabel: ax.set_ylabel(r'${\Delta}$(ln OD)', fontsize=fontsize) return ax
def setAxesLabels(ax, subtract_control, plot_params, fontsize=20): '''' Given an axis and analysis parameters, determine appropriate labels for axes and adjus them accordingly. Args: ax (matplotlib.axes._subplots.AxesSubplot) subtract_control (boolean) plot_params (dictionary) fontsize (float) Returns: ax (matplotlib.axes._subplots.AxesSubplot) ''' if plot_params['plot_linear_od']: base = getValue('hypo_plot_y_label') else: base = 'ln {}'.format(getValue('hypo_plot_y_label')) # plot aesthetics if subtract_control: ylabel = 'Normalized {}'.format(base) else: ylabel = base ax.set_xlabel('Time ({})'.format(getTimeUnits('output')), fontsize=plot_params['fontsize']) ax.set_ylabel(ylabel, fontsize=plot_params['fontsize']) return ax
def setAxesLabels(ax, subtract_control, plot_params, logged=True, fontsize=20): '''' Given an axis and analysis parameters, determine appropriate labels for axes and adjus them accordingly. Args: ax (matplotlib.axes._subplots.AxesSubplot) subtract_control (boolean) plot_params (dictionary) fontsize (float) Returns: ax (matplotlib.axes._subplots.AxesSubplot) ''' import matplotlib as mpl mpl.rcParams["mathtext.default"] = 'regular' mpl.rcParams[""] = 'sans-serif' mpl.rcParams["font.sans-serif"] = 'Arial' # mpl.rcParams["text.usetex"] = True #if plot_params['plot_linear_od']: # base = getValue('hypo_plot_y_label') # base = r'$\frac{{{}}}{{{}}}$'.format(base+'(t)',base+'(0)') #else: if logged: base = 'ln {}'.format(getValue('hypo_plot_y_label')) else: base = getValue('hypo_plot_y_label') # plot aesthetics if subtract_control: ylabel = 'Normalized {}'.format(base) else: ylabel = base ax.set_xlabel('Time ({})'.format(getTimeUnits('output')), fontsize=plot_params['fontsize']) ax.set_ylabel(ylabel, fontsize=plot_params['fontsize']) return ax
def basicSummaryOnly(data, mapping, directory, args, verbose=False): ''' If user only requested plotting, then for each data file, perform a basic algebraic summary and plot data. Once completed, exit system. Otherwise, return None. Args: data (dictionary): keys are plate IDs and values are pandas.DataFrames with size t x (n+1) where t is the number of time-points and n is number of wells (i.e. samples), the additional 1 is due to the explicit 'Time' column, index is uninformative. mapping (dictionary): keys are plate IDs and values are pandas.DataFrames with size n x (p) where is the number of wells (or samples) in plate, and p are the number of variables or parameters described in dataframe. directory (dictionary): keys are folder names, values are their paths args verbose (boolean) Returns: None: if only_plot_plate argument is False. ''' if not args['obs']: # if not only_basic_summary return None print(tidyMessage('AMiGA is summarizing and plotting data files')) list_keys = [] for pid, data_df in data.items(): # define paths where summary and plot will be saved key_file_path = assemblePath(directory['summary'], pid, '.txt') key_fig_path = assemblePath(directory['figures'], pid, '.pdf') # grab plate-specific samples # index should be well IDs but a column Well should also exist # in, annotateMappings() is called which ensures the above is the case mapping_df = mapping[pid] mapping_df = resetNameIndex(mapping_df, 'Well', False) # grab plate-specific data wells = list(mapping_df.Well.values) data_df = data_df.loc[:, ['Time'] + wells] # update plate-specific data with unique Sample Identifiers sample_ids = list(mapping_df.index.values) data_df.columns = ['Time'] + sample_ids # create GrowthPlate object, perform basic summary plate = GrowthPlate(data=data_df, key=mapping_df) plate.convertTimeUnits(input=getTimeUnits('input'), output=getTimeUnits('output')) plate.computeBasicSummary() plate.computeFoldChange(subtract_baseline=True) # plot and save as PDF, also save key as TXT if not args['dp']: plate.plot(key_fig_path) if args['merges']: list_keys.append(plate.key) else: plate.key.to_csv(key_file_path, sep='\t', header=True, index=False) smartPrint(pid, verbose=verbose) if args['merges']: filename = selectFileName(args['fout']) summary_path = assembleFullName(directory['summary'], 'summary', filename, '_basic', '.txt') summary_df = pd.concat(list_keys, sort=False) summary_df.to_csv(summary_path, sep='\t', header=True, index=False) smartPrint( '\nSee {} for summary text file(s).'.format(directory['summary']), verbose) smartPrint('See {} for figure PDF(s).\n'.format(directory['figures']), verbose) msg = 'AMiGA completed your request and ' msg += 'wishes you good luck with the analysis!' print(tidyMessage(msg)) sys.exit()
def plot(self, save_path='', plot_fit=False, plot_derivative=False, plot_raw_with_fit=False): ''' Creates a 8x12 grid plot (for 96-well plate) that shows the growth curves in each well. Plot aesthetics require several parameters that are saved in and pulled using functions in Plot will be saved as a PDF to location passed via argument. Index column for object's key should be Well IDs but object's key should also have a Well column. Args: save_path (str): file path: if empty, plot will not be saved at all. plot_fit (boolean): whether to plot GP fits on top of raw OD. plot_derivative (boolean): if True, plot only the derivative of GP fit instead. Returns: fig,axes: figure and axis handles. Action: if user passes save_path argument, plot will be saved as PDF in desired location ''' sns.set_style('whitegrid') self.addLocation() time = self.time cols = [ 'Sample_ID', 'Plate_ID', 'Well', 'Row', 'Column', 'Fold_Change', 'OD_Max', 'OD_Baseline' ] key = self.key.reindex( cols, axis='columns', ) key = key.dropna(axis=1, how='all') if 'Sample_ID' in key.columns: key = key.drop_duplicates().set_index('Sample_ID') # make sure plate is 96-well version, otherwise skip plotting if not self.isSingleMultiWellPlate(): msg = 'WARNING: GrowthPlate() object for {} is not a 96-well plate. '.format( self.key.Plate_ID.iloc[0]) msg += 'AMiGA can not plot it.\n' print(msg) return None if plot_derivative: base_y = self.gp_data.pivot(columns='Sample_ID', index='Time', values='GP_Derivative') elif plot_fit: base_y = self.gp_data.pivot(columns='Sample_ID', index='Time', values='OD_Growth_Data') overlay_y = self.gp_data.pivot(columns='Sample_ID', index='Time', values='OD_Growth_Fit') elif plot_raw_with_fit: base_y = self.gp_data.pivot(columns='Sample_ID', index='Time', values='OD_Data') overlay_y = self.gp_data.pivot(columns='Sample_ID', index='Time', values='OD_Fit') else: base_y = #gp_data.pivot(columns='Sample_ID',index='Time',values='OD_Data') fig, axes = plt.subplots(8, 12, figsize=[12, 8]) # define window axis limits ymax = np.ceil(base_y.max(1).max()) ymin = np.floor(base_y.min(1).min()) if plot_fit: ymin = 0 xmin = 0 xmax = time.values[-1] xmax_up = int(np.ceil(xmax)) # round up to nearest integer for well in base_y.columns: # select proper sub-plot r, c = key.loc[well, ['Row', 'Column']] - 1 ax = axes[r, c] # get colors based on fold-change and uration parameters if 'Fold_Change' in key.keys(): color_l, color_f = getPlotColors(key.loc[well, 'Fold_Change']) else: color_l = getValue('fcn_line_color') color_f = getValue('fcn_face_color') # set window axis limits ax.set_xlim([xmin, xmax]) ax.set_ylim([ymin, ymax]) # define x-data and y-data points x = np.ravel(time.values) y = base_y.loc[:, well].values # plot line and fill_betwen, if plotting OD estimate ax.plot(x, y, color=color_l, lw=1.5, zorder=10) if not plot_derivative: ax.fill_between(x=x, y1=[ax.get_ylim()[0]] * len(y), y2=y, color=color_f, zorder=7) # add fit lines, if desired if plot_fit or plot_raw_with_fit: y_fit = overlay_y.loc[:, well].values ax.plot(x, y_fit, color='yellow', alpha=0.65, ls='--', lw=1.5, zorder=10) # show tick labels for bottom left subplot only, so by default no labels if plot_derivative: plt.setp(ax, yticks=[ymin, 0, ymax], yticklabels=[] ) # zero derivative indicates no instantaneous growth else: plt.setp(ax, yticks=[ymin, ymax], yticklabels=[]) plt.setp(ax, xticks=[xmin, xmax], xticklabels=[]) # add well identifier on top left of each sub-plot well_color = getTextColors('Well_ID') ax.text(0., 1., key.loc[well, 'Well'], color=well_color, ha='left', va='top', transform=ax.transAxes) # add Max OD value on top right of each sub-plot if self.mods.floored: od_max = key.loc[well, 'OD_Max'] - key.loc[well, 'OD_Baseline'] else: od_max = key.loc[well, 'OD_Max'] ax.text(1., 1., "{0:.2f}".format(od_max), color=getTextColors('OD_Max'), ha='right', va='top', transform=ax.transAxes) # show tick labels for bottom left sub-plot only plt.setp(axes[7, 0], xticks=[0, xmax], xticklabels=[0, xmax_up]) plt.setp(axes[7, 0], yticks=[ymin, ymax], yticklabels=[ymin, ymax]) # add x- and y-labels and title ylabel_base = getValue('grid_plot_y_label') ylabel_mod = ['ln ' if self.mods.logged else ''][0] if plot_derivative: ylabel_text = 'd[ln{}]/dt'.format(ylabel_base) else: ylabel_text = ylabel_mod + ylabel_base # add labels and title fig.text(0.512, 0.07, 'Time ({})'.format(getTimeUnits('output')), fontsize=15, ha='center', va='bottom') fig.text(0.100, 0.50, ylabel_text, fontsize=15, ha='right', va='center', rotation='vertical') fig.suptitle(x=0.512, y=0.93, t=key.loc[well, 'Plate_ID'], fontsize=15, ha='center', va='center') # if no file path passed, do not save if save_path != '': plt.savefig(save_path, bbox_inches='tight') self.key.drop(['Row', 'Column'], axis=1, inplace=True) plt.close() return fig, axes