def execute_normalization(self,analysis_id_I,concentration_units_I=[],plot_values_I = False,r_calc_I=None): '''glog normalize concentration values using R''' print('execute_normalization...') if r_calc_I: r_calc = r_calc_I; else: r_calc = r_calculate(); mplot = matplot(); # get the analysis information analysis_info = []; analysis_info = self.get_rows_analysisID_dataPreProcessingAnalysis(analysis_id_I); # query metabolomics data from the experiment data_transformed = []; if concentration_units_I: concentration_units = concentration_units_I; else: concentration_units = []; for row in analysis_info: concentration_units_tmp = [] concentration_units_tmp = self.get_concentrationUnits_experimentID_dataPreProcessingImputation(row['experiment_id']); concentration_units.extend(concentration_units_tmp) concentration_units = list(set(concentration_units)); for cu in concentration_units: print('calculating normalization for concentration_units ' + cu); data = []; # get all of the samples in the simulation for row in analysis_info: data_tmp = []; data_tmp = self.get_RExpressionData_AnalysisIDAndExperimentIDAndTimePointAndUnitsAndSampleNameShort_dataPreProcessingImputation(analysis_id_I,row['experiment_id'], row['time_point'], cu, row['sample_name_short']); data.extend(data_tmp) # call R concentrations = None; concentrations_glog = None; data_glog, concentrations, concentrations_glog = r_calc.calculate_normalization(data) for i,d in enumerate(data_glog): data_glog[i]['calculated_concentration_units']=cu+'_glog_normalization' # plot original values: if plot_values_I: mplot.densityPlot(concentrations); mplot.densityPlot(concentrations_glog); # upload data for d in data: row = None; row = data_dataPreProcessing_glogNormalization(analysis_id_I,d['experiment_id'], d['sample_name_short'], d['time_point'],d['component_group_name'], d['component_name'],d['calculated_concentration'], d['calculated_concentration_units'] + '_glog_normalization', True,None); self.session.add(row); data_transformed.extend(data_glog); # commit data to the session every timepoint self.session.commit(); self.update_concentrations_dataPreProcessingNormalization(analysis_id_I, data_transformed)
def export_boxAndWhiskersPlot_peakInformation_matplot(self,experiment_id_I, peakInfo_parameter_I = ['height','retention_time','width_at_50','signal_2_noise'], component_names_I=[], filename_O = 'tmp', figure_format_O = '.png'): '''generate a boxAndWhiskers plot from peakInformation table''' #TODO: remove after refactor mplot = matplot(); print('export_boxAndWhiskersPlot...') if peakInfo_parameter_I: peakInfo_parameter = peakInfo_parameter_I; else: peakInfo_parameter = []; peakInfo_parameter = self.get_peakInfoParameter_experimentID_dataStage01PeakInformation(experiment_id_I); for parameter in peakInfo_parameter: data_plot_mean = []; data_plot_cv = []; data_plot_ci = []; data_plot_parameters = []; data_plot_component_names = []; data_plot_data = []; data_plot_units = []; if component_names_I: component_names = component_names_I; else: component_names = []; component_names = self.get_componentNames_experimentIDAndPeakInfoParameter_dataStage01PeakInformation(experiment_id_I,parameter); for cn in component_names: print('generating boxAndWhiskersPlot for component_name ' + cn); # get the data data = {}; data = self.get_row_experimentIDAndPeakInfoParameterComponentName_dataStage01PeakInformation(experiment_id_I,parameter,cn) if data and data['peakInfo_ave']: # record data for plotting data_plot_mean.append(data['peakInfo_ave']); data_plot_cv.append(data['peakInfo_cv']); data_plot_ci.append([data['peakInfo_lb'],data['peakInfo_ub']]); data_plot_data.append(data['peakInfo_data']); data_plot_parameters.append(parameter); data_plot_component_names.append(data['component_group_name']); data_plot_units.append('Retention_time [min]'); # visualize the stats: data_plot_se = [(x[1]-x[0])/2 for x in data_plot_ci] filename = filename_O + '_' + experiment_id_I + '_' + parameter + figure_format_O; mplot.boxAndWhiskersPlot(data_plot_parameters[0],data_plot_component_names,data_plot_units[0],'samples',data_plot_data,data_plot_mean,data_plot_ci,filename_I=filename,show_plot_I=False);
def export_boxAndWhiskersPlot_physiologicalRatios_matplot(self,experiment_id_I,sample_name_abbreviations_I=[],ratio_ids_I=[]): '''generate a boxAndWhiskers plot from physiological ratios table''' mplot = matplot(); print('execute_boxAndWhiskersPlot...') # get time points time_points = []; time_points = self.get_timePoint_experimentID_dataStage01PhysiologicalRatiosAverages(experiment_id_I); for tp in time_points: print('generating boxAndWhiskersPlot for time_point ' + tp); if ratio_ids_I: ratio_ids = ratio_ids_I; else: ratio_ids = list(self.ratios.keys()); for k in ratio_ids: #for k,v in self.ratios.iteritems(): print('generating boxAndWhiskersPlot for ratio ' + k); # get sample_name_abbreviations data_plot_mean = []; data_plot_var = []; data_plot_ci = []; data_plot_sna = []; data_plot_ratio_ids = []; data_plot_data = []; data_plot_ratio_units = []; if sample_name_abbreviations_I: sample_name_abbreviations = sample_name_abbreviations_I; else: sample_name_abbreviations = []; sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentIDAndTimePointAndRatioID_dataStage01PhysiologicalRatiosAverages(experiment_id_I,tp,k); for sna in sample_name_abbreviations: print('generating boxAndWhiskersPlot for sample_name_abbreviation ' + sna); # get the data data = {}; data = self.get_data_experimentIDAndTimePointAndRatioIDAndSampleNameAbbreviation_dataStage01PhysiologicalRatiosAverages(experiment_id_I,tp,k,sna) ratio_values = []; ratio_values = self.get_ratios_experimentIDAndSampleNameAbbreviationAndTimePointAndRatioID_dataStage01PhysiologicalRatiosReplicates(experiment_id_I,sna,tp,k) # record data for plotting data_plot_mean.append(data['physiologicalratio_value_ave']); data_plot_var.append(data['physiologicalratio_value_cv']); data_plot_ci.append([data['physiologicalratio_value_lb'],data['physiologicalratio_value_ub']]); data_plot_data.append(ratio_values); data_plot_sna.append(sna); data_plot_ratio_ids.append(k); data_plot_ratio_units.append(''); # visualize the stats: #self.matplot.barPlot(data_plot_ratio_ids[0],data_plot_sna,data_plot_sna[0],'samples',data_plot_mean,data_plot_var); mplot.boxAndWhiskersPlot(data_plot_ratio_ids[0],data_plot_sna,data_plot_ratio_units[0],'samples',data_plot_data,data_plot_mean,data_plot_ci);
def export_scatterLinePlot_physiologicalRatios_matplot(self,experiment_id_I,sample_name_abbreviations_I=[],ratio_ids_I=[]): '''Generate a scatter line plot for physiological ratios averages''' mplot = matplot(); print('Generating scatterLinePlot for physiologicalRatios') # get time points time_points = []; time_points = self.get_timePoint_experimentID_dataStage01PhysiologicalRatiosReplicates(experiment_id_I); for tp in time_points: print('Generating scatterLinePlot for physiologicalRatios for time_point ' + tp); # get physiological ratio_ids ratios = {}; ratios = self.get_ratioIDs_experimentIDAndTimePoint_dataStage01PhysiologicalRatiosReplicates(experiment_id_I,tp); for k,v in ratios.items(): if ratio_ids_I: if not k in ratio_ids_I: continue; print('Generating scatterLinePlot for physiologicalRatios for ratio ' + k); # get sample_names if sample_name_abbreviations_I: sample_name_abbreviations = sample_name_abbreviations_I; else: sample_name_abbreviations = []; sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentIDAndTimePointAndRatioID_dataStage01PhysiologicalRatiosAverages(experiment_id_I,tp,k); ratios_num = []; ratios_den = []; for sna_cnt,sna in enumerate(sample_name_abbreviations): print('Generating scatterLinePlot for physiologicalRatios for sample name abbreviation ' + sna); # get ratios_numerator ratio_num = None; ratio_num = self.get_ratio_experimentIDAndTimePointAndRatioIDAndSampleNameAbbreviation_dataStage01PhysiologicalRatiosAverages(experiment_id_I,tp,k+'_numerator',sna) if not ratio_num: continue; ratios_num.append(ratio_num); # get ratios_denominator ratio_den = None; ratio_den = self.get_ratio_experimentIDAndTimePointAndRatioIDAndSampleNameAbbreviation_dataStage01PhysiologicalRatiosAverages(experiment_id_I,tp,k+'_denominator',sna) if not ratio_den: continue; ratios_den.append(ratio_den); # plot the data mplot.scatterLinePlot(k,k+'_denominator',k+'_numerator',ratios_den,ratios_num,sample_name_abbreviations);
def plot_averageSpectrumNormSum(self,experiment_id_I, time_points_I = None, sample_name_abbreviations_I = None, met_ids_I = None, scan_types_I = None): '''calculate the average normalized intensity for all samples and scan types''' '''Assumptions: only a single fragment:spectrum is used_ per sample name abbreviation, time-point, replicate, scan_type (i.e. there are no multiple dilutions of the same precursor:spectrum that are used_) ''' mids = mass_isotopomer_distributions(); print('plot_averagesNormSum...') plot = matplot(); # get time points if time_points_I: time_points = time_points_I; else: time_points = []; time_points = self.get_timePoint_experimentID_dataStage01AveragesNormSum(experiment_id_I); for tp in time_points: print('Plotting product and precursor for time-point ' + str(tp)); # get sample names and sample name abbreviations if sample_name_abbreviations_I: sample_abbreviations = sample_name_abbreviations_I; sample_types_lst = ['Unknown' for x in sample_abbreviations]; else: sample_abbreviations = []; sample_types = ['Unknown']; sample_types_lst = []; for st in sample_types: sample_abbreviations_tmp = []; sample_abbreviations_tmp = self.get_sampleNameAbbreviations_experimentIDAndSampleTypeAndTimePoint_dataStage01AveragesNormSum(experiment_id_I,st,tp); sample_abbreviations.extend(sample_abbreviations_tmp); sample_types_lst.extend([st for i in range(len(sample_abbreviations_tmp))]); for sna_cnt,sna in enumerate(sample_abbreviations): print('Plotting product and precursor for sample name abbreviation ' + sna); # get the scan_types if scan_types_I: scan_types = []; scan_types_tmp = []; scan_types_tmp = self.get_scanTypes_experimentIDAndTimePointAndSampleAbbreviationsAndSampleType_dataStage01AveragesNormSum(experiment_id_I,tp,sna,sample_types_lst[sna_cnt]); scan_types = [st for st in scan_types_tmp if st in scan_types_I]; else: scan_types = []; scan_types = self.get_scanTypes_experimentIDAndTimePointAndSampleAbbreviationsAndSampleType_dataStage01AveragesNormSum(experiment_id_I,tp,sna,sample_types_lst[sna_cnt]); for scan_type in scan_types: print('Plotting product and precursor for scan type ' + scan_type) # met_ids if not met_ids_I: met_ids = []; met_ids = self.get_metIDs_experimentIDAndSampleAbbreviationAndTimePointAndSampleTypeAndScanType_dataStage01AveragesNormSum( \ experiment_id_I,sna,tp,sample_types_lst[sna_cnt],scan_type); else: met_ids = met_ids_I; if not(met_ids): continue #no component information was found for met in met_ids: print('Plotting product and precursor for metabolite ' + met); # fragments fragment_formulas = []; fragment_formulas = self.get_fragmentFormula_experimentIDAndSampleAbbreviationAndTimePointAndSampleTypeAndScanTypeAndMetID_dataStage01AveragesNormSum( \ experiment_id_I,sna,tp,sample_types_lst[sna_cnt],scan_type,met); for frag in fragment_formulas: print('Plotting product and precursor for fragment ' + frag); # data data_mat = []; data_mat_cv = []; data_masses = []; data_mat,data_mat_cv,data_masses = self.get_spectrum_experimentIDAndSampleAbbreviationAndTimePointAndSampleTypeAndScanTypeAndMetIDAndFragmentFormula_dataStage01AveragesNormSum( \ experiment_id_I,sna,tp,sample_types_lst[sna_cnt],scan_type,met,frag); data_stdev = []; for i,d in enumerate(data_mat): stdev = 0.0; stderr = 0.0; if data_mat_cv[i]: stdev = data_mat[i]*data_mat_cv[i]/100; data_stdev.append(stdev); title = sna+'_'+met+'_'+frag; plot.barPlot(title,data_masses,'intensity','m/z',data_mat,var_I=None,se_I=data_stdev,add_labels_I=True)
def plot_normalizedSpectrumNormSum(self,experiment_id_I, sample_names_I = None, sample_name_abbreviations_I = None, met_ids_I = None, scan_types_I = None): '''calculate the average normalized intensity for all samples and scan types''' '''Assumptions: only a single fragment:spectrum is used_ per sample name abbreviation, time-point, replicate, scan_type (i.e. there are no multiple dilutions of the same precursor:spectrum that are used_) ''' mids = mass_isotopomer_distributions(); print('plot_normalizedSpectrumNormSum...') plot = matplot(); # get time points time_points = self.get_timePoint_experimentID_dataStage01Normalized(experiment_id_I); for tp in time_points: print('Plotting precursor and product spectrum from isotopomer normalized for time-point ' + str(tp)); if sample_names_I: sample_abbreviations = []; sample_types = ['Unknown','QC']; sample_types_lst = []; for sn in sample_names_I: for st in sample_types: sample_abbreviations_tmp = []; sample_abbreviations_tmp = self.get_sampleNameAbbreviations_experimentIDAndSampleTypeAndTimePointAndSampleName_dataStage01Normalized(experiment_id_I,st,tp,sn); sample_abbreviations.extend(sample_abbreviations_tmp); sample_types_lst.extend([st for i in range(len(sample_names_tmp))]); elif sample_name_abbreviations_I: sample_abbreviations = sample_name_abbreviations_I; sample_types_lst = ['Unknown' for x in sample_abbreviations]; # query sample types from sample name abbreviations and time-point from data_stage01_isotopomer_normalized else: # get sample names and sample name abbreviations sample_abbreviations = []; sample_types = ['Unknown','QC']; sample_types_lst = []; for st in sample_types: sample_abbreviations_tmp = []; sample_abbreviations_tmp = self.get_sampleNameAbbreviations_experimentIDAndSampleTypeAndTimePoint_dataStage01Normalized(experiment_id_I,st,tp); sample_abbreviations.extend(sample_abbreviations_tmp); sample_types_lst.extend([st for i in range(len(sample_abbreviations_tmp))]); for sna_cnt,sna in enumerate(sample_abbreviations): print('Plotting precursor and product spectrum from isotopomer normalized for sample name abbreviation ' + sna); # get the scan_types if scan_types_I: scan_types = []; scan_types_tmp = []; scan_types_tmp = self.get_scanTypes_experimentIDAndTimePointAndSampleAbbreviationsAndSampleType_dataStage01Normalized(experiment_id_I,tp,sna,sample_types_lst[sna_cnt]); scan_types = [st for st in scan_types_tmp if st in scan_types_I]; else: scan_types = []; scan_types = self.get_scanTypes_experimentIDAndTimePointAndSampleAbbreviationsAndSampleType_dataStage01Normalized(experiment_id_I,tp,sna,sample_types_lst[sna_cnt]); for scan_type in scan_types: print('Plotting precursor and product spectrum for scan type ' + scan_type) # met_ids if not met_ids_I: met_ids = []; met_ids = self.get_metIDs_experimentIDAndSampleAbbreviationAndTimePointAndSampleTypeAndScanType_dataStage01Normalized( \ experiment_id_I,sna,tp,sample_types_lst[sna_cnt],scan_type); else: met_ids = met_ids_I; if not(met_ids): continue #no component information was found for met in met_ids: print('Plotting precursor and product spectrum for metabolite ' + met); # get replicates replicate_numbers = []; replicate_numbers = self.get_replicateNumbers_experimentIDAndSampleAbbreviationAndTimePointAndScanTypeAndMetID_dataStage01Normalized( \ experiment_id_I,sna,tp,scan_type,met); peakSpectrum_normalized_lst = []; for rep in replicate_numbers: print('Plotting precursor and product spectrum for replicate_number ' + str(rep)); #get data peakData_I = {}; peakData_I = self.get_dataNormalized_experimentIDAndSampleAbbreviationAndTimePointAndScanTypeAndMetIDAndReplicateNumber_dataStage01Normalized( \ experiment_id_I,sna,tp,scan_type,met,rep); fragment_formulas = list(peakData_I.keys()); peakSpectrum_corrected, peakSpectrum_normalized = mids.extract_peakList_normSum(\ peakData_I, fragment_formulas, True); peakSpectrum_normalized_lst.append(peakSpectrum_normalized); # plot spectrum data for all replicates and fragments fragment_formulas_unique = list(set(fragment_formulas_lst)); for fragment in fragment_formulas_unique: panelLabels = []; xticklabels = []; mean = []; xlabel = 'm/z' ylabel = 'intensity' for rep,spectrum in enumerate(peakSpectrum_normalized_lst): panelLabels_tmp = sna+'_'+met+'_'+fragment+'_'+str(rep+1) xticklabels_tmp = []; mean_tmp = []; for mass,intensity in spectrum[fragment].items(): intensity_tmp = intensity; if not intensity_tmp: intensity_tmp=0.0 mean_tmp.append(intensity_tmp); xticklabels_tmp.append(mass); panelLabels.append(panelLabels_tmp); xticklabels.append(xticklabels_tmp); mean.append(mean_tmp); plot.multiPanelBarPlot('',xticklabels,xlabel,ylabel,panelLabels,mean);
def export_scatterLinePlot_peakResolution_matplot(self,experiment_id_I,sample_names_I=[],sample_types_I=['Standard'],component_name_pairs_I=[], peakInfo_I = ['rt_dif','resolution'], acquisition_date_and_time_I=[None,None], x_title_I='Time [hrs]',y_title_I='Retention Time [min]',y_data_type_I='acquisition_date_and_time', plot_type_I='single'): '''Analyze resolution for critical pairs''' #Input: # experiment_id_I # sample_names_I # sample_types_I # component_name_pairs_I = [[component_name_1,component_name_2],...] # acquisition_date_and_time_I = ['%m/%d/%Y %H:%M','%m/%d/%Y %H:%M'] #TODO: remove after refactor mplot = matplot(); print('export_peakInformation_resolution...') #convert string date time to datetime # e.g. time.strptime('4/15/2014 15:51','%m/%d/%Y %H:%M') acquisition_date_and_time = []; if acquisition_date_and_time_I and acquisition_date_and_time_I[0] and acquisition_date_and_time_I[1]: for dateandtime in acquisition_date_and_time_I: time_struct = strptime(dateandtime,'%m/%d/%Y %H:%M') dt = datetime.fromtimestamp(mktime(time_struct)) acquisition_date_and_time.append(dt); else: acquisition_date_and_time=[None,None] data_O = []; component_names_pairs_all = []; # get sample names if sample_names_I and sample_types_I and len(sample_types_I)==1: sample_names = sample_names_I; sample_types = [sample_types_I[0] for sn in sample_names]; else: sample_names = []; sample_types = []; for st in sample_types_I: sample_names_tmp = []; sample_names_tmp = self.get_sampleNames_experimentIDAndSampleType(experiment_id_I,st); sample_names.extend(sample_names_tmp); sample_types_tmp = []; sample_types_tmp = [st for sn in sample_names_tmp]; sample_types.extend(sample_types_tmp); for sn in sample_names: print('analyzing peakInformation for sample_name ' + sn); for component_name_pair in component_name_pairs_I: # get critical pair data cpd1 = {}; cpd2 = {}; cpd1 = self.get_peakInfo_sampleNameAndComponentName(sn,component_name_pair[0],acquisition_date_and_time); cpd2 = self.get_peakInfo_sampleNameAndComponentName(sn,component_name_pair[1],acquisition_date_and_time); # calculate the RT difference and resolution rt_dif = 0.0; rt_dif = abs(cpd1['retention_time']-cpd2['retention_time']) resolution = 0.0; resolution = rt_dif/(0.5*(cpd1['width_at_50']+cpd2['width_at_50'])); # record data data_O.append({'component_name_pair':component_name_pair, 'rt_dif':rt_dif, 'resolution':resolution, 'component_group_name_pair':[cpd1['component_group_name'],cpd2['component_group_name']], 'sample_name':sn, 'acquisition_date_and_time':cpd1['acquisition_date_and_time']}); if plot_type_I == 'single': for cnp in component_name_pairs_I: data_parameters = {}; data_parameters_stats = {}; for parameter in peakInfo_I: data_parameters[parameter] = []; acquisition_date_and_times = []; acquisition_date_and_times_hrs = []; sample_names_parameter = []; sample_types_parameter = []; component_group_name_pair = None; for sn_cnt,sn in enumerate(sample_names): for d in data_O: if d['sample_name'] == sn and d['component_name_pair'] == cnp and d[parameter]: data_parameters[parameter].append(d[parameter]); acquisition_date_and_times.append(d['acquisition_date_and_time']) acquisition_date_and_times_hrs.append(d['acquisition_date_and_time'].year*8765.81277 + d['acquisition_date_and_time'].month*730.484 + d['acquisition_date_and_time'].day*365.242 + d['acquisition_date_and_time'].hour + d['acquisition_date_and_time'].minute / 60. + d['acquisition_date_and_time'].second / 3600.); #convert using datetime object sample_names_parameter.append(sn); sample_types_parameter.append(sample_types[sn_cnt]) component_group_name_pair = d['component_group_name_pair']; # normalize time acquisition_date_and_times_hrs.sort(); t_start = min(acquisition_date_and_times_hrs); for t_cnt,t in enumerate(acquisition_date_and_times_hrs): if y_data_type_I == 'acquisition_date_and_time':acquisition_date_and_times_hrs[t_cnt] = t - t_start; elif y_data_type_I == 'count':acquisition_date_and_times_hrs[t_cnt] = t_cnt; title = cn + '\n' + parameter; filename = 'data/_output/' + experiment_id_I + '_' + cn + '_' + parameter + '.png' mplot.scatterLinePlot(title,x_title_I,y_title_I,acquisition_date_and_times_hrs,data_parameters[parameter],fit_func_I='lowess',show_eqn_I=False,show_r2_I=False,filename_I=filename,show_plot_I=False); if plot_type_I == 'multiple': for parameter in peakInfo_I: data_parameters = []; acquisition_date_and_times = []; acquisition_date_and_times_hrs = []; sample_names_parameter = []; sample_types_parameter = []; component_group_names_pair = []; component_names_pair = []; for cnp_cnt,cnp in enumerate(component_name_pairs_I): data = []; acquisition_date_and_time = []; acquisition_date_and_time_hrs = []; sample_name_parameter = []; sample_type_parameter = []; for sn_cnt,sn in enumerate(sample_names): for d in data_O: if d['sample_name'] == sn and d['component_name_pair'] == cnp and d[parameter]: data.append(d[parameter]) acquisition_date_and_time.append(d['acquisition_date_and_time']) acquisition_date_and_time_hrs.append(d['acquisition_date_and_time'].year*8765.81277 + d['acquisition_date_and_time'].month*730.484 + d['acquisition_date_and_time'].day*365.242 + d['acquisition_date_and_time'].hour + d['acquisition_date_and_time'].minute / 60. + d['acquisition_date_and_time'].second / 3600.); #convert using datetime object sample_name_parameter.append(sn); sample_type_parameter.append(sample_types[sn_cnt]) if sn_cnt == 0: component_group_names_pair.append(d['component_group_name_pair']); component_names_pair.append(d['component_name_pair']); # normalize time acquisition_date_and_time_hrs.sort(); t_start = min(acquisition_date_and_time_hrs); for t_cnt,t in enumerate(acquisition_date_and_time_hrs): if y_data_type_I == 'acquisition_date_and_time':acquisition_date_and_time_hrs[t_cnt] = t - t_start; elif y_data_type_I == 'count':acquisition_date_and_time_hrs[t_cnt] = t_cnt; data_parameters.append(data); acquisition_date_and_times.append(acquisition_date_and_time) acquisition_date_and_times_hrs.append(acquisition_date_and_time_hrs); sample_names_parameter.append(sample_name_parameter); sample_types_parameter.append(sample_type_parameter) # create data labels data_labels = []; for component_group_names in component_group_names_pair: data_labels.append(component_group_names[0] + '/' + component_group_names[1]); title = parameter; filename = 'data/_output/' + experiment_id_I + '_' + parameter + '.eps' mplot.multiScatterLinePlot(title,x_title_I,y_title_I,acquisition_date_and_times_hrs,data_parameters,data_labels_I=data_labels,fit_func_I=None,show_eqn_I=False,show_r2_I=False,filename_I=filename,show_plot_I=False);
def export_scatterLinePlot_peakInformation_matplot(self,experiment_id_I,sample_names_I=[], sample_types_I=['Standard'], component_names_I=[], peakInfo_I = ['retention_time'], acquisition_date_and_time_I=[None,None], x_title_I='Time [hrs]',y_title_I='Retention Time [min]',y_data_type_I='acquisition_date_and_time', plot_type_I='single', filename_O = 'tmp', figure_format_O = 'png'): '''Analyze retention-time, height, s/n, and assymetry''' #INPUT: # experiment_id_I # sample_names_I # sample_types_I # component_names_I # peakInfo_I # acquisition_date_and_time_I = ['%m/%d/%Y %H:%M','%m/%d/%Y %H:%M'] # y_data_type_I = 'acquisition_date_and_time' or 'count' # plot_type_I = 'single', 'multiple', or 'sub' print('export_peakInformation...') #TODO: remove after refactor mplot = matplot(); #convert string date time to datetime # e.g. time.strptime('4/15/2014 15:51','%m/%d/%Y %H:%M') acquisition_date_and_time = []; if acquisition_date_and_time_I and acquisition_date_and_time_I[0] and acquisition_date_and_time_I[1]: for dateandtime in acquisition_date_and_time_I: time_struct = strptime(dateandtime,'%m/%d/%Y %H:%M') dt = datetime.fromtimestamp(mktime(time_struct)) acquisition_date_and_time.append(dt); else: acquisition_date_and_time=[None,None] data_O = []; component_names_all = []; # get sample names if sample_names_I and sample_types_I and len(sample_types_I)==1: sample_names = sample_names_I; sample_types = [sample_types_I[0] for sn in sample_names]; else: sample_names = []; sample_types = []; for st in sample_types_I: sample_names_tmp = []; sample_names_tmp = self.get_sampleNames_experimentIDAndSampleType(experiment_id_I,st); sample_names.extend(sample_names_tmp); sample_types_tmp = []; sample_types_tmp = [st for sn in sample_names_tmp]; sample_types.extend(sample_types_tmp); for sn in sample_names: print('analyzing peakInformation for sample_name ' + sn); # get sample description desc = {}; desc = self.get_description_experimentIDAndSampleID_sampleDescription(experiment_id_I,sn); # get component names if component_names_I: component_names = component_names_I; else: component_names = []; component_names = self.get_componentsNames_experimentIDAndSampleName(experiment_id_I,sn); component_names_all.extend(component_names); for cn in component_names: # get rt, height, s/n sst_data = {}; sst_data = self.get_peakInfo_sampleNameAndComponentName(sn,cn,acquisition_date_and_time); if sst_data: tmp = {}; tmp.update(sst_data); tmp.update(desc); tmp.update({'sample_name':sn}); data_O.append(tmp); # Plot data over time if component_names_I: # use input order component_names_unique = component_names_I; else: # use alphabetical order component_names_unique = list(set(component_names_all)); component_names_unique.sort(); if plot_type_I == 'single': for cn in component_names_unique: data_parameters = {}; data_parameters_stats = {}; for parameter in peakInfo_I: data_parameters[parameter] = []; acquisition_date_and_times = []; acquisition_date_and_times_hrs = []; sample_names_parameter = []; sample_types_parameter = []; component_group_name = None; for sn_cnt,sn in enumerate(sample_names): for d in data_O: if d['sample_name'] == sn and d['component_name'] == cn and d[parameter]: data_parameters[parameter].append(d[parameter]); acquisition_date_and_times.append(d['acquisition_date_and_time']) acquisition_date_and_times_hrs.append(d['acquisition_date_and_time'].year*8765.81277 + d['acquisition_date_and_time'].month*730.484 + d['acquisition_date_and_time'].day*365.242 + d['acquisition_date_and_time'].hour + d['acquisition_date_and_time'].minute / 60. + d['acquisition_date_and_time'].second / 3600.); #convert using datetime object sample_names_parameter.append(sn); sample_types_parameter.append(sample_types[sn_cnt]) component_group_name = d['component_group_name']; # normalize time acquisition_date_and_times_hrs.sort(); t_start = min(acquisition_date_and_times_hrs); for t_cnt,t in enumerate(acquisition_date_and_times_hrs): if y_data_type_I == 'acquisition_date_and_time':acquisition_date_and_times_hrs[t_cnt] = t - t_start; elif y_data_type_I == 'count':acquisition_date_and_times_hrs[t_cnt] = t_cnt; title = cn + '\n' + parameter; filename = filename_O + '_' + experiment_id_I + '_' + cn + '_' + parameter + figure_format_O; mplot.scatterLinePlot(title,x_title_I,y_title_I,acquisition_date_and_times_hrs,data_parameters[parameter],fit_func_I='lowess',show_eqn_I=False,show_r2_I=False,filename_I=filename,show_plot_I=False); if plot_type_I == 'multiple': for parameter in peakInfo_I: data_parameters = []; acquisition_date_and_times = []; acquisition_date_and_times_hrs = []; sample_names_parameter = []; sample_types_parameter = []; component_group_names = []; component_names = []; for cn_cnt,cn in enumerate(component_names_unique): data = []; acquisition_date_and_time = []; acquisition_date_and_time_hrs = []; sample_name_parameter = []; sample_type_parameter = []; for sn_cnt,sn in enumerate(sample_names): for d in data_O: if d['sample_name'] == sn and d['component_name'] == cn and d[parameter]: data.append(d[parameter]) acquisition_date_and_time.append(d['acquisition_date_and_time']) acquisition_date_and_time_hrs.append(d['acquisition_date_and_time'].year*8765.81277 + d['acquisition_date_and_time'].month*730.484 + d['acquisition_date_and_time'].day*365.242 + d['acquisition_date_and_time'].hour + d['acquisition_date_and_time'].minute / 60. + d['acquisition_date_and_time'].second / 3600.); #convert using datetime object sample_name_parameter.append(sn); sample_type_parameter.append(sample_types[sn_cnt]) if sn_cnt == 0: component_group_names.append(d['component_group_name']); component_names.append(d['component_name']); # normalize time acquisition_date_and_time_hrs.sort(); t_start = min(acquisition_date_and_time_hrs); for t_cnt,t in enumerate(acquisition_date_and_time_hrs): if y_data_type_I == 'acquisition_date_and_time':acquisition_date_and_time_hrs[t_cnt] = t - t_start; elif y_data_type_I == 'count':acquisition_date_and_time_hrs[t_cnt] = t_cnt; data_parameters.append(data); acquisition_date_and_times.append(acquisition_date_and_time) acquisition_date_and_times_hrs.append(acquisition_date_and_time_hrs); sample_names_parameter.append(sample_name_parameter); sample_types_parameter.append(sample_type_parameter) title = parameter; filename = filename_O + '_' + experiment_id_I + '_' + parameter + figure_format_O; mplot.multiScatterLinePlot(title,x_title_I,y_title_I,acquisition_date_and_times_hrs,data_parameters,data_labels_I=component_group_names,fit_func_I=None,show_eqn_I=False,show_r2_I=False,filename_I=filename,show_plot_I=False);