def execute_calculateMissingValues_replicates(self,experiment_id_I,sample_name_abbreviations_I=[],r_calc_I=None): '''calculate estimates for missing replicates values using AmeliaII from R INPUT: experiment_id_I sample_name_abbreviations_I''' if r_calc_I: r_calc = r_calc_I; else: r_calc = r_interface(); print('execute_calculateMissingValues_replicates...') # get sample name abbreviations if sample_name_abbreviations_I: sample_names_abbreviation = sample_name_abbreviations_I; else: sample_names_abbreviation = []; sample_names_abbreviation = self.get_sampleNameAbbreviations_experimentID_dataStage01Replicates(experiment_id_I); # for each sample name abbreviation for sna in sample_names_abbreviation: print('calculating missing values for sample_name_abbreviation ' + sna); # get time points time_points = []; time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01Replicates(experiment_id_I,sna); for tp in time_points: print('calculating missing values for time_point ' + tp); # get sample names short sample_names_short = [] sample_names_short = self.get_SampleNameShort_experimentIDAndSampleNameAbbreviationAndTimePoint_dataStage01Replicates(experiment_id_I,sna,tp); data = []; for sns in sample_names_short: print('calculating missing values for sample_name_abbreviation ' + sns); # get sample names short, component names, and concentrations data_tmp = []; data_tmp = self.get_data_experimentIDAndSampleNameShortAndTimePoint_dataStage01Replicates(experiment_id_I,sns,tp); data.extend(data_tmp); # compute missing values dataListUpdated = []; sns_NA = []; cn_NA = []; cc_NA = []; sns_NA, cn_NA, cc_NA = r_calc.calculate_missingValues(data); for n in range(len(sns_NA)): component_group_name = None; calculated_concentration_units = None; component_group_name, calculated_concentration_units = self.get_componentGroupNameAndConcUnits_experimentIDAndComponentNameAndSampleNameAbbreviationAndTimePoint_dataStage01Replicates(experiment_id_I,cn_NA[n],sna,tp); # update data_stage01_quantification_replicatesMI row = data_stage01_quantification_replicatesMI(experiment_id_I,sns_NA[n],tp,component_group_name,cn_NA[n],"AmeliaII",None,cc_NA[n],calculated_concentration_units,True,None); self.session.add(row); self.session.commit();
def execute_calculateMissingValues_ameliaII(self, analysis_id_I, imputation_method_I = 'ameliaII', imputation_options_I = {'n_imputations':1000}, calculated_concentration_units_I=[], experiment_ids_I=[], sample_name_abbreviations_I=[], time_points_I=[], r_calc_I=None): '''calculate estimates for missing replicates values using AmeliaII from R INPUT: experiment_id_I sample_name_abbreviations_I''' if r_calc_I: r_calc = r_calc_I; else: r_calc = r_interface(); print('execute_calculateMissingValues_ameliaII...') data_O = []; data_imputations = []; # get the calculated_concentration_units/experiment_ids/sample_name_abbreviations/time_points that are unique unique_groups = []; unique_groups = get_calculatedConcentrationUnitsAndExperimentIDsAndSampleNameAbbreviationsAndTimePoints_analysisID_dataPreProcessingReplicates( analysis_id_I, calculated_concentration_units_I=calculated_concentration_units_I, experiment_ids_I=experiment_ids_I, sample_name_abbreviations_I=sample_name_abbreviations_I, time_points_I=time_points_I, ); #unique_groups_1 = []; ##is there a more "pythonic" way of doing this? ##or alternatively, should this just be added into the query? #for row in unique_groups: # add_row = True # if calculated_concentration_units_I and not row['calculated_concentration_units'] in calculated_concentration_units_I: # add_row = False; # elif experiment_ids_I and not row['experiment_id'] in experiment_ids_I: # add_row = False; # elif sample_name_abbreviations_I and not row['sample_name_abbreviation'] in sample_name_abbreviations_I: # add_row = False; # elif time_points_I and not row['time_point'] in time_points_I: # add_row = False; # if add_row: # unique_groups_1.append(row); for row in unique_groups: data = []; data = get_rows_analysisIDAndCalculatedConcentrationUnitsAndExperimentIDsAndSampleNameAbbreviationsAndTimePoints_dataPreProcessingReplicates( analysis_id_I, row['calculated_concentration_units'], row['experiment_id'], row['sample_name_abbreviation'], row['time_point'], ); # compute missing values dataListUpdated = []; sns_NA = []; cn_NA = []; cc_NA = []; sns_NA, cn_NA, cc_NA = r_calc.calculate_missingValues( data, imputation_options_I['n_imputations'] ); for n in range(len(sns_NA)): component_group_name = None; # update data_stage01_quantification_replicatesMI row = data_stage01_quantification_replicatesMI(experiment_id_I,sns_NA[n],tp,component_group_name,cn_NA[n],"AmeliaII",None,cc_NA[n],calculated_concentration_units,True,None); # record data imputation method tmp = { "analysis_id":analysis_id_I, "imputation_method":imputation_method_I, "imputation_options":imputation_options_I, "normalization_method":None, "normalization_options":None, 'calculated_concentration_units':cu, "used_":True, 'comment_I':None } data_imputations.append(tmp); self.add_rows_table('data_preProcessing_replicates',data_O); self.add_rows_table('data_preProcessing_replicates_imputationAndNormalizations',data_imputations);
##import the analysis #analysis01.import_rows_table_add_csv( # 'data_preProcessing_analysis', # pg_settings.datadir_settings['workspace_data']+'/_input/160309_RNASequencing_dataPreProcessing_ALEsKOs01_analysis01.csv' # ); #make the dataPreProcessing tables from SBaaS_dataPreProcessing.dataPreProcessing_replicates_execute import dataPreProcessing_replicates_execute dpprep01 = dataPreProcessing_replicates_execute(session,engine,pg_settings.datadir_settings); dpprep01.initialize_supportedTables(); #dpprep01.drop_tables(); dpprep01.initialize_tables(); # Load R once from r_statistics.r_interface import r_interface r_calc = r_interface(); #get RNAsequencing data snsPreProcessing2snRNASequencing_I={ 'OxicEvo04EcoliGlcM9_Broth-4':'140818_0_OxicEvo04EcoliGlcM9_Broth-4', 'OxicEvo04EcoliGlcM9_Broth-5':'140818_0_OxicEvo04EcoliGlcM9_Broth-5', 'OxicEvo04Evo01EPEcoliGlcM9_Broth-1':'140815_11_OxicEvo04Evo01EPEcoliGlcM9_Broth-1', 'OxicEvo04Evo01EPEcoliGlcM9_Broth-2':'140815_11_OxicEvo04Evo01EPEcoliGlcM9_Broth-2', 'OxicEvo04Evo02EPEcoliGlcM9_Broth-1':'140815_11_OxicEvo04Evo02EPEcoliGlcM9_Broth-1', 'OxicEvo04Evo02EPEcoliGlcM9_Broth-2':'140815_11_OxicEvo04Evo02EPEcoliGlcM9_Broth-2', 'OxicEvo04gndEcoliGlcM9_Broth-1':'140715_0_OxicEvo04gndEcoliGlcM9_Broth-1', 'OxicEvo04gndEcoliGlcM9_Broth-2':'140715_0_OxicEvo04gndEcoliGlcM9_Broth-2', 'OxicEvo04gndEvo01EPEcoliGlcM9_Broth-1':'140812_11_OxicEvo04gndEvo01EPEcoliGlcM9_Broth-1', 'OxicEvo04gndEvo01EPEcoliGlcM9_Broth-2':'140812_11_OxicEvo04gndEvo01EPEcoliGlcM9_Broth-2', 'OxicEvo04gndEvo02EPEcoliGlcM9_Broth-1':'140812_11_OxicEvo04gndEvo02EPEcoliGlcM9_Broth-1', 'OxicEvo04gndEvo02EPEcoliGlcM9_Broth-2':'140812_11_OxicEvo04gndEvo02EPEcoliGlcM9_Broth-2',
def execute_normalization_dataSet(self, analysis_id_I, imputation_methods_I=[], normalization_methods_I=[], calculated_concentration_units_I=[], normalization_method_I='gLog', normalization_options_I={'mult':"TRUE",'lowessnorm':"FALSE"}, r_calc_I=None ): '''normalization of the full data set''' print('execute_glogNormalization...') if r_calc_I: r_calc = r_calc_I; else: r_calc = r_interface(); python_calc = calculate_statisticsDescriptive(); data_O = []; data_normalized = []; data_normalizations = []; # get the calculated_concentration_units if calculated_concentration_units_I: calculated_concentration_units = calculated_concentration_units_I; else: calculated_concentration_units = []; calculated_concentration_units = self.get_calculatedConcentrationUnits_analysisID_dataPreProcessingReplicates(analysis_id_I); for cu in calculated_concentration_units: print('calculating normalization for concentration_units ' + cu); # get the data set data = []; data = self.get_rows_analysisIDAndCalculatedConcentrationUnits_dataPreProcessingReplicates( analysis_id_I, cu, query_I={}, output_O='listDict', dictColumn_I=None); # normalize the data set if normalization_method_I == 'gLog': concentrations = None; concentrations_glog = None; data_glog, concentrations, concentrations_glog = r_calc.calculate_glogNormalization( data, normalization_options_I['mult'], normalization_options_I['lowessnorm']); data_normalized.extend(data_glog); elif normalization_method_I in ["log2","log10","ln","abs","exp","exp2","^10","^2","sqrt"]: for d in data: normalized_value = python_calc.scale_values(d['calculated_concentration'],normalization_method_I); normalized_units = ('%s_%s_%s' %(d['calculated_concentration_units'],normalization_method_I,'normalized')); d['calculated_concentration'] = normalized_value; d['calculated_concentration_units'] = normalized_units; d['imputation_method'] = None; data_normalized.extend(data); else: print('normalization_method_I not recognized'); continue; # record data normalization method tmp = { "analysis_id":analysis_id_I, "imputation_method":None, "imputation_options":None, "normalization_method":normalization_method_I, "normalization_options":normalization_options_I, 'calculated_concentration_units':cu, "used_":True, 'comment_I':None } data_normalizations.append(tmp); self.add_rows_table('data_preProcessing_replicates',data_normalized); self.add_rows_table('data_preProcessing_replicates_imputationAndNormalizations',data_normalizations);