def predict_metagenome_variances(otu_table,genome_table,\ gene_variances,verbose=False): """Predict variances for metagenome predictions otu_table -- BIOM Table object of OTUs gene_table -- BIOM Table object of predicted gene counts per OTU and samples gene_variances -- BIOM Table object of predicted variance in each gene count Note that OTU counts are treated as constants (exactly known) rather than random variables for now. If a good method for getting variance for OTU counts becomes available, this should be updated to treat them as random variables as well. """ #Assume that OTUs are SampleIds in the genome table, but ObservationIds in the OTU table genome_table_otu_ids = "SampleIds" otu_table_otu_ids = "ObservationIds" #Find overlapping otus overlapping_otus = get_overlapping_ids(otu_table,genome_table,\ genome_table_ids=genome_table_otu_ids,otu_table_ids=otu_table_otu_ids) #Ensure they overlap fully with variance table overlapping_otus = get_overlapping_ids(otu_table,gene_variances,\ genome_table_ids=genome_table_otu_ids,otu_table_ids=otu_table_otu_ids) #Filter OTU and Genome Table to contain only overlapping IDs #print "overlapping_otus:",overlapping_otus otu_table.filterObservations( lambda val, otu_id, metadata: otu_id in overlapping_otus) genome_table.filterSamples( lambda val, otu_id, metadata: otu_id in overlapping_otus) #Handle missing variance data #if gene_variances is None: # gene_variances = genome_table.copy() # gene_variances.transformSamples(lambda val,otu_id,metadata: val*0.0) # #TODO: test if this is faster or slower than filling numpy.zeros followed by table # #construction metagenome_data = None metagenome_variance_data = None if verbose: print "Calculating the variance of the estimated metagenome for %i OTUs." % len( overlapping_otus) for otu_id in overlapping_otus: otu_across_samples = otu_table.observationData(otu_id) otu_across_genes = genome_table.sampleData(otu_id) otu_variance_across_genes = gene_variances.sampleData(otu_id) otu_contrib_to_metagenome = array( [o * otu_across_genes for o in otu_across_samples]) var_otu_contrib_to_metagenome=\ array([scaled_variance(otu_variance_across_genes,o) for o in otu_across_samples]) if metagenome_data is None: metagenome_data = otu_contrib_to_metagenome metagenome_variance_data = var_otu_contrib_to_metagenome else: metagenome_data += otu_contrib_to_metagenome metagenome_variance_data = variance_of_sum( metagenome_variance_data, var_otu_contrib_to_metagenome) data_result = metagenome_data.T variance_result = metagenome_variance_data.T if verbose: print "Calculating metagenomic confidene intervals from variance." lower_95_CI,upper_95_CI=calc_confidence_interval_95(data_result,variance_result,\ round_CI=True,min_val=0.0,max_val=None) if verbose: print "Generating BIOM output tables for the prediction,variance,upper confidence interval and lower confidence interval." #Wrap results into BIOM Tables result_data_table=\ table_from_template(data_result,otu_table.SampleIds,\ genome_table.ObservationIds,sample_metadata_source=otu_table,\ observation_metadata_source=genome_table,constructor=SparseGeneTable) result_variance_table=\ table_from_template(variance_result,otu_table.SampleIds,\ genome_table.ObservationIds,sample_metadata_source=\ otu_table,observation_metadata_source=genome_table,constructor=\ SparseGeneTable,verbose=verbose) result_lower_CI_table=\ table_from_template(lower_95_CI,otu_table.SampleIds,\ genome_table.ObservationIds,sample_metadata_source=otu_table,\ observation_metadata_source=genome_table,constructor=SparseGeneTable,\ verbose=verbose) result_upper_CI_table=\ table_from_template(upper_95_CI,otu_table.SampleIds,\ genome_table.ObservationIds,sample_metadata_source=\ otu_table,observation_metadata_source=genome_table,constructor=\ SparseGeneTable,verbose=verbose) return result_data_table,result_variance_table,result_lower_CI_table,\ result_upper_CI_table
def predict_metagenome_variances(otu_table,genome_table,\ gene_variances,verbose=False): """Predict variances for metagenome predictions otu_table -- BIOM Table object of OTUs gene_table -- BIOM Table object of predicted gene counts per OTU and samples gene_variances -- BIOM Table object of predicted variance in each gene count Note that OTU counts are treated as constants (exactly known) rather than random variables for now. If a good method for getting variance for OTU counts becomes available, this should be updated to treat them as random variables as well. """ #Assume that OTUs are SampleIds in the genome table, but ObservationIds in the OTU table genome_table_otu_ids="SampleIds" otu_table_otu_ids="ObservationIds" #Find overlapping otus overlapping_otus = get_overlapping_ids(otu_table,genome_table,\ genome_table_ids=genome_table_otu_ids,otu_table_ids=otu_table_otu_ids) #Ensure they overlap fully with variance table overlapping_otus = get_overlapping_ids(otu_table,gene_variances,\ genome_table_ids=genome_table_otu_ids,otu_table_ids=otu_table_otu_ids) #Filter OTU and Genome Table to contain only overlapping IDs #print "overlapping_otus:",overlapping_otus otu_table.filterObservations(lambda val,otu_id,metadata: otu_id in overlapping_otus) genome_table.filterSamples(lambda val,otu_id,metadata: otu_id in overlapping_otus) #Handle missing variance data #if gene_variances is None: # gene_variances = genome_table.copy() # gene_variances.transformSamples(lambda val,otu_id,metadata: val*0.0) # #TODO: test if this is faster or slower than filling numpy.zeros followed by table # #construction metagenome_data = None metagenome_variance_data = None if verbose: print "Calculating the variance of the estimated metagenome for %i OTUs." %len(overlapping_otus) for otu_id in overlapping_otus: otu_across_samples = otu_table.observationData(otu_id) otu_across_genes = genome_table.sampleData(otu_id) otu_variance_across_genes = gene_variances.sampleData(otu_id) otu_contrib_to_metagenome=array([o*otu_across_genes for o in otu_across_samples]) var_otu_contrib_to_metagenome=\ array([scaled_variance(otu_variance_across_genes,o) for o in otu_across_samples]) if metagenome_data is None: metagenome_data = otu_contrib_to_metagenome metagenome_variance_data = var_otu_contrib_to_metagenome else: metagenome_data += otu_contrib_to_metagenome metagenome_variance_data = variance_of_sum(metagenome_variance_data,var_otu_contrib_to_metagenome) data_result = metagenome_data.T variance_result = metagenome_variance_data.T if verbose: print "Calculating metagenomic confidene intervals from variance." lower_95_CI,upper_95_CI=calc_confidence_interval_95(data_result,variance_result,\ round_CI=True,min_val=0.0,max_val=None) if verbose: print "Generating BIOM output tables for the prediction,variance,upper confidence interval and lower confidence interval." #Wrap results into BIOM Tables result_data_table=\ table_from_template(data_result,otu_table.SampleIds,\ genome_table.ObservationIds,sample_metadata_source=otu_table,\ observation_metadata_source=genome_table,constructor=SparseGeneTable) result_variance_table=\ table_from_template(variance_result,otu_table.SampleIds,\ genome_table.ObservationIds,sample_metadata_source=\ otu_table,observation_metadata_source=genome_table,constructor=\ SparseGeneTable,verbose=verbose) result_lower_CI_table=\ table_from_template(lower_95_CI,otu_table.SampleIds,\ genome_table.ObservationIds,sample_metadata_source=otu_table,\ observation_metadata_source=genome_table,constructor=SparseGeneTable,\ verbose=verbose) result_upper_CI_table=\ table_from_template(upper_95_CI,otu_table.SampleIds,\ genome_table.ObservationIds,sample_metadata_source=\ otu_table,observation_metadata_source=genome_table,constructor=\ SparseGeneTable,verbose=verbose) return result_data_table,result_variance_table,result_lower_CI_table,\ result_upper_CI_table
def predict_metagenome_variances(otu_table, genome_table, gene_variances, verbose=False, whole_round=True): """Predict variances for metagenome predictions otu_table -- BIOM Table object of OTUs gene_table -- BIOM Table object of predicted gene counts per OTU and samples gene_variances -- BIOM Table object of predicted variance in each gene count Users can also specify verbose mode and whether functional count confidence interval rounding should be performed. Note that OTU counts are treated as constants (exactly known) rather than random variables for now. If a good method for getting variance for OTU counts becomes available, this should be updated to treat them as random variables as well. """ #Assume that OTUs are samples in the genome table, but observations in the OTU table genome_table_otu_ids = "sample" otu_table_otu_ids = "observation" #Find overlapping otus overlapping_otus = get_overlapping_ids( otu_table, genome_table, genome_table_ids=genome_table_otu_ids, otu_table_ids=otu_table_otu_ids) #Ensure they overlap fully with variance table overlapping_otus = get_overlapping_ids( otu_table, gene_variances, genome_table_ids=genome_table_otu_ids, otu_table_ids=otu_table_otu_ids) #Filter OTU and Genome Table to contain only overlapping IDs filter_f = lambda v, id_, m: id_ in overlapping_otus otu_table = otu_table.filter(filter_f, axis='observation', inplace=False) genome_table = genome_table.filter(filter_f, inplace=False) metagenome_data = None metagenome_variance_data = None if verbose: print "Calculating the variance of the estimated metagenome for %i OTUs." % len( overlapping_otus) for otu_id in overlapping_otus: otu_across_samples = otu_table.data(otu_id, axis='observation') otu_across_genes = genome_table.data(otu_id) otu_variance_across_genes = gene_variances.data(otu_id) otu_contrib_to_metagenome = array( [o * otu_across_genes for o in otu_across_samples]) var_otu_contrib_to_metagenome = \ array([scaled_variance(otu_variance_across_genes,o) for o in otu_across_samples]) if metagenome_data is None: metagenome_data = otu_contrib_to_metagenome metagenome_variance_data = var_otu_contrib_to_metagenome else: metagenome_data += otu_contrib_to_metagenome metagenome_variance_data = variance_of_sum( metagenome_variance_data, var_otu_contrib_to_metagenome) data_result = metagenome_data.T variance_result = metagenome_variance_data.T if whole_round: #Round counts to nearest whole numbers data_result = around(data_result) if verbose: print "Calculating metagenomic confidence intervals from variance." lower_95_CI,upper_95_CI=calc_confidence_interval_95(data_result,variance_result,\ round_CI=whole_round,min_val=0.0,max_val=None) if verbose: print "Generating BIOM output tables for the prediction, variance, upper confidence interval and lower confidence interval." #Wrap results into BIOM Tables result_data_table=\ table_from_template(data_result,otu_table.ids(),\ genome_table.ids(axis='observation'),sample_metadata_source=otu_table,\ observation_metadata_source=genome_table) result_variance_table=\ table_from_template(variance_result,otu_table.ids(),\ genome_table.ids(axis='observation'),sample_metadata_source=\ otu_table,observation_metadata_source=genome_table, verbose=verbose) result_lower_CI_table=\ table_from_template(lower_95_CI,otu_table.ids(),\ genome_table.ids(axis='observation'),sample_metadata_source=otu_table,\ observation_metadata_source=genome_table, verbose=verbose) result_upper_CI_table=\ table_from_template(upper_95_CI,otu_table.ids(),\ genome_table.ids(axis='observation'),sample_metadata_source=\ otu_table,observation_metadata_source=genome_table, verbose=verbose) return result_data_table,result_variance_table,result_lower_CI_table,\ result_upper_CI_table
def predict_metagenome_variances(otu_table, genome_table, gene_variances, verbose=False, whole_round=True): """Predict variances for metagenome predictions otu_table -- BIOM Table object of OTUs gene_table -- BIOM Table object of predicted gene counts per OTU and samples gene_variances -- BIOM Table object of predicted variance in each gene count Users can also specify verbose mode and whether functional count confidence interval rounding should be performed. Note that OTU counts are treated as constants (exactly known) rather than random variables for now. If a good method for getting variance for OTU counts becomes available, this should be updated to treat them as random variables as well. """ #Assume that OTUs are samples in the genome table, but observations in the OTU table genome_table_otu_ids = "sample" otu_table_otu_ids = "observation" #Find overlapping otus overlapping_otus = get_overlapping_ids(otu_table, genome_table, genome_table_ids=genome_table_otu_ids, otu_table_ids=otu_table_otu_ids) #Ensure they overlap fully with variance table overlapping_otus = get_overlapping_ids(otu_table, gene_variances, genome_table_ids=genome_table_otu_ids, otu_table_ids=otu_table_otu_ids) #Filter OTU and Genome Table to contain only overlapping IDs filter_f = lambda v, id_, m: id_ in overlapping_otus otu_table = otu_table.filter(filter_f, axis='observation', inplace=False) genome_table = genome_table.filter(filter_f, inplace=False) metagenome_data = None metagenome_variance_data = None if verbose: print "Calculating the variance of the estimated metagenome for %i OTUs." %len(overlapping_otus) for otu_id in overlapping_otus: otu_across_samples = otu_table.data(otu_id, axis='observation') otu_across_genes = genome_table.data(otu_id) otu_variance_across_genes = gene_variances.data(otu_id) otu_contrib_to_metagenome = array([o*otu_across_genes for o in otu_across_samples]) var_otu_contrib_to_metagenome = \ array([scaled_variance(otu_variance_across_genes,o) for o in otu_across_samples]) if metagenome_data is None: metagenome_data = otu_contrib_to_metagenome metagenome_variance_data = var_otu_contrib_to_metagenome else: metagenome_data += otu_contrib_to_metagenome metagenome_variance_data = variance_of_sum(metagenome_variance_data,var_otu_contrib_to_metagenome) data_result = metagenome_data.T variance_result = metagenome_variance_data.T if whole_round: #Round counts to nearest whole numbers data_result = around(data_result) if verbose: print "Calculating metagenomic confidence intervals from variance." lower_95_CI,upper_95_CI=calc_confidence_interval_95(data_result,variance_result,\ round_CI=whole_round,min_val=0.0,max_val=None) if verbose: print "Generating BIOM output tables for the prediction, variance, upper confidence interval and lower confidence interval." #Wrap results into BIOM Tables result_data_table=\ table_from_template(data_result,otu_table.ids(),\ genome_table.ids(axis='observation'),sample_metadata_source=otu_table,\ observation_metadata_source=genome_table) result_variance_table=\ table_from_template(variance_result,otu_table.ids(),\ genome_table.ids(axis='observation'),sample_metadata_source=\ otu_table,observation_metadata_source=genome_table, verbose=verbose) result_lower_CI_table=\ table_from_template(lower_95_CI,otu_table.ids(),\ genome_table.ids(axis='observation'),sample_metadata_source=otu_table,\ observation_metadata_source=genome_table, verbose=verbose) result_upper_CI_table=\ table_from_template(upper_95_CI,otu_table.ids(),\ genome_table.ids(axis='observation'),sample_metadata_source=\ otu_table,observation_metadata_source=genome_table, verbose=verbose) return result_data_table,result_variance_table,result_lower_CI_table,\ result_upper_CI_table