def test_predict_metagenome_variances_propagates_variance(self): """ predict_metagenomes correctly propagates differences in gene family variance as expected in a simple example""" curr_otu_table = self.otu_table1 curr_genome_table = self.genome_table1 curr_variance_table = self.variance_table1_one_gene_one_otu curr_exp_metagenome_table = self.predicted_metagenome_table1 curr_exp_metagenome_varaiance_table = self.predicted_metagenome_variance_table1_one_gene_one_otu curr_exp_upper_CI_95 = self.predicted_metagenome_table1_one_gene_one_otu_upper_CI curr_exp_lower_CI_95 = self.predicted_metagenome_table1_one_gene_one_otu_lower_CI obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\ predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table) self.assertEqual(obs_prediction.delimitedSelf(), curr_exp_metagenome_table.delimitedSelf()) #Expect no variance in f1 or f2 in any sample, and no variance in OTU 1 or 3. #Otu 2 occurs in all samples except sample 3, so all samples except 3 should #have variance. The exact values follow from variance of scaled random variables or #The sum of random variables self.assertEqual( obs_variances, self.predicted_metagenome_variance_table1_one_gene_one_otu) #Check CIs against hand calculated CIs self.assertEqual(obs_upper_CI_95.delimitedSelf(), curr_exp_upper_CI_95.delimitedSelf()) self.assertEqual(obs_lower_CI_95.delimitedSelf(), curr_exp_lower_CI_95.delimitedSelf())
def test_predict_metagenome_variances_returns_zero_variance_from_zero_variance( self): """ predict_metagenomes outputs correct results given zero variance input""" curr_otu_table = self.otu_table1 curr_genome_table = self.genome_table1 curr_variance_table = self.zero_variance_table1 curr_exp_metagenome_table = self.predicted_metagenome_table1 curr_exp_metagenome_variance_table = self.predicted_metagenome_table1_zero_variance obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\ predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table) #Test that the prediction itself is as expected self.assertEqual(obs_prediction.delimitedSelf(), curr_exp_metagenome_table.delimitedSelf()) #Test that the variance prediction is all zeros, as expected self.assertEqual(obs_variances.delimitedSelf(), curr_exp_metagenome_variance_table.delimitedSelf()) #Test that with zero variance, the upper and lower CIs are equal to the expected value (i.e. the prediction) self.assertEqual(obs_lower_CI_95.delimitedSelf(), curr_exp_metagenome_table.delimitedSelf()) self.assertEqual(obs_upper_CI_95.delimitedSelf(), curr_exp_metagenome_table.delimitedSelf())
def test_predict_metagenome_variances_returns_zero_variance_from_zero_variance(self): """ predict_metagenomes outputs correct results given zero variance input""" curr_otu_table = self.otu_table1 curr_genome_table = self.genome_table1 curr_variance_table = self.zero_variance_table1 curr_exp_metagenome_table = self.predicted_metagenome_table1 curr_exp_metagenome_variance_table = self.predicted_metagenome_table1_zero_variance obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\ predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table) #Test that the prediction itself is as expected self.assertEqual(str(obs_prediction), str(curr_exp_metagenome_table)) #Test that the variance prediction is all zeros, as expected self.assertEqual(str(obs_variances), str(curr_exp_metagenome_variance_table)) #Test that with zero variance, the upper and lower CIs are equal to the expected value (i.e. the prediction) self.assertEqual(str(obs_lower_CI_95), str(curr_exp_metagenome_table)) self.assertEqual(str(obs_upper_CI_95), str(curr_exp_metagenome_table))
def test_predict_metagenome_variances_propagates_variance(self): """ predict_metagenomes correctly propagates differences in gene family variance as expected in a simple example""" curr_otu_table = self.otu_table1 curr_genome_table = self.genome_table1 curr_variance_table = self.variance_table1_one_gene_one_otu curr_exp_metagenome_table = self.predicted_metagenome_table1 curr_exp_metagenome_varaiance_table = self.predicted_metagenome_variance_table1_one_gene_one_otu curr_exp_upper_CI_95 = self.predicted_metagenome_table1_one_gene_one_otu_upper_CI curr_exp_lower_CI_95 = self.predicted_metagenome_table1_one_gene_one_otu_lower_CI obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\ predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table) self.assertEqual(str(obs_prediction), str(curr_exp_metagenome_table)) #Expect no variance in f1 or f2 in any sample, and no variance in OTU 1 or 3. #Otu 2 occurs in all samples except sample 3, so all samples except 3 should #have variance. The exact values follow from variance of scaled random variables or #The sum of random variables self.assertEqual(obs_variances,self.predicted_metagenome_variance_table1_one_gene_one_otu) #Check CIs against hand calculated CIs self.assertEqual(str(obs_upper_CI_95), str(curr_exp_upper_CI_95)) self.assertEqual(str(obs_lower_CI_95), str(curr_exp_lower_CI_95))
def test_predict_metagenome_variances_propagates_variance_in_gene_categories(self): """ predict_metagenomes correctly propagates the rank order of gene family variance""" curr_otu_table = self.otu_table1 curr_genome_table = self.genome_table1 curr_variance_table = self.variance_table1_var_by_gene curr_exp_metagenome_table = self.predicted_metagenome_table1 obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\ predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table) #Check that the metagenome prediction hasn't changed self.assertEqual(obs_prediction.delimitedSelf(),curr_exp_metagenome_table.delimitedSelf())
def test_predict_metagenome_variances_propagates_variance_in_gene_categories( self): """ predict_metagenomes correctly propagates the rank order of gene family variance""" curr_otu_table = self.otu_table1 curr_genome_table = self.genome_table1 curr_variance_table = self.variance_table1_var_by_gene curr_exp_metagenome_table = self.predicted_metagenome_table1 obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\ predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table) #Check that the metagenome prediction hasn't changed self.assertEqual(str(obs_prediction), str(curr_exp_metagenome_table))
accuracy_output_fh = open(opts.accuracy_metrics, 'w') accuracy_output_fh.write("#Sample\tMetric\tValue\n") for sample, nsti in samples_and_nstis: line = "%s\tWeighted NSTI\t%s\n" % (sample, str(nsti)) accuracy_output_fh.write(line) if opts.with_confidence: #If we are calculating variance, we get the prediction as part #of the process if opts.verbose: print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..." predicted_metagenomes,predicted_metagenome_variances,\ predicted_metagenomes_lower_CI_95,predicted_metagenomes_upper_CI_95=\ predict_metagenome_variances(otu_table,genome_table,variance_table,whole_round=round_flag) else: #If we don't need confidence intervals, we can do a faster pure numpy prediction if opts.verbose: print "Predicting the metagenome..." predicted_metagenomes = predict_metagenomes(otu_table, genome_table, whole_round=round_flag) if opts.normalize_by_otu: #normalize (e.g. divide) the abundances by the sum of the OTUs per sample if opts.verbose: print "Normalizing functional abundances by sum of OTUs per sample" inverse_otu_sums = [1 / x for x in otu_table.sum(axis='sample')] scaling_factors = dict(zip(otu_table.ids(), inverse_otu_sums))
accuracy_output_fh = open(opts.accuracy_metrics,'w') accuracy_output_fh.write("#Sample\tMetric\tValue\n") for sample,nsti in samples_and_nstis: line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti)) accuracy_output_fh.write(line) if opts.with_confidence: #If we are calculating variance, we get the prediction as part #of the process if opts.verbose: print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..." predicted_metagenomes,predicted_metagenome_variances,\ predicted_metagenomes_lower_CI_95,predicted_metagenomes_upper_CI_95=\ predict_metagenome_variances(otu_table,genome_table,variance_table) else: #If we don't need confidence intervals, we can do a faster pure numpy prediction if opts.verbose: print "Predicting the metagenome..." predicted_metagenomes = predict_metagenomes(otu_table,genome_table) if opts.normalize_by_otu: #normalize (e.g. divide) the abundances by the sum of the OTUs per sample if opts.verbose: print "Normalizing functional abundances by sum of OTUs per sample" inverse_otu_sums = [1/x for x in otu_table.sum(axis='sample')] scaling_factors = dict(zip(otu_table.SampleIds,inverse_otu_sums)) predicted_metagenomes = scale_metagenomes(predicted_metagenomes,scaling_factors)
accuracy_output_fh = open(opts.accuracy_metrics, 'w') accuracy_output_fh.write("#Sample\tMetric\tValue\n") for sample, nsti in samples_and_nstis: line = "%s\tWeighted NSTI\t%s\n" % (sample, str(nsti)) accuracy_output_fh.write(line) if opts.with_confidence: #If we are calculating variance, we get the prediction as part #of the process if opts.verbose: print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..." predicted_metagenomes,predicted_metagenome_variances,\ predicted_metagenomes_lower_CI_95,predicted_metagenomes_upper_CI_95=\ predict_metagenome_variances(otu_table,genome_table,variance_table) else: #If we don't need confidence intervals, we can do a faster pure numpy prediction if opts.verbose: print "Predicting the metagenome..." predicted_metagenomes = predict_metagenomes(otu_table, genome_table) write_metagenome_to_file(predicted_metagenomes,opts.output_metagenome_table,\ opts.format_tab_delimited,"metagenome prediction",verbose=opts.verbose) if opts.with_confidence: output_path, output_filename = split(opts.output_metagenome_table) base_output_filename, ext = splitext(output_filename) variance_output_fp =\ join(output_path,"%s_variances%s" %(base_output_filename,ext))
accuracy_output_fh = open(opts.accuracy_metrics,'w') accuracy_output_fh.write("#Sample\tMetric\tValue\n") for sample,nsti in samples_and_nstis: line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti)) accuracy_output_fh.write(line) if opts.with_confidence: #If we are calculating variance, we get the prediction as part #of the process if opts.verbose: print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..." predicted_metagenomes,predicted_metagenome_variances,\ predicted_metagenomes_lower_CI_95,predicted_metagenomes_upper_CI_95=\ predict_metagenome_variances(otu_table,genome_table,variance_table,whole_round=round_flag) else: #If we don't need confidence intervals, we can do a faster pure numpy prediction if opts.verbose: print "Predicting the metagenome..." predicted_metagenomes = predict_metagenomes(otu_table,genome_table,whole_round=round_flag) if opts.normalize_by_otu: #normalize (e.g. divide) the abundances by the sum of the OTUs per sample if opts.verbose: print "Normalizing functional abundances by sum of OTUs per sample" inverse_otu_sums = [1/x for x in otu_table.sum(axis='sample')] scaling_factors = dict(zip(otu_table.ids(),inverse_otu_sums)) predicted_metagenomes = scale_metagenomes(predicted_metagenomes,scaling_factors)
print "Writing NSTI information to file:", opts.accuracy_metrics accuracy_output_fh = open(opts.accuracy_metrics, "w") accuracy_output_fh.write("#Sample\tMetric\tValue\n") for sample, nsti in samples_and_nstis: line = "%s\tWeighted NSTI\t%s\n" % (sample, str(nsti)) accuracy_output_fh.write(line) if opts.with_confidence: # If we are calculating variance, we get the prediction as part # of the process if opts.verbose: print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..." predicted_metagenomes, predicted_metagenome_variances, predicted_metagenomes_lower_CI_95, predicted_metagenomes_upper_CI_95 = predict_metagenome_variances( otu_table, genome_table, variance_table ) else: # If we don't need confidence intervals, we can do a faster pure numpy prediction if opts.verbose: print "Predicting the metagenome..." predicted_metagenomes = predict_metagenomes(otu_table, genome_table) if opts.normalize_by_otu: # normalize (e.g. divide) the abundances by the sum of the OTUs per sample if opts.verbose: print "Normalizing functional abundances by sum of OTUs per sample" inverse_otu_sums = [1 / x for x in otu_table.sum(axis="sample")] scaling_factors = dict(zip(otu_table.ids(), inverse_otu_sums)) predicted_metagenomes = scale_metagenomes(predicted_metagenomes, scaling_factors)