Example #1
0
    def test_predict_metagenome_variances_propagates_variance(self):
        """ predict_metagenomes correctly propagates differences in gene family variance as expected in a simple example"""

        curr_otu_table = self.otu_table1
        curr_genome_table = self.genome_table1
        curr_variance_table = self.variance_table1_one_gene_one_otu
        curr_exp_metagenome_table = self.predicted_metagenome_table1
        curr_exp_metagenome_varaiance_table = self.predicted_metagenome_variance_table1_one_gene_one_otu
        curr_exp_upper_CI_95 = self.predicted_metagenome_table1_one_gene_one_otu_upper_CI
        curr_exp_lower_CI_95 = self.predicted_metagenome_table1_one_gene_one_otu_lower_CI

        obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\
          predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table)

        self.assertEqual(obs_prediction.delimitedSelf(),
                         curr_exp_metagenome_table.delimitedSelf())
        #Expect no variance in f1 or f2 in any sample, and no variance in OTU 1 or 3.
        #Otu 2 occurs in all samples except sample 3, so all samples except 3 should
        #have variance.   The exact values follow from variance of scaled random variables or
        #The sum of random variables
        self.assertEqual(
            obs_variances,
            self.predicted_metagenome_variance_table1_one_gene_one_otu)

        #Check CIs against hand calculated CIs
        self.assertEqual(obs_upper_CI_95.delimitedSelf(),
                         curr_exp_upper_CI_95.delimitedSelf())
        self.assertEqual(obs_lower_CI_95.delimitedSelf(),
                         curr_exp_lower_CI_95.delimitedSelf())
Example #2
0
    def test_predict_metagenome_variances_returns_zero_variance_from_zero_variance(
            self):
        """ predict_metagenomes outputs correct results given zero variance input"""

        curr_otu_table = self.otu_table1
        curr_genome_table = self.genome_table1
        curr_variance_table = self.zero_variance_table1
        curr_exp_metagenome_table = self.predicted_metagenome_table1
        curr_exp_metagenome_variance_table = self.predicted_metagenome_table1_zero_variance

        obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\
          predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table)

        #Test that the prediction itself is as expected
        self.assertEqual(obs_prediction.delimitedSelf(),
                         curr_exp_metagenome_table.delimitedSelf())

        #Test that the variance prediction is all zeros, as expected
        self.assertEqual(obs_variances.delimitedSelf(),
                         curr_exp_metagenome_variance_table.delimitedSelf())

        #Test that with zero variance, the upper and lower CIs are equal to the expected value (i.e. the prediction)
        self.assertEqual(obs_lower_CI_95.delimitedSelf(),
                         curr_exp_metagenome_table.delimitedSelf())
        self.assertEqual(obs_upper_CI_95.delimitedSelf(),
                         curr_exp_metagenome_table.delimitedSelf())
    def test_predict_metagenome_variances_returns_zero_variance_from_zero_variance(self):
        """ predict_metagenomes outputs correct results given zero variance input"""

        curr_otu_table = self.otu_table1
        curr_genome_table = self.genome_table1
        curr_variance_table = self.zero_variance_table1
        curr_exp_metagenome_table = self.predicted_metagenome_table1
        curr_exp_metagenome_variance_table = self.predicted_metagenome_table1_zero_variance

        obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\
          predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table)

        #Test that the prediction itself is as expected
        self.assertEqual(str(obs_prediction),
                         str(curr_exp_metagenome_table))

        #Test that the variance prediction is all zeros, as expected
        self.assertEqual(str(obs_variances),
                         str(curr_exp_metagenome_variance_table))

        #Test that with zero variance, the upper and lower CIs are equal to the expected value (i.e. the prediction)
        self.assertEqual(str(obs_lower_CI_95),
                         str(curr_exp_metagenome_table))
        self.assertEqual(str(obs_upper_CI_95),
                         str(curr_exp_metagenome_table))
    def test_predict_metagenome_variances_propagates_variance(self):
        """ predict_metagenomes correctly propagates differences in gene family variance as expected in a simple example"""

        curr_otu_table = self.otu_table1
        curr_genome_table = self.genome_table1
        curr_variance_table = self.variance_table1_one_gene_one_otu
        curr_exp_metagenome_table = self.predicted_metagenome_table1
        curr_exp_metagenome_varaiance_table = self.predicted_metagenome_variance_table1_one_gene_one_otu
        curr_exp_upper_CI_95 = self.predicted_metagenome_table1_one_gene_one_otu_upper_CI
        curr_exp_lower_CI_95 = self.predicted_metagenome_table1_one_gene_one_otu_lower_CI

        obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\
          predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table)

        self.assertEqual(str(obs_prediction),
                         str(curr_exp_metagenome_table))
        #Expect no variance in f1 or f2 in any sample, and no variance in OTU 1 or 3.
        #Otu 2 occurs in all samples except sample 3, so all samples except 3 should
        #have variance.   The exact values follow from variance of scaled random variables or
        #The sum of random variables
        self.assertEqual(obs_variances,self.predicted_metagenome_variance_table1_one_gene_one_otu)

        #Check CIs against hand calculated CIs
        self.assertEqual(str(obs_upper_CI_95),
                         str(curr_exp_upper_CI_95))
        self.assertEqual(str(obs_lower_CI_95),
                         str(curr_exp_lower_CI_95))
 def test_predict_metagenome_variances_propagates_variance_in_gene_categories(self):
     """ predict_metagenomes correctly propagates the rank order of gene family variance"""
     curr_otu_table = self.otu_table1
     curr_genome_table = self.genome_table1
     curr_variance_table = self.variance_table1_var_by_gene
     curr_exp_metagenome_table = self.predicted_metagenome_table1
     obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\
       predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table)
      
     #Check that the metagenome prediction hasn't changed 
     self.assertEqual(obs_prediction.delimitedSelf(),curr_exp_metagenome_table.delimitedSelf())
    def test_predict_metagenome_variances_propagates_variance_in_gene_categories(
            self):
        """ predict_metagenomes correctly propagates the rank order of gene family variance"""
        curr_otu_table = self.otu_table1
        curr_genome_table = self.genome_table1
        curr_variance_table = self.variance_table1_var_by_gene
        curr_exp_metagenome_table = self.predicted_metagenome_table1
        obs_prediction,obs_variances,obs_lower_CI_95,obs_upper_CI_95 =\
          predict_metagenome_variances(curr_otu_table,curr_genome_table,gene_variances=curr_variance_table)

        #Check that the metagenome prediction hasn't changed
        self.assertEqual(str(obs_prediction), str(curr_exp_metagenome_table))
Example #7
0
        accuracy_output_fh = open(opts.accuracy_metrics, 'w')
        accuracy_output_fh.write("#Sample\tMetric\tValue\n")
        for sample, nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" % (sample, str(nsti))
            accuracy_output_fh.write(line)

    if opts.with_confidence:
        #If we are calculating variance, we get the prediction as part
        #of the process

        if opts.verbose:
            print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..."

        predicted_metagenomes,predicted_metagenome_variances,\
        predicted_metagenomes_lower_CI_95,predicted_metagenomes_upper_CI_95=\
          predict_metagenome_variances(otu_table,genome_table,variance_table,whole_round=round_flag)
    else:
        #If we don't need confidence intervals, we can do a faster pure numpy prediction

        if opts.verbose:
            print "Predicting the metagenome..."
        predicted_metagenomes = predict_metagenomes(otu_table,
                                                    genome_table,
                                                    whole_round=round_flag)

    if opts.normalize_by_otu:
        #normalize (e.g. divide) the abundances by the sum of the OTUs per sample
        if opts.verbose:
            print "Normalizing functional abundances by sum of OTUs per sample"
        inverse_otu_sums = [1 / x for x in otu_table.sum(axis='sample')]
        scaling_factors = dict(zip(otu_table.ids(), inverse_otu_sums))
Example #8
0
        accuracy_output_fh = open(opts.accuracy_metrics,'w')
        accuracy_output_fh.write("#Sample\tMetric\tValue\n")
        for sample,nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti))
            accuracy_output_fh.write(line)

    if opts.with_confidence:
        #If we are calculating variance, we get the prediction as part
        #of the process
        
        if opts.verbose:
            print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..."
        
        predicted_metagenomes,predicted_metagenome_variances,\
        predicted_metagenomes_lower_CI_95,predicted_metagenomes_upper_CI_95=\
          predict_metagenome_variances(otu_table,genome_table,variance_table)
    else:
        #If we don't need confidence intervals, we can do a faster pure numpy prediction
        
        if opts.verbose:
            print "Predicting the metagenome..."
        predicted_metagenomes = predict_metagenomes(otu_table,genome_table)

    if opts.normalize_by_otu:
        #normalize (e.g. divide) the abundances by the sum of the OTUs per sample
        if opts.verbose:
            print "Normalizing functional abundances by sum of OTUs per sample"
        inverse_otu_sums = [1/x for x in otu_table.sum(axis='sample')] 
        scaling_factors = dict(zip(otu_table.SampleIds,inverse_otu_sums))
        predicted_metagenomes = scale_metagenomes(predicted_metagenomes,scaling_factors)
Example #9
0
        accuracy_output_fh = open(opts.accuracy_metrics, 'w')
        accuracy_output_fh.write("#Sample\tMetric\tValue\n")
        for sample, nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" % (sample, str(nsti))
            accuracy_output_fh.write(line)

    if opts.with_confidence:
        #If we are calculating variance, we get the prediction as part
        #of the process

        if opts.verbose:
            print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..."

        predicted_metagenomes,predicted_metagenome_variances,\
        predicted_metagenomes_lower_CI_95,predicted_metagenomes_upper_CI_95=\
          predict_metagenome_variances(otu_table,genome_table,variance_table)
    else:
        #If we don't need confidence intervals, we can do a faster pure numpy prediction

        if opts.verbose:
            print "Predicting the metagenome..."
        predicted_metagenomes = predict_metagenomes(otu_table, genome_table)

    write_metagenome_to_file(predicted_metagenomes,opts.output_metagenome_table,\
        opts.format_tab_delimited,"metagenome prediction",verbose=opts.verbose)

    if opts.with_confidence:
        output_path, output_filename = split(opts.output_metagenome_table)
        base_output_filename, ext = splitext(output_filename)
        variance_output_fp =\
          join(output_path,"%s_variances%s" %(base_output_filename,ext))
Example #10
0
        accuracy_output_fh = open(opts.accuracy_metrics,'w')
        accuracy_output_fh.write("#Sample\tMetric\tValue\n")
        for sample,nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti))
            accuracy_output_fh.write(line)

    if opts.with_confidence:
        #If we are calculating variance, we get the prediction as part
        #of the process

        if opts.verbose:
            print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..."

        predicted_metagenomes,predicted_metagenome_variances,\
        predicted_metagenomes_lower_CI_95,predicted_metagenomes_upper_CI_95=\
          predict_metagenome_variances(otu_table,genome_table,variance_table,whole_round=round_flag)
    else:
        #If we don't need confidence intervals, we can do a faster pure numpy prediction

        if opts.verbose:
            print "Predicting the metagenome..."
        predicted_metagenomes = predict_metagenomes(otu_table,genome_table,whole_round=round_flag)

    if opts.normalize_by_otu:
        #normalize (e.g. divide) the abundances by the sum of the OTUs per sample
        if opts.verbose:
            print "Normalizing functional abundances by sum of OTUs per sample"
        inverse_otu_sums = [1/x for x in otu_table.sum(axis='sample')]
        scaling_factors = dict(zip(otu_table.ids(),inverse_otu_sums))
        predicted_metagenomes = scale_metagenomes(predicted_metagenomes,scaling_factors)
Example #11
0
            print "Writing NSTI information to file:", opts.accuracy_metrics
        accuracy_output_fh = open(opts.accuracy_metrics, "w")
        accuracy_output_fh.write("#Sample\tMetric\tValue\n")
        for sample, nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" % (sample, str(nsti))
            accuracy_output_fh.write(line)

    if opts.with_confidence:
        # If we are calculating variance, we get the prediction as part
        # of the process

        if opts.verbose:
            print "Predicting the metagenome, metagenome variance and confidence intervals for the metagenome..."

        predicted_metagenomes, predicted_metagenome_variances, predicted_metagenomes_lower_CI_95, predicted_metagenomes_upper_CI_95 = predict_metagenome_variances(
            otu_table, genome_table, variance_table
        )
    else:
        # If we don't need confidence intervals, we can do a faster pure numpy prediction

        if opts.verbose:
            print "Predicting the metagenome..."
        predicted_metagenomes = predict_metagenomes(otu_table, genome_table)

    if opts.normalize_by_otu:
        # normalize (e.g. divide) the abundances by the sum of the OTUs per sample
        if opts.verbose:
            print "Normalizing functional abundances by sum of OTUs per sample"
        inverse_otu_sums = [1 / x for x in otu_table.sum(axis="sample")]
        scaling_factors = dict(zip(otu_table.ids(), inverse_otu_sums))
        predicted_metagenomes = scale_metagenomes(predicted_metagenomes, scaling_factors)