def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
  
    if opts.limit_to_function:
        limit_to_functions = opts.limit_to_function.split(',')
        if opts.verbose:
            print "Limiting output to only functions:",limit_to_functions
    else:
        limit_to_functions = []

    if opts.verbose:
        print "Loading otu table: ",opts.input_otu_table

    otu_table = parse_biom_table(open(opts.input_otu_table,'U'))
    ext=path.splitext(opts.input_count_table)[1]

    if opts.verbose:
        print "Loading count table: ", opts.input_count_table
    if (ext == '.gz'):
        genome_table = parse_biom_table(gzip.open(opts.input_count_table,'rb'))
    else:
        genome_table = parse_biom_table(open(opts.input_count_table,'U'))
    if opts.verbose:
        print "Predicting the metagenome..."
    
    partitioned_metagenomes = partition_metagenome_contributions(otu_table,genome_table,limit_to_functions=limit_to_functions)
    output_text = "\n".join(["\t".join(map(str,i)) for i in partitioned_metagenomes])
    if opts.verbose:
        print "Writing results to output file: ",opts.output_metagenome_table
        
    make_output_dir_for_file(opts.output_metagenome_table)
    open(opts.output_metagenome_table,'w').write(output_text)
Beispiel #2
0
    def test_partition_metagenome_contributions_with_taxonomy(self):
        obs = partition_metagenome_contributions(self.otu_table_with_taxonomy,self.genome_table1)

        obs_text = "\n".join(["\t".join(map(str,i)) for i in obs])
        exp_text_list = [map(str,r.split()) for r in self.predicted_gene_partition_table_with_taxonomy.split('\n')]

        #BIOM adds spaces to metadata fields (not sure why), so add them here just for the taxonomy fields
        for row in exp_text_list[1:]:
            row[9]=' '+row[9]
            row[10]=' '+row[10]
            row[11]=' '+row[11]
            row[12]=' '+row[12]
            row[13]=' '+row[13]
            row[14]=' '+row[14]

        exp_text="\n".join(["\t".join(i) for i in exp_text_list])

        self.assertEqual(obs_text,exp_text)
    def test_partition_metagenome_contributions_with_taxonomy(self):
        obs = partition_metagenome_contributions(self.otu_table_with_taxonomy,self.genome_table1)

        obs_text = "\n".join(["\t".join(map(str,i)) for i in obs])
        exp_text_list = [map(str,r.split()) for r in self.predicted_gene_partition_table_with_taxonomy.split('\n')]

        #BIOM adds spaces to metadata fields (not sure why), so add them here just for the taxonomy fields
        for row in exp_text_list[1:]:
            row[9]=' '+row[9]
            row[10]=' '+row[10]
            row[11]=' '+row[11]
            row[12]=' '+row[12]
            row[13]=' '+row[13]
            row[14]=' '+row[14]

        exp_text="\n".join(["\t".join(i) for i in exp_text_list])

        self.assertEqual(obs_text,exp_text)
Beispiel #4
0
    def test_partition_metagenome_contributions(self):
        """partition_metagenome_contributions functions with valid input"""
        #For reference, the OTU table should look like this:
        ##OTU ID Sample1 Sample2 Sample3 Sample4
        #GG_OTU_1    1.0 2.0 3.0 5.0
        #GG_OTU_2    5.0 1.0 0.0 2.0
        #GG_OTU_3    0.0 0.0 1.0 4.0

        #...and the genome table will look like this:
        ##OTU ID GG_OTU_1    GG_OTU_3    GG_OTU_2
        #f1  1.0 2.0 3.0
        #f2  0.0 1.0 0.0
        #f3  0.0 0.0 1.0

        #For which predict metagenomes should produce a table like this:
        ##OTU ID    Sample1 Sample2 Sample3 Sample4
        #f1  16.0    5.0 5.0 19.0
        #f2  0.0 0.0 1.0 4.0
        #f3  5.0 1.0 0.0 2.0

        #First, sanity checks

        #We expect to see the contributions broken down by OTU
        metagenome_table = predict_metagenomes(self.otu_table1,self.genome_table1)
        obs = partition_metagenome_contributions(self.otu_table1,self.genome_table1)

        obs_text = "\n".join(["\t".join(map(str,i)) for i in obs])
        exp_text = "\n".join(["\t".join(map(str,r.split())) for r in \
          self.predicted_gene_partition_table.split('\n')])

        #Test that the percent of all samples is always smaller than
        #the percent of the current sample
        for l in obs[1:]:
            self.assertTrue(l[-1]<=l[-2])

        #Test that the summed contributions equal the metagenome table value
        sum_f1_sample1 = sum([i[5] for i in obs[1:] if (i[0]=="f1" and i[1]=="Sample1")])
        self.assertFloatEqual(sum_f1_sample1,16.0)

        sum_f2_sample1 = sum(\
          [i[5] for i in obs[1:] if (i[0]=="f2" and i[1]=="Sample1")])
        self.assertFloatEqual(sum_f2_sample1,0.0)

        sum_f3_sample1 = sum(\
          [i[5] for i in obs[1:] if (i[0]=="f3" and i[1]=="Sample1")])
        self.assertFloatEqual(sum_f3_sample1,5.0)

        for l in obs[1:]:
            gene,sample,OTU,gene_count_per_genome,otu_abundance_in_sample,count,percent,percent_all = l
            #Test that genomes without genes don't contribute
            #Only GG_OTU_3 has f2, so for all others the gene
            #contribution should be 0,0
            if gene == "f2" and OTU != "GG_OTU_3":
                self.assertFloatEqual(count,0.0)
                self.assertFloatEqual(percent,0.0)
                self.assertFloatEqual(percent_all,0.0)
            #Ditto for GG_OTU_2 and f3
            if gene == "f3" and OTU != "GG_OTU_2":
                self.assertFloatEqual(count,0.0)
                self.assertFloatEqual(percent,0.0)
                self.assertFloatEqual(percent_all,0.0)

            #Test that OTUs absent from a sample don't contribute
            if sample == "Sample1" and OTU == "GG_OTU_3":
                self.assertFloatEqual(count,0.0)
                self.assertFloatEqual(percent,0.0)
                self.assertFloatEqual(percent_all,0.0)

        #Having validated that this looks OK, just compare to
        #hand-checked result
        self.assertEqual(obs_text,exp_text)

        #Check if "limit to functions" works and retrieves the correct information
        obs_limited = partition_metagenome_contributions(self.otu_table1,self.genome_table1,limit_to_functions=["f2"])
        for l in obs_limited[1:]:
            gene,sample,OTU,gene_count_per_genome,otu_abundance_in_sample,count,percent,percent_all = l
            self.assertEqual(gene,"f2")
    def test_partition_metagenome_contributions(self):
        """partition_metagenome_contributions functions with valid input"""
        #For reference, the OTU table should look like this:
        ##OTU ID Sample1 Sample2 Sample3 Sample4
        #GG_OTU_1    1.0 2.0 3.0 5.0
        #GG_OTU_2    5.0 1.0 0.0 2.0
        #GG_OTU_3    0.0 0.0 1.0 4.0

        #...and the genome table will look like this:
        ##OTU ID GG_OTU_1    GG_OTU_3    GG_OTU_2
        #f1  1.0 2.0 3.0
        #f2  0.0 1.0 0.0
        #f3  0.0 0.0 1.0

        #For which predict metagenomes should produce a table like this:
        ##OTU ID    Sample1 Sample2 Sample3 Sample4
        #f1  16.0    5.0 5.0 19.0
        #f2  0.0 0.0 1.0 4.0
        #f3  5.0 1.0 0.0 2.0

        #First, sanity checks

        #We expect to see the contributions broken down by OTU
        metagenome_table = predict_metagenomes(self.otu_table1,self.genome_table1)
        obs = partition_metagenome_contributions(self.otu_table1,self.genome_table1)

        obs_text = "\n".join(["\t".join(map(str,i)) for i in obs])
        exp_text = "\n".join(["\t".join(map(str,r.split())) for r in \
          self.predicted_gene_partition_table.split('\n')])

        #Test that the percent of all samples is always smaller than
        #the percent of the current sample
        for l in obs[1:]:
            self.assertTrue(l[-1]<=l[-2])

        #Test that the summed contributions equal the metagenome table value
        sum_f1_sample1 = sum([i[5] for i in obs[1:] if (i[0]=="f1" and i[1]=="Sample1")])
        self.assertFloatEqual(sum_f1_sample1,16.0)

        sum_f2_sample1 = sum(\
          [i[5] for i in obs[1:] if (i[0]=="f2" and i[1]=="Sample1")])
        self.assertFloatEqual(sum_f2_sample1,0.0)

        sum_f3_sample1 = sum(\
          [i[5] for i in obs[1:] if (i[0]=="f3" and i[1]=="Sample1")])
        self.assertFloatEqual(sum_f3_sample1,5.0)

        for l in obs[1:]:
            gene,sample,OTU,gene_count_per_genome,otu_abundance_in_sample,count,percent,percent_all = l
            #Test that genomes without genes don't contribute
            #Only GG_OTU_3 has f2, so for all others the gene
            #contribution should be 0,0
            if gene == "f2" and OTU != "GG_OTU_3":
                self.assertFloatEqual(count,0.0)
                self.assertFloatEqual(percent,0.0)
                self.assertFloatEqual(percent_all,0.0)
            #Ditto for GG_OTU_2 and f3
            if gene == "f3" and OTU != "GG_OTU_2":
                self.assertFloatEqual(count,0.0)
                self.assertFloatEqual(percent,0.0)
                self.assertFloatEqual(percent_all,0.0)

            #Test that OTUs absent from a sample don't contribute
            if sample == "Sample1" and OTU == "GG_OTU_3":
                self.assertFloatEqual(count,0.0)
                self.assertFloatEqual(percent,0.0)
                self.assertFloatEqual(percent_all,0.0)

        #Having validated that this looks OK, just compare to
        #hand-checked result
        self.assertEqual(obs_text,exp_text)

        #Check if "limit to functions" works and retrieves the correct information
        obs_limited = partition_metagenome_contributions(self.otu_table1,self.genome_table1,limit_to_functions=["f2"])
        for l in obs_limited[1:]:
            gene,sample,OTU,gene_count_per_genome,otu_abundance_in_sample,count,percent,percent_all = l
            self.assertEqual(gene,"f2")
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)


    if opts.limit_to_function:
        limit_to_functions = opts.limit_to_function.split(',')
        if opts.verbose:
            print "Limiting output to only functions:",limit_to_functions
    else:
        limit_to_functions = []

    if opts.verbose:
        print "Loading otu table: ",opts.input_otu_table

    otu_table = load_table(opts.input_otu_table)
    ids_to_load = otu_table.ids(axis='observation')

    if(opts.input_count_table is None):
        #precalc file has specific name (e.g. ko_13_5_precalculated.tab.gz)
        precalc_file_name='_'.join([opts.type_of_prediction,opts.gg_version,'precalculated.tab.gz'])
        input_count_table=join(get_picrust_project_dir(),'picrust','data',precalc_file_name)
    else:
        input_count_table=opts.input_count_table

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext=path.splitext(input_count_table)[1]

    if opts.verbose:
        print "Loading count table: ", input_count_table

    if (ext == '.gz'):
        genome_table_fh = gzip.open(input_count_table,'rb')
    else:
        genome_table_fh = open(input_count_table,'U')

    #In the genome/trait table genomes are the samples and
    #genes are the observations


    if opts.load_precalc_file_in_biom:
        if not opts.suppress_subset_loading:
            #Now we want to use the OTU table information
            #to load only rows in the count table corresponding
            #to relevant OTUs

            if opts.verbose:
                print "Loading traits for %i organisms from the trait table" %len(ids_to_load)

            genome_table = load_subset_from_biom_str(genome_table_fh.read(),ids_to_load,axis='samples')
        else:
            if opts.verbose:
                print "Loading *full* count table because --suppress_subset_loading was passed. This may result in high memory usage"
            genome_table = load_table(genome_table_fh)
    else:
        genome_table = convert_precalc_to_biom(genome_table_fh,ids_to_load)
    ok_functional_categories = None

    metadata_type = None
    if opts.limit_to_functional_categories:
        ok_functional_categories = opts.limit_to_functional_categories.split("|")
        if opts.verbose:
            print "Limiting to functional categories: %s" %(str(ok_functional_categories))

        # Either KEGG_Pathways or COG_Category needs
        # to be assigned to metadata_key to limit to
        # functional categories (not needed for 
        # individual functions) 

        if opts.type_of_prediction == "ko":
            metadata_type = "KEGG_Pathways"
        elif opts.type_of_prediction == "cog":
            metadata_type = "COG_Category"
        elif opts.type_of_prediction == "rfam":
            exit("Stopping program: when type of prediction is set to rfam you can only limit to individual functions (-l) rather than to functional categories (-f)")
              
    partitioned_metagenomes = partition_metagenome_contributions(otu_table,genome_table,limit_to_functions=limit_to_functions,\
      limit_to_functional_categories = ok_functional_categories ,  metadata_key = metadata_type )

    output_text = "\n".join(["\t".join(map(str,i)) for i in partitioned_metagenomes])
    if opts.verbose:
        print "Writing results to output file: ",opts.output_fp

    make_output_dir_for_file(opts.output_fp)
    open(opts.output_fp,'w').write(output_text)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
  
    if opts.limit_to_function:
        limit_to_functions = opts.limit_to_function.split(',')
        if opts.verbose:
            print "Limiting output to only functions:",limit_to_functions
    else:
        limit_to_functions = []

    if opts.verbose:
        print "Loading otu table: ",opts.input_otu_table

    otu_table = parse_biom_table(open(opts.input_otu_table,'U'))
    ids_to_load = otu_table.ObservationIds

    if(opts.input_count_table is None):
        #precalc file has specific name (e.g. ko_13_5_precalculated.tab.gz)
        precalc_file_name='_'.join([opts.type_of_prediction,opts.gg_version,'precalculated.tab.gz'])
        input_count_table=join(get_picrust_project_dir(),'picrust','data',precalc_file_name)
    else:
        input_count_table=opts.input_count_table

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext=path.splitext(input_count_table)[1]

    if opts.verbose:
        print "Loading count table: ", input_count_table
    
    if (ext == '.gz'):
        genome_table_fh = gzip.open(input_count_table,'rb')
    else:
        genome_table_fh = open(input_count_table,'U')
    
    #In the genome/trait table genomes are the samples and 
    #genes are the observations

    
    if opts.load_precalc_file_in_biom:
        if not opts.suppress_subset_loading:
            #Now we want to use the OTU table information
            #to load only rows in the count table corresponding
            #to relevant OTUs
           
            if opts.verbose:
                print "Loading traits for %i organisms from the trait table" %len(ids_to_load)

            genome_table = load_subset_from_biom_str(genome_table_fh.read(),ids_to_load,axis='samples')
        else:
            if opts.verbose:
                print "Loading *full* count table because --suppress_subset_loading was passed. This may result in high memory usage"
            genome_table = parse_biom_table(genome_table_fh.read())
    else:
        genome_table = convert_precalc_to_biom(genome_table_fh,ids_to_load)
    
    partitioned_metagenomes = partition_metagenome_contributions(otu_table,genome_table,limit_to_functions=limit_to_functions)
    output_text = "\n".join(["\t".join(map(str,i)) for i in partitioned_metagenomes])
    if opts.verbose:
        print "Writing results to output file: ",opts.output_fp
        
    make_output_dir_for_file(opts.output_fp)
    open(opts.output_fp,'w').write(output_text)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)


    if opts.limit_to_function:
        limit_to_functions = opts.limit_to_function.split(',')
        if opts.verbose:
            print "Limiting output to only functions:",limit_to_functions
    else:
        limit_to_functions = []

    if opts.verbose:
        print "Loading otu table: ",opts.input_otu_table

    otu_table = load_table(opts.input_otu_table)
    ids_to_load = otu_table.ids(axis='observation')

    if(opts.input_count_table is None):
        #precalc file has specific name (e.g. ko_13_5_precalculated.tab.gz)
        precalc_file_name='_'.join([opts.type_of_prediction,opts.gg_version,'precalculated.tab.gz'])
        input_count_table=join(get_picrust_project_dir(),'picrust','data',precalc_file_name)
    else:
        input_count_table=opts.input_count_table

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext=path.splitext(input_count_table)[1]

    if opts.verbose:
        print "Loading count table: ", input_count_table

    if (ext == '.gz'):
        genome_table_fh = gzip.open(input_count_table,'rb')
    else:
        genome_table_fh = open(input_count_table,'U')

    #In the genome/trait table genomes are the samples and
    #genes are the observations


    if opts.load_precalc_file_in_biom:
        if not opts.suppress_subset_loading:
            #Now we want to use the OTU table information
            #to load only rows in the count table corresponding
            #to relevant OTUs

            if opts.verbose:
                print "Loading traits for %i organisms from the trait table" %len(ids_to_load)

            genome_table = load_subset_from_biom_str(genome_table_fh.read(),ids_to_load,axis='samples')
        else:
            if opts.verbose:
                print "Loading *full* count table because --suppress_subset_loading was passed. This may result in high memory usage"
            genome_table = load_table(genome_table_fh)
    else:
        genome_table = convert_precalc_to_biom(genome_table_fh,ids_to_load)
    ok_functional_categories = None

    metadata_type = None
    if opts.limit_to_functional_categories:
        ok_functional_categories = opts.limit_to_functional_categories.split("|")
        if opts.verbose:
            print "Limiting to functional categories: %s" %(str(ok_functional_categories))

        # Either KEGG_Pathways or COG_Category needs
        # to be assigned to metadata_key to limit to
        # functional categories (not needed for 
        # individual functions) 

        if opts.type_of_prediction == "ko":
            metadata_type = "KEGG_Pathways"
        elif opts.type_of_prediction == "cog":
            metadata_type = "COG_Category"
        elif opts.type_of_prediction == "rfam":
            exit("Stopping program: when type of prediction is set to rfam you can only limit to individual functions (-l) rather than to functional categories (-f)")
              
    partitioned_metagenomes = partition_metagenome_contributions(otu_table,genome_table,limit_to_functions=limit_to_functions,\
      limit_to_functional_categories = ok_functional_categories ,  metadata_key = metadata_type )

    output_text = "\n".join(["\t".join(map(str,i)) for i in partitioned_metagenomes])
    if opts.verbose:
        print "Writing results to output file: ",opts.output_fp

    make_output_dir_for_file(opts.output_fp)
    open(opts.output_fp,'w').write(output_text)