Exemple #1
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    if opts.verbose:
        print "Loading otu table: ",opts.input_otu_table

    otu_table = parse_biom_table(open(opts.input_otu_table,'U'))
    ext=path.splitext(opts.input_count_table)[1]

    if opts.verbose:
        print "Loading count table: ", opts.input_count_table
    if (ext == '.gz'):
        genome_table = parse_biom_table(gzip.open(opts.input_count_table,'rb'))
    else:
        genome_table = parse_biom_table(open(opts.input_count_table,'U'))

    make_output_dir_for_file(opts.output_metagenome_table)

    if opts.accuracy_metrics:
        # Calculate accuracy metrics
        #unweighted_nsti = calc_nsti(otu_table,genome_table,weighted=False)
        #print "Unweighted NSTI:", unweighted_nsti
        
        weighted_nsti = calc_nsti(otu_table,genome_table,weighted=True)
        samples= weighted_nsti[0]
        nstis = list(weighted_nsti[1])
        #print "Samples:",samples
        #print "NSTIs:",nstis
        samples_and_nstis = zip(samples,nstis)
        #print "Samples and NSTIs:",samples_and_nstis
        lines = ["#Sample\tMetric\tValue\n"]
        #print weighted_nsti
        for sample,nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti))
            lines.append(line)

        if opts.verbose:
            for l in sorted(lines):
                print l
        if opts.verbose:
            print "Writing accuracy information to file:", opts.accuracy_metrics
        open(opts.accuracy_metrics,'w').writelines(sorted(lines))

    if opts.verbose:
        print "Predicting the metagenome..."
        
    predicted_metagenomes = predict_metagenomes(otu_table,genome_table)

    if opts.verbose:
        print "Writing results to output file: ",opts.output_metagenome_table
        
    make_output_dir_for_file(opts.output_metagenome_table)
    if(opts.format_tab_delimited):
        open(opts.output_metagenome_table,'w').write(predicted_metagenomes.delimitedSelf())
    else:
        open(opts.output_metagenome_table,'w').write(format_biom_table(predicted_metagenomes))
Exemple #2
0
                if var_id not in genome_table.ids():
                    print "Variance table SampleId %s not in genome_table SampleIds" % var_id
            raise AssertionError(
                "Variance table and genome table contain different OTU ids")

        #sort the ObservationIds and SampleIds to be in the same order
        variance_table = variance_table.sort_order(
            genome_table.ids(axis='observation'), axis='observation')
        variance_table = variance_table.sort_order(genome_table.ids(),
                                                   axis='sample')

    make_output_dir_for_file(opts.output_metagenome_table)

    if opts.accuracy_metrics:
        # Calculate accuracy metrics
        weighted_nsti = calc_nsti(otu_table, genome_table, weighted=True)
        samples = weighted_nsti[0]
        nstis = list(weighted_nsti[1])
        samples_and_nstis = zip(samples, nstis)
        if opts.verbose:
            print "Writing NSTI information to file:", opts.accuracy_metrics
        accuracy_output_fh = open(opts.accuracy_metrics, 'w')
        accuracy_output_fh.write("#Sample\tMetric\tValue\n")
        for sample, nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" % (sample, str(nsti))
            accuracy_output_fh.write(line)

    if opts.with_confidence:
        #If we are calculating variance, we get the prediction as part
        #of the process
            assert set(variance_table.SampleIds) == set(genome_table.SampleIds)
        except AssertionError,e:
            for var_id in variance_table.SampleIds:
                if var_id not in genome_table.SampleIds:
                    print "Variance table SampleId %s not in genome_table SampleIds" %var_id
            raise AssertionError("Variance table and genome table contain different OTU ids")

        #sort the ObservationIds and SampleIds to be in the same order
        variance_table=variance_table.sortObservationOrder(genome_table.ObservationIds)
        variance_table=variance_table.sortSampleOrder(genome_table.SampleIds)

    make_output_dir_for_file(opts.output_metagenome_table)

    if opts.accuracy_metrics:
        # Calculate accuracy metrics
        weighted_nsti = calc_nsti(otu_table,genome_table,weighted=True)
        samples= weighted_nsti[0]
        nstis = list(weighted_nsti[1])
        samples_and_nstis = zip(samples,nstis)
        if opts.verbose:
            print "Writing NSTI information to file:", opts.accuracy_metrics
        accuracy_output_fh = open(opts.accuracy_metrics,'w')
        accuracy_output_fh.write("#Sample\tMetric\tValue\n")
        for sample,nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti))
            accuracy_output_fh.write(line)

    if opts.with_confidence:
        #If we are calculating variance, we get the prediction as part
        #of the process
        
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    if opts.verbose:
        print "Loading OTU table: ",opts.input_otu_table

    otu_table = parse_biom_table(open(opts.input_otu_table,'U'))

    if opts.verbose:
        print "Done loading OTU table containing %i samples and %i OTUs." %(len(otu_table.SampleIds),len(otu_table.ObservationIds))
    if(opts.input_count_table is None):
        if(opts.type_of_prediction == 'KO'):
            input_count_table=join(get_picrust_project_dir(),'picrust','data','ko_precalculated.biom.gz')
        elif(opts.type_of_prediction == 'COG'):
            input_count_table=join(get_picrust_project_dir(),'picrust','data','cog_precalculated.biom.gz')
    else:
        input_count_table=opts.input_count_table

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    
    ext=path.splitext(input_count_table)[1]
    
    if (ext == '.gz'):
        genome_table_str = gzip.open(input_count_table,'rb').read()
    else:
        genome_table_str = open(input_count_table,'U').read()
    
    #In the genome/trait table genomes are the samples and 
    #genes are the observations
    
    if not opts.suppress_subset_loading:
        #Now we want to use the OTU table information
        #to load only rows in the count table corresponding
        #to relevant OTUs
        ids_to_load = otu_table.ObservationIds

        if opts.verbose:
            print "Loading traits for %i organisms from the trait table" %len(ids_to_load)

        genome_table = load_subset_from_biom_str(genome_table_str,ids_to_load,axis='samples')
    else:
        if opts.verbose:
            print "Loading *full* trait table because --suppress_subset_loading was passed. This may result in high memory usage."
        genome_table = parse_biom_table(genome_table_str)
    
    if opts.verbose:
        print "Done loading trait table containing %i functions for %i organisms." %(len(genome_table.ObservationIds),len(genome_table.SampleIds))

    make_output_dir_for_file(opts.output_metagenome_table)

    if opts.accuracy_metrics:
        # Calculate accuracy metrics
        #unweighted_nsti = calc_nsti(otu_table,genome_table,weighted=False)
        #print "Unweighted NSTI:", unweighted_nsti
        
        weighted_nsti = calc_nsti(otu_table,genome_table,weighted=True)
        samples= weighted_nsti[0]
        nstis = list(weighted_nsti[1])
        #print "Samples:",samples
        #print "NSTIs:",nstis
        samples_and_nstis = zip(samples,nstis)
        #print "Samples and NSTIs:",samples_and_nstis
        lines = ["#Sample\tMetric\tValue\n"]
        #print weighted_nsti
        for sample,nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti))
            lines.append(line)

        if opts.verbose:
            for l in sorted(lines):
                print l
        if opts.verbose:
            print "Writing accuracy information to file:", opts.accuracy_metrics
        open(opts.accuracy_metrics,'w').writelines(sorted(lines))

    if opts.verbose:
        print "Predicting the metagenome..."
        
    predicted_metagenomes = predict_metagenomes(otu_table,genome_table)

    if opts.verbose:
        print "Writing results to output file: ",opts.output_metagenome_table
        
    make_output_dir_for_file(opts.output_metagenome_table)
    if(opts.format_tab_delimited):
        open(opts.output_metagenome_table,'w').write(predicted_metagenomes.delimitedSelf(header_key="KEGG Pathways",header_value="KEGG Pathways",metadata_formatter=lambda s: '|'.join(['; '.join(l) for l in s])))
    else:
        open(opts.output_metagenome_table,'w').write(format_biom_table(predicted_metagenomes))