def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.verbose: print "Loading otu table: ",opts.input_otu_table otu_table = parse_biom_table(open(opts.input_otu_table,'U')) ext=path.splitext(opts.input_count_table)[1] if opts.verbose: print "Loading count table: ", opts.input_count_table if (ext == '.gz'): genome_table = parse_biom_table(gzip.open(opts.input_count_table,'rb')) else: genome_table = parse_biom_table(open(opts.input_count_table,'U')) make_output_dir_for_file(opts.output_metagenome_table) if opts.accuracy_metrics: # Calculate accuracy metrics #unweighted_nsti = calc_nsti(otu_table,genome_table,weighted=False) #print "Unweighted NSTI:", unweighted_nsti weighted_nsti = calc_nsti(otu_table,genome_table,weighted=True) samples= weighted_nsti[0] nstis = list(weighted_nsti[1]) #print "Samples:",samples #print "NSTIs:",nstis samples_and_nstis = zip(samples,nstis) #print "Samples and NSTIs:",samples_and_nstis lines = ["#Sample\tMetric\tValue\n"] #print weighted_nsti for sample,nsti in samples_and_nstis: line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti)) lines.append(line) if opts.verbose: for l in sorted(lines): print l if opts.verbose: print "Writing accuracy information to file:", opts.accuracy_metrics open(opts.accuracy_metrics,'w').writelines(sorted(lines)) if opts.verbose: print "Predicting the metagenome..." predicted_metagenomes = predict_metagenomes(otu_table,genome_table) if opts.verbose: print "Writing results to output file: ",opts.output_metagenome_table make_output_dir_for_file(opts.output_metagenome_table) if(opts.format_tab_delimited): open(opts.output_metagenome_table,'w').write(predicted_metagenomes.delimitedSelf()) else: open(opts.output_metagenome_table,'w').write(format_biom_table(predicted_metagenomes))
if var_id not in genome_table.ids(): print "Variance table SampleId %s not in genome_table SampleIds" % var_id raise AssertionError( "Variance table and genome table contain different OTU ids") #sort the ObservationIds and SampleIds to be in the same order variance_table = variance_table.sort_order( genome_table.ids(axis='observation'), axis='observation') variance_table = variance_table.sort_order(genome_table.ids(), axis='sample') make_output_dir_for_file(opts.output_metagenome_table) if opts.accuracy_metrics: # Calculate accuracy metrics weighted_nsti = calc_nsti(otu_table, genome_table, weighted=True) samples = weighted_nsti[0] nstis = list(weighted_nsti[1]) samples_and_nstis = zip(samples, nstis) if opts.verbose: print "Writing NSTI information to file:", opts.accuracy_metrics accuracy_output_fh = open(opts.accuracy_metrics, 'w') accuracy_output_fh.write("#Sample\tMetric\tValue\n") for sample, nsti in samples_and_nstis: line = "%s\tWeighted NSTI\t%s\n" % (sample, str(nsti)) accuracy_output_fh.write(line) if opts.with_confidence: #If we are calculating variance, we get the prediction as part #of the process
assert set(variance_table.SampleIds) == set(genome_table.SampleIds) except AssertionError,e: for var_id in variance_table.SampleIds: if var_id not in genome_table.SampleIds: print "Variance table SampleId %s not in genome_table SampleIds" %var_id raise AssertionError("Variance table and genome table contain different OTU ids") #sort the ObservationIds and SampleIds to be in the same order variance_table=variance_table.sortObservationOrder(genome_table.ObservationIds) variance_table=variance_table.sortSampleOrder(genome_table.SampleIds) make_output_dir_for_file(opts.output_metagenome_table) if opts.accuracy_metrics: # Calculate accuracy metrics weighted_nsti = calc_nsti(otu_table,genome_table,weighted=True) samples= weighted_nsti[0] nstis = list(weighted_nsti[1]) samples_and_nstis = zip(samples,nstis) if opts.verbose: print "Writing NSTI information to file:", opts.accuracy_metrics accuracy_output_fh = open(opts.accuracy_metrics,'w') accuracy_output_fh.write("#Sample\tMetric\tValue\n") for sample,nsti in samples_and_nstis: line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti)) accuracy_output_fh.write(line) if opts.with_confidence: #If we are calculating variance, we get the prediction as part #of the process
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.verbose: print "Loading OTU table: ",opts.input_otu_table otu_table = parse_biom_table(open(opts.input_otu_table,'U')) if opts.verbose: print "Done loading OTU table containing %i samples and %i OTUs." %(len(otu_table.SampleIds),len(otu_table.ObservationIds)) if(opts.input_count_table is None): if(opts.type_of_prediction == 'KO'): input_count_table=join(get_picrust_project_dir(),'picrust','data','ko_precalculated.biom.gz') elif(opts.type_of_prediction == 'COG'): input_count_table=join(get_picrust_project_dir(),'picrust','data','cog_precalculated.biom.gz') else: input_count_table=opts.input_count_table if opts.verbose: print "Loading trait table: ", input_count_table ext=path.splitext(input_count_table)[1] if (ext == '.gz'): genome_table_str = gzip.open(input_count_table,'rb').read() else: genome_table_str = open(input_count_table,'U').read() #In the genome/trait table genomes are the samples and #genes are the observations if not opts.suppress_subset_loading: #Now we want to use the OTU table information #to load only rows in the count table corresponding #to relevant OTUs ids_to_load = otu_table.ObservationIds if opts.verbose: print "Loading traits for %i organisms from the trait table" %len(ids_to_load) genome_table = load_subset_from_biom_str(genome_table_str,ids_to_load,axis='samples') else: if opts.verbose: print "Loading *full* trait table because --suppress_subset_loading was passed. This may result in high memory usage." genome_table = parse_biom_table(genome_table_str) if opts.verbose: print "Done loading trait table containing %i functions for %i organisms." %(len(genome_table.ObservationIds),len(genome_table.SampleIds)) make_output_dir_for_file(opts.output_metagenome_table) if opts.accuracy_metrics: # Calculate accuracy metrics #unweighted_nsti = calc_nsti(otu_table,genome_table,weighted=False) #print "Unweighted NSTI:", unweighted_nsti weighted_nsti = calc_nsti(otu_table,genome_table,weighted=True) samples= weighted_nsti[0] nstis = list(weighted_nsti[1]) #print "Samples:",samples #print "NSTIs:",nstis samples_and_nstis = zip(samples,nstis) #print "Samples and NSTIs:",samples_and_nstis lines = ["#Sample\tMetric\tValue\n"] #print weighted_nsti for sample,nsti in samples_and_nstis: line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti)) lines.append(line) if opts.verbose: for l in sorted(lines): print l if opts.verbose: print "Writing accuracy information to file:", opts.accuracy_metrics open(opts.accuracy_metrics,'w').writelines(sorted(lines)) if opts.verbose: print "Predicting the metagenome..." predicted_metagenomes = predict_metagenomes(otu_table,genome_table) if opts.verbose: print "Writing results to output file: ",opts.output_metagenome_table make_output_dir_for_file(opts.output_metagenome_table) if(opts.format_tab_delimited): open(opts.output_metagenome_table,'w').write(predicted_metagenomes.delimitedSelf(header_key="KEGG Pathways",header_value="KEGG Pathways",metadata_formatter=lambda s: '|'.join(['; '.join(l) for l in s]))) else: open(opts.output_metagenome_table,'w').write(format_biom_table(predicted_metagenomes))