def process( in_biom, out_biom, out_metadata ): ordered_blast_keys = ["taxonomy", "subject", "evalue", "perc_identity", "perc_query_coverage", "aln_length"] # Keys in blast_affiliations metadata taxonomy_depth = 0 unclassified_observations = list() FH_metadata = open( out_metadata, "w" ) FH_metadata.write( "#OTUID\t" + "\t".join([item for item in ordered_blast_keys]) + "\n" ) biom = BiomIO.from_json( in_biom ) for observation in biom.get_observations(): for metadata_key in observation["metadata"].keys(): if metadata_key == "blast_affiliations": # Extract blast_affiliations metadata in metadata_file if observation["metadata"][metadata_key] is not None: for current_affi in observation["metadata"][metadata_key]: if isinstance(current_affi["taxonomy"], list) or isinstance(current_affi["taxonomy"], tuple): current_affi["taxonomy"] = ";".join( current_affi["taxonomy"] ) FH_metadata.write( observation["id"] + "\t" + "\t".join([str(current_affi[item]) for item in ordered_blast_keys]) + "\n" ) del observation["metadata"][metadata_key] elif observation["metadata"][metadata_key] is not None: # All list are transformed in string if isinstance(observation["metadata"][metadata_key], list) or isinstance(observation["metadata"][metadata_key], tuple): observation["metadata"][metadata_key] = ";".join( map(str, observation["metadata"][metadata_key]) ) if observation["metadata"].has_key( "blast_taxonomy" ): if observation["metadata"]["blast_taxonomy"] is None: unclassified_observations.append( observation["id"] ) observation["metadata"]["taxonomy"] = list() else: taxonomy_depth = len(observation["metadata"]["blast_taxonomy"].split(";")) observation["metadata"]["taxonomy"] = observation["metadata"]["blast_taxonomy"].split(";") # Add "Unclassified" ranks in unclassified observations if taxonomy_depth > 0: for observation_id in unclassified_observations: observation_metadata = biom.get_observation_metadata(observation_id) observation_metadata["taxonomy"] = ["Unclassified"] * taxonomy_depth BiomIO.write( out_biom, biom )
def remove_observations( removed_observations, input_biom, output_biom ): """ @summary: Removes the specified list of observations. @param removed_observations: [list] The names of the observations to remove. @param input_biom: [str] The path to the input BIOM. @param output_biom: [str] The path to the output BIOM. """ biom = BiomIO.from_json( input_biom ) biom.remove_observations( removed_observations ) BiomIO.write( output_biom, biom )
def process(in_biom, out_biom, out_metadata): ordered_blast_keys = [ "taxonomy", "subject", "evalue", "perc_identity", "perc_query_coverage", "aln_length" ] # Keys in blast_affiliations metadata taxonomy_depth = 0 unclassified_observations = list() FH_metadata = open(out_metadata, "w") FH_metadata.write("#OTUID\t" + "\t".join([item for item in ordered_blast_keys]) + "\n") biom = BiomIO.from_json(in_biom) for observation in biom.get_observations(): for metadata_key in observation["metadata"].keys(): if metadata_key == "blast_affiliations": # Extract blast_affiliations metadata in metadata_file if observation["metadata"][metadata_key] is not None: for current_affi in observation["metadata"][metadata_key]: if isinstance(current_affi["taxonomy"], list) or isinstance( current_affi["taxonomy"], tuple): current_affi["taxonomy"] = ";".join( current_affi["taxonomy"]) FH_metadata.write(observation["id"] + "\t" + "\t".join([ str(current_affi[item]) for item in ordered_blast_keys ]) + "\n") del observation["metadata"][metadata_key] elif observation["metadata"][ metadata_key] is not None: # All list are transformed in string if isinstance(observation["metadata"][metadata_key], list) or isinstance( observation["metadata"][metadata_key], tuple): observation["metadata"][metadata_key] = ";".join( map(str, observation["metadata"][metadata_key])) if observation["metadata"].has_key("blast_taxonomy"): if observation["metadata"]["blast_taxonomy"] is None: unclassified_observations.append(observation["id"]) observation["metadata"]["taxonomy"] = list() else: taxonomy_depth = len( observation["metadata"]["blast_taxonomy"].split(";")) observation["metadata"]["taxonomy"] = observation["metadata"][ "blast_taxonomy"].split(";") # Add "Unclassified" ranks in unclassified observations if taxonomy_depth > 0: for observation_id in unclassified_observations: observation_metadata = biom.get_observation_metadata( observation_id) observation_metadata["taxonomy"] = ["Unclassified" ] * taxonomy_depth BiomIO.write(out_biom, biom)
def process( args ): tmp_files = TmpFiles( os.path.split(args.output_file)[0] ) try: # Add temp taxonomy if multiple and without consensus tmp_biom = args.input_biom used_taxonomy_tag = args.taxonomy_tag if args.multiple_tag is not None: used_taxonomy_tag = args.tax_consensus_tag if args.tax_consensus_tag is None: used_taxonomy_tag = "Used_taxonomy_FROGS-affi" tmp_biom = tmp_files.add( "tax.biom" ) biom = BiomIO.from_json( args.input_biom ) for observation in biom.get_observations(): metadata = observation["metadata"] if len(metadata[args.multiple_tag]) > 0: metadata[used_taxonomy_tag] = metadata[args.multiple_tag][0][args.taxonomy_tag] BiomIO.write( tmp_biom, biom ) del biom # Rarefaction tax_depth = [args.taxonomic_ranks.index(rank) for rank in args.rarefaction_ranks] rarefaction_cmd = Rarefaction(tmp_biom, tmp_files, used_taxonomy_tag, tax_depth) rarefaction_cmd.submit( args.log_file ) rarefaction_files = rarefaction_cmd.output_files # Taxonomy tree tree_count_file = tmp_files.add( "taxCount.enewick" ) tree_ids_file = tmp_files.add( "taxCount_ids.tsv" ) TaxonomyTree(tmp_biom, used_taxonomy_tag, tree_count_file, tree_ids_file).submit( args.log_file ) # Writes summary write_summary( args.output_file, args.input_biom, tree_count_file, tree_ids_file, rarefaction_files, args ) finally: if not args.debug: tmp_files.deleteAll()