Esempio n. 1
0
def process( in_biom, out_biom, out_metadata ):
    ordered_blast_keys = ["taxonomy", "subject", "evalue", "perc_identity", "perc_query_coverage", "aln_length"] # Keys in blast_affiliations metadata
    taxonomy_depth = 0
    unclassified_observations = list()

    FH_metadata = open( out_metadata, "w" )
    FH_metadata.write( "#OTUID\t" + "\t".join([item for item in ordered_blast_keys]) + "\n" )
    biom = BiomIO.from_json( in_biom )
    for observation in biom.get_observations():
        for metadata_key in observation["metadata"].keys():
            if metadata_key == "blast_affiliations": # Extract blast_affiliations metadata in metadata_file
                if observation["metadata"][metadata_key] is not None:
                    for current_affi in observation["metadata"][metadata_key]:
                        if isinstance(current_affi["taxonomy"], list) or isinstance(current_affi["taxonomy"], tuple):
                            current_affi["taxonomy"] = ";".join( current_affi["taxonomy"] )
                        FH_metadata.write( observation["id"] + "\t" + "\t".join([str(current_affi[item]) for item in ordered_blast_keys]) + "\n" )
                del observation["metadata"][metadata_key]
            elif observation["metadata"][metadata_key] is not None: # All list are transformed in string
                if isinstance(observation["metadata"][metadata_key], list) or isinstance(observation["metadata"][metadata_key], tuple):
                    observation["metadata"][metadata_key] = ";".join( map(str, observation["metadata"][metadata_key]) )
        if observation["metadata"].has_key( "blast_taxonomy" ):
            if observation["metadata"]["blast_taxonomy"] is None:
                unclassified_observations.append( observation["id"] )
                observation["metadata"]["taxonomy"] = list()
            else:
                taxonomy_depth = len(observation["metadata"]["blast_taxonomy"].split(";"))
                observation["metadata"]["taxonomy"] = observation["metadata"]["blast_taxonomy"].split(";")
    # Add "Unclassified" ranks in unclassified observations
    if taxonomy_depth > 0:
        for observation_id in unclassified_observations:
            observation_metadata = biom.get_observation_metadata(observation_id)
            observation_metadata["taxonomy"] = ["Unclassified"] * taxonomy_depth
    BiomIO.write( out_biom, biom )
Esempio n. 2
0
def remove_observations( removed_observations, input_biom, output_biom ):
    """
    @summary: Removes the specified list of observations.
    @param removed_observations: [list] The names of the observations to remove.
    @param input_biom: [str] The path to the input BIOM.
    @param output_biom: [str] The path to the output BIOM.
    """
    biom = BiomIO.from_json( input_biom )
    biom.remove_observations( removed_observations )
    BiomIO.write( output_biom, biom )
def process(in_biom, out_biom, out_metadata):
    ordered_blast_keys = [
        "taxonomy", "subject", "evalue", "perc_identity",
        "perc_query_coverage", "aln_length"
    ]  # Keys in blast_affiliations metadata
    taxonomy_depth = 0
    unclassified_observations = list()

    FH_metadata = open(out_metadata, "w")
    FH_metadata.write("#OTUID\t" +
                      "\t".join([item for item in ordered_blast_keys]) + "\n")
    biom = BiomIO.from_json(in_biom)
    for observation in biom.get_observations():
        for metadata_key in observation["metadata"].keys():
            if metadata_key == "blast_affiliations":  # Extract blast_affiliations metadata in metadata_file
                if observation["metadata"][metadata_key] is not None:
                    for current_affi in observation["metadata"][metadata_key]:
                        if isinstance(current_affi["taxonomy"],
                                      list) or isinstance(
                                          current_affi["taxonomy"], tuple):
                            current_affi["taxonomy"] = ";".join(
                                current_affi["taxonomy"])
                        FH_metadata.write(observation["id"] + "\t" +
                                          "\t".join([
                                              str(current_affi[item])
                                              for item in ordered_blast_keys
                                          ]) + "\n")
                del observation["metadata"][metadata_key]
            elif observation["metadata"][
                    metadata_key] is not None:  # All list are transformed in string
                if isinstance(observation["metadata"][metadata_key],
                              list) or isinstance(
                                  observation["metadata"][metadata_key],
                                  tuple):
                    observation["metadata"][metadata_key] = ";".join(
                        map(str, observation["metadata"][metadata_key]))
        if observation["metadata"].has_key("blast_taxonomy"):
            if observation["metadata"]["blast_taxonomy"] is None:
                unclassified_observations.append(observation["id"])
                observation["metadata"]["taxonomy"] = list()
            else:
                taxonomy_depth = len(
                    observation["metadata"]["blast_taxonomy"].split(";"))
                observation["metadata"]["taxonomy"] = observation["metadata"][
                    "blast_taxonomy"].split(";")
    # Add "Unclassified" ranks in unclassified observations
    if taxonomy_depth > 0:
        for observation_id in unclassified_observations:
            observation_metadata = biom.get_observation_metadata(
                observation_id)
            observation_metadata["taxonomy"] = ["Unclassified"
                                                ] * taxonomy_depth
    BiomIO.write(out_biom, biom)
Esempio n. 4
0
def process( args ):
    tmp_files = TmpFiles( os.path.split(args.output_file)[0] )

    try:
        # Add temp taxonomy if multiple and without consensus
        tmp_biom = args.input_biom
        used_taxonomy_tag = args.taxonomy_tag
        if args.multiple_tag is not None:
            used_taxonomy_tag = args.tax_consensus_tag
            if args.tax_consensus_tag is None:
                used_taxonomy_tag = "Used_taxonomy_FROGS-affi"
                tmp_biom = tmp_files.add( "tax.biom" )
                biom = BiomIO.from_json( args.input_biom )
                for observation in biom.get_observations():
                    metadata = observation["metadata"]
                    if len(metadata[args.multiple_tag]) > 0:
                        metadata[used_taxonomy_tag] = metadata[args.multiple_tag][0][args.taxonomy_tag]
                BiomIO.write( tmp_biom, biom )
                del biom

        # Rarefaction
        tax_depth = [args.taxonomic_ranks.index(rank) for rank in args.rarefaction_ranks]
        rarefaction_cmd = Rarefaction(tmp_biom, tmp_files, used_taxonomy_tag, tax_depth)
        rarefaction_cmd.submit( args.log_file )
        rarefaction_files = rarefaction_cmd.output_files

        # Taxonomy tree
        tree_count_file = tmp_files.add( "taxCount.enewick" )
        tree_ids_file = tmp_files.add( "taxCount_ids.tsv" )
        TaxonomyTree(tmp_biom, used_taxonomy_tag, tree_count_file, tree_ids_file).submit( args.log_file )

        # Writes summary
        write_summary( args.output_file, args.input_biom, tree_count_file, tree_ids_file, rarefaction_files, args )
    finally:
        if not args.debug:
            tmp_files.deleteAll()