Ejemplo n.º 1
0
def excluded_obs_on_blastMetrics( input_biom, tag, cmp_operator, threshold, excluded_file ):
    """
    @summary: Writes the list of the observations with no affiliations with sufficient blast value.
    @param input_biom: [str] The path to the BIOM file to check.
    @param tag: [str] The metadata checked.
    @param cmp_operator: [str] The operator use in comparison (tag_value ">=" thresold or tag_value "<=" thresold ).
    @param threshold: [float] The limit for the tag value.
    @param excluded_file: [str] The path to the output file.
    """
    valid_operators = {
        ">=": operator.__ge__,
        "<=": operator.__le__
    }
    cmp_func = valid_operators[cmp_operator]
    biom = BiomIO.from_json( input_biom )
    FH_excluded_file = open( excluded_file, "w" )
    for observation in biom.get_observations():
        alignments = observation["metadata"]["blast_affiliations"]
        is_discarded = True
        for current_alignment in alignments:
            if cmp_func(float(current_alignment[tag]), threshold):
                is_discarded = False
        if is_discarded:
            FH_excluded_file.write( str(observation["id"]) + "\n" )
    FH_excluded_file.close()
Ejemplo n.º 2
0
def process( in_biom, out_biom, out_metadata ):
    ordered_blast_keys = ["taxonomy", "subject", "evalue", "perc_identity", "perc_query_coverage", "aln_length"] # Keys in blast_affiliations metadata
    taxonomy_depth = 0
    unclassified_observations = list()

    FH_metadata = open( out_metadata, "w" )
    FH_metadata.write( "#OTUID\t" + "\t".join([item for item in ordered_blast_keys]) + "\n" )
    biom = BiomIO.from_json( in_biom )
    for observation in biom.get_observations():
        for metadata_key in observation["metadata"].keys():
            if metadata_key == "blast_affiliations": # Extract blast_affiliations metadata in metadata_file
                if observation["metadata"][metadata_key] is not None:
                    for current_affi in observation["metadata"][metadata_key]:
                        if isinstance(current_affi["taxonomy"], list) or isinstance(current_affi["taxonomy"], tuple):
                            current_affi["taxonomy"] = ";".join( current_affi["taxonomy"] )
                        FH_metadata.write( observation["id"] + "\t" + "\t".join([str(current_affi[item]) for item in ordered_blast_keys]) + "\n" )
                del observation["metadata"][metadata_key]
            elif observation["metadata"][metadata_key] is not None: # All list are transformed in string
                if isinstance(observation["metadata"][metadata_key], list) or isinstance(observation["metadata"][metadata_key], tuple):
                    observation["metadata"][metadata_key] = ";".join( map(str, observation["metadata"][metadata_key]) )
        if observation["metadata"].has_key( "blast_taxonomy" ):
            if observation["metadata"]["blast_taxonomy"] is None:
                unclassified_observations.append( observation["id"] )
                observation["metadata"]["taxonomy"] = list()
            else:
                taxonomy_depth = len(observation["metadata"]["blast_taxonomy"].split(";"))
                observation["metadata"]["taxonomy"] = observation["metadata"]["blast_taxonomy"].split(";")
    # Add "Unclassified" ranks in unclassified observations
    if taxonomy_depth > 0:
        for observation_id in unclassified_observations:
            observation_metadata = biom.get_observation_metadata(observation_id)
            observation_metadata["taxonomy"] = ["Unclassified"] * taxonomy_depth
    BiomIO.write( out_biom, biom )
Ejemplo n.º 3
0
def remove_observations( removed_observations, input_biom, output_biom ):
    """
    @summary: Removes the specified list of observations.
    @param removed_observations: [list] The names of the observations to remove.
    @param input_biom: [str] The path to the input BIOM.
    @param output_biom: [str] The path to the output BIOM.
    """
    biom = BiomIO.from_json( input_biom )
    biom.remove_observations( removed_observations )
    BiomIO.write( output_biom, biom )
Ejemplo n.º 4
0
def process(in_biom, out_biom, out_metadata):
    ordered_blast_keys = [
        "taxonomy", "subject", "evalue", "perc_identity",
        "perc_query_coverage", "aln_length"
    ]  # Keys in blast_affiliations metadata
    taxonomy_depth = 0
    unclassified_observations = list()

    FH_metadata = open(out_metadata, "w")
    FH_metadata.write("#OTUID\t" +
                      "\t".join([item for item in ordered_blast_keys]) + "\n")
    biom = BiomIO.from_json(in_biom)
    for observation in biom.get_observations():
        for metadata_key in observation["metadata"].keys():
            if metadata_key == "blast_affiliations":  # Extract blast_affiliations metadata in metadata_file
                if observation["metadata"][metadata_key] is not None:
                    for current_affi in observation["metadata"][metadata_key]:
                        if isinstance(current_affi["taxonomy"],
                                      list) or isinstance(
                                          current_affi["taxonomy"], tuple):
                            current_affi["taxonomy"] = ";".join(
                                current_affi["taxonomy"])
                        FH_metadata.write(observation["id"] + "\t" +
                                          "\t".join([
                                              str(current_affi[item])
                                              for item in ordered_blast_keys
                                          ]) + "\n")
                del observation["metadata"][metadata_key]
            elif observation["metadata"][
                    metadata_key] is not None:  # All list are transformed in string
                if isinstance(observation["metadata"][metadata_key],
                              list) or isinstance(
                                  observation["metadata"][metadata_key],
                                  tuple):
                    observation["metadata"][metadata_key] = ";".join(
                        map(str, observation["metadata"][metadata_key]))
        if observation["metadata"].has_key("blast_taxonomy"):
            if observation["metadata"]["blast_taxonomy"] is None:
                unclassified_observations.append(observation["id"])
                observation["metadata"]["taxonomy"] = list()
            else:
                taxonomy_depth = len(
                    observation["metadata"]["blast_taxonomy"].split(";"))
                observation["metadata"]["taxonomy"] = observation["metadata"][
                    "blast_taxonomy"].split(";")
    # Add "Unclassified" ranks in unclassified observations
    if taxonomy_depth > 0:
        for observation_id in unclassified_observations:
            observation_metadata = biom.get_observation_metadata(
                observation_id)
            observation_metadata["taxonomy"] = ["Unclassified"
                                                ] * taxonomy_depth
    BiomIO.write(out_biom, biom)
Ejemplo n.º 5
0
def excluded_obs_on_nBiggest( input_biom, nb_selected, excluded_file ):
    """
    @summary: Writes the list of all the observations without the n most abundant.
    @param input_biom: [str] The path to the BIOM file.
    @param threshold: [float] The number of the most abundant observations that will not be written in the excluded list.
    @param excluded_file: [str] The path to the output file.
    """
    biom = BiomIO.from_json( input_biom )
    FH_excluded_file = open( excluded_file, "w" )
    sorted_obs_counts = sorted( biom.get_observations_counts(), key=lambda observation: observation[1], reverse=True )
    for observation_name, observation_count in sorted_obs_counts[nb_selected:]:
        FH_excluded_file.write( observation_name + "\n" )
    FH_excluded_file.close()
Ejemplo n.º 6
0
def excluded_obs_on_samplePresence(input_biom, min_sample_presence, excluded_file):
    """
    @summary: Writes the list of the observations present in an insufficient number of samples.
    @param input_biom: [str] The path to the BIOM file to check.
    @param min_sample_presence: [int] The observations present in a number of samples inferior than this value are reported in the excluded file.
    @param excluded_file: [str] The path to the output file.
    """
    biom = BiomIO.from_json( input_biom )
    FH_excluded_file = open( excluded_file, "w" )
    for observation_name in biom.get_observations_names():
        nb_samples = sum(1 for x in biom.get_samples_by_observation(observation_name))
        if nb_samples < min_sample_presence:
            FH_excluded_file.write( observation_name + "\n" )
    FH_excluded_file.close()
Ejemplo n.º 7
0
def get_alignment_distrib( input_biom, identity_tag, coverage_tag, multiple_tag ):
    """
    @summary: Returns by taxonomic rank the count (seq and clstr) for the different identity/coverage.
    @param input_biom: The path to the processed BIOM.
    @param identity_tag: The metadata tag used in BIOM file to store the alignment identity.
    @param coverage_tag: The metadata tag used in BIOM file to store the alignment query coverage.
    @param multiple_tag: The metadata tag used in BIOM file to store the list of possible taxonomies.
    @returns: [list] By taxonomic rank the count for the different identity/coverage.
              Example:
                [
                    [100, 100, { "clstr": 53, "seq": 20500 }],
                    [99, 100, { "clstr": 35, "seq": 18000 }],
                    [90, 95, { "clstr": 1, "seq": 10 }],
                ]
    """
    biom = BiomIO.from_json( input_biom )
    aln_results = list()
    aln_results_hash = dict()
    for observation in biom.get_observations():
        observation_metadata = observation['metadata']
        identity = None
        coverage = None
        if args.multiple_tag is not None:
            if observation_metadata.has_key(multiple_tag) and len(observation_metadata[multiple_tag]) > 0:
                identity = observation_metadata[multiple_tag][0][identity_tag]
                coverage = observation_metadata[multiple_tag][0][coverage_tag]
        else:
            if observation_metadata.has_key(identity_tag) and observation_metadata.has_key(coverage_tag):
                identity = observation_metadata[identity_tag]
                coverage = observation_metadata[coverage_tag]
        if identity is not None:
            if not aln_results_hash.has_key( identity ):
                aln_results_hash[identity] = dict()
            if not aln_results_hash[identity].has_key( coverage ):
                aln_results_hash[identity][coverage] = {
                    "clstr": 0,
                    "seq": 0
                }
            aln_results_hash[identity][coverage]["clstr"] += 1
            aln_results_hash[identity][coverage]["seq"] += biom.get_observation_count( observation['id'] )
    for ident in aln_results_hash.keys():
        for cover in aln_results_hash[ident].keys():
            aln_results.append([
                ident,
                cover,
                aln_results_hash[ident][cover]
            ])
    del biom
    return aln_results
Ejemplo n.º 8
0
def get_bootstrap_distrib( input_biom, bootstrap_tag, multiple_tag ):
    """
    @summary: Returns by taxonomic rank the count (seq and clstr) for the different bootstrap categories.
    @param input_biom: The path to the processed BIOM.
    @param bootstrap_tag: The metadata tag used in BIOM file to store the taxonomy bootstraps.
    @param multiple_tag: The metadata tag used in BIOM file to store the list of possible taxonomies.
    @returns: [dict] By taxonomic rank the count for the different bootstrap categories.
              Example:
                {
                    "Phylum": {
                        "80": { "clstr": 1, "seq":100 },
                        "90": {    "clstr": 2,    "seq":400 },
                        "100": { "clstr": 50, "seq":20000 },
                    },
                    "Genus":{
                        "80":{ "clstr": 1, "seq":100 },
                        "90":{ "clstr": 2, "seq":400 },
                        "100":{ "clstr": 50, "seq":20000 },
                    }
                }
    """
    bootstrap_results = dict()

    biom = BiomIO.from_json( input_biom )
    for observation in biom.get_observations():
        observation_metadata = observation['metadata']
        bootstrap = None
        if multiple_tag is not None:
            if observation_metadata.has_key(multiple_tag) and len(observation_metadata[multiple_tag]) > 0:
                bootstrap = observation_metadata[multiple_tag][0][bootstrap_tag]
        else:
            if observation_metadata.has_key(bootstrap_tag):
                bootstrap = observation_metadata[bootstrap_tag]
        if bootstrap is not None:
            for taxonomy_depth, rank_bootstrap in enumerate( bootstrap ):
                rank_bootstrap = rank_bootstrap * 100
                rank = args.taxonomic_ranks[taxonomy_depth]
                if not bootstrap_results.has_key(rank):
                    bootstrap_results[rank] = dict()
                if not bootstrap_results[rank].has_key(rank_bootstrap):
                    bootstrap_results[rank][rank_bootstrap] = {
                        "clstr": 0,
                        "seq": 0
                    }
                bootstrap_results[rank][rank_bootstrap]["clstr"] += 1
                bootstrap_results[rank][rank_bootstrap]["seq"] += biom.get_observation_count( observation['id'] )
    del biom
    return bootstrap_results
Ejemplo n.º 9
0
def excluded_obs_on_rdpBootstrap(input_biom, taxonomic_depth, min_bootstrap, excluded_file):
    """
    @summary: Writes the list of the observations with an insufficient bootstrap on the specified taxonomic rank.
    @param input_biom: [str] The path to the BIOM file to check.
    @param taxonomic_depth: [int] The taxonomic rank depth to check (example: 6 for Species in system "Domain, Phylum, Class, Order, Family, Genus, Species").
    @param min_bootstrap: [float] The observations with a value inferior to this threshold at the specified taxonomic depth are reported in the excluded file.
    @param excluded_file: [str] The path to the output file.
    """
    biom = BiomIO.from_json( input_biom )
    FH_excluded_file = open( excluded_file, "w" )
    for observation in biom.get_observations():
        bootstrap = observation["metadata"]["rdp_bootstrap"]
        if issubclass(bootstrap.__class__, str):
            bootstrap = bootstrap.split(";")
        if bootstrap[taxonomic_depth] < min_bootstrap:
            FH_excluded_file.write( str(observation["id"]) + "\n" )
    FH_excluded_file.close()
Ejemplo n.º 10
0
def excluded_obs_on_abundance(input_biom, min_abundance, excluded_file):
    """
    @summary: Writes the list of the observations with an insufficient abundance.
    @param input_biom: [str] The path to the BIOM file to check.
    @param min_abundance: [int/float] The observations with an abundance inferior than this value are reported in the excluded file.
    @param excluded_file: [str] The path to the output file.
    """
    biom = BiomIO.from_json( input_biom )
    FH_excluded_file = open( excluded_file, "w" )
    min_nb_seq = min_abundance
    if type(min_abundance) == float:
        min_nb_seq = biom.get_total_count() * min_abundance
    for idx, count_by_sample in enumerate(biom.to_count()):
        observation = biom.rows[idx]
        abundance = sum(count_by_sample)
        if abundance < min_nb_seq:
            FH_excluded_file.write( str(observation["id"]) + "\n" )
    FH_excluded_file.close()
Ejemplo n.º 11
0
 def get_step_size(self, nb_step=35):
     """
     @summary: Returns the step size to obtain 'nb_step' steps or more in 3/4 of samples.
     @param nb_step: [int] The number of expected steps.
     @returns: [int] The step size.
     """
     counts = list()
     # Get the number of sequences by sample
     biom = BiomIO.from_json( self.in_biom )
     for sample_name in biom.get_samples_names():
         counts.append( biom.get_sample_count(sample_name) )
     del biom
     counts = sorted(counts)
     nb_samples = len(counts)
     # Finds the lower quartile number of sequences
     lower_quartile_idx = nb_samples/4
     nb_seq = counts[lower_quartile_idx]
     # If lower quartile sample is empty
     if nb_seq == 0:
         idx = 0
         while (lower_quartile_idx + idx) < nb_samples and counts[lower_quartile_idx + idx] == 0:
             nb_seq = counts[lower_quartile_idx + idx]
             idx += 1
     return int(nb_seq/nb_step)
Ejemplo n.º 12
0
def process( args ):
    tmp_files = TmpFiles( os.path.split(args.output_file)[0] )

    try:
        # Add temp taxonomy if multiple and without consensus
        tmp_biom = args.input_biom
        used_taxonomy_tag = args.taxonomy_tag
        if args.multiple_tag is not None:
            used_taxonomy_tag = args.tax_consensus_tag
            if args.tax_consensus_tag is None:
                used_taxonomy_tag = "Used_taxonomy_FROGS-affi"
                tmp_biom = tmp_files.add( "tax.biom" )
                biom = BiomIO.from_json( args.input_biom )
                for observation in biom.get_observations():
                    metadata = observation["metadata"]
                    if len(metadata[args.multiple_tag]) > 0:
                        metadata[used_taxonomy_tag] = metadata[args.multiple_tag][0][args.taxonomy_tag]
                BiomIO.write( tmp_biom, biom )
                del biom

        # Rarefaction
        tax_depth = [args.taxonomic_ranks.index(rank) for rank in args.rarefaction_ranks]
        rarefaction_cmd = Rarefaction(tmp_biom, tmp_files, used_taxonomy_tag, tax_depth)
        rarefaction_cmd.submit( args.log_file )
        rarefaction_files = rarefaction_cmd.output_files

        # Taxonomy tree
        tree_count_file = tmp_files.add( "taxCount.enewick" )
        tree_ids_file = tmp_files.add( "taxCount_ids.tsv" )
        TaxonomyTree(tmp_biom, used_taxonomy_tag, tree_count_file, tree_ids_file).submit( args.log_file )

        # Writes summary
        write_summary( args.output_file, args.input_biom, tree_count_file, tree_ids_file, rarefaction_files, args )
    finally:
        if not args.debug:
            tmp_files.deleteAll()
Ejemplo n.º 13
0
def write_summary( summary_file, input_biom, tree_count_file, tree_ids_file, rarefaction_files, args ):
    """
    @summary: Writes the summary of results.
    @param summary_file: [str] The output file.
    @param input_biom: [str] Path to the input BIOM.
    @param tree_count_file: [str] Path to biomTools treeCount output.
    @param tree_ids_file: [str] Path to biomTools treeCount optional output.
    @param rarefaction_file: [str] Path to biomTools rarefaction output.
    @param args: The script arguments.
    """
    # Get taxonomy distribution
    FH_tree_count = open( tree_count_file )
    newick_tree = FH_tree_count.readline()
    FH_tree_count.close()
    ordered_samples_names = list()
    FH_tree_ids = open( tree_ids_file )
    for line in FH_tree_ids:
        id, sample_name = line.strip().split( "\t", 1 )
        ordered_samples_names.append( sample_name )
    FH_tree_ids.close()

    # Get bootstrap metrics
    bootstrap_results = None
    if args.bootstrap_tag is not None:
        bootstrap_results = get_bootstrap_distrib( input_biom, args.bootstrap_tag, args.multiple_tag )

    # Get alignment metrics
    aln_results = None
    if args.identity_tag is not None and args.coverage_tag is not None:
        aln_results = get_alignment_distrib( input_biom, args.identity_tag, args.coverage_tag, args.multiple_tag )

    # Get rarefaction data
    rarefaction_step_size = None
    rarefaction = None
    biom = BiomIO.from_json( input_biom )
    for rank_idx, current_file in enumerate(rarefaction_files):
        rank = args.rarefaction_ranks[rank_idx]
        FH_rarefaction = open( current_file )
        for line in FH_rarefaction:
            fields = map(str.strip, line.split("\t"))
            if line.startswith('#'):
                samples = fields[1:]
                if rarefaction is None:
                    rarefaction = dict()
                    for sample in samples:
                        rarefaction[sample] = dict()
                        rarefaction[sample]['nb_seq'] = biom.get_sample_count( sample )
                for sample in samples:
                    rarefaction[sample][rank] = list()
            else:
                if rarefaction_step_size is None:
                    rarefaction_step_size = int(fields[0])
                if not rarefaction[sample].has_key( rank ):
                    rarefaction[sample][rank] = list()
                for idx, sample in enumerate(samples):
                    if fields[idx+1] != "":
                        rarefaction[sample][rank].append( int(fields[idx+1]) )
        FH_rarefaction.close()
    del biom

    # Write
    FH_summary_tpl = open( os.path.join(CURRENT_DIR, "affiliations_stat_tpl.html") )
    FH_summary_out = open( summary_file, "w" )
    for line in FH_summary_tpl:
        if "###TAXONOMIC_RANKS###" in line:
            line = line.replace( "###TAXONOMIC_RANKS###", json.dumps(args.taxonomic_ranks) )
        elif "###SAMPLES_NAMES###" in line:
            line = line.replace( "###SAMPLES_NAMES###", json.dumps(ordered_samples_names) )
        elif "###TREE_DISTRIBUTION###" in line:
            line = line.replace( "###TREE_DISTRIBUTION###", json.dumps(newick_tree) )
        elif "###DATA_RAREFACTION###" in line:
            line = line.replace( "###DATA_RAREFACTION###", json.dumps(rarefaction) )
        elif "###RAREFACTION_STEP_SIZE###" in line:
            line = line.replace( "###RAREFACTION_STEP_SIZE###", json.dumps(rarefaction_step_size) )
        elif "###RAREFACTION_RANKS###" in line:
            line = line.replace( "###RAREFACTION_RANKS###", json.dumps(args.rarefaction_ranks) )
        elif "###ALIGNMENT_SCORES###" in line:
            line = line.replace( "###ALIGNMENT_SCORES###", json.dumps(aln_results) )
        elif "###BOOTSTRAP_SCORES###" in line:
            line = line.replace( "###BOOTSTRAP_SCORES###", json.dumps(bootstrap_results) )
        FH_summary_out.write( line )
    FH_summary_out.close()
    FH_summary_tpl.close()
Ejemplo n.º 14
0
    group_input.add_argument( '-i', '--input-biom', required=True, help="The input biom file." )
    #     Outputs
    group_output = parser.add_argument_group( 'Outputs' )
    group_output.add_argument( '-o', '--output-file', default="affiliations_metrics.html", help="The output report." )
    group_output.add_argument( '-l', '--log-file', default=sys.stdout, help='The list of commands executed.' )
    args = parser.parse_args()
    prevent_shell_injections(args)

    Logger.static_write(args.log_file, "## Application\nSoftware: " + os.path.basename(sys.argv[0]) + " (version: " + str(__version__) + ")\nCommand: " + " ".join(sys.argv) + "\n\n")

    # Check parameters
    if args.multiple_tag is None and args.tax_consensus_tag is not None:
        raise Exception( "The parameter '--tax-consensus-tag' must be used only with the paameter '--multiple-tag'." )
    if args.taxonomy_tag is None and args.tax_consensus_tag is None:
        raise Exception( "The parameter '--taxonomy-tag' or the parameter '--tax-consensus-tag' must be set." )
    if (args.identity_tag is None and args.coverage_tag is not None) or (args.identity_tag is not None and args.coverage_tag is None):
        raise Exception( "The parameters '--identity-tag' and '--coverage-tag' must be setted together." )
    for current_rank in args.rarefaction_ranks:
        if current_rank not in args.taxonomic_ranks: raise Exception( "'" + current_rank + "' is not in valid taxonomic ranks : " + ", ".join(args.taxonomic_ranks) )
    biom = BiomIO.from_json( args.input_biom )
    if args.multiple_tag is None:
        for param in [args.taxonomy_tag, args.bootstrap_tag, args.identity_tag, args.coverage_tag]:
            if param is not None and not biom.has_observation_metadata( param ):
                raise Exception( "The metadata '" + param + "' does not exist in the BIOM file." )
    else:
        if args.tax_consensus_tag is not None and not biom.has_observation_metadata( args.tax_consensus_tag ):
            raise Exception( "The metadata '" + args.tax_consensus_tag + "' does not exist in the BIOM file." )
    del biom

    # Process
    process( args )
Ejemplo n.º 15
0
def write_summary( summary_file, input_biom, output_biom, discards ):
    """
    @summary: Writes the process summary.
    @param summary_file: [str] The path to the output file.
    @param input_biom: [str] The path to the BIOM before program execution.
    @param output_biom: [str] The path to the BIOM after program execution.
    @param discards: [dict] By filter the path of the file that contains the list of the removed observations.
    """
    global_results = {
        'nb_clstr_kept': 0,
        'nb_clstr_ini': 0,
        'nb_seq_kept': 0,
        'nb_seq_ini': 0
    }
    samples_results = dict()
    filters_results = dict()

    # Global before filters
    in_biom = BiomIO.from_json( input_biom )
    for observation_name in in_biom.get_observations_names():
        global_results['nb_clstr_ini'] += 1
        global_results['nb_seq_ini'] += in_biom.get_observation_count( observation_name )
    for sample_name in in_biom.get_samples_names():
        samples_results[sample_name] = {
            'initial': sum( 1 for x in in_biom.get_observations_by_sample(sample_name) ),
            'filtered': dict(),
            'kept': 0
        }

    # By sample and by filters
    filters_intersections = dict()
    for filter in discards.keys():
        FH_filter = open( discards[filter] )
        for line in FH_filter:
            observation_name = line.strip()
            if not filters_intersections.has_key( observation_name ):
                filters_intersections[observation_name] = dict()
            filters_intersections[observation_name][filter] = 1
        FH_filter.close()
    for observation_name in filters_intersections.keys():
        # Removed intersection
        intersections_key = "--@@--".join(sorted( filters_intersections[observation_name].keys() ))
        if not filters_results.has_key( intersections_key ):
            filters_results[intersections_key] = {
                'filters': filters_intersections[observation_name].keys(),
                'count': 0
            }
        filters_results[intersections_key]['count'] += 1

        # Filters by samples
        for sample in in_biom.get_samples_by_observation(observation_name):
            for filter in filters_intersections[observation_name]:
                if not samples_results[sample['id']]['filtered'].has_key(filter):
                    samples_results[sample['id']]['filtered'][filter] = 0
                samples_results[sample['id']]['filtered'][filter] += 1
    del in_biom

    # Global after filters
    out_biom = BiomIO.from_json( output_biom )
    for observation_name in out_biom.get_observations_names():
        global_results['nb_clstr_kept'] += 1
        global_results['nb_seq_kept'] += out_biom.get_observation_count( observation_name )
    for sample_name in out_biom.get_samples_names():
        samples_results[sample_name]['kept'] = sum( 1 for x in out_biom.get_observations_by_sample(sample_name) )
    del out_biom

    # Write
    FH_summary_tpl = open( os.path.join(CURRENT_DIR, "filters_tpl.html") )
    FH_summary_out = open( summary_file, "w" )
    for line in FH_summary_tpl:
        if "###PORCESSED_FILTERS###" in line:
            line = line.replace( "###PORCESSED_FILTERS###", json.dumps([filter for filter in discards]) )
        elif "###GLOBAL_RESULTS###" in line:
            line = line.replace( "###GLOBAL_RESULTS###", json.dumps(global_results) )
        elif "###SAMPLES_RESULTS###" in line:
            line = line.replace( "###SAMPLES_RESULTS###", json.dumps(samples_results) )
        elif "###FILTERS_RESULTS###" in line:
            line = line.replace( "###FILTERS_RESULTS###", json.dumps(filters_results.values()) )
        FH_summary_out.write( line )

    FH_summary_out.close()
    FH_summary_tpl.close()