def predict_metagenome_variances(otu_table,genome_table,\
    gene_variances,verbose=False):
    """Predict variances for metagenome predictions
    otu_table -- BIOM Table object of OTUs
    gene_table -- BIOM Table object of predicted gene counts per OTU and samples
    gene_variances -- BIOM Table object of predicted variance in each gene count
   
   Note that OTU counts are treated as constants (exactly known) rather than random variables
   for now.   If a good method for getting variance for OTU counts becomes available, this should
   be updated to treat them as random variables as well.
   """
    #Assume that OTUs are SampleIds in the genome table, but ObservationIds in the OTU table
    genome_table_otu_ids = "SampleIds"
    otu_table_otu_ids = "ObservationIds"

    #Find overlapping otus
    overlapping_otus = get_overlapping_ids(otu_table,genome_table,\
                  genome_table_ids=genome_table_otu_ids,otu_table_ids=otu_table_otu_ids)
    #Ensure they overlap fully with variance table
    overlapping_otus = get_overlapping_ids(otu_table,gene_variances,\
                  genome_table_ids=genome_table_otu_ids,otu_table_ids=otu_table_otu_ids)
    #Filter OTU and Genome Table to contain only overlapping IDs
    #print "overlapping_otus:",overlapping_otus
    otu_table.filterObservations(
        lambda val, otu_id, metadata: otu_id in overlapping_otus)
    genome_table.filterSamples(
        lambda val, otu_id, metadata: otu_id in overlapping_otus)

    #Handle missing variance data
    #if gene_variances is None:
    #    gene_variances = genome_table.copy()
    #    gene_variances.transformSamples(lambda val,otu_id,metadata: val*0.0)
    #    #TODO: test if this is faster or slower than filling numpy.zeros followed by table
    #    #construction

    metagenome_data = None
    metagenome_variance_data = None
    if verbose:
        print "Calculating the variance of the estimated metagenome for %i OTUs." % len(
            overlapping_otus)
    for otu_id in overlapping_otus:
        otu_across_samples = otu_table.observationData(otu_id)
        otu_across_genes = genome_table.sampleData(otu_id)
        otu_variance_across_genes = gene_variances.sampleData(otu_id)
        otu_contrib_to_metagenome = array(
            [o * otu_across_genes for o in otu_across_samples])
        var_otu_contrib_to_metagenome=\
          array([scaled_variance(otu_variance_across_genes,o) for o in otu_across_samples])

        if metagenome_data is None:
            metagenome_data = otu_contrib_to_metagenome
            metagenome_variance_data = var_otu_contrib_to_metagenome
        else:
            metagenome_data += otu_contrib_to_metagenome
            metagenome_variance_data = variance_of_sum(
                metagenome_variance_data, var_otu_contrib_to_metagenome)

    data_result = metagenome_data.T
    variance_result = metagenome_variance_data.T

    if verbose:
        print "Calculating metagenomic confidene intervals from variance."

    lower_95_CI,upper_95_CI=calc_confidence_interval_95(data_result,variance_result,\
      round_CI=True,min_val=0.0,max_val=None)

    if verbose:
        print "Generating BIOM output tables for the prediction,variance,upper confidence interval and lower confidence interval."

    #Wrap results into BIOM Tables
    result_data_table=\
      table_from_template(data_result,otu_table.SampleIds,\
      genome_table.ObservationIds,sample_metadata_source=otu_table,\
      observation_metadata_source=genome_table,constructor=SparseGeneTable)

    result_variance_table=\
      table_from_template(variance_result,otu_table.SampleIds,\
      genome_table.ObservationIds,sample_metadata_source=\
      otu_table,observation_metadata_source=genome_table,constructor=\
      SparseGeneTable,verbose=verbose)

    result_lower_CI_table=\
      table_from_template(lower_95_CI,otu_table.SampleIds,\
      genome_table.ObservationIds,sample_metadata_source=otu_table,\
      observation_metadata_source=genome_table,constructor=SparseGeneTable,\
      verbose=verbose)

    result_upper_CI_table=\
      table_from_template(upper_95_CI,otu_table.SampleIds,\
      genome_table.ObservationIds,sample_metadata_source=\
      otu_table,observation_metadata_source=genome_table,constructor=\
      SparseGeneTable,verbose=verbose)

    return result_data_table,result_variance_table,result_lower_CI_table,\
      result_upper_CI_table
def predict_metagenome_variances(otu_table,genome_table,\
    gene_variances,verbose=False):
    """Predict variances for metagenome predictions
    otu_table -- BIOM Table object of OTUs
    gene_table -- BIOM Table object of predicted gene counts per OTU and samples
    gene_variances -- BIOM Table object of predicted variance in each gene count
   
   Note that OTU counts are treated as constants (exactly known) rather than random variables
   for now.   If a good method for getting variance for OTU counts becomes available, this should
   be updated to treat them as random variables as well.
   """
    #Assume that OTUs are SampleIds in the genome table, but ObservationIds in the OTU table
    genome_table_otu_ids="SampleIds"
    otu_table_otu_ids="ObservationIds"
    
    #Find overlapping otus
    overlapping_otus = get_overlapping_ids(otu_table,genome_table,\
                  genome_table_ids=genome_table_otu_ids,otu_table_ids=otu_table_otu_ids)
    #Ensure they overlap fully with variance table 
    overlapping_otus = get_overlapping_ids(otu_table,gene_variances,\
                  genome_table_ids=genome_table_otu_ids,otu_table_ids=otu_table_otu_ids)
    #Filter OTU and Genome Table to contain only overlapping IDs
    #print "overlapping_otus:",overlapping_otus
    otu_table.filterObservations(lambda val,otu_id,metadata: otu_id in overlapping_otus)
    genome_table.filterSamples(lambda val,otu_id,metadata: otu_id in overlapping_otus)
    
    #Handle missing variance data
    #if gene_variances is None:
    #    gene_variances = genome_table.copy()
    #    gene_variances.transformSamples(lambda val,otu_id,metadata: val*0.0) 
    #    #TODO: test if this is faster or slower than filling numpy.zeros followed by table
    #    #construction

   
    metagenome_data = None
    metagenome_variance_data = None
    if verbose:
        print "Calculating the variance of the estimated metagenome for %i OTUs." %len(overlapping_otus)
    for otu_id in overlapping_otus:
        otu_across_samples = otu_table.observationData(otu_id)
        otu_across_genes = genome_table.sampleData(otu_id)
        otu_variance_across_genes = gene_variances.sampleData(otu_id)
        otu_contrib_to_metagenome=array([o*otu_across_genes for o in otu_across_samples])
        var_otu_contrib_to_metagenome=\
          array([scaled_variance(otu_variance_across_genes,o) for o in otu_across_samples])
        
        if metagenome_data is None:
            metagenome_data = otu_contrib_to_metagenome
            metagenome_variance_data = var_otu_contrib_to_metagenome
        else:
            metagenome_data += otu_contrib_to_metagenome
            metagenome_variance_data = variance_of_sum(metagenome_variance_data,var_otu_contrib_to_metagenome)

    data_result = metagenome_data.T    
    variance_result = metagenome_variance_data.T
    
    if verbose:
        print "Calculating metagenomic confidene intervals from variance."

    lower_95_CI,upper_95_CI=calc_confidence_interval_95(data_result,variance_result,\
      round_CI=True,min_val=0.0,max_val=None)

    
    if verbose:
        print "Generating BIOM output tables for the prediction,variance,upper confidence interval and lower confidence interval."
    
    #Wrap results into BIOM Tables
    result_data_table=\
      table_from_template(data_result,otu_table.SampleIds,\
      genome_table.ObservationIds,sample_metadata_source=otu_table,\
      observation_metadata_source=genome_table,constructor=SparseGeneTable)

    result_variance_table=\
      table_from_template(variance_result,otu_table.SampleIds,\
      genome_table.ObservationIds,sample_metadata_source=\
      otu_table,observation_metadata_source=genome_table,constructor=\
      SparseGeneTable,verbose=verbose)
    
    result_lower_CI_table=\
      table_from_template(lower_95_CI,otu_table.SampleIds,\
      genome_table.ObservationIds,sample_metadata_source=otu_table,\
      observation_metadata_source=genome_table,constructor=SparseGeneTable,\
      verbose=verbose)
    
    result_upper_CI_table=\
      table_from_template(upper_95_CI,otu_table.SampleIds,\
      genome_table.ObservationIds,sample_metadata_source=\
      otu_table,observation_metadata_source=genome_table,constructor=\
      SparseGeneTable,verbose=verbose)
    
    return result_data_table,result_variance_table,result_lower_CI_table,\
      result_upper_CI_table
Beispiel #3
0
def predict_metagenome_variances(otu_table,
                                 genome_table,
                                 gene_variances,
                                 verbose=False,
                                 whole_round=True):
    """Predict variances for metagenome predictions
    otu_table -- BIOM Table object of OTUs
    gene_table -- BIOM Table object of predicted gene counts per OTU and samples
    gene_variances -- BIOM Table object of predicted variance in each gene count

    Users can also specify verbose mode and whether functional count confidence
    interval rounding should be performed.

    Note that OTU counts are treated as constants (exactly known) rather than random variables
    for now. If a good method for getting variance for OTU counts becomes available, this should
    be updated to treat them as random variables as well.
    """
    #Assume that OTUs are samples in the genome table, but observations in the OTU table
    genome_table_otu_ids = "sample"
    otu_table_otu_ids = "observation"

    #Find overlapping otus
    overlapping_otus = get_overlapping_ids(
        otu_table,
        genome_table,
        genome_table_ids=genome_table_otu_ids,
        otu_table_ids=otu_table_otu_ids)
    #Ensure they overlap fully with variance table
    overlapping_otus = get_overlapping_ids(
        otu_table,
        gene_variances,
        genome_table_ids=genome_table_otu_ids,
        otu_table_ids=otu_table_otu_ids)

    #Filter OTU and Genome Table to contain only overlapping IDs
    filter_f = lambda v, id_, m: id_ in overlapping_otus
    otu_table = otu_table.filter(filter_f, axis='observation', inplace=False)
    genome_table = genome_table.filter(filter_f, inplace=False)

    metagenome_data = None
    metagenome_variance_data = None
    if verbose:
        print "Calculating the variance of the estimated metagenome for %i OTUs." % len(
            overlapping_otus)
    for otu_id in overlapping_otus:
        otu_across_samples = otu_table.data(otu_id, axis='observation')
        otu_across_genes = genome_table.data(otu_id)
        otu_variance_across_genes = gene_variances.data(otu_id)
        otu_contrib_to_metagenome = array(
            [o * otu_across_genes for o in otu_across_samples])
        var_otu_contrib_to_metagenome = \
            array([scaled_variance(otu_variance_across_genes,o)
                   for o in otu_across_samples])

        if metagenome_data is None:
            metagenome_data = otu_contrib_to_metagenome
            metagenome_variance_data = var_otu_contrib_to_metagenome
        else:
            metagenome_data += otu_contrib_to_metagenome
            metagenome_variance_data = variance_of_sum(
                metagenome_variance_data, var_otu_contrib_to_metagenome)

    data_result = metagenome_data.T
    variance_result = metagenome_variance_data.T

    if whole_round:
        #Round counts to nearest whole numbers
        data_result = around(data_result)

    if verbose:
        print "Calculating metagenomic confidence intervals from variance."

    lower_95_CI,upper_95_CI=calc_confidence_interval_95(data_result,variance_result,\
      round_CI=whole_round,min_val=0.0,max_val=None)

    if verbose:
        print "Generating BIOM output tables for the prediction, variance, upper confidence interval and lower confidence interval."

    #Wrap results into BIOM Tables
    result_data_table=\
      table_from_template(data_result,otu_table.ids(),\
      genome_table.ids(axis='observation'),sample_metadata_source=otu_table,\
      observation_metadata_source=genome_table)

    result_variance_table=\
      table_from_template(variance_result,otu_table.ids(),\
      genome_table.ids(axis='observation'),sample_metadata_source=\
      otu_table,observation_metadata_source=genome_table,
      verbose=verbose)

    result_lower_CI_table=\
      table_from_template(lower_95_CI,otu_table.ids(),\
      genome_table.ids(axis='observation'),sample_metadata_source=otu_table,\
      observation_metadata_source=genome_table,
      verbose=verbose)

    result_upper_CI_table=\
      table_from_template(upper_95_CI,otu_table.ids(),\
      genome_table.ids(axis='observation'),sample_metadata_source=\
      otu_table,observation_metadata_source=genome_table,
      verbose=verbose)

    return result_data_table,result_variance_table,result_lower_CI_table,\
      result_upper_CI_table
Beispiel #4
0
def predict_metagenome_variances(otu_table, genome_table, gene_variances,
                                 verbose=False, whole_round=True):
    """Predict variances for metagenome predictions
    otu_table -- BIOM Table object of OTUs
    gene_table -- BIOM Table object of predicted gene counts per OTU and samples
    gene_variances -- BIOM Table object of predicted variance in each gene count

    Users can also specify verbose mode and whether functional count confidence
    interval rounding should be performed.

    Note that OTU counts are treated as constants (exactly known) rather than random variables
    for now. If a good method for getting variance for OTU counts becomes available, this should
    be updated to treat them as random variables as well.
    """
    #Assume that OTUs are samples in the genome table, but observations in the OTU table
    genome_table_otu_ids = "sample"
    otu_table_otu_ids = "observation"

    #Find overlapping otus
    overlapping_otus = get_overlapping_ids(otu_table, genome_table,
        genome_table_ids=genome_table_otu_ids, otu_table_ids=otu_table_otu_ids)
    #Ensure they overlap fully with variance table
    overlapping_otus = get_overlapping_ids(otu_table, gene_variances,
        genome_table_ids=genome_table_otu_ids, otu_table_ids=otu_table_otu_ids)

    #Filter OTU and Genome Table to contain only overlapping IDs
    filter_f = lambda v, id_, m: id_ in overlapping_otus
    otu_table = otu_table.filter(filter_f, axis='observation', inplace=False)
    genome_table = genome_table.filter(filter_f, inplace=False)

    metagenome_data = None
    metagenome_variance_data = None
    if verbose:
        print "Calculating the variance of the estimated metagenome for %i OTUs." %len(overlapping_otus)
    for otu_id in overlapping_otus:
        otu_across_samples = otu_table.data(otu_id, axis='observation')
        otu_across_genes = genome_table.data(otu_id)
        otu_variance_across_genes = gene_variances.data(otu_id)
        otu_contrib_to_metagenome = array([o*otu_across_genes
                                           for o in otu_across_samples])
        var_otu_contrib_to_metagenome = \
            array([scaled_variance(otu_variance_across_genes,o)
                   for o in otu_across_samples])

        if metagenome_data is None:
            metagenome_data = otu_contrib_to_metagenome
            metagenome_variance_data = var_otu_contrib_to_metagenome
        else:
            metagenome_data += otu_contrib_to_metagenome
            metagenome_variance_data = variance_of_sum(metagenome_variance_data,var_otu_contrib_to_metagenome)

    data_result = metagenome_data.T
    variance_result = metagenome_variance_data.T

    if whole_round:
        #Round counts to nearest whole numbers
        data_result = around(data_result)

    if verbose:
        print "Calculating metagenomic confidence intervals from variance."

    lower_95_CI,upper_95_CI=calc_confidence_interval_95(data_result,variance_result,\
      round_CI=whole_round,min_val=0.0,max_val=None)


    if verbose:
        print "Generating BIOM output tables for the prediction, variance, upper confidence interval and lower confidence interval."

    #Wrap results into BIOM Tables
    result_data_table=\
      table_from_template(data_result,otu_table.ids(),\
      genome_table.ids(axis='observation'),sample_metadata_source=otu_table,\
      observation_metadata_source=genome_table)

    result_variance_table=\
      table_from_template(variance_result,otu_table.ids(),\
      genome_table.ids(axis='observation'),sample_metadata_source=\
      otu_table,observation_metadata_source=genome_table,
      verbose=verbose)

    result_lower_CI_table=\
      table_from_template(lower_95_CI,otu_table.ids(),\
      genome_table.ids(axis='observation'),sample_metadata_source=otu_table,\
      observation_metadata_source=genome_table,
      verbose=verbose)

    result_upper_CI_table=\
      table_from_template(upper_95_CI,otu_table.ids(),\
      genome_table.ids(axis='observation'),sample_metadata_source=\
      otu_table,observation_metadata_source=genome_table,
      verbose=verbose)

    return result_data_table,result_variance_table,result_lower_CI_table,\
      result_upper_CI_table