Example #1
0
    def test_transfer_observation_metadata_moves_observation_metadata_between_biom_tables(self):
        """transfer_sample_metadata moves sample metadata values between BIOM format tables"""
        t1 = self.genome_table1
        exp = self.genome_table1_with_metadata
        actual = transfer_observation_metadata(self.genome_table1_with_metadata,\
          self.genome_table1,"observation",verbose=False)

        actual_md = map(dict,sorted([md for md in actual.metadata(axis='observation')]))
        exp_md = map(dict,sorted([md for md in exp.metadata(axis='observation')]))
        for i,md in enumerate(actual_md):
            self.assertEqualItems(md,exp_md[i])
        for i,md in enumerate(exp_md):
            self.assertEqualItems(md,actual_md[i])
    def test_transfer_observation_metadata_moves_observation_metadata_between_biom_tables(self):
        """transfer_sample_metadata moves sample metadata values between BIOM format tables"""
        t1 = self.genome_table1
        exp = self.genome_table1_with_metadata
        actual = transfer_observation_metadata(self.genome_table1_with_metadata,\
          self.genome_table1,"observation",verbose=False)

        actual_md = map(dict,sorted([md for md in actual.metadata(axis='observation')]))
        exp_md = map(dict,sorted([md for md in exp.metadata(axis='observation')]))
        for i,md in enumerate(actual_md):
            self.assertEqualItems(md,exp_md[i])
        for i,md in enumerate(exp_md):
            self.assertEqualItems(md,actual_md[i])
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    otu_table = load_table(opts.input_otu_fp)

    ids_to_load = otu_table.ids(axis='observation')

    if(opts.input_count_fp is None):
        #precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz)
        precalc_file_name='_'.join(['16S',opts.gg_version,'precalculated.tab.gz'])
        input_count_table=join(get_picrust_project_dir(),'picrust','data',precalc_file_name)
    else:
        input_count_table=opts.input_count_fp

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext=path.splitext(input_count_table)[1]

    if (ext == '.gz'):
        count_table_fh = gzip.open(input_count_table,'rb')
    else:
        count_table_fh = open(input_count_table,'U')

    if opts.load_precalc_file_in_biom:
        count_table = load_table(count_table_fh)
    else:
        count_table = convert_precalc_to_biom(count_table_fh, ids_to_load)

    #Need to only keep data relevant to our otu list
    ids=[]
    for x in otu_table.iter(axis='observation'):
        ids.append(str(x[1]))

    ob_id=count_table.ids(axis='observation')[0]

    filtered_otus=[]
    filtered_values=[]
    for x in ids:
        if count_table.exists(x, axis='sample'):
            filtered_otus.append(x)
            filtered_values.append(otu_table.data(x, axis='observation'))

    filtered_otu_table = Table(filtered_values, filtered_otus, otu_table.ids())

    copy_numbers_filtered={}
    for x in filtered_otus:
        value = count_table.get_value_by_ids(ob_id,x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))

        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x]={opts.metadata_identifer:value}

    filtered_otu_table.add_metadata(copy_numbers_filtered, axis='observation')

    def metadata_norm(v, i, md):
        return v / float(md[opts.metadata_identifer])
    normalized_table = filtered_otu_table.transform(metadata_norm, axis='observation')

    #move Observation Metadata from original to filtered OTU table
    normalized_table = transfer_observation_metadata(otu_table, normalized_table, 'observation')

    make_output_dir_for_file(opts.output_otu_fp)
    write_biom_table(normalized_table, opts.output_otu_fp)
Example #4
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    input_ext = path.splitext(opts.input_otu_fp)[1]
    if opts.input_format_classic:
        otu_table = parse_classic_table_to_rich_table(
            open(opts.input_otu_fp, 'U'), None, None, None, DenseOTUTable)
    else:
        try:
            otu_table = parse_biom_table(open(opts.input_otu_fp, 'U'))
        except ValueError:
            raise ValueError(
                "Error loading OTU table! If not in BIOM format use '-f' option.\n"
            )

    ids_to_load = otu_table.ObservationIds

    if (opts.input_count_fp is None):
        #precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz)
        precalc_file_name = '_'.join(
            ['16S', opts.gg_version, 'precalculated.tab.gz'])
        input_count_table = join(get_picrust_project_dir(), 'picrust', 'data',
                                 precalc_file_name)
    else:
        input_count_table = opts.input_count_fp

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext = path.splitext(input_count_table)[1]

    if (ext == '.gz'):
        count_table_fh = gzip.open(input_count_table, 'rb')
    else:
        count_table_fh = open(input_count_table, 'U')

    if opts.load_precalc_file_in_biom:
        count_table = parse_biom_table(count_table_fh.read())
    else:
        count_table = convert_precalc_to_biom(count_table_fh, ids_to_load)

    #Need to only keep data relevant to our otu list
    ids = []
    for x in otu_table.iterObservations():
        ids.append(str(x[1]))

    ob_id = count_table.ObservationIds[0]

    filtered_otus = []
    filtered_values = []
    for x in ids:
        if count_table.sampleExists(x):
            filtered_otus.append(x)
            filtered_values.append(otu_table.observationData(x))

    #filtered_values = map(list,zip(*filtered_values))
    filtered_otu_table = table_factory(filtered_values,
                                       otu_table.SampleIds,
                                       filtered_otus,
                                       constructor=DenseOTUTable)

    copy_numbers_filtered = {}
    for x in filtered_otus:
        value = count_table.getValueByIds(ob_id, x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))

        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x] = {opts.metadata_identifer: value}

    filtered_otu_table.addObservationMetadata(copy_numbers_filtered)

    normalized_table = filtered_otu_table.normObservationByMetadata(
        opts.metadata_identifer)

    #move Observation Metadata from original to filtered OTU table
    normalized_table = transfer_observation_metadata(otu_table,
                                                     normalized_table,
                                                     'ObservationMetadata')
    normalized_otu_table = transfer_sample_metadata(otu_table,
                                                    normalized_table,
                                                    'SampleMetadata')

    make_output_dir_for_file(opts.output_otu_fp)
    open(opts.output_otu_fp, 'w').write(format_biom_table(normalized_table))
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table = load_table(opts.input_otu_fp)

    ids_to_load = otu_table.ids(axis="observation")

    if opts.input_count_fp is None:
        # precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz)
        precalc_file_name = "_".join(["16S", opts.gg_version, "precalculated.tab.gz"])
        input_count_table = join(get_picrust_project_dir(), "picrust", "data", precalc_file_name)
    else:
        input_count_table = opts.input_count_fp

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext = path.splitext(input_count_table)[1]

    if ext == ".gz":
        count_table_fh = gzip.open(input_count_table, "rb")
    else:
        count_table_fh = open(input_count_table, "U")

    if opts.load_precalc_file_in_biom:
        count_table = load_table(count_table_fh)
    else:
        count_table = convert_precalc_to_biom(count_table_fh, ids_to_load)

    # Need to only keep data relevant to our otu list
    ids = []
    for x in otu_table.iter(axis="observation"):
        ids.append(str(x[1]))

    ob_id = count_table.ids(axis="observation")[0]

    filtered_otus = []
    filtered_values = []
    for x in ids:
        if count_table.exists(x, axis="sample"):
            filtered_otus.append(x)
            filtered_values.append(otu_table.data(x, axis="observation"))

    filtered_otu_table = Table(filtered_values, filtered_otus, otu_table.ids())

    copy_numbers_filtered = {}
    for x in filtered_otus:
        value = count_table.get_value_by_ids(ob_id, x)
        try:
            # data can be floats so round them and make them integers
            value = int(round(float(value)))

        except ValueError:
            raise ValueError, "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x] = {opts.metadata_identifer: value}

    filtered_otu_table.add_metadata(copy_numbers_filtered, axis="observation")

    def metadata_norm(v, i, md):
        return v / float(md[opts.metadata_identifer])

    normalized_table = filtered_otu_table.transform(metadata_norm, axis="observation")

    # move Observation Metadata from original to filtered OTU table
    normalized_table = transfer_observation_metadata(otu_table, normalized_table, "observation")

    make_output_dir_for_file(opts.output_otu_fp)
    write_biom_table(normalized_table, opts.output_otu_fp)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    input_ext=path.splitext(opts.input_otu_fp)[1]
    if opts.input_format_classic:
        otu_table=parse_classic_table_to_rich_table(open(opts.input_otu_fp,'U'),None,None,None,DenseOTUTable)
    else:
        try:
            otu_table = parse_biom_table(open(opts.input_otu_fp,'U'))
        except ValueError:
            raise ValueError("Error loading OTU table! If not in BIOM format use '-f' option.\n")

    ids_to_load = otu_table.ObservationIds
    
    if(opts.input_count_fp is None):
        #precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz)
        precalc_file_name='_'.join(['16S',opts.gg_version,'precalculated.tab.gz'])
        input_count_table=join(get_picrust_project_dir(),'picrust','data',precalc_file_name)
    else:
        input_count_table=opts.input_count_fp

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext=path.splitext(input_count_table)[1]
    
    if (ext == '.gz'):
        count_table_fh = gzip.open(input_count_table,'rb')
    else:
        count_table_fh = open(input_count_table,'U')
       
    if opts.load_precalc_file_in_biom:
        count_table = parse_biom_table(count_table_fh.read())
    else:
        count_table = convert_precalc_to_biom(count_table_fh,ids_to_load)

    #Need to only keep data relevant to our otu list
    ids=[]
    for x in otu_table.iterObservations():
        ids.append(str(x[1]))

    ob_id=count_table.ObservationIds[0]

    filtered_otus=[]
    filtered_values=[]
    for x in ids:
        if count_table.sampleExists(x):
            filtered_otus.append(x)
            filtered_values.append(otu_table.observationData(x))

    #filtered_values = map(list,zip(*filtered_values))
    filtered_otu_table=table_factory(filtered_values,otu_table.SampleIds,filtered_otus, constructor=DenseOTUTable)

    copy_numbers_filtered={}
    for x in filtered_otus:
        value = count_table.getValueByIds(ob_id,x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))
            
        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x]={opts.metadata_identifer:value}
        
    filtered_otu_table.addObservationMetadata(copy_numbers_filtered)
            

    normalized_table = filtered_otu_table.normObservationByMetadata(opts.metadata_identifer)
    
    #move Observation Metadata from original to filtered OTU table
    normalized_table = transfer_observation_metadata(otu_table,normalized_table,'ObservationMetadata')
    normalized_otu_table = transfer_sample_metadata(otu_table,normalized_table,'SampleMetadata')

    make_output_dir_for_file(opts.output_otu_fp)
    open(opts.output_otu_fp,'w').write(format_biom_table(normalized_table))
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    input_ext=path.splitext(opts.input_otu_fp)[1]
    if opts.input_format_classic:
        otu_table=parse_classic_table_to_rich_table(open(opts.input_otu_fp,'U'),None,None,None,DenseOTUTable)
    else:
        try:
            otu_table = parse_biom_table(open(opts.input_otu_fp,'U'))
        except ValueError:
            raise ValueError("Error loading OTU table! If not in BIOM format use '-f' option.\n")

    ext=path.splitext(opts.input_count_fp)[1]
    if (ext == '.gz'):
        count_table = parse_biom_table(gzip.open(opts.input_count_fp,'rb'))
    else:
        count_table = parse_biom_table(open(opts.input_count_fp,'U'))
        
    #Need to only keep data relevant to our otu list
    ids=[]
    for x in otu_table.iterObservations():
        ids.append(str(x[1]))

    ob_id=count_table.ObservationIds[0]

    filtered_otus=[]
    filtered_values=[]
    for x in ids:
        if count_table.sampleExists(x):
            filtered_otus.append(x)
            filtered_values.append(otu_table.observationData(x))

    #filtered_values = map(list,zip(*filtered_values))
    filtered_otu_table=table_factory(filtered_values,otu_table.SampleIds,filtered_otus, constructor=DenseOTUTable)

    copy_numbers_filtered={}
    for x in filtered_otus:
        value = count_table.getValueByIds(ob_id,x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))
            
        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x]={opts.metadata_identifer:value}
        
    filtered_otu_table.addObservationMetadata(copy_numbers_filtered)
            

    normalized_table = filtered_otu_table.normObservationByMetadata(opts.metadata_identifer)
    
    #move Observation Metadata from original to filtered OTU table
    normalized_table = transfer_observation_metadata(otu_table,normalized_table,'ObservationMetadata')
    normalized_otu_table = transfer_sample_metadata(otu_table,normalized_table,'SampleMetadata')

    make_output_dir_for_file(opts.output_otu_fp)
    open(opts.output_otu_fp,'w').write(\
     normalized_table.getBiomFormatJsonString('PICRUST'))