def test_make_sample_node_table(self):
     """Test that the sample node table is created correctly."""
     # test when sampleids in biom == sampleids in mapping file
     bt = parse_biom_table(BIOM_STRING_1)
     mf_dict = parse_mapping_file_to_dict(MF_LINES.split("\n"))[0]
     obs = make_sample_node_table(bt, mf_dict)
     exp = [
         "#NodeID\tNodeType\tAbundance\tTimePt\tStudy\tTreatment\tDiet",
         "s1\tsample\t148.0\t1\ta\tpre\thf",
         "s2\tsample\t156.0\t2\ta\tpre\tlf",
         "s3\tsample\t164.0\t3\ta\tpre\thf",
         "s4\tsample\t172.0\t4\ta\tpost\tlf",
         "s5\tsample\t180.0\t5\ta\tpost\tmf",
     ]
     self.assertEqual(obs, exp)
     # test when sampleids in biom are a subset of sampleids in mapping file
     bt = parse_biom_table(BIOM_STRING_2)
     obs = make_sample_node_table(bt, mf_dict)
     exp = [
         "#NodeID\tNodeType\tAbundance\tTimePt\tStudy\tTreatment\tDiet",
         "s3\tsample\t164.0\t3\ta\tpre\thf",
         "s4\tsample\t172.0\t4\ta\tpost\tlf",
         "s5\tsample\t180.0\t5\ta\tpost\tmf",
     ]
     self.assertEqual(obs, exp)
예제 #2
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    if opts.verbose:
        print "Loading sequencing depth table: ",opts.input_seq_depth_file
    scaling_factors = {}
    for sample_id,depth in parse_seq_count_file(open(opts.input_seq_depth_file,'U')):
        scaling_factors[sample_id]=depth    
    
    ext=path.splitext(opts.input_count_table)[1]

    if opts.verbose:
        print "Loading count table: ", opts.input_count_table
    if (ext == '.gz'):
        genome_table = parse_biom_table(gzip.open(opts.input_count_table,'rb'))
    else:
        genome_table = parse_biom_table(open(opts.input_count_table,'U'))
    
    if opts.verbose:
        print "Scaling the metagenome..."
        
    scaled_metagenomes = scale_metagenomes(genome_table,scaling_factors)

    if opts.verbose:
        print "Writing results to output file: ",opts.output_metagenome_table
        
    make_output_dir_for_file(opts.output_metagenome_table)
    open(opts.output_metagenome_table,'w').write(format_biom_table(scaled_metagenomes))
예제 #3
0
    def test_sort_otu_table(self):
        """ sort_otu_table fns as expected """

        actual = sort_otu_table(parse_biom_table(self.otu_table1),
                                ['NA', 'Key', 'Fing'])
        expected = parse_biom_table(self.age_sorted_otu_table1)
        self.assertEqual(actual, expected)
예제 #4
0
    def test_make_edge_table(self):
        '''Test that edge table is created properly.'''
        bt = parse_biom_table(BIOM_STRING_3)
        obs_out = make_edge_table(bt)
        exp_out = [
            '#Sample\tOTU\tAbundance', 's3\to1\t3.0', 's3\to2\t8.0',
            's3\to3\t13.0', 's3\to4\t18.0', 's3\to5\t23.0', 's3\to6\t28.0',
            's3\to7\t33.0', 's3\to8\t38.0', 's4\to1\t4.0', 's4\to2\t9.0',
            's4\to3\t14.0', 's4\to4\t19.0', 's4\to5\t24.0', 's4\to6\t29.0',
            's4\to7\t34.0', 's4\to8\t39.0', 's5\to1\t5.0', 's5\to2\t10.0',
            's5\to3\t15.0', 's5\to4\t20.0', 's5\to5\t25.0', 's5\to6\t30.0',
            's5\to7\t35.0', 's5\to8\t40.0'
        ]
        self.assertEqual(set(obs_out), set(exp_out))

        # test with a row and a col that are all zero
        bt = parse_biom_table(BIOM_STRING_6)
        obs_out = make_edge_table(bt)
        exp_out = [
            '#Sample\tOTU\tAbundance', 's2\to1\t2.0', 's2\to2\t7.0',
            's2\to3\t12.0', 's2\to4\t17.0', 's2\to6\t27.0', 's2\to7\t32.0',
            's2\to8\t37.0', 's3\to1\t3.0', 's3\to2\t8.0', 's3\to3\t13.0',
            's3\to4\t18.0', 's3\to6\t28.0', 's3\to7\t33.0', 's3\to8\t38.0',
            's4\to1\t4.0', 's4\to2\t9.0', 's4\to3\t14.0', 's4\to4\t19.0',
            's4\to6\t29.0', 's4\to7\t34.0', 's4\to8\t39.0', 's5\to1\t5.0',
            's5\to2\t10.0', 's5\to3\t15.0', 's5\to4\t20.0', 's5\to6\t30.0',
            's5\to7\t35.0', 's5\to8\t40.0'
        ]
        self.assertEqual(set(obs_out), set(exp_out))
def generate_full_otu_table(study, study_input_dir, zip_fname, files_to_remove,
                            biom_files,output_dir):
    """ Merge OTU tables """
    
    master = parse_biom_table(open(biom_files[0],'U'))
    # only merge if there is more than 1 biom file
    if len(biom_files) > 1:
        for input_fp in biom_files[1:]:
            master = master.merge(parse_biom_table(open(input_fp,'U')))
        
    # write full biom-table
    full_biom_table_fname='study_%s_closed_reference_otu_table.biom' % \
                                                              (str(study))
    full_biom_table_fp=join(output_dir,full_biom_table_fname)
    # add to list of files to remove
    files_to_remove.append(full_biom_table_fp)
    
    biom_f = open(join(full_biom_table_fp),'w')
    biom_f.write(format_biom_table(master))
    biom_f.close()
    
    # zip the full biom-table file
    #cmd_call='cd %s; tar rzvf %s %s' % (study_input_dir,zip_fname,
    #                               full_biom_table_fname)
    #system(cmd_call)
    
    return files_to_remove
예제 #6
0
    def test_sort_otu_table_error(self):
        """ sort_otu_table handles errors """

        self.assertRaises(ValueError, sort_otu_table,
                          parse_biom_table(self.otu_table1), ['NA', 'Key', 'Fing', 'Key'])
        self.assertRaises(KeyError, sort_otu_table,
                          parse_biom_table(self.otu_table1), ['NA', 'Key'])
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
  
    if opts.limit_to_function:
        limit_to_functions = opts.limit_to_function.split(',')
        if opts.verbose:
            print "Limiting output to only functions:",limit_to_functions
    else:
        limit_to_functions = []

    if opts.verbose:
        print "Loading otu table: ",opts.input_otu_table

    otu_table = parse_biom_table(open(opts.input_otu_table,'U'))
    ext=path.splitext(opts.input_count_table)[1]

    if opts.verbose:
        print "Loading count table: ", opts.input_count_table
    if (ext == '.gz'):
        genome_table = parse_biom_table(gzip.open(opts.input_count_table,'rb'))
    else:
        genome_table = parse_biom_table(open(opts.input_count_table,'U'))
    if opts.verbose:
        print "Predicting the metagenome..."
    
    partitioned_metagenomes = partition_metagenome_contributions(otu_table,genome_table,limit_to_functions=limit_to_functions)
    output_text = "\n".join(["\t".join(map(str,i)) for i in partitioned_metagenomes])
    if opts.verbose:
        print "Writing results to output file: ",opts.output_metagenome_table
        
    make_output_dir_for_file(opts.output_metagenome_table)
    open(opts.output_metagenome_table,'w').write(output_text)
예제 #8
0
    def test_suppress_md5(self):
        """ TableSummarizer functions as expected with md5 suppression

        """
        t = TableSummarizer()
        # suppress md5 by passing suppress_md5=True
        actual = t(table=(parse_biom_table(self.biom1_lines),
                          self.biom1_lines),
                   qualitative=False,
                   suppress_md5=True)

        self.assertEqual(actual['biom_summary'],
                         self.summary_suppress_md5_lines)
        # suppress md5 by passing None as the second value in table
        actual = t(table=(parse_biom_table(self.biom1_lines), None),
                   qualitative=False,
                   suppress_md5=False)
        self.assertEqual(actual['biom_summary'],
                         self.summary_suppress_md5_lines)
        # suppress md5 by passing None as the second value in table
        # and suppress_md5=True
        actual = t(table=(parse_biom_table(self.biom1_lines), None),
                   qualitative=False,
                   suppress_md5=True)
        self.assertEqual(actual['biom_summary'],
                         self.summary_suppress_md5_lines)
예제 #9
0
 def test_make_sample_node_table(self):
     '''Test that the sample node table is created correctly.'''
     # test when sampleids in biom == sampleids in mapping file
     bt = parse_biom_table(BIOM_STRING_1)
     mf_dict = parse_mapping_file_to_dict(MF_LINES.split('\n'))[0]
     obs = make_sample_node_table(bt, mf_dict)
     exp = [
         '#NodeID\tNodeType\tAbundance\tTimePt\tStudy\tTreatment\tDiet',
         's1\tsample\t148.0\t1\ta\tpre\thf',
         's2\tsample\t156.0\t2\ta\tpre\tlf',
         's3\tsample\t164.0\t3\ta\tpre\thf',
         's4\tsample\t172.0\t4\ta\tpost\tlf',
         's5\tsample\t180.0\t5\ta\tpost\tmf'
     ]
     self.assertEqual(obs, exp)
     # test when sampleids in biom are a subset of sampleids in mapping file
     bt = parse_biom_table(BIOM_STRING_2)
     obs = make_sample_node_table(bt, mf_dict)
     exp = [
         '#NodeID\tNodeType\tAbundance\tTimePt\tStudy\tTreatment\tDiet',
         's3\tsample\t164.0\t3\ta\tpre\thf',
         's4\tsample\t172.0\t4\ta\tpost\tlf',
         's5\tsample\t180.0\t5\ta\tpost\tmf'
     ]
     self.assertEqual(obs, exp)
예제 #10
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    input_ext=path.splitext(opts.input_otu_fp)[1]
    if opts.input_format_classic:
        otu_table=parse_classic_table_to_rich_table(open(opts.input_otu_fp,'U'),None,None,None,DenseOTUTable)
    else:
        if input_ext != '.biom':
            sys.stderr.write("\nOTU table does not have '.biom' extension! If loading causes error consider using '-f' option to load tab-delimited OTU table!\n\n")
        otu_table = parse_biom_table(open(opts.input_otu_fp,'U'))

    ext=path.splitext(opts.input_count_fp)[1]
    if (ext == '.gz'):
        count_table = parse_biom_table(gzip.open(opts.input_count_fp,'rb'))
    else:
        count_table = parse_biom_table(open(opts.input_count_fp,'U'))
        
    #Need to only keep data relevant to our otu list
    ids=[]
    for x in otu_table.iterObservations():
        ids.append(str(x[1]))

    ob_id=count_table.ObservationIds[0]

    filtered_otus=[]
    filtered_values=[]
    for x in ids:
        if count_table.sampleExists(x):
            filtered_otus.append(x)
            filtered_values.append(otu_table.observationData(x))

    #filtered_values = map(list,zip(*filtered_values))
    filtered_otu_table=table_factory(filtered_values,otu_table.SampleIds,filtered_otus, constructor=DenseOTUTable)

    copy_numbers_filtered={}
    for x in filtered_otus:
        value = count_table.getValueByIds(ob_id,x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))
            
        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x]={opts.metadata_identifer:value}
        
    filtered_otu_table.addObservationMetadata(copy_numbers_filtered)
            

    normalized_table = filtered_otu_table.normObservationByMetadata(opts.metadata_identifer)

    make_output_dir_for_file(opts.output_otu_fp)
    open(opts.output_otu_fp,'w').write(\
     normalized_table.getBiomFormatJsonString('PICRUST'))
예제 #11
0
	def convertBiomFileToStampProfile(self, file_name, output_name, metadata_name):
		""" Function taken from PICRUSt by Morgan Langill.
		      https://github.com/mlangill/get_mgrast_data/blob/master/biom_to_stamp.py
		"""

		#allow file to be optionally gzipped (must use extension '.gz')
		ext=splitext(file_name)[1]
		if (ext == '.gz'):
			table = parse_biom_table(gzip.open(file_name,'rb'))
		else:
			table = parse_biom_table(open(file_name,'U'))
		
		metadata_name = metadata_name.split('(')[0].rstrip()
		if metadata_name is None or metadata_name == '<observation ids>':
			max_len_metadata = 0
		elif table.observation_metadata and metadata_name in table.observation_metadata[0]:
			#figure out the longest list within the given metadata
			max_len_metadata = max(len(p[metadata_name]) for p in table.observation_metadata)
		else:
			QtGui.QMessageBox.information(self, 'Unrecognized metadata file', "'" + metadata_name + "' was not found in the BIOM table.", QtGui.QMessageBox.Ok)
			return
		
		#make the header line
		header=[]
		#make simple labels for each level in the metadata (e.g. 'Level_1', 'Level_2', etc.) "+1" for the observation id as well.
		for i in range(max_len_metadata):
			header.append('Level_'+ str(i+1))
		header.append('Observation Ids')
		
		#add the sample ids to the header line
		header.extend(table.sample_ids)
		
		fout = open(output_name, 'w')
		fout.write("\t".join(header) + '\n')
		
		#now process each observation (row in the table)
		for obs_vals, obs_id, obs_metadata in table.iter(axis='observation'):
			row=[]
			if max_len_metadata > 0:
				row = obs_metadata[metadata_name]
		
			# add blanks if the metadata doesn't fill each level
			if len(row) < max_len_metadata:
				for i in range(max_len_metadata - len(row)):
					row.append('unclassified')
			
			#Add the observation id as the last "Level"
			if isNumber(obs_id):
				row.append('ID' + obs_id)
			else:
				row.append(obs_id)
			
			#Add count data to the row
			row.extend(map(str,obs_vals))
			fout.write("\t".join(row) + '\n')
			
		fout.close()
예제 #12
0
    def test_make_otu_table_no_taxonomy(self):
        """make_otu_table should work without tax (new-style OTU table)"""
        otu_map_lines = """0	ABC_0	DEF_1
1	ABC_1
x	GHI_2	GHI_3	GHI_77
z	DEF_3	XYZ_1""".split('\n')
        obs = make_otu_table(otu_map_lines,constructor=DenseOTUTable)
        exp = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "x", "metadata": null}, {"id": "z", "metadata": null}], "format": "Biological Observation Matrix 0.9dev", "data": [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:49:15.978315", "type": "OTU table", "id": null, "matrix_element_type": "float"}"""
        self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n')))
예제 #13
0
    def test_sort_otu_table_by_mapping_field_some_values_differ(self):
        """ sort_otu_table fns when some values differ"""

        actual = sort_otu_table_by_mapping_field(
            parse_biom_table(self.otu_table1),
            parse_mapping_file(self.mapping_f2),
            sort_field="Nothing")
        expected = parse_biom_table(self.nothing_sorted_otu_table1)
        self.assertEqual(actual, expected)
예제 #14
0
    def test_sort_otu_table_by_mapping_field_some_values_same(self):
        """ sort_otu_table_by_mapping_field fns when all values are the same"""

        actual = sort_otu_table_by_mapping_field(
            parse_biom_table(self.otu_table1),
            parse_mapping_file(self.mapping_f2),
            sort_field="Name")
        expected = parse_biom_table(self.name_sorted_otu_table1)
        self.assertEqual(actual, expected)
예제 #15
0
    def test_sort_otu_table_by_mapping_field_all_values_differ(self):
        """ sort_otu_table_by_mapping_field fns when all values differ"""

        actual = sort_otu_table_by_mapping_field(
            parse_biom_table(self.otu_table1),
            parse_mapping_file(self.mapping_f2),
            sort_field="Age")
        expected = parse_biom_table(self.age_sorted_otu_table1)
        self.assertEqual(actual, expected)
예제 #16
0
def iter_prediction_expectation_pairs(obs_dir_fp,
                                      exp_dir_fp,
                                      file_name_field_order,
                                      file_name_delimiter,
                                      verbose=False):
    """Iterate pairs of observed, expected biom file names"""
    input_files = sorted(listdir(obs_dir_fp))
    for file_number, f in enumerate(input_files):
        if verbose:
            print "\nExamining file {0} of {1}: {2}".format(
                file_number + 1, len(input_files), f)
        if 'accuracy_metrics' in f:
            print "%s is an Accuracy file...skipping" % str(f)
            continue
        #filename_components_list = f.split(file_name_delimiter)
        #Get predicted traits
        filename_metadata = get_metadata_from_filename(f,file_name_field_order,\
          file_name_delimiter,verbose=verbose)

        if filename_metadata.get('file_type', None) == 'predict_traits':
            if verbose:
                #print "Found a prediction file"
                print "\tLoading .biom format observation table:", f

            try:
                obs_table =\
                  parse_biom_table(open(join(obs_dir_fp,f),'U'))
            except ValueError:
                print 'Failed, skipping...'
                continue
            #    raise RuntimeError(\
            #      "Could not parse predicted trait file: %s.   Is it a .biom formatted file?" %(f))
        else:
            continue

        # Get paired observation file
        exp_filename = file_name_delimiter.join([
            'exp_biom_traits', filename_metadata['holdout_method'],
            filename_metadata['distance'], filename_metadata['organism']
        ])
        exp_filepath = join(exp_dir_fp, exp_filename)
        if verbose:
            print "\tLooking for the expected trait file matching %s here: %s" % (
                f, exp_filepath)

        try:
            exp_table =\
              parse_biom_table(open(exp_filepath,"U"))
        except IOError, e:
            if strict:
                raise IOError(e)
            else:
                if verbose:
                    print "Missing expectation file....skipping!"
                continue
        yield obs_table, exp_table, f
예제 #17
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    if opts.verbose:
        print "Loading otu table: ",opts.input_otu_table

    otu_table = parse_biom_table(open(opts.input_otu_table,'U'))
    ext=path.splitext(opts.input_count_table)[1]

    if opts.verbose:
        print "Loading count table: ", opts.input_count_table
    if (ext == '.gz'):
        genome_table = parse_biom_table(gzip.open(opts.input_count_table,'rb'))
    else:
        genome_table = parse_biom_table(open(opts.input_count_table,'U'))

    make_output_dir_for_file(opts.output_metagenome_table)

    if opts.accuracy_metrics:
        # Calculate accuracy metrics
        #unweighted_nsti = calc_nsti(otu_table,genome_table,weighted=False)
        #print "Unweighted NSTI:", unweighted_nsti
        
        weighted_nsti = calc_nsti(otu_table,genome_table,weighted=True)
        samples= weighted_nsti[0]
        nstis = list(weighted_nsti[1])
        #print "Samples:",samples
        #print "NSTIs:",nstis
        samples_and_nstis = zip(samples,nstis)
        #print "Samples and NSTIs:",samples_and_nstis
        lines = ["#Sample\tMetric\tValue\n"]
        #print weighted_nsti
        for sample,nsti in samples_and_nstis:
            line = "%s\tWeighted NSTI\t%s\n" %(sample,str(nsti))
            lines.append(line)

        if opts.verbose:
            for l in sorted(lines):
                print l
        if opts.verbose:
            print "Writing accuracy information to file:", opts.accuracy_metrics
        open(opts.accuracy_metrics,'w').writelines(sorted(lines))

    if opts.verbose:
        print "Predicting the metagenome..."
        
    predicted_metagenomes = predict_metagenomes(otu_table,genome_table)

    if opts.verbose:
        print "Writing results to output file: ",opts.output_metagenome_table
        
    make_output_dir_for_file(opts.output_metagenome_table)
    if(opts.format_tab_delimited):
        open(opts.output_metagenome_table,'w').write(predicted_metagenomes.delimitedSelf())
    else:
        open(opts.output_metagenome_table,'w').write(format_biom_table(predicted_metagenomes))
예제 #18
0
    def test_make_otu_table_no_taxonomy(self):
        """make_otu_table should work without tax (new-style OTU table)"""
        otu_map_lines = """0	ABC_0	DEF_1
1	ABC_1
x	GHI_2	GHI_3	GHI_77
z	DEF_3	XYZ_1""".split('\n')
        obs = make_otu_table(otu_map_lines, constructor=DenseOTUTable)
        exp = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "x", "metadata": null}, {"id": "z", "metadata": null}], "format": "Biological Observation Matrix 0.9dev", "data": [[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 3, 0], [0, 1, 0, 1]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:49:15.978315", "type": "OTU table", "id": null, "matrix_element_type": "float"}"""
        self.assertEqual(parse_biom_table(obs.split('\n')),
                         parse_biom_table(exp.split('\n')))
예제 #19
0
    def test_make_otu_table_taxonomy(self):
        """make_otu_table should work with taxonomy"""
        otu_map_lines = """0	ABC_0	DEF_1
1	ABC_1
x	GHI_2	GHI_3	GHI_77
z	DEF_3	XYZ_1""".split('\n')
        taxonomy = {'0':'Bacteria;Firmicutes', 'x':'Bacteria;Bacteroidetes'}
        obs = make_otu_table(otu_map_lines, taxonomy,constructor=DenseOTUTable)
        exp = """{"rows": [{"id": "0", "metadata": {"taxonomy": ["Bacteria", "Firmicutes"]}}, {"id": "1", "metadata": {"taxonomy": ["None"]}}, {"id": "x", "metadata": {"taxonomy": ["Bacteria", "Bacteroidetes"]}}, {"id": "z", "metadata": {"taxonomy": ["None"]}}], "format": "Biological Observation Matrix 0.9dev", "data": [[1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.0, 0.0], [0.0, 1.0, 0.0, 1.0]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:19:30.961477", "type": "OTU table", "id": null, "matrix_element_type": "float"}"""
        self.assertEqual(parse_biom_table(obs.split('\n')), parse_biom_table(exp.split('\n')))
예제 #20
0
    def test_sort_otu_table_by_mapping_field_all_values_differ(self):
        """ sort_otu_table_by_mapping_field fns when all values differ"""

        actual = sort_otu_table_by_mapping_field(
            parse_biom_table(self.otu_table1),
            parse_mapping_file(
                self.mapping_f2),
            sort_field="Age")
        expected = parse_biom_table(self.age_sorted_otu_table1)
        self.assertEqual(actual, expected)
예제 #21
0
    def test_sort_otu_table_by_mapping_field_some_values_differ(self):
        """ sort_otu_table fns when some values differ"""

        actual = sort_otu_table_by_mapping_field(
            parse_biom_table(self.otu_table1),
            parse_mapping_file(
                self.mapping_f2),
            sort_field="Nothing")
        expected = parse_biom_table(self.nothing_sorted_otu_table1)
        self.assertEqual(actual, expected)
예제 #22
0
    def test_sort_otu_table_by_mapping_field_some_values_same(self):
        """ sort_otu_table_by_mapping_field fns when all values are the same"""

        actual = sort_otu_table_by_mapping_field(
            parse_biom_table(self.otu_table1),
            parse_mapping_file(
                self.mapping_f2),
            sort_field="Name")
        expected = parse_biom_table(self.name_sorted_otu_table1)
        self.assertEqual(actual, expected)
예제 #23
0
def main():
    option_parser, opts, args =\
                   parse_command_line_parameters(**script_info)

    min_args = 1
    if len(args) < min_args:
       option_parser.error('A BIOM file must be provided.')

    file_name = args[0]

    #allow file to be optionally gzipped (must use extension '.gz')
    ext=splitext(file_name)[1]
    if (ext == '.gz'):
        table = parse_biom_table(gzip.open(file_name,'rb'))
    else:
        table = parse_biom_table(open(file_name,'U'))

    metadata_name=opts.metadata

    if metadata_name is None:
        max_len_metadata=0
    elif table.ObservationMetadata and metadata_name in table.ObservationMetadata[0]:
        #figure out the longest list within the given metadata
        max_len_metadata = max(len(p[metadata_name]) for p in table.ObservationMetadata)
    else:
        raise ValueError("'"+metadata_name+"' was not found in the BIOM table. Please try changing --metadata to a valid metadata field.")

    #make the header line
    header=[]
    #make simple labels for each level in the metadata (e.g. 'Level_1', 'Level_2', etc.) "+1" for the observation id as well.
    for i in range(max_len_metadata+1):
        header.append('Level_'+ str(i+1))
    
    #add the sample ids to the header line
    header.extend(table.SampleIds)
    
    print "\t".join(header)

    #now process each observation (row in the table)
    for obs_vals,obs_id,obs_metadata in table.iterObservations():
        row=[]
        if max_len_metadata >0:
            row=obs_metadata[metadata_name]
        
        #Add blanks if the metadata doesn't fill each level
        if len(row) < max_len_metadata:
            for i in range(max_len_metadata - len(row)):
                row.append('')

        #Add the observation id as the last "Level"
        row.append(obs_id)

        #Add count data to the row
        row.extend(map(str,obs_vals))
        print "\t".join(row)
예제 #24
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)
    input_fps = opts.input_fps

    master = parse_biom_table(open(input_fps[0], 'U'))
    for input_fp in input_fps[1:]:
        master = master.merge(parse_biom_table(open(input_fp, 'U')))

    out_f = open(opts.output_fp, 'w')
    out_f.write(format_biom_table(master))
    out_f.close()
예제 #25
0
    def test_classic_to_biom(self):
        """Correctly converts classic to biom."""
        obs = self.cmd(table=parse_biom_table(self.classic_lines1),
                       to_json=True, table_type='OTU table')
        self.assertEqual(obs.keys(), ['table'])

        obs = parse_biom_table(obs['table'][0].to_json('testing'))
        self.assertEqual(type(obs), Table)
        self.assertEqual(len(obs.sample_ids), 9)
        self.assertEqual(len(obs.observation_ids), 14)
        self.assertEqual(obs.sample_metadata, None)
        self.assertNotEqual(obs.observation_metadata, None)
예제 #26
0
    def test_biom_to_classic(self):
        """Correctly converts biom to classic."""
        obs = self.cmd(table=parse_biom_table(self.biom_lines1),
                       to_tsv=True, header_key='taxonomy')
        self.assertEqual(obs.keys(), ['table'])
        self.assertEqual(obs['table'][0], classic1)

        obs = self.cmd(table=parse_biom_table(self.biom_lines1), to_tsv=True,
                       header_key='taxonomy', output_metadata_id='foo')
        self.assertEqual(obs.keys(), ['table'])
        obs_md_col = obs['table'][0].split('\n')[1].split('\t')[-1]
        self.assertEqual(obs_md_col, 'foo')
예제 #27
0
    def test_get_shared_otus(self):
        otu_table = parse_biom_table(self.get_shared_otus_1_1_input)
        exp = get_shared_otus([otu_table],1,1)
        self.assertEqual(self.get_shared_otus_1_1_result,exp)

        otu_table_2 = parse_biom_table(self.get_shared_otus_2_06_input)
        exp_2_06 = get_shared_otus([otu_table_2],2,0.6)
        self.assertEqual(self.get_shared_otus_2_06_result,exp_2_06)

        otu_table_3 = parse_biom_table(self.get_shared_otus_5_09_input)
        exp_5_09 = get_shared_otus([otu_table_3],5,0.9)
        self.assertEqual(self.get_shared_otus_5_09_result,exp_5_09)
    def test_classic_to_biom(self):
        """Correctly converts classic to biom."""
        obs = self.cmd(table=parse_biom_table(self.classic_lines1),
                       to_json=True)
        self.assertEqual(obs.keys(), ['table'])

        obs = parse_biom_table(obs['table'][0])
        self.assertEqual(type(obs), Table)
        self.assertEqual(len(obs.sample_ids), 9)
        self.assertEqual(len(obs.observation_ids), 14)
        self.assertEqual(obs.sample_metadata, None)
        self.assertNotEqual(obs.observation_metadata, None)
예제 #29
0
    def test_make_otu_table_taxonomy(self):
        """make_otu_table should work with taxonomy"""
        otu_map_lines = """0	ABC_0	DEF_1
1	ABC_1
x	GHI_2	GHI_3	GHI_77
z	DEF_3	XYZ_1""".split('\n')
        taxonomy = {'0': 'Bacteria;Firmicutes', 'x': 'Bacteria;Bacteroidetes'}
        obs = make_otu_table(otu_map_lines,
                             taxonomy,
                             constructor=DenseOTUTable)
        exp = """{"rows": [{"id": "0", "metadata": {"taxonomy": ["Bacteria", "Firmicutes"]}}, {"id": "1", "metadata": {"taxonomy": ["None"]}}, {"id": "x", "metadata": {"taxonomy": ["Bacteria", "Bacteroidetes"]}}, {"id": "z", "metadata": {"taxonomy": ["None"]}}], "format": "Biological Observation Matrix 0.9dev", "data": [[1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.0, 0.0], [0.0, 1.0, 0.0, 1.0]], "columns": [{"id": "ABC", "metadata": null}, {"id": "DEF", "metadata": null}, {"id": "GHI", "metadata": null}, {"id": "XYZ", "metadata": null}], "generated_by": "QIIME 1.4.0-dev, svn revision 2532", "matrix_type": "dense", "shape": [4, 4], "format_url": "http://biom-format.org", "date": "2011-12-21T00:19:30.961477", "type": "OTU table", "id": null, "matrix_element_type": "float"}"""
        self.assertEqual(parse_biom_table(obs.split('\n')),
                         parse_biom_table(exp.split('\n')))
    def test_biom_to_classic(self):
        """Correctly converts biom to classic."""
        obs = self.cmd(table=parse_biom_table(self.biom_lines1),
                       to_tsv=True,
                       header_key='taxonomy')
        self.assertEqual(obs.keys(), ['table'])
        self.assertEqual(obs['table'][0], classic1)

        obs = self.cmd(table=parse_biom_table(self.biom_lines1),
                       to_tsv=True,
                       header_key='taxonomy',
                       output_metadata_id='foo')
        self.assertEqual(obs.keys(), ['table'])
        obs_md_col = obs['table'][0].split('\n')[1].split('\t')[-1]
        self.assertEqual(obs_md_col, 'foo')
    def setUp(self):
        self.otu_table1 = parse_biom_table(otu_table1)
        self.otu_table_with_taxonomy = parse_biom_table(otu_table_with_taxonomy)
        self.genome_table1 = parse_biom_table(genome_table1)
        self.genome_table2 = parse_biom_table(genome_table2)
        self.predicted_metagenome_table1 = parse_biom_table(predicted_metagenome_table1)
        self.predicted_gene_partition_table = predicted_gene_partition_table
        self.predicted_gene_partition_table_with_taxonomy =\
          predicted_gene_partition_table_with_taxonomy

        #Examples of BIOM format value,id,metadata tuples
        #as returned when iterating over a table
        #metadata are defined at the bottom of this file.
        self.metadata_example = [(700.0,"Gene1",example_metadata1),\
          (250.0,"Gene2",example_metadata2),(0.0,"Gene3",example_metadata3)]
예제 #32
0
 def test_split_otu_table_on_sample_metadata_extra_mapping_entries(self):
     """ split_otu_table_on_sample_metadata functions as expected with extra mapping data """
     actual = list(split_otu_table_on_sample_metadata(self.otu_table_f1,
                                                      self.mapping_f2,
                                                      "Treatment"))
                                                      
     actual = [(id_,parse_biom_table(e)) for id_, e in actual]
     exp = [(id_,parse_biom_table(e)) for id_, e in otu_table_exp1]
     
     actual.sort()
     exp.sort()
     
     for a,e in zip(actual,exp):
         self.assertEqual(a,e,"OTU tables are not equal:\n%s\n%s" % \
          (format_biom_table(a[1]),format_biom_table(e[1])))
예제 #33
0
    def setUp(self):
        self.otu_table1 = parse_biom_table(otu_table1)
        self.otu_table_with_taxonomy = parse_biom_table(otu_table_with_taxonomy)
        self.genome_table1 = parse_biom_table(genome_table1)
        self.genome_table2 = parse_biom_table(genome_table2)
        self.predicted_metagenome_table1 = parse_biom_table(predicted_metagenome_table1)
        self.predicted_gene_partition_table = predicted_gene_partition_table
        self.predicted_gene_partition_table_with_taxonomy =\
          predicted_gene_partition_table_with_taxonomy

        #Examples of BIOM format value,id,metadata tuples
        #as returned when iterating over a table
        #metadata are defined at the bottom of this file.
        self.metadata_example = [(700.0,"Gene1",example_metadata1),\
          (250.0,"Gene2",example_metadata2),(0.0,"Gene3",example_metadata3)]
예제 #34
0
    def test_split_otu_table_on_sample_metadata_extra_mapping_entries(self):
        """ split_otu_table_on_sample_metadata functions as expected with extra mapping data """
        actual = list(split_otu_table_on_sample_metadata(self.otu_table_f1,
                                                         self.mapping_f2,
                                                         "Treatment"))

        actual = [(id_, parse_biom_table(e)) for id_, e in actual]
        exp = [(id_, parse_biom_table(e)) for id_, e in otu_table_exp1]

        actual.sort()
        exp.sort()

        for a, e in zip(actual, exp):
            self.assertEqual(a, e, "OTU tables are not equal:\n%s\n%s" %
                             (format_biom_table(a[1]), format_biom_table(e[1])))
예제 #35
0
def load_otus(conn, table, params):
    table = parse_biom_table(table)
    cur = conn.cursor()
    
    print "loading observations..."
    print "locking..."
    cur.execute('lock table observations')
    cur.execute('lock table observation_tables')

    cur.execute("insert into observation_tables values ('%s','%s','%s','%s',%f)" % (\
            params['table_id'],
            params['study_id'],
            params['ref'],
            params['trim'],
            params['similarity']))

    tableid = params['table_id']
    study = params['study_id']
    print "writing observations..."
    obs_fname = '%s_observations.csv' % study
    f = open(obs_fname, 'w')
    for values, sid, md in table.iterSamples(conv_to_np=False):
        study_sample = "%s::%s" % (study,sid)
        for (row, c_idx), val in values.items():
            f.write("%s,%s,%s,%f\n" % (tableid, study_sample, table.ObservationIds[c_idx], val))
    f.close()

    bulk_load("observations", obs_fname, cur)

    print "committing..."
    conn.commit()

    os.remove(obs_fname)
예제 #36
0
    def getResult(self, data_path, tree_path=None):
        """Returns distance matrix from (indcidence matrix and optionally tree).
        
        Parameters:
        
        data_path: path to data file, matrix (samples = cols, taxa = rows)
        in tab-delimited text format 
        
        tree_path: path or object.
        if method is phylogenetic, must supply tree_path.
        if path, path to
        Newick-format tree file where taxon ids match taxon ids in the
        input data file.

        returns 2d dist matrix, list of sample names ordered as in dist mtx
        """
        #if it's a phylogenetic metric, read the tree
        if self.IsPhylogenetic:
            tree = self.getTree(tree_path)
        else:
            tree = None
        
        otu_table = parse_biom_table(open(data_path,'U'))
        if isinstance(otu_table, DenseTable):
            otumtx = otu_table._data.T
        else:
            otumtx = asarray([v for v in otu_table.iterSampleData()])
        
        # get the 2d dist matrix from beta diversity analysis
        if self.IsPhylogenetic:
            return (self.Metric(otumtx, otu_table.ObservationIds, tree,
                                otu_table.SampleIds),
                    list(otu_table.SampleIds))
        else:
            return self.Metric(otumtx), list(otu_table.SampleIds)
예제 #37
0
    def test_qualitative(self):
        """ TableSummarizer functions as expected with qualitative=True

        """
        t = TableSummarizer()
        actual = t(table=(parse_biom_table(self.biom1_lines), self.biom1_lines.split("\n")), qualitative=True)
        self.assertEqual(actual["biom_summary"], self.summary_qualitative_lines)
예제 #38
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    input_table = parse_biom_table(open(opts.input_otu_table_fp, 'U'))
    output_table_f = open(opts.output_otu_table_fp, 'w')
    metadata_field = opts.metadata_field
    positive_taxa = opts.positive_taxa
    negative_taxa = opts.negative_taxa

    if positive_taxa is not None:
        positive_taxa = positive_taxa.split(',')
    else:
        positive_taxa = None

    if negative_taxa is not None:
        negative_taxa = negative_taxa.split(',')
    else:
        negative_taxa = None

    filter_fn = get_otu_ids_from_taxonomy_f(
        positive_taxa,
        negative_taxa,
        metadata_field)
    output_table = input_table.filterObservations(filter_fn)
    output_table_f.write(format_biom_table(output_table))
    output_table_f.close()
예제 #39
0
def split_otu_table_on_taxonomy_to_files(otu_table_fp,
                                         level,
                                         output_dir,
                                         md_identifier='taxonomy',
                                         md_processor=process_md_as_list):
    """ Split OTU table by taxonomic level, writing otu tables to output dir
    """
    results = []
    otu_table = parse_biom_table(open(otu_table_fp,'U'))
    create_dir(output_dir)
    
    def split_f(obs_md):
        try:
            result = md_processor(obs_md,md_identifier,level)
        except KeyError:
            raise KeyError,\
             "Metadata identifier (%s) is not associated with all (or any) observerations. You can modify the key with the md_identifier parameter." % md_identifier
        except TypeError:
            raise TypeError,\
             "Can't correctly process the metadata string. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_as_string."
        except AttributeError:
            raise AttributeError,\
             "Metadata category not found. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_identifier \"Consensus Lineage\"."
    
        return result
    
    for bin, sub_otu_table in otu_table.binObservationsByMetadata(split_f):
        output_fp = '%s/otu_table_%s.biom' % (output_dir,bin)
        output_f = open(output_fp,'w')
        output_f.write(format_biom_table(sub_otu_table))
        output_f.close()
        results.append(output_fp)
    return results
예제 #40
0
 def setUp(self):
     """Set up data for use in unit tests."""
     self.cmd = MetadataAdder()
     self.biom_lines1 = biom1
     self.biom_table1 = parse_biom_table(self.biom_lines1)
     self.sample_md_lines1 = sample_md1.split('\n')
     self.obs_md_lines1 = obs_md1.split('\n')
예제 #41
0
def split_otu_table_on_taxonomy_to_files(otu_table_fp,
                                         level,
                                         output_dir,
                                         md_identifier='taxonomy',
                                         md_processor=process_md_as_list):
    """ Split OTU table by taxonomic level, writing otu tables to output dir
    """
    results = []
    otu_table = parse_biom_table(open(otu_table_fp, 'U'))
    create_dir(output_dir)

    def split_f(obs_md):
        try:
            result = md_processor(obs_md, md_identifier, level)
        except KeyError:
            raise KeyError,\
             "Metadata identifier (%s) is not associated with all (or any) observerations. You can modify the key with the md_identifier parameter." % md_identifier
        except TypeError:
            raise TypeError,\
             "Can't correctly process the metadata string. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_as_string."
        except AttributeError:
            raise AttributeError,\
             "Metadata category not found. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_identifier \"Consensus Lineage\"."

        return result

    for bin, sub_otu_table in otu_table.binObservationsByMetadata(split_f):
        output_fp = '%s/otu_table_%s.biom' % (output_dir, bin)
        output_f = open(output_fp, 'w')
        output_f.write(format_biom_table(sub_otu_table))
        output_f.close()
        results.append(output_fp)
    return results
예제 #42
0
 def setUp(self):
     """Set up data for use in unit tests."""
     self.cmd = MetadataAdder()
     self.biom_lines1 = biom1
     self.biom_table1 = parse_biom_table(self.biom_lines1)
     self.sample_md_lines1 = sample_md1.split('\n')
     self.obs_md_lines1 = obs_md1.split('\n')
예제 #43
0
def load_category_files(category_files):
    """Loads the category tables as biom files

    INPUTS:
        category_files -- a dictionary that associates the mapping category
                    (key) with the file path to the otu_table summarizing that

    OUTPUTS:
        category_tables -- a dictionary that associates the mapping category
                    with the summarized otu table for the category.
    """

    category_tables = {}
    watch_count = 0
    watch_list = []

    for (category, category_file) in category_files.iteritems():
        if isfile(category_file):
            cat_table = parse_biom_table(open(category_file, 'U'))
            category_tables[category] = cat_table
        else:
            watch_list.append('The summarized OTU table file cannot be found '
                              'for %s. \n%s is not in the file path.' %
                              (category, category_file))
            watch_count = watch_count + 1

    if watch_count > 0:
        print 'The following category files could not be found: \n%s' \
            % '\n'.join(watch_list)
    if watch_count == len(category_files):
        raise ValueError('No files could be found for any of the supplied '
                         'categories. \n%s' % '\n'.join(watch_list))

    return category_tables
예제 #44
0
def add_counts_to_mapping(biom_lines, mapping_lines, otu_counts, output_fp):
    """Counts the number of seqs/OTUs per sample and add its to the mapping file

    Inputs:
        biom_lines:
        mapping_lines:
        otu_counts:
        output_fp:
    """
    # Parse biom file
    biom = parse_biom_table(biom_lines)
    # Parse mapping file
    map_data, headers, comments = parse_mapping_file(mapping_lines)
    # Compute the counts per sample
    min_count, max_count, median_count, mean_count, counts_per_sample =\
        compute_counts_per_sample_stats(biom, binary_counts=otu_counts)
    # Add the counts to the mapping data
    index = len(headers) - 1
    headers.insert(index, "NumIndividuals")
    for row in map_data:
        row.insert(index, str(counts_per_sample[row[0]]))
    # # Add the '#' character to the first header
    # headers[0] = '#' + headers[0]
    # # Add headers to the data
    # map_data.insert(0, headers)
    # Write the corrected mapping file
    write_corrected_mapping(output_fp, headers, comments, map_data)
예제 #45
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    if opts.level <= 0:
        parser.error("level must be greater than zero!")

    collapse_f = make_collapse_f(opts.metadata_category, opts.level,
                                 opts.ignore)
    table = parse_biom_table(open(opts.input_fp))
    result = table.collapseObservationsByMetadata(
        collapse_f,
        one_to_many=True,
        norm=False,
        one_to_many_md_key=opts.metadata_category)

    f = open(opts.output_fp, 'w')

    if (opts.format_tab_delimited):
        f.write(
            result.delimitedSelf(header_key=opts.metadata_category,
                                 header_value=opts.metadata_category,
                                 metadata_formatter=lambda s: '; '.join(s)))
    else:
        f.write(result.getBiomFormatJsonString('picrust %s - categorize_by_function'\
                                           % __version__))
    f.close()
예제 #46
0
    def getResult(self, data_path, tree_path=None):
        """Returns distance matrix from (indcidence matrix and optionally tree).

        Parameters:

        data_path: path to data file, matrix (samples = cols, taxa = rows)
        in tab-delimited text format

        tree_path: path or object.
        if method is phylogenetic, must supply tree_path.
        if path, path to
        Newick-format tree file where taxon ids match taxon ids in the
        input data file.

        returns 2d dist matrix, list of sample names ordered as in dist mtx
        """
        # if it's a phylogenetic metric, read the tree
        if self.IsPhylogenetic:
            tree = self.getTree(tree_path)
        else:
            tree = None

        otu_table = parse_biom_table(open(data_path, 'U'))
        if isinstance(otu_table, DenseTable):
            otumtx = otu_table._data.T
        else:
            otumtx = asarray([v for v in otu_table.iterSampleData()])

        # get the 2d dist matrix from beta diversity analysis
        if self.IsPhylogenetic:
            return (self.Metric(otumtx, otu_table.ObservationIds,
                                tree, otu_table.SampleIds),
                    list(otu_table.SampleIds))
        else:
            return self.Metric(otumtx), list(otu_table.SampleIds)
예제 #47
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_dir = opts.output_dir
    create_dir(output_dir)

    otu_table_fp = opts.otu_table
    otu_table_fh = open(otu_table_fp, 'U')
    otu_table = parse_biom_table(otu_table_fh)
    otu_table_fh.close()

    tree_fh = open(opts.tree_file, 'U')
    tree = DndParser(tree_fh)
    tree_fh.close()

    mapping_fp = opts.mapping_fp
    if mapping_fp:
        mapping_f = open(mapping_fp, 'U')
        input_map_basename = splitext(split(mapping_fp)[1])[0]
    else:
        mapping_f = None
        input_map_basename = None

    input_table_basename = splitext(split(otu_table_fp)[1])[0]

    simsam_range_to_files(otu_table,
                          tree,
                          simulated_sample_sizes=map(int, opts.num.split(',')),
                          dissimilarities=map(float, opts.dissim.split(',')),
                          output_dir=output_dir,
                          mapping_f=mapping_f,
                          output_table_basename=input_table_basename,
                          output_map_basename=input_map_basename)
예제 #48
0
def calc_shared_phylotypes(infile, reference_sample=None):
    """Calculates number of shared phylotypes for each pair of sample.

    infile: otu table filehandle

    reference_sample: if set, will use this sample name to calculate shared OTUs
                      between reference sample, and pair of samples. Useful, 
                      e.g. when the reference sample is the Donor in a transplant study
    """

    otu_table = parse_biom_table(infile)

    if reference_sample:
        #ref_idx = sample_ids.index(reference_sample)
        ref_idx = reference_sample
    
    num_samples = len(otu_table.SampleIds)
    result_array = zeros((num_samples, num_samples), dtype=int)
    for i,samp1_id in enumerate(otu_table.SampleIds):
        for j,samp2_id in enumerate(otu_table.SampleIds[:i+1]):
            if reference_sample:
                result_array[i,j] = result_array[j,i] = \
                    _calc_shared_phylotypes_multiple(otu_table, 
                                                 [samp1_id, samp2_id, ref_idx])
            else:  
                result_array[i,j] = result_array[j,i] = \
                    _calc_shared_phylotypes_pairwise(otu_table, samp1_id, 
                                                      samp2_id)
                
    return format_distance_matrix(otu_table.SampleIds, result_array)+"\n"
예제 #49
0
def create_fused_data_matrix(biom_fname, map_fname, foi_fname, is_fasta=False):
    """
		create_fused_data_matrix(biom_fname, map_fname=None, foi_fname=None)
		@biom_fname - path to the BIOM file containing the interesting counts (shall they be k-mers or OTUs, up to you!)
		@map_fname - contains the path to the mapping file with the environmental factors. default = None, in which case SOMETHING HAPPENS
		@foi_fname - path to the tab delimited text file containing two columns: the first one contains the keys to the important environmental factors, and the second column contains whether the variables is continuous (e.g. temperature) or discrete (male/female)
		@data_matrix (return) - dense matrix containing both OTU/k-mer data AND environmental data
		@site_names (return) - names of the different samples
		@variable_names (return) - Names of the different variables used
		@environmental_param (return) - names of the environmental parameters (as used in the foi_fname)
		@hashtable_env (return) - correspondences between discrete variables and class affected (for instance male->1, female->0)
	"""
    if not is_fasta:
        biom_table = parse_biom_table(open(biom_fname, 'U'))
        # Function to convert a biom dictionary to a numpy array
        data_matrix, site_names, variable_names = biom_table_to_array(
            biom_table)
        env_table, site_names_env, environmental_param, is_continuous, hashtable_env = read_environment_table(
            open(map_fname, 'U'), open(foi_fname, 'U'))
    else:  # We read the k-mers from the fasta file instead of the biom file
        # Call the count-k-mer function
        env_table, site_names_env, environmental_param, is_continuous, hashtable_env = read_environment_table(
            open(map_fname, 'U'), open(foi_fname, 'U'))

    # Merge both data matrices making sure to keep only the sites that are common to both mapping and biom files and making sure that each site from one file is well aligned with the associated one from the other file.
    complete_matrix, complete_sites, complete_variables = merge_matrices(
        data_matrix, env_table, site_names, site_names_env, variable_names,
        environmental_param)

    return complete_matrix, complete_sites, variable_names, environmental_param, hashtable_env
예제 #50
0
 def test_format_tep_file_lines(self):
     """ format_tep_file_lines: this converts files into tep lines """
     
     # set variables
     prefs_dict1 = {'sample_coloring': {'TEST1': {'column': 'TEST1', 
         'colors': (('red', (0, 100, 100)), ('blue', (240, 100, 100)))}}}
     test_biom2 = parse_biom_table(biom2)
     
     # test with prefs file
     exp1 = ['>>tre\n', "['(tax1:0.00000043418318065054,((tax2:0.01932550067944402081,tax3:0.08910446960529855298):0.00000043418318065054,tax4:0.17394765077611337722):0.00000043418318065054,tax5:0.00000043418318065054):0.0;']", '\n', '>>otm\n#OTU ID\tOTU Metadata\n', u'tax1\tk__Bacteria;p__Proteobacteria;', '\n', u'tax2\tk__Bacteria;p__Cyanobacteria;', '\n', '>>osm\n', '# Constructed from biom file\n#OTU ID\tsam1\tsam2\tConsensus Lineage\ntax1\t7.0\t4.0\tk__Bacteria;p__Proteobacteria\ntax2\t1.0\t2.0\tk__Bacteria;p__Cyanobacteria', '\n>>sam\n', "['#SampleID\\tcol1\\tcol0\\tDescription', 'sam1\\tv1_3\\tv0_3\\td1', 'sam2\\taval\\tanother\\td2']", '\n>>pre\n', '0,100,100,\n', '240,100,100,\n', '>defaultTEST1:TEST1\n']
     obs1 = format_tep_file_lines(test_biom2, 
                          StringIO(example_mapping_file2.split('\n')), 
                          StringIO(example_tree.split('\n')), 
                          prefs_dict1)
     
     self.assertEqual(obs1,exp1)
     
     # test without prefs file
     exp2 = ['>>tre\n', "['(tax1:0.00000043418318065054,((tax2:0.01932550067944402081,tax3:0.08910446960529855298):0.00000043418318065054,tax4:0.17394765077611337722):0.00000043418318065054,tax5:0.00000043418318065054):0.0;']", '\n', '>>otm\n#OTU ID\tOTU Metadata\n', u'tax1\tk__Bacteria;p__Proteobacteria;', '\n', u'tax2\tk__Bacteria;p__Cyanobacteria;', '\n', '>>osm\n', '# Constructed from biom file\n#OTU ID\tsam1\tsam2\tConsensus Lineage\ntax1\t7.0\t4.0\tk__Bacteria;p__Proteobacteria\ntax2\t1.0\t2.0\tk__Bacteria;p__Cyanobacteria', '\n>>sam\n', "['#SampleID\\tcol1\\tcol0\\tDescription', 'sam1\\tv1_3\\tv0_3\\td1', 'sam2\\taval\\tanother\\td2']"]
     obs2 = format_tep_file_lines(test_biom2, 
                          StringIO(example_mapping_file2.split('\n')), 
                          StringIO(example_tree.split('\n')), 
                          {})
     
     self.assertEqual(obs2,exp2)
예제 #51
0
    def test_default(self):
        """ TableSummarizer functions as expected

        """
        t = TableSummarizer()
        actual = t(table=(parse_biom_table(self.biom1_lines), self.biom1_lines.split("\n")), qualitative=False)
        self.assertEqual(actual["biom_summary"], self.summary_default_lines)
def readBIOM(fileName):
    f = open(fileName, "r")
    table = parse_biom_table(f)
    #print table
    f.close()

    root = TaxonNode()

    root.sampleIDs = list(table._sample_ids)
    #print "SampleIDs type:", type(table.SampleIds)

    #for obs in table.iterObservations():
    for obs in table.iter(axis='observation'):

        counts = obs[0]
        otuName = obs[1]
        taxonomy = obs[2]["taxonomy"]
        root.maxdepth = max(root.maxdepth, len(taxonomy) - 1)
        # Build Tree
        node = root
        for taxon in taxonomy:
            n = node.getNode(taxon)
            if n == None:
                node = node.addNode(taxon, None, None)
            else:
                node = n
            if taxon == taxonomy[-1]:
                if node.counts != None:
                    node.counts = map(sum, zip(node.counts, counts))
                else:
                    node.counts = counts
    return root
예제 #53
0
def main():
    option_parser, opts, args =\
      parse_command_line_parameters(**script_info)

    otu_table_data = parse_biom_table(open(opts.input_otu_table, 'U'))
    sort_field = opts.sort_field
    mapping_fp = opts.mapping_fp
    sorted_sample_ids_fp = opts.sorted_sample_ids_fp

    if sort_field and mapping_fp:
        mapping_data = parse_mapping_file(open(mapping_fp, 'U'))
        result = sort_otu_table_by_mapping_field(otu_table_data, mapping_data,
                                                 sort_field)
    elif sorted_sample_ids_fp:
        sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp, 'U'))
        result = sort_otu_table(otu_table_data, sorted_sample_ids)
    else:
        result = sort_otu_table(
            otu_table_data, natsort_case_insensitive(otu_table_data.SampleIds))

    # format and write the otu table
    result_str = format_biom_table(result)
    of = open(opts.output_fp, 'w')
    of.write(result_str)
    of.close()
예제 #54
0
def calc_shared_phylotypes(infile, reference_sample=None):
    """Calculates number of shared phylotypes for each pair of sample.

    infile: otu table filehandle

    reference_sample: if set, will use this sample name to calculate shared OTUs
                      between reference sample, and pair of samples. Useful,
                      e.g. when the reference sample is the Donor in a transplant study
    """

    otu_table = parse_biom_table(infile)

    if reference_sample:
        #ref_idx = sample_ids.index(reference_sample)
        ref_idx = reference_sample

    num_samples = len(otu_table.SampleIds)
    result_array = zeros((num_samples, num_samples), dtype=int)
    for i, samp1_id in enumerate(otu_table.SampleIds):
        for j, samp2_id in enumerate(otu_table.SampleIds[:i + 1]):
            if reference_sample:
                result_array[i, j] = result_array[j, i] = \
                    _calc_shared_phylotypes_multiple(otu_table,
                                                     [samp1_id, samp2_id, ref_idx])
            else:
                result_array[i, j] = result_array[j, i] = \
                    _calc_shared_phylotypes_pairwise(otu_table, samp1_id,
                                                     samp2_id)

    return format_distance_matrix(otu_table.SampleIds, result_array) + "\n"
예제 #55
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    output_f = open(opts.output_distance_matrix, 'w')
    if opts.otu_table_fp:
        otu_table = parse_biom_table(open(opts.otu_table_fp, 'U'))
        samples_to_keep = otu_table.SampleIds
        #samples_to_keep = \
        # sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
    elif opts.sample_id_fp:
        samples_to_keep = \
         get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U'))
    elif opts.mapping_fp and opts.valid_states:
        samples_to_keep = sample_ids_from_metadata_description(
            open(opts.mapping_fp, 'U'), opts.valid_states)
    else:
        option_parser.error(
            'must pass either --sample_id_fp, -t, or -m and -s')
    # note that negate gets a little weird here. The function we're calling removes the specified
    # samples from the distance matrix, but the other QIIME filter scripts keep these samples specified.
    # So, the interface of this script is designed to keep the specified samples, and therefore
    # negate=True is passed to filter_samples_from_distance_matrix by default.
    d = filter_samples_from_distance_matrix(parse_distmat(
        open(opts.input_distance_matrix, 'U')),
                                            samples_to_keep,
                                            negate=not opts.negate)
    output_f.write(d)
    output_f.close()