def test_parallel_database_mapper_usearch(self): """ parallel_database_mapper_usearch functions as expected """ params = { 'refseqs_fp': self.refseqs1_fp, 'min_percent_id': 0.97, 'evalue': 1e-10, 'max_accepts': 1, 'max_rejects': 32, 'queryalnfract': 0.35, 'targetalnfract': 0.0, 'observation_metadata_fp': None } app = ParallelDatabaseMapperUsearch() r = app(self.inseqs1_fp, self.test_out, params, job_prefix='PTEST', poll_directly=True, suppress_submit_jobs=False) observation_map_fp = glob(join(self.test_out, 'observation_map.txt'))[0] omap = parse_otu_map(open(observation_map_fp, 'U')) self.assertEqual(len(omap[0]), 3) self.assertItemsEqual( omap[1], ['eco:b0015', 'eco:b0122', 'eco:b0015:duplicate']) self.assertItemsEqual(omap[2], ['eco:b0015-pr', 'eco:b0122-pr'])
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None, sample_metadata=None, constructor=SparseOTUTable): data, sample_ids, otu_ids = parse_otu_map(otu_map_f,delim) if otu_to_taxonomy != None: otu_metadata = [] for o in otu_ids: try: otu_metadata.append({'taxonomy':otu_to_taxonomy[o].split(';')}) except KeyError: otu_metadata.append({'taxonomy':["None"]}) else: otu_metadata = None if sample_metadata != None: raise NotImplementedError,\ "Passing of sample metadata to make_otu_table is not currently supported." try: otu_table = table_factory(data, sample_ids, otu_ids, sample_metadata=sample_metadata, observation_metadata=otu_metadata, table_id=table_id, constructor=constructor, dtype=int) except ValueError,e: raise ValueError,\ ("Couldn't create OTU table. Is your OTU map empty?" " Original error message: %s" % (str(e)))
def test_parallel_database_mapper_usearch(self): """ parallel_database_mapper_usearch functions as expected """ params = {'refseqs_fp': self.refseqs1_fp, 'min_percent_id': 0.97, 'evalue': 1e-10, 'max_accepts': 1, 'max_rejects': 32, 'queryalnfract': 0.35, 'targetalnfract': 0.0, 'observation_metadata_fp': None } app = ParallelDatabaseMapperUsearch() r = app(self.inseqs1_fp, self.test_out, params, job_prefix='PTEST', poll_directly=True, suppress_submit_jobs=False) observation_map_fp = glob( join(self.test_out, 'observation_map.txt'))[0] omap = parse_otu_map(open(observation_map_fp, 'U')) self.assertEqual(len(omap[0]), 3) self.assertItemsEqual( omap[1], ['eco:b0015', 'eco:b0122', 'eco:b0015:duplicate']) self.assertItemsEqual(omap[2], ['eco:b0015-pr', 'eco:b0122-pr'])
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None, otu_ids_to_exclude=None, sample_metadata=None, seq_counts=None): """Generate a BIOM table from an OTU map Parameters ---------- otu_map_f : file-like object The OTU map. Jagged tab-separated file where the first column contains the OTU ID and subsequent columns contain sequence IDs belonging to that OTU otu_to_taxonomy : dict, optional Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies delim : str, optional Defaults to "_". The delimiter that is used in the sequence IDs to join the sample ID to the sequence number table_id : object, optional Defaults to ``None``. The identifier that will be given to the generated BIOM table otu_ids_to_exclude : iterable, optional Defaults to ``None``. If present, these OTUs will not be added to the OTU table from the OTU map sample_metadata : dict of dicts, optional Defaults to ``None``. If supplied, keys in the outer dict should be sample IDs, and keys in the inner dicts should be column names. seq_counts : dict, optional Defaults to ``None``. If supplied, the dict maps seq ids to seq counts """ data, sample_ids, otu_ids = parse_otu_map( otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude,seq_counts=seq_counts) if otu_to_taxonomy is not None: otu_metadata = [] for o in otu_ids: otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])}) else: otu_metadata = None # if sample_metadata is supplied, put in index-order with the OTU map's # sample_ids, and do not include samples that were in the mapping file # but NOT in the OTU map if sample_metadata is not None: try: sample_metadata = [sample_metadata[sample_id] for sample_id in sample_ids] except KeyError: raise KeyError("Sample IDs found in OTU map without sample " "metadata") try: return Table(data, otu_ids, sample_ids, observation_metadata=otu_metadata, sample_metadata=sample_metadata, table_id=table_id, generated_by=get_generated_by_for_biom_tables(), create_date=datetime.now().isoformat()) except ValueError as e: raise ValueError("Couldn't create OTU table. Is your OTU map empty?" " Original error message: %s" % (str(e)))
def test_find_otus(self): """ test_find_otus: This fxn checks for otu assignment of a set of sequences """ #make sure files gets cleaned up self.files_to_remove.append(self.output_otu_map) self.files_to_remove.append(self.leftovers) self.files_to_remove.append(self.input_fasta) #run the fxn find_otus(self.input_fasta,self.leftovers,self.output_otu_map) data, sample_ids, otu_ids = parse_otu_map(open(self.output_otu_map)) #check the outputs are correct self.assertEqual(sample_ids[0],'HIT') self.assertEqual(open(self.leftovers).read(),exp_failures)
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None, otu_ids_to_exclude=None, sample_metadata=None): """Generate a BIOM table from an OTU map Parameters ---------- otu_map_f : file-like object The OTU map. Jagged tab-separated file where the first column contains the OTU ID and subsequent columns contain sequence IDs belonging to that OTU otu_to_taxonomy : dict, optional Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies delim : str, optional Defaults to "_". The delimiter that is used in the sequence IDs to join the sample ID to the sequence number table_id : object, optional Defaults to ``None``. The identifier that will be given to the generated BIOM table otu_ids_to_exclude : iterable, optional Defaults to ``None``. If present, these OTUs will not be added to the OTU table from the OTU map sample_metadata : iterable of dicts, optional Defaults to ``None``. """ data, sample_ids, otu_ids = parse_otu_map( otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude) if otu_to_taxonomy is not None: otu_metadata = [] for o in otu_ids: otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])}) else: otu_metadata = None # if sample_metadata is not None: # raise NotImplementedError("Passing of sample metadata to " # "make_otu_table is not currently supported.") try: return Table(data, otu_ids, sample_ids, observation_metadata=otu_metadata, sample_metadata=sample_metadata, table_id=table_id, generated_by=get_generated_by_for_biom_tables(), create_date=datetime.now().isoformat()) except ValueError as e: raise ValueError("Couldn't create OTU table. Is your OTU map empty?" " Original error message: %s" % (str(e)))
def test_parallel_database_mapper_usearch(self): """ parallel_database_mapper_usearch functions as expected """ params = { "refseqs_fp": self.refseqs1_fp, "min_percent_id": 0.97, "evalue": 1e-10, "max_accepts": 1, "max_rejects": 32, "queryalnfract": 0.35, "targetalnfract": 0.0, "observation_metadata_fp": None, } app = ParallelDatabaseMapperUsearch() r = app( self.inseqs1_fp, self.test_out, params, job_prefix="PTEST", poll_directly=True, suppress_submit_jobs=False ) observation_map_fp = glob(join(self.test_out, "observation_map.txt"))[0] omap = parse_otu_map(open(observation_map_fp, "U")) self.assertEqual(len(omap[0]), 3) self.assertItemsEqual(omap[1], ["eco:b0015", "eco:b0122", "eco:b0015:duplicate"]) self.assertItemsEqual(omap[2], ["eco:b0015-pr", "eco:b0122-pr"])
def test_parallel_function_assigner_usearch(self): """ parallel_function_assigner_usearch functions as expected """ params = {'refseqs_fp':self.refseqs1_fp, 'min_percent_id':0.97, 'evalue':1e-10, 'max_accepts':1, 'max_rejects':32, 'queryalnfract':0.35, 'targetalnfract':0.0 } app = ParallelFunctionAssignerUsearch() r = app(self.inseqs1_fp, self.test_out, params, job_prefix='PTEST', poll_directly=True, suppress_submit_jobs=False) otu_map_fp = glob(join(self.test_out,'*fmap.txt'))[0] fmap = parse_otu_map(open(otu_map_fp,'U')) self.assertEqual(len(fmap[0]),2) self.assertEqualItems(fmap[1],['eco:b0015', 'eco:b0122']) self.assertEqualItems(fmap[2],['eco:b0015-pr', 'eco:b0122-pr'])
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None, otu_ids_to_exclude=None, sample_metadata=None): """Generate a BIOM table from an OTU map Parameters ---------- otu_map_f : file-like object The OTU map. Jagged tab-separated file where the first column contains the OTU ID and subsequent columns contain sequence IDs belonging to that OTU otu_to_taxonomy : dict, optional Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies delim : str, optional Defaults to "_". The delimiter that is used in the sequence IDs to join the sample ID to the sequence number table_id : object, optional Defaults to ``None``. The identifier that will be given to the generated BIOM table otu_ids_to_exclude : iterable, optional Defaults to ``None``. If present, these OTUs will not be added to the OTU table from the OTU map sample_metadata : dict of dicts, optional Defaults to ``None``. If supplied, keys in the outer dict should be sample IDs, and keys in the inner dicts should be column names. """ data, sample_ids, otu_ids = parse_otu_map( otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude) if otu_to_taxonomy is not None: otu_metadata = [] for o in otu_ids: otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])}) else: otu_metadata = None # if sample_metadata is supplied, put in index-order with the OTU map's # sample_ids, and do not include samples that were in the mapping file # but NOT in the OTU map if sample_metadata is not None: try: sample_metadata = [ sample_metadata[sample_id] for sample_id in sample_ids ] except KeyError: raise KeyError("Sample IDs found in OTU map without sample " "metadata") try: return Table(data, otu_ids, sample_ids, observation_metadata=otu_metadata, sample_metadata=sample_metadata, table_id=table_id, generated_by=get_generated_by_for_biom_tables(), create_date=datetime.now().isoformat()) except ValueError as e: raise ValueError("Couldn't create OTU table. Is your OTU map empty?" " Original error message: %s" % (str(e)))