Beispiel #1
0
    def test_parallel_database_mapper_usearch(self):
        """ parallel_database_mapper_usearch functions as expected """

        params = {
            'refseqs_fp': self.refseqs1_fp,
            'min_percent_id': 0.97,
            'evalue': 1e-10,
            'max_accepts': 1,
            'max_rejects': 32,
            'queryalnfract': 0.35,
            'targetalnfract': 0.0,
            'observation_metadata_fp': None
        }

        app = ParallelDatabaseMapperUsearch()
        r = app(self.inseqs1_fp,
                self.test_out,
                params,
                job_prefix='PTEST',
                poll_directly=True,
                suppress_submit_jobs=False)
        observation_map_fp = glob(join(self.test_out,
                                       'observation_map.txt'))[0]
        omap = parse_otu_map(open(observation_map_fp, 'U'))
        self.assertEqual(len(omap[0]), 3)
        self.assertItemsEqual(
            omap[1], ['eco:b0015', 'eco:b0122', 'eco:b0015:duplicate'])
        self.assertItemsEqual(omap[2], ['eco:b0015-pr', 'eco:b0122-pr'])
Beispiel #2
0
def make_otu_table(otu_map_f,
                   otu_to_taxonomy=None,
                   delim='_',
                   table_id=None,
                   sample_metadata=None,
                   constructor=SparseOTUTable):
    
    data, sample_ids, otu_ids = parse_otu_map(otu_map_f,delim)
    
    if otu_to_taxonomy != None:
        otu_metadata = []
        for o in otu_ids:
            try:
                otu_metadata.append({'taxonomy':otu_to_taxonomy[o].split(';')})
            except KeyError:
                otu_metadata.append({'taxonomy':["None"]})
    else: 
        otu_metadata = None
    
    if sample_metadata != None:
        raise NotImplementedError,\
         "Passing of sample metadata to make_otu_table is not currently supported."
    try:
        otu_table = table_factory(data, sample_ids, otu_ids, 
                                  sample_metadata=sample_metadata, 
                                  observation_metadata=otu_metadata, 
                                  table_id=table_id, 
                                  constructor=constructor,
                                  dtype=int)
    except ValueError,e:
        raise ValueError,\
         ("Couldn't create OTU table. Is your OTU map empty?"
          " Original error message: %s" % (str(e)))
    def test_parallel_database_mapper_usearch(self):
        """ parallel_database_mapper_usearch functions as expected """

        params = {'refseqs_fp': self.refseqs1_fp,
                  'min_percent_id': 0.97,
                  'evalue': 1e-10,
                  'max_accepts': 1,
                  'max_rejects': 32,
                  'queryalnfract': 0.35,
                  'targetalnfract': 0.0,
                  'observation_metadata_fp': None
                  }

        app = ParallelDatabaseMapperUsearch()
        r = app(self.inseqs1_fp,
                self.test_out,
                params,
                job_prefix='PTEST',
                poll_directly=True,
                suppress_submit_jobs=False)
        observation_map_fp = glob(
            join(self.test_out, 'observation_map.txt'))[0]
        omap = parse_otu_map(open(observation_map_fp, 'U'))
        self.assertEqual(len(omap[0]), 3)
        self.assertItemsEqual(
            omap[1],
            ['eco:b0015',
             'eco:b0122',
             'eco:b0015:duplicate'])
        self.assertItemsEqual(omap[2], ['eco:b0015-pr', 'eco:b0122-pr'])
Beispiel #4
0
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None,
                   otu_ids_to_exclude=None, sample_metadata=None, seq_counts=None):
    """Generate a BIOM table from an OTU map

    Parameters
    ----------
    otu_map_f : file-like object
        The OTU map. Jagged tab-separated file where the first column contains
        the OTU ID and subsequent columns contain sequence IDs belonging to
        that OTU
    otu_to_taxonomy : dict, optional
        Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies
    delim : str, optional
        Defaults to "_". The delimiter that is used in the sequence IDs to join
        the sample ID to the sequence number
    table_id : object, optional
        Defaults to ``None``. The identifier that will be given to the
        generated BIOM table
    otu_ids_to_exclude : iterable, optional
        Defaults to ``None``. If present, these OTUs will not be added to the
        OTU table from the OTU map
    sample_metadata : dict of dicts, optional
        Defaults to ``None``. If supplied, keys in the outer dict should be
        sample IDs, and keys in the inner dicts should be column names.
    seq_counts : dict, optional
        Defaults to ``None``. If supplied, the dict maps seq ids to seq counts

    """
    data, sample_ids, otu_ids = parse_otu_map(
        otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude,seq_counts=seq_counts)

    if otu_to_taxonomy is not None:
        otu_metadata = []
        for o in otu_ids:
            otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])})
    else:
        otu_metadata = None

    # if sample_metadata is supplied, put in index-order with the OTU map's
    # sample_ids, and do not include samples that were in the mapping file
    # but NOT in the OTU map
    if sample_metadata is not None:
        try:
            sample_metadata = [sample_metadata[sample_id]
                               for sample_id in sample_ids]
        except KeyError:
            raise KeyError("Sample IDs found in OTU map without sample "
                           "metadata")

    try:
        return Table(data, otu_ids, sample_ids,
                     observation_metadata=otu_metadata, 
                     sample_metadata=sample_metadata, table_id=table_id,
                     generated_by=get_generated_by_for_biom_tables(),
                     create_date=datetime.now().isoformat())
    except ValueError as e:
        raise ValueError("Couldn't create OTU table. Is your OTU map empty?"
                         " Original error message: %s" % (str(e)))
    def test_find_otus(self):
        """ test_find_otus: This fxn checks for otu assignment of a set of 
            sequences
        """
        #make sure files gets cleaned up
        self.files_to_remove.append(self.output_otu_map)
        self.files_to_remove.append(self.leftovers)
        self.files_to_remove.append(self.input_fasta)

        #run the fxn
        find_otus(self.input_fasta,self.leftovers,self.output_otu_map)
        data, sample_ids, otu_ids = parse_otu_map(open(self.output_otu_map))
        
        #check the outputs are correct
        self.assertEqual(sample_ids[0],'HIT')
        self.assertEqual(open(self.leftovers).read(),exp_failures)
Beispiel #6
0
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None,
                   otu_ids_to_exclude=None, sample_metadata=None):
    """Generate a BIOM table from an OTU map

    Parameters
    ----------
    otu_map_f : file-like object
        The OTU map. Jagged tab-separated file where the first column contains
        the OTU ID and subsequent columns contain sequence IDs belonging to
        that OTU
    otu_to_taxonomy : dict, optional
        Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies
    delim : str, optional
        Defaults to "_". The delimiter that is used in the sequence IDs to join
        the sample ID to the sequence number
    table_id : object, optional
        Defaults to ``None``. The identifier that will be given to the
        generated BIOM table
    otu_ids_to_exclude : iterable, optional
        Defaults to ``None``. If present, these OTUs will not be added to the
        OTU table from the OTU map
    sample_metadata : iterable of dicts, optional
        Defaults to ``None``.
    """
    data, sample_ids, otu_ids = parse_otu_map(
        otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude)

    if otu_to_taxonomy is not None:
        otu_metadata = []
        for o in otu_ids:
            otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])})
    else:
        otu_metadata = None

#    if sample_metadata is not None:
#        raise NotImplementedError("Passing of sample metadata to "
#                                  "make_otu_table is not currently supported.")
    try:
        return Table(data, otu_ids, sample_ids,
                     observation_metadata=otu_metadata, 
                     sample_metadata=sample_metadata, table_id=table_id,
                     generated_by=get_generated_by_for_biom_tables(),
                     create_date=datetime.now().isoformat())
    except ValueError as e:
        raise ValueError("Couldn't create OTU table. Is your OTU map empty?"
                         " Original error message: %s" % (str(e)))
    def test_parallel_database_mapper_usearch(self):
        """ parallel_database_mapper_usearch functions as expected """

        params = {
            "refseqs_fp": self.refseqs1_fp,
            "min_percent_id": 0.97,
            "evalue": 1e-10,
            "max_accepts": 1,
            "max_rejects": 32,
            "queryalnfract": 0.35,
            "targetalnfract": 0.0,
            "observation_metadata_fp": None,
        }

        app = ParallelDatabaseMapperUsearch()
        r = app(
            self.inseqs1_fp, self.test_out, params, job_prefix="PTEST", poll_directly=True, suppress_submit_jobs=False
        )
        observation_map_fp = glob(join(self.test_out, "observation_map.txt"))[0]
        omap = parse_otu_map(open(observation_map_fp, "U"))
        self.assertEqual(len(omap[0]), 3)
        self.assertItemsEqual(omap[1], ["eco:b0015", "eco:b0122", "eco:b0015:duplicate"])
        self.assertItemsEqual(omap[2], ["eco:b0015-pr", "eco:b0122-pr"])
 def test_parallel_function_assigner_usearch(self):
     """ parallel_function_assigner_usearch functions as expected """
     
     params = {'refseqs_fp':self.refseqs1_fp,
       'min_percent_id':0.97,
       'evalue':1e-10,
       'max_accepts':1,
       'max_rejects':32,
       'queryalnfract':0.35,
       'targetalnfract':0.0
     }
     
     app = ParallelFunctionAssignerUsearch()
     r = app(self.inseqs1_fp,
             self.test_out,
             params,
             job_prefix='PTEST',
             poll_directly=True,
             suppress_submit_jobs=False)
     otu_map_fp = glob(join(self.test_out,'*fmap.txt'))[0]
     fmap = parse_otu_map(open(otu_map_fp,'U'))
     self.assertEqual(len(fmap[0]),2)
     self.assertEqualItems(fmap[1],['eco:b0015', 'eco:b0122'])
     self.assertEqualItems(fmap[2],['eco:b0015-pr', 'eco:b0122-pr'])
Beispiel #9
0
def make_otu_table(otu_map_f,
                   otu_to_taxonomy=None,
                   delim='_',
                   table_id=None,
                   otu_ids_to_exclude=None,
                   sample_metadata=None):
    """Generate a BIOM table from an OTU map

    Parameters
    ----------
    otu_map_f : file-like object
        The OTU map. Jagged tab-separated file where the first column contains
        the OTU ID and subsequent columns contain sequence IDs belonging to
        that OTU
    otu_to_taxonomy : dict, optional
        Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies
    delim : str, optional
        Defaults to "_". The delimiter that is used in the sequence IDs to join
        the sample ID to the sequence number
    table_id : object, optional
        Defaults to ``None``. The identifier that will be given to the
        generated BIOM table
    otu_ids_to_exclude : iterable, optional
        Defaults to ``None``. If present, these OTUs will not be added to the
        OTU table from the OTU map
    sample_metadata : dict of dicts, optional
        Defaults to ``None``. If supplied, keys in the outer dict should be
        sample IDs, and keys in the inner dicts should be column names.
    """
    data, sample_ids, otu_ids = parse_otu_map(
        otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude)

    if otu_to_taxonomy is not None:
        otu_metadata = []
        for o in otu_ids:
            otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])})
    else:
        otu_metadata = None

    # if sample_metadata is supplied, put in index-order with the OTU map's
    # sample_ids, and do not include samples that were in the mapping file
    # but NOT in the OTU map
    if sample_metadata is not None:
        try:
            sample_metadata = [
                sample_metadata[sample_id] for sample_id in sample_ids
            ]
        except KeyError:
            raise KeyError("Sample IDs found in OTU map without sample "
                           "metadata")

    try:
        return Table(data,
                     otu_ids,
                     sample_ids,
                     observation_metadata=otu_metadata,
                     sample_metadata=sample_metadata,
                     table_id=table_id,
                     generated_by=get_generated_by_for_biom_tables(),
                     create_date=datetime.now().isoformat())
    except ValueError as e:
        raise ValueError("Couldn't create OTU table. Is your OTU map empty?"
                         " Original error message: %s" % (str(e)))