Exemple #1
0
    def test_deblur_keyerror(self):
        # generating filepaths
        fd, fp = mkstemp(suffix='_seqs.demux')
        close(fd)
        self._clean_up_files.append(fp)
        copyfile('support_files/filtered_5_seqs.demux', fp)

        # inserting new prep template
        prep_info_dict = {
            'SKB7.640196': {
                'description_prep': 'SKB7',
                'platform': 'Illumina'
            },
            'SKB8.640193': {
                'description_prep': 'SKB8',
                'platform': 'Illumina'
            }
        }
        data = {
            'prep_info': dumps(prep_info_dict),
            # magic #1 = testing study
            'study': 1,
            'data_type': '16S'
        }
        pid = self.qclient.post('/apitest/prep_template/', data=data)['prep']

        # inserting artifacts
        data = {
            'filepaths': dumps([(fp, 'preprocessed_demux')]),
            'type': "Demultiplexed",
            'name': "New demultiplexed artifact",
            'prep': pid
        }
        aid = self.qclient.post('/apitest/artifact/', data=data)['artifact']

        self.params['Demultiplexed sequences'] = aid

        data = {
            'user': '******',
            'command': dumps(['deblur', '2021.09', 'Deblur']),
            'status': 'running',
            'parameters': dumps(self.params)
        }
        jid = self.qclient.post('/apitest/processing_job/', data=data)['job']

        out_dir = mkdtemp()
        self._clean_up_files.append(out_dir)

        # pre-populate archive with fragment placements
        # make sure that at least one sequence got no placements via SEPP
        self.features[('TACGGAGGGTGCAAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGCGTAGGT'
                       'GGGTTGGTAAGTCAGTGGTGAAATCTCCGGGCTTAACTCGGAAACTG')] = ''
        self.qclient.patch(url="/qiita_db/archive/observations/",
                           op="add",
                           path=jid,
                           value=dumps(self.features))
        success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir)

        self.assertEqual("", msg)
        self.assertTrue(success)
Exemple #2
0
    def test_deblur(self):
        # generating filepaths
        fd, fp = mkstemp(suffix='_seqs.demux')
        close(fd)
        self._clean_up_files.append(fp)
        copyfile('support_files/filtered_5_seqs.demux', fp)

        # inserting new prep template
        prep_info_dict = {
            'SKB7.640196': {'description_prep': 'SKB7'},
            'SKB8.640193': {'description_prep': 'SKB8'}
        }
        data = {'prep_info': dumps(prep_info_dict),
                # magic #1 = testing study
                'study': 1,
                'data_type': '16S'}
        pid = self.qclient.post('/apitest/prep_template/', data=data)['prep']

        # inserting artifacts
        data = {
            'filepaths': dumps([(fp, 'preprocessed_fastq')]),
            'type': "Demultiplexed",
            'name': "New demultiplexed artifact",
            'prep': pid}
        aid = self.qclient.post('/apitest/artifact/', data=data)['artifact']

        self.params['Demultiplexed sequences'] = aid

        data = {'user': '******',
                'command': dumps(['deblur', '1.0.4', 'Deblur']),
                'status': 'running',
                'parameters': dumps(self.params)}
        jid = self.qclient.post('/apitest/processing_job/', data=data)['job']

        out_dir = mkdtemp()
        self._clean_up_files.append(out_dir)

        # pre-populate archive with fragment placements
        self.qclient.patch(url="/qiita_db/archive/observations/",
                           op="add", path=jid,
                           value=dumps(self.features))
        success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir)

        self.assertEqual("", msg)
        self.assertTrue(success)

        self.assertEqual("BIOM", ainfo[0].artifact_type)
        self.assertEqual("BIOM", ainfo[1].artifact_type)

        self.assertEqual(
            [(join(out_dir, 'deblur_out', 'all.biom'), 'biom'),
             (join(out_dir, 'deblur_out', 'all.seqs.fa'),
              'preprocessed_fasta')], ainfo[0].files)
        self.assertEqual(
            [(join(out_dir, 'deblur_out', 'reference-hit.biom'), 'biom'),
             (join(out_dir, 'deblur_out', 'reference-hit.seqs.fa'),
              'preprocessed_fasta'),
             (None, 'plain_text')], ainfo[1].files)
Exemple #3
0
    def test_deblur_failing_sepp(self):
        # generating filepaths
        fd, fp = mkstemp(suffix='_seqs.demux')
        close(fd)
        self._clean_up_files.append(fp)
        copyfile('support_files/filtered_5_seqs.demux', fp)

        # inserting new prep template
        prep_info_dict = {
            'SKB7.640196': {
                'description_prep': 'SKB7',
                'platform': 'Illumina'
            },
            'SKB8.640193': {
                'description_prep': 'SKB8',
                'platform': 'Illumina'
            }
        }
        data = {
            'prep_info': dumps(prep_info_dict),
            # magic #1 = testing study
            'study': 1,
            'data_type': '16S'
        }
        pid = self.qclient.post('/apitest/prep_template/', data=data)['prep']

        # inserting artifacts
        data = {
            'filepaths': dumps([(fp, 'preprocessed_demux')]),
            'type': "Demultiplexed",
            'name': "New demultiplexed artifact",
            'prep': pid
        }
        aid = self.qclient.post('/apitest/artifact/', data=data)['artifact']

        self.params['Demultiplexed sequences'] = aid

        data = {
            'user': '******',
            'command': dumps(['deblur', '2021.09', 'Deblur']),
            'status': 'running',
            'parameters': dumps(self.params)
        }
        jid = self.qclient.post('/apitest/processing_job/', data=data)['job']

        out_dir = mkdtemp()
        self._clean_up_files.append(out_dir)

        # create a fake sepp binary that will always fail
        fp_fake_sepp = join(out_dir, 'run-sepp.sh')
        with open(fp_fake_sepp, 'w') as f:
            f.write('#!/bin/bash\nexit 123\n')
        chmod(fp_fake_sepp, 0o775)
        environ['PATH'] = '%s:%s' % (out_dir, self.oldpath)
        success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir)
        self.assertFalse(success)
        self.assertEqual(ainfo, None)
        self.assertIn('Error running run-sepp.sh', msg)
Exemple #4
0
    def test_deblur_no_target_gene(self):
        # generating filepaths
        fd, fp = mkstemp(suffix='_seqs.demux')
        close(fd)
        self._clean_up_files.append(fp)
        copyfile('support_files/no_sepp_seqs.demux', fp)
        prep_info_dict = {
            'SKB7.640196': {
                'description_prep': 'SKB7',
                'platform': 'Illumina'
            },
            'SKB8.640193': {
                'description_prep': 'SKB8',
                'platform': 'Illumina'
            }
        }
        data = {
            'prep_info': dumps(prep_info_dict),
            # magic #1 = testing study
            'study': 1,
            'data_type': 'Metagenomic'
        }
        pid = self.qclient.post('/apitest/prep_template/', data=data)['prep']

        # inserting artifacts
        data = {
            'filepaths': dumps([(fp, 'preprocessed_fastq')]),
            'type': "Demultiplexed",
            'name': "New demultiplexed artifact",
            'prep': pid
        }
        aid = self.qclient.post('/apitest/artifact/', data=data)['artifact']

        self.params['Demultiplexed sequences'] = aid

        data = {
            'user': '******',
            'command': dumps(['deblur', '2021.09', 'Deblur']),
            'status': 'running',
            'parameters': dumps(self.params)
        }
        jid = self.qclient.post('/apitest/processing_job/', data=data)['job']

        out_dir = mkdtemp()
        self._clean_up_files.append(out_dir)

        # pre-populate archive with fragment placements
        self.qclient.patch(url="/qiita_db/archive/observations/",
                           op="add",
                           path=jid,
                           value=dumps(self.features))
        success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir)
        self.assertEqual(
            'deblur was developed only for amplicon sequencing data', msg)
        self.assertFalse(success)
Exemple #5
0
    def test_no_valid_values_platform_error(self):
        # generating filepaths
        fd, fp = mkstemp(suffix='_seqs.demux')
        close(fd)
        self._clean_up_files.append(fp)
        copyfile('support_files/filtered_5_seqs.demux', fp)

        # inserting new prep template
        prep_info_dict = {
            'SKB7.640196': {
                'description_prep': 'SKB7',
                'platform': 'foo'
            },
            'SKB8.640193': {
                'description_prep': 'SKB8',
                'platform': 'bar'
            }
        }
        data = {
            'prep_info': dumps(prep_info_dict),
            # magic #1 = testing study
            'study': 1,
            'data_type': '16S'
        }
        pid = self.qclient.post('/apitest/prep_template/', data=data)['prep']

        # inserting artifacts
        data = {
            'filepaths': dumps([(fp, 'preprocessed_demux')]),
            'type': "Demultiplexed",
            'name': "New demultiplexed artifact",
            'prep': pid
        }
        aid = self.qclient.post('/apitest/artifact/', data=data)['artifact']
        self.params['Demultiplexed sequences'] = aid

        data = {
            'user': '******',
            'command': dumps(['deblur', '2021.09', 'Deblur']),
            'status': 'running',
            'parameters': dumps(self.params)
        }
        jid = self.qclient.post('/apitest/processing_job/', data=data)['job']

        out_dir = mkdtemp()
        self._clean_up_files.append(out_dir)
        success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir)
        self.assertEqual(
            'deblur is only valid for Illumina `platform`, '
            'current values in the Preparation Information File: '
            'bar, foo', msg)
        self.assertFalse(success)
Exemple #6
0
    def test_deblur(self):
        # generating filepaths
        fd, fp = mkstemp(suffix='_seqs.demux')
        close(fd)
        self._clean_up_files.append(fp)
        copyfile('support_files/filtered_5_seqs.demux', fp)

        # inserting new prep template
        prep_info_dict = {
            'SKB7.640196': {
                'description_prep': 'SKB7'
            },
            'SKB8.640193': {
                'description_prep': 'SKB8'
            }
        }
        data = {
            'prep_info': dumps(prep_info_dict),
            # magic #1 = testing study
            'study': 1,
            'data_type': '16S'
        }
        pid = self.qclient.post('/apitest/prep_template/', data=data)['prep']

        # inserting artifacts
        data = {
            'filepaths': dumps([(fp, 'preprocessed_fastq')]),
            'type': "Demultiplexed",
            'name': "New demultiplexed artifact",
            'prep': pid
        }
        aid = self.qclient.post('/apitest/artifact/', data=data)['artifact']

        self.params['seqs-fp'] = aid

        data = {
            'user': '******',
            'command': dumps(['deblur', '0.1.0', 'deblur-workflow']),
            'status': 'running',
            'parameters': dumps(self.params)
        }
        jid = self.qclient.post('/apitest/processing_job/', data=data)['job']

        out_dir = mkdtemp()
        self._clean_up_files.append(out_dir)

        success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir)

        self.assertEqual("", msg)
        self.assertTrue(success)

        self.assertEqual("BIOM", ainfo[0].artifact_type)
        self.assertEqual("BIOM", ainfo[1].artifact_type)

        self.assertEqual([(join(out_dir, 'deblur_out', 'final.biom'), 'biom'),
                          (join(out_dir, 'deblur_out',
                                'final.seqs.fa'), 'preprocessed_fasta')],
                         ainfo[0].files)
        self.assertEqual(
            [(join(out_dir, 'deblur_out', 'final.only-16s.biom'), 'biom'),
             (join(out_dir, 'deblur_out',
                   'final.seqs.fa.no_artifacts'), 'preprocessed_fasta')],
            ainfo[1].files)
Exemple #7
0
    def test_fragment_archiving(self):
        # generating filepaths
        fd, fp = mkstemp(suffix='_seqs.demux')
        close(fd)
        self._clean_up_files.append(fp)
        copyfile('support_files/filtered_5_seqs.demux', fp)

        # inserting new prep template
        prep_info_dict = {
            'SKB7.640196': {
                'description_prep': 'SKB7'
            },
            'SKB8.640193': {
                'description_prep': 'SKB8'
            }
        }
        data = {
            'prep_info': dumps(prep_info_dict),
            # magic #1 = testing study
            'study': 1,
            'data_type': '16S'
        }
        pid = self.qclient.post('/apitest/prep_template/', data=data)['prep']

        # inserting artifacts
        data = {
            'filepaths': dumps([(fp, 'preprocessed_demux')]),
            'type': "Demultiplexed",
            'name': "New demultiplexed artifact",
            'prep': pid
        }
        aid = self.qclient.post('/apitest/artifact/', data=data)['artifact']
        self.params['Demultiplexed sequences'] = aid

        data = {
            'user': '******',
            'command': dumps(['deblur', '1.0.4', 'Deblur']),
            'status': 'running',
            'parameters': dumps(self.params)
        }
        jid = self.qclient.post('/apitest/processing_job/', data=data)['job']

        # populate Qiita archive with some precomputed placements
        # placements.json is output from a SEPP run for the resulting Deblur
        # table, but with "tree" value removed for the sake of space
        features = dict()
        with open('support_files/sepp/placements.json', 'r') as f:
            for placement in json.load(f)['placements']:
                fragment = placement['nm'][0][0]
                # exclude 10 sequences to trigger SEPP computation later on
                if fragment not in self.novel_seqs:
                    features[fragment] = json.dumps(placement['p'])
        # add in a feature which should be rejected by SEPP
        features['A' * len(self.novel_seqs[0])] = ""

        # 1) check that archive is currently empty:
        observations = self.qclient.post("/qiita_db/archive/observations/",
                                         data={
                                             'job_id': jid,
                                             'features': list(features.keys())
                                         })
        self.assertTrue(len(observations.keys()) == 0)

        # 2) insert placements into archive ...
        self.qclient.patch(url="/qiita_db/archive/observations/",
                           op="add",
                           path=jid,
                           value=json.dumps(features))
        # ... and check that archive does hold those placements now:
        observations = self.qclient.post("/qiita_db/archive/observations/",
                                         data={
                                             'job_id': jid,
                                             'features': list(features.keys())
                                         })
        self.assertTrue(len(observations.keys()) == len(features.keys()))

        # 3) execute deblur job with subsequent SEPP run and tiny reference
        out_dir = mkdtemp()
        self._clean_up_files.append(out_dir)
        self.params['Reference phylogeny for SEPP'] = 'tiny'
        success, ainfo, msg = deblur(self.qclient, jid, self.params, out_dir)
        self.assertEqual("", msg)
        self.assertTrue(success)

        # ensure number of stored placements did grow or at least did not
        observations_2 = self.qclient.post("/qiita_db/archive/observations/",
                                           data={
                                               'job_id':
                                               jid,
                                               'features':
                                               list(features.keys()) +
                                               [self.novel_seqs[0]]
                                           })
        self.assertTrue(len(observations.keys()) <= len(observations_2.keys()))
        self.maxDiff = None
        # test specific placement values for one fragment that has
        # been pre-populated ...
        exp_placement = (
            '[[226990, -15902.052, 0.14311954, 9.856619e-06, 6.113515e-06], '
            '[226989, -15902.052, 0.14311936, 7.0000096e-06, 6.113515e-06], '
            '[226993, -15902.052, 0.14311917, 8.61664e-06, 6.113515e-06], '
            '[226991, -15902.052, 0.14311911, 6.3553584e-06, 6.113515e-06], '
            '[227443, -15902.052, 0.14311688, 6.7868327e-06, 6.113515e-06], '
            '[226994, -15902.052, 0.14311177, 5.000002e-07, 6.113515e-06], '
            '[227452, -15902.064, 0.14129417, 0.00160019, 6.113515e-06]]')
        self.assertEqual(
            observations_2[(
                'TACGTAGGGCGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCA'
                'CGTCGGATGTGAAAGCCCGGGGCTTAACCCCGGGTCTG')], exp_placement)

        # ... and one fragment that was recompted via SEPP during this test
        exp_placement = (
            '[[78, -18489.055, 0.8486466, 0.015792055, 6.113515e-06], '
            '[74, -18491.146, 0.10484001, 0.017408343, 0.010122812], '
            '[77, -18491.959, 0.046513416, 0.015838308, 0.010945947]]')
        self.assertEqual(observations_2[self.novel_seqs[0]], exp_placement)