Esempio n. 1
0
    def test_add_samples(self):
        '''test add_samples'''
        tmp_dir = 'tmp.sample_dirs.add_samples'
        utils.rmtree(tmp_dir)
        sdirs = sample_dirs.SampleDirs(tmp_dir)
        self.assertTrue(os.path.exists(sdirs.sample_data_json_file))

        new_samples = ['ABC123', 'ABC123']
        samples_file = 'tmp.new_samples'
        with open(samples_file, 'w') as f:
            print(*new_samples, sep='\n', file=f)
        with self.assertRaises(Exception):
            sdirs.add_samples(samples_file)

        expect_dict = {
            'ABC123': {
                'reads': False,
                'asm': False,
                'annot': False,
                'ignore': False
            }
        }
        new_samples = ['ABC123']
        with open(samples_file, 'w') as f:
            print(*new_samples, sep='\n', file=f)
        sdirs.add_samples(samples_file)
        self.assertEqual(expect_dict, sdirs.sample_data)

        with self.assertRaises(Exception):
            sdirs.add_samples(samples_file)

        new_samples = ['SAMN42']
        expect_dict['SAMN42'] = {
            'reads': False,
            'asm': False,
            'annot': False,
            'ignore': False
        }
        with open(samples_file, 'w') as f:
            print(*new_samples, sep='\n', file=f)
        sdirs.add_samples(samples_file)
        self.assertEqual(expect_dict, sdirs.sample_data)

        os.unlink(samples_file)

        sdirs = sample_dirs.SampleDirs(tmp_dir)
        self.assertEqual(expect_dict, sdirs.sample_data)
        utils.rmtree(tmp_dir)
    def test_init_and_write_metadata_json(self):
        '''test init and write_metadata_json'''
        tmp_dir = 'tmp.sample_dir.init_and_write_metadata_json'
        utils.rmtree(tmp_dir)
        self.assertFalse(os.path.exists(tmp_dir))
        with self.assertRaises(AssertionError):
            sd = sample_dir.SampleDir(tmp_dir)
        sd = sample_dir.SampleDir(tmp_dir, 'sample_id')
        self.assertTrue(os.path.exists(tmp_dir))
        with open(os.path.join(tmp_dir, 'metadata.json')) as f:
            expected_json = json.load(f)

        self.assertEqual(expected_json, sd.metadata)
        sd.metadata['foo'] = 'bar'
        sd.write_metadata_json()
        expected_json['foo'] = 'bar'
        with open(os.path.join(tmp_dir, 'metadata.json')) as f:
            updated_json = json.load(f)
        self.assertEqual(updated_json, sd.metadata)
        utils.rmtree(tmp_dir)
    def test_assemble(self):
        '''test assemble'''
        test_dir = 'tmp.sample_dir.assemble'
        utils.rmtree(test_dir)
        sdir = sample_dir.SampleDir(test_dir, sample_id='ABC123')
        with self.assertRaises(Exception):
            sdir.assemble()

        sdir.metadata['reads_downloaded'] = True
        sdir.assemble(testing=True)

        expected_files = [
            'ABC123.contigs.fa.gz',
            'contigs.gfa.gz',
            'shovill.corrections',
            'shovill.log',
            'spades.fasta.gz',
        ]
        for filename in expected_files:
            self.assertTrue(os.path.exists(os.path.join(sdir.assembly_dir, filename)))

        utils.rmtree(test_dir)
    def test_get_ena_metadata_and_download_reads(self):
        '''test get_ena_metadata_and_download_reads'''
        tmp_dir = 'tmp.sample_dir.get_ena_metadata_and_download_reads'
        utils.rmtree(tmp_dir)
        self.assertFalse(os.path.exists(tmp_dir))
        sd = sample_dir.SampleDir(tmp_dir, 'sample_id')
        fq1_1 = os.path.join(data_dir, 'get_ena_metadata_and_download_reads.1_1.fastq.gz')
        fq1_2 = os.path.join(data_dir, 'get_ena_metadata_and_download_reads.1_2.fastq.gz')
        fq2_1 = os.path.join(data_dir, 'get_ena_metadata_and_download_reads.2_1.fastq.gz')
        fq2_2 = os.path.join(data_dir, 'get_ena_metadata_and_download_reads.2_2.fastq.gz')
        md51_1 = '5c9da3ff0f327bb4274ceafb99c3cb7e'
        md51_2 = 'c49ce7e3c516df226c0076ff700ee61e'
        md52_1 = 'cfcb910a051e95ab4df7d9bed952cd89'
        md52_2 = '2759494b541c7f26627ae21bff58c97b'
        md51 = 'd5724c937aacdf83119f16a2bd91b8a4'
        md52 = '8e3cec37b8f139744486e5653b2f906e'

        ena_metadata = [
                {'sample_accession': 's1', 'secondary_sample_accession': 's2', 'fastq_ftp': 'file1;file2'},
                {'sample_accession': '', 'secondary_sample_accession': 's2', 'fastq_ftp': 'file1;file2'},
                {'sample_accession': 'sample_id', 'secondary_sample_accession': 's2', 'fastq_ftp': f'{fq1_1};{fq1_2}', 'fastq_md5': f'{md51_1};{md51_2}'},
                {'sample_accession': 's3', 'secondary_sample_accession': 'sample_id', 'fastq_ftp': f'{fq2_1};{fq2_2}', 'fastq_md5': f'{md52_1};{md52_2}'},
        ]

        def mock_wget(infile, outfile, known_md5=None):
            shutil.copyfile(infile, outfile)
            got_md5 = utils.md5(outfile)
            assert known_md5 == got_md5

        with mock.patch.object(sample_dir.SampleDir, 'get_ena_metadata', return_value=ena_metadata), mock.patch.object(utils, 'wget', side_effect=mock_wget):
            sd.get_ena_metadata_and_download_reads()

        sd = sample_dir.SampleDir(tmp_dir)
        self.assertTrue(sd.metadata['reads_downloaded'])
        self.assertTrue(os.path.exists(sd.reads_fwd))
        self.assertTrue(os.path.exists(sd.reads_rev))
        self.assertTrue(os.path.exists(sd.reads_downloaded_done_file))
        utils.rmtree(tmp_dir)
Esempio n. 5
0
    def assemble(self, force=False, testing=False, testing_fail=False, temp_dir=None, working_dir=None):
        if not self.metadata['reads_downloaded']:
            raise Exception(f'Cannot assemble {self.sample_id} because reads not downloaded')
        if self.metadata['assembled']:
            if not force:
                raise Exception(f'Sample {self.sample_id} already assembled')
            else:
                utils.rmtree(self.assembly_dir)


        assembly_ok = False
        if working_dir is None:
            shovill_outdir = self.assembly_dir
        else:
            assert os.path.exists(working_dir)
            print('Working_dir:', working_dir)
            shovill_outdir = os.path.join(os.path.abspath(working_dir), 'Shovill.out')

        if testing:
            modules_dir = os.path.dirname(os.path.realpath(__file__))
            data_dir = os.path.join(modules_dir, 'data')
            shutil.copytree(os.path.join(data_dir, 'test_shovill_outdir'), shovill_outdir)
            assembly_ok = True
        elif testing_fail:
            pass
        else:
            command = f'shovill --outdir {shovill_outdir} --R1 {self.reads_fwd} --R2 {self.reads_rev} --cpus 1'
            if temp_dir is not None and len(temp_dir) > 0:
                command += f' --tmpdir {temp_dir}'
            completed_process = subprocess.run(command, shell=True)
            assembly_ok = completed_process.returncode == 0

        if working_dir is not None:
            cmd = f'rsync -a {shovill_outdir}/ {self.assembly_dir}'
            print(cmd)
            subprocess.run(cmd, shell=True, check=True)
            subprocess.run(cmd, shell=True, check=True)
            subprocess.run(f'rm -rf {shovill_outdir}', shell=True, check=True)
            
        if assembly_ok:
            contigs_fa_gz = os.path.join(self.assembly_dir, f'{self.metadata["sample_id"]}.contigs.fa.gz')
            utils.add_prefix_to_seqs(os.path.join(self.assembly_dir, 'contigs.fa'), contigs_fa_gz, self.metadata['sample_id'] + '.')
            os.unlink(os.path.join(self.assembly_dir, 'contigs.fa'))

            to_gzip = ['contigs.gfa', 'spades.fasta']
            for filename in to_gzip:
                utils.gzip(os.path.join(self.assembly_dir, filename))

            self.metadata['assembled'] = True
            self.metadata['reads_downloaded'] = False
            try:
                os.unlink(self.reads_fwd)
                os.unlink(self.reads_rev)
                os.unlink(self.reads_downloaded_done_file)
            except:
                pass
        else:
            self.metadata['ignore'] = True
            self.metadata['assembly_notes'] = 'Shovill fail'

        self.write_metadata_json()
    def test_nextflow_assemble(self):
        '''test nextflow_assemble'''
        nextflow_helper.write_config_file()
        input_dir = 'tmp.nextflow_assemble.dir'
        utils.rmtree(input_dir)
        samples = ['ERS1', 'ERS2', 'ERS3']
        samples_file = 'tmp.nextflow_assemble.samples'
        with open(samples_file, 'w') as f:
            print(*samples, sep='\n', file=f)

        sdirs = sample_dirs.SampleDirs(input_dir)
        sdirs.add_samples(samples_file)
        os.unlink(samples_file)

        nextflow_file = os.path.join(nextflow_helper.nextflow_dir, 'assemble.nf')
        work_dir = 'tmp.nextflow_assemble.work'
        outdir = 'tmp.nextflow_assemble.out'

        command = ' '.join([
            'nextflow run',
            '--input_dir', input_dir,
            '--testing',
            '--shovill_tempdir /foo/bar',
            '-c', nextflow_helper.config_file,
            '-w ', work_dir,
            nextflow_file,
        ])

        try:
            completed_process = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
        except subprocess.CalledProcessError as e:
            print('Error running nextflow\nCommand: ', command)
            print('Output:', e.stdout.decode(), sep='\n')
            print('\n____________________________________\n')
            self.assertTrue(False)

        expected_json = {
            "ERS3": {
                "reads": False,
                "asm": True,
                "annot": False,
                "ignore": False
                },
            "ERS1": {
                "reads": False,
                "asm": True,
                "annot": False,
                "ignore": False
                },
            "ERS2": {
                "reads": False,
                "asm": True,
                "annot": False,
                "ignore": False
                }
        }

        self.maxDiff = None

        sdirs = sample_dirs.SampleDirs(input_dir)
        self.assertEqual(expected_json, sdirs.sample_data)
        utils.rmtree(input_dir)
        utils.rmtree(work_dir)
        nextflow_helper.clean_files()
Esempio n. 7
0
    def test_update_sample_data(self):
        '''test update_sample_data'''
        tmp_dir = 'tmp.sample_dirs.update_sample_data'
        samples_file = 'tmp.sample_dirs.update_sample_data.new_samples'
        utils.rmtree(tmp_dir)
        sdirs = sample_dirs.SampleDirs(tmp_dir)
        self.assertTrue(os.path.exists(sdirs.sample_data_json_file))
        expect_dict = {
            'ABC123': {
                'reads': False,
                'asm': False,
                'annot': False,
                'ignore': False
            }
        }
        new_samples = ['ABC123']
        with open(samples_file, 'w') as f:
            print(*new_samples, sep='\n', file=f)
        sdirs.add_samples(samples_file)
        self.assertEqual(expect_dict, sdirs.sample_data)
        sdir = sdirs.make_sample_dir('ABC123')
        sdir.metadata['reads_downloaded'] = True
        sdir.write_metadata_json()
        sdirs.update_sample_data()
        expect_dict['ABC123']['reads'] = True
        self.assertEqual(expect_dict, sdirs.sample_data)

        new_samples = ['ERS42', 'ERS43']
        with open(samples_file, 'w') as f:
            print(*new_samples, sep='\n', file=f)
        sdirs.add_samples(samples_file)
        sdirs.update_sample_data()
        expect_dict['ERS42'] = {
            'reads': False,
            'asm': False,
            'annot': False,
            'ignore': False
        }
        expect_dict['ERS43'] = {
            'reads': False,
            'asm': False,
            'annot': False,
            'ignore': False
        }
        self.assertEqual(expect_dict, sdirs.sample_data)

        sdir42 = sdirs.make_sample_dir('ERS42')
        sdir42.metadata['reads_downloaded'] = True
        sdir42.metadata['assembled'] = True
        sdir42.write_metadata_json()
        sdir43 = sdirs.make_sample_dir('ERS43')
        sdir43.metadata['reads_downloaded'] = True
        sdir43.write_metadata_json()
        with open(samples_file, 'w') as f:
            print('ERS42', file=f)
        sdirs.update_sample_data(file_of_sample_names=samples_file)
        expect_dict['ERS42'] = {
            'reads': True,
            'asm': True,
            'annot': False,
            'ignore': False
        }
        self.assertEqual(expect_dict, sdirs.sample_data)

        sdirs = sample_dirs.SampleDirs(tmp_dir)
        self.assertEqual(expect_dict, sdirs.sample_data)
        utils.rmtree(tmp_dir)
        os.unlink(samples_file)