def test_create_downloadjob_symlink_only(req, tmpdir): entry, config, joblist = prepare_create_downloadjob(req, tmpdir, human_readable=True, create_local_file=True) jobs = core.create_downloadjob(entry, 'bacteria', config) expected = [core.DownloadJob(None, j.local_file, None, j.symlink_path) for j in joblist if j.local_file.endswith('_genomic.gbff.gz')] assert jobs == expected
def test_download_entry_symlink_only(req, tmpdir): entry, outdir, joblist = prepare_download_entry(req, tmpdir, human_readable=True, create_local_file=True) jobs = core.download_entry(entry, 'refseq', 'bacteria', str(outdir), 'genbank', True) expected = [(core.DownloadJob(None, j[0].local_file, None, j[0].symlink_path), None) for j in joblist if j[0].local_file.endswith('_genomic.gbff.gz')] assert jobs == expected
def prepare_download_entry(req, tmpdir, format_map=core.FORMAT_NAME_MAP): # Set up test env entry = { 'assembly_accession': 'FAKE0.1', 'ftp_path': 'http://fake/genomes/FAKE0.1' } outdir = tmpdir.mkdir('output') download_jobs = [] checksum_file_content = '' for key, val in format_map.items(): seqfile = tmpdir.join('fake{}'.format(val)) seqfile.write(key) checksum = core.md5sum(str(seqfile)) filename = path.basename(str(seqfile)) full_url = 'http://fake/genomes/FAKE0.1/{}'.format(filename) local_file = str(outdir.join('refseq', 'bacteria', 'FAKE0.1', filename)) download_jobs.append(core.DownloadJob(full_url, local_file, checksum)) checksum_file_content += '{}\t./{}\n'.format(checksum, filename) req.get(full_url, text=seqfile.read()) req.get('http://fake/genomes/FAKE0.1/md5checksums.txt', text=checksum_file_content) return entry, outdir, download_jobs
def test_download_defaults(monkeypatch, mocker): """Test _download is called an appropriate number of times.""" worker_mock = mocker.MagicMock() _download_mock = mocker.MagicMock( return_value=[core.DownloadJob(None, None, None, None)]) monkeypatch.setattr(core, '_download', _download_mock) monkeypatch.setattr(core, 'worker', worker_mock) assert core.download() == 0 assert _download_mock.call_count == len(core.SUPPORTED_TAXONOMIC_GROUPS)
def test_download_metadata(monkeypatch, mocker, req, tmpdir): """Test creating the metadata file works.""" metadata_file = tmpdir.join('metadata.tsv') summary_contents = open(_get_file('partial_summary.txt'), 'r').read() req.get('https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/assembly_summary.txt', text=summary_contents) mocker.spy(core, 'get_summary') mocker.spy(core, 'parse_summary') mocker.patch('ncbi_genome_download.core.create_downloadjob', return_value=[core.DownloadJob(None, None, None, None)]) core.download(group='bacteria', output='/tmp/fake', metadata_table=str(metadata_file)) assert core.get_summary.call_count == 1 assert core.parse_summary.call_count == 1 assert core.create_downloadjob.call_count == 4 assert metadata_file.check()
def prepare_create_downloadjob(req, tmpdir, format_map=NgdConfig._FORMATS, human_readable=False, create_local_file=False): # Set up test env entry = { 'assembly_accession': 'FAKE0.1', 'organism_name': 'Example species', 'infraspecific_name': 'strain=ABC 1234', 'ftp_path': 'https://fake/genomes/FAKE0.1' } config = NgdConfig() outdir = tmpdir.mkdir('output') download_jobs = [] config.output = str(outdir) config.human_readable = human_readable checksum_file_content = '' for key, val in format_map.items(): seqfile = tmpdir.join('fake{}'.format(val)) seqfile.write(key) checksum = core.md5sum(str(seqfile)) filename = path.basename(str(seqfile)) full_url = 'https://fake/genomes/FAKE0.1/{}'.format(filename) local_file = outdir.join('refseq', 'bacteria', 'FAKE0.1', filename) if create_local_file: local_file.write(seqfile.read(), ensure=True) symlink_path = None if human_readable: symlink_path = str( outdir.join('human_readable', 'refseq', 'bacteria', 'Example', 'species', 'ABC_1234', filename)) download_jobs.append( core.DownloadJob(full_url, str(local_file), checksum, symlink_path)) checksum_file_content += '{}\t./{}\n'.format(checksum, filename) req.get(full_url, text=seqfile.read()) req.get('https://fake/genomes/FAKE0.1/md5checksums.txt', text=checksum_file_content) return entry, config, download_jobs
def test_args_download_defaults(monkeypatch, mocker): """Test args_download does the correct thing.""" entry = { 'assembly_accession': 'FAKE0.1', 'organism_name': 'Example species', 'infraspecific_name': 'strain=ABC 1234', 'ftp_path': 'https://fake/genomes/FAKE0.1' } worker_mock = mocker.MagicMock() select_candidates_mock = mocker.MagicMock(return_value=[(entry, 'bacteria')]) create_downloadjob_mock = mocker.MagicMock(return_value=[core.DownloadJob(None, None, None, None)]) monkeypatch.setattr(core, 'select_candidates', select_candidates_mock) monkeypatch.setattr(core, 'create_downloadjob', create_downloadjob_mock) monkeypatch.setattr(core, 'worker', worker_mock) assert core.args_download(Namespace()) == 0 assert select_candidates_mock.call_args_list[0][0][0].group == SUPPORTED_TAXONOMIC_GROUPS assert create_downloadjob_mock.call_args_list[0][0][0] == entry
def test_download_dry_run(monkeypatch, mocker): """Test _download is not called for a dry run.""" entry = { 'assembly_accession': 'FAKE0.1', 'organism_name': 'Example species', 'infraspecific_name': 'strain=ABC 1234', 'ftp_path': 'https://fake/genomes/FAKE0.1' } worker_mock = mocker.MagicMock() select_candidates_mock = mocker.MagicMock(return_value=[(entry, 'bacteria')]) create_downloadjob_mock = mocker.MagicMock(return_value=[core.DownloadJob(None, None, None, None)]) monkeypatch.setattr(core, 'select_candidates', select_candidates_mock) monkeypatch.setattr(core, 'create_downloadjob', create_downloadjob_mock) monkeypatch.setattr(core, 'worker', worker_mock) assert core.download(dry_run=True) == 0 assert select_candidates_mock.call_count == 1 assert create_downloadjob_mock.call_count == 0 assert worker_mock.call_count == 0