def prepare_download_entry(req, tmpdir, format_map=core.format_name_map):
    # Set up test env
    entry = {
        'assembly_accession': 'FAKE0.1',
        'ftp_path': 'http://fake/genomes/FAKE0.1'
    }

    outdir = tmpdir.mkdir('output')
    download_jobs = []

    checksum_file_content = ''
    for key, val in format_map.items():
        seqfile = tmpdir.join('fake{}'.format(val))
        seqfile.write(key)

        checksum = core.md5sum(str(seqfile))
        filename = path.basename(str(seqfile))
        full_url = 'http://fake/genomes/FAKE0.1/{}'.format(filename)
        local_file = str(outdir.join('refseq', 'bacteria', 'FAKE0.1', filename))

        download_jobs.append(core.DownloadJob(full_url, local_file, checksum))
        checksum_file_content += '{}\t./{}\n'.format(checksum, filename)
        req.get(full_url, text=seqfile.read())

    req.get('http://fake/genomes/FAKE0.1/md5checksums.txt', text=checksum_file_content)

    return entry, outdir, download_jobs
예제 #2
0
def prepare_download_entry(req, tmpdir, format_map=core.FORMAT_NAME_MAP):
    # Set up test env
    entry = {
        'assembly_accession': 'FAKE0.1',
        'ftp_path': 'http://fake/genomes/FAKE0.1'
    }

    outdir = tmpdir.mkdir('output')
    download_jobs = []

    checksum_file_content = ''
    for key, val in format_map.items():
        seqfile = tmpdir.join('fake{}'.format(val))
        seqfile.write(key)

        checksum = core.md5sum(str(seqfile))
        filename = path.basename(str(seqfile))
        full_url = 'http://fake/genomes/FAKE0.1/{}'.format(filename)
        local_file = str(outdir.join('refseq', 'bacteria', 'FAKE0.1',
                                     filename))

        download_jobs.append(core.DownloadJob(full_url, local_file, checksum))
        checksum_file_content += '{}\t./{}\n'.format(checksum, filename)
        req.get(full_url, text=seqfile.read())

    req.get('http://fake/genomes/FAKE0.1/md5checksums.txt',
            text=checksum_file_content)

    return entry, outdir, download_jobs
예제 #3
0
def test_download_file_fasta(req, tmpdir):
    entry = {'ftp_path': 'ftp://fake/path'}
    bogus_file = tmpdir.join('fake_cds_from_genomic.fna.gz')
    bogus_file.write("we don't want this one")
    bogus_checksum = core.md5sum(str(bogus_file))
    fake_file = tmpdir.join('fake_genomic.fna.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    checksums = [
        {'checksum': bogus_checksum, 'file': bogus_file.basename},
        {'checksum': checksum, 'file': fake_file.basename},
    ]
    dl_dir = tmpdir.mkdir('download')
    req.get('https://fake/path/fake_genomic.fna.gz', text=fake_file.read())

    assert core.worker(core.download_file_job(entry, str(dl_dir), checksums, 'fasta'))
예제 #4
0
def test_download_file_fasta(req, tmpdir):
    entry = {'ftp_path': 'ftp://fake/path'}
    bogus_file = tmpdir.join('fake_cds_from_genomic.fna.gz')
    bogus_file.write("we don't want this one")
    bogus_checksum = core.md5sum(str(bogus_file))
    fake_file = tmpdir.join('fake_genomic.fna.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    checksums = [
        {'checksum': bogus_checksum, 'file': bogus_file.basename},
        {'checksum': checksum, 'file': fake_file.basename},
    ]
    dl_dir = tmpdir.mkdir('download')
    req.get('https://fake/path/fake_genomic.fna.gz', text=fake_file.read())

    assert core.worker(core.download_file_job(entry, str(dl_dir), checksums, 'fasta'))
예제 #5
0
def test_download_file_genbank(req, tmpdir):
    entry = {'ftp_path': 'ftp://fake/path'}
    fake_file = tmpdir.join('fake_genomic.gbff.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    checksums = [{'checksum': checksum, 'file': fake_file.basename}]
    dl_dir = tmpdir.mkdir('download')
    req.get('https://fake/path/fake_genomic.gbff.gz', text=fake_file.read())

    assert core.worker(core.download_file_job(entry, str(dl_dir), checksums))
def test_download_file_fasta(req, tmpdir):
    entry = {'ftp_path': 'ftp://fake/path'}
    fake_file = tmpdir.join('fake_genomic.fna.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    checksums = [{'checksum': checksum, 'file': fake_file.basename}]
    dl_dir = tmpdir.mkdir('download')
    req.get('http://fake/path/fake_genomic.fna.gz', text=fake_file.read())

    assert core.worker(core.download_file(entry, str(dl_dir), checksums, 'fasta'))
예제 #7
0
def test_has_file_changed_unchanged(tmpdir):
    fake_file = tmpdir.join('fake_genomic.gbff.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))

    checksums = [
        {'checksum': 'fake', 'file': 'skipped'},
        {'checksum': checksum, 'file': fake_file.basename},
    ]

    assert core.has_file_changed(str(tmpdir), checksums) is False
def test_has_file_changed_unchanged(tmpdir):
    fake_file = tmpdir.join('fake_genomic.gbff.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))

    checksums = [
        {'checksum': 'fake', 'file': 'skipped'},
        {'checksum': checksum, 'file': fake_file.basename},
    ]

    assert core.has_file_changed(str(tmpdir), checksums) is False
예제 #9
0
def test_create_symlink_job(tmpdir):
    dl_dir = tmpdir.mkdir('download')
    fake_file = dl_dir.join('fake_genomic.gbff.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    checksums = [{'checksum': checksum, 'file': fake_file.basename}]
    symlink_dir = tmpdir.mkdir('symlink')

    assert core.worker(
        core.create_symlink_job(str(dl_dir), checksums, 'genbank', str(symlink_dir)))
    symlink = symlink_dir.join('fake_genomic.gbff.gz')
    assert symlink.check()
예제 #10
0
def test_need_to_create_symlink(tmpdir):
    fake_file = tmpdir.join('fake_genomic.gbff.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    human_readable_dir = tmpdir.mkdir('human_readable')

    checksums = [
        {'checksum': 'fake', 'file': 'skipped'},
        {'checksum': checksum, 'file': fake_file.basename},
    ]

    assert core.need_to_create_symlink(str(tmpdir), checksums, 'genbank', str(human_readable_dir))
예제 #11
0
def test_need_to_create_symlink(tmpdir):
    fake_file = tmpdir.join('fake_genomic.gbff.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    human_readable_dir = tmpdir.mkdir('human_readable')

    checksums = [
        {'checksum': 'fake', 'file': 'skipped'},
        {'checksum': checksum, 'file': fake_file.basename},
    ]

    assert core.need_to_create_symlink(str(tmpdir), checksums, 'genbank', str(human_readable_dir))
예제 #12
0
def test_create_symlink_job(tmpdir):
    dl_dir = tmpdir.mkdir('download')
    fake_file = dl_dir.join('fake_genomic.gbff.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    checksums = [{'checksum': checksum, 'file': fake_file.basename}]
    symlink_dir = tmpdir.mkdir('symlink')

    assert core.worker(
        core.create_symlink_job(str(dl_dir), checksums, 'genbank', str(symlink_dir)))
    symlink = symlink_dir.join('fake_genomic.gbff.gz')
    assert symlink.check()
예제 #13
0
def test_download_file_symlink_path(req, tmpdir):
    entry = {'ftp_path': 'ftp://fake/path'}
    fake_file = tmpdir.join('fake_genomic.gbff.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    checksums = [{'checksum': checksum, 'file': fake_file.basename}]
    dl_dir = tmpdir.mkdir('download')
    symlink_dir = tmpdir.mkdir('symlink')
    req.get('https://fake/path/fake_genomic.gbff.gz', text=fake_file.read())

    assert core.worker(
        core.download_file_job(entry, str(dl_dir), checksums, symlink_path=str(symlink_dir)))
    symlink = symlink_dir.join('fake_genomic.gbff.gz')
    assert symlink.check()
예제 #14
0
def prepare_create_downloadjob(req,
                               tmpdir,
                               format_map=NgdConfig._FORMATS,
                               human_readable=False,
                               create_local_file=False):
    # Set up test env
    entry = {
        'assembly_accession': 'FAKE0.1',
        'organism_name': 'Example species',
        'infraspecific_name': 'strain=ABC 1234',
        'ftp_path': 'https://fake/genomes/FAKE0.1'
    }

    config = NgdConfig()

    outdir = tmpdir.mkdir('output')
    download_jobs = []
    config.output = str(outdir)
    config.human_readable = human_readable

    checksum_file_content = ''
    for key, val in format_map.items():
        seqfile = tmpdir.join('fake{}'.format(val))
        seqfile.write(key)

        checksum = core.md5sum(str(seqfile))
        filename = path.basename(str(seqfile))
        full_url = 'https://fake/genomes/FAKE0.1/{}'.format(filename)
        local_file = outdir.join('refseq', 'bacteria', 'FAKE0.1', filename)
        if create_local_file:
            local_file.write(seqfile.read(), ensure=True)

        symlink_path = None
        if human_readable:
            symlink_path = str(
                outdir.join('human_readable', 'refseq', 'bacteria', 'Example',
                            'species', 'ABC_1234', filename))

        download_jobs.append(
            core.DownloadJob(full_url, str(local_file), checksum,
                             symlink_path))
        checksum_file_content += '{}\t./{}\n'.format(checksum, filename)
        req.get(full_url, text=seqfile.read())

    req.get('https://fake/genomes/FAKE0.1/md5checksums.txt',
            text=checksum_file_content)

    return entry, config, download_jobs
예제 #15
0
def test_download_file_symlink_path_existed(req, tmpdir):
    entry = {'ftp_path': 'ftp://fake/path'}
    fake_file = tmpdir.join('fake_genomic.gbff.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    checksums = [{'checksum': checksum, 'file': fake_file.basename}]
    dl_dir = tmpdir.mkdir('download')
    symlink_dir = tmpdir.mkdir('symlink')
    symlink = symlink_dir.join('fake_genomic.gbff.gz')
    os.symlink("/foo/bar", str(symlink))
    req.get('https://fake/path/fake_genomic.gbff.gz', text=fake_file.read())

    assert core.worker(
        core.download_file_job(entry, str(dl_dir), checksums, symlink_path=str(symlink_dir)))
    assert symlink.check()
예제 #16
0
def test_download_file_rna_fasta(req, tmpdir):
    entry = {'ftp_path': 'ftp://fake/path'}
    fake_file = tmpdir.join('fake_rna_from_genomic.fna.gz')
    fake_file.write('foo')
    assert fake_file.check()
    checksum = core.md5sum(str(fake_file))
    checksums = [
        {
            'checksum': checksum,
            'file': fake_file.basename
        },
    ]
    dl_dir = tmpdir.mkdir('download')
    req.get('http://fake/path/fake_rna_from_genomic.fna.gz',
            text=fake_file.read())

    assert core.worker(
        core.download_file(entry, str(dl_dir), checksums, 'rna-fasta'))
예제 #17
0
def prepare_create_downloadjob(req, tmpdir, format_map=NgdConfig._FORMATS, human_readable=False,
                               create_local_file=False):
    # Set up test env
    entry = {
        'assembly_accession': 'FAKE0.1',
        'organism_name': 'Example species',
        'infraspecific_name': 'strain=ABC 1234',
        'ftp_path': 'https://fake/genomes/FAKE0.1'
    }

    config = NgdConfig()

    outdir = tmpdir.mkdir('output')
    download_jobs = []
    config.output = str(outdir)
    config.human_readable = human_readable

    checksum_file_content = ''
    for key, val in format_map.items():
        seqfile = tmpdir.join('fake{}'.format(val))
        seqfile.write(key)

        checksum = core.md5sum(str(seqfile))
        filename = path.basename(str(seqfile))
        full_url = 'https://fake/genomes/FAKE0.1/{}'.format(filename)
        local_file = outdir.join('refseq', 'bacteria', 'FAKE0.1', filename)
        if create_local_file:
            local_file.write(seqfile.read(), ensure=True)

        symlink_path = None
        if human_readable:
            symlink_path = str(
                outdir.join('human_readable', 'refseq', 'bacteria', 'Example', 'species',
                            'ABC_1234', filename))

        download_jobs.append(core.DownloadJob(full_url, str(local_file), checksum, symlink_path))
        checksum_file_content += '{}\t./{}\n'.format(checksum, filename)
        req.get(full_url, text=seqfile.read())

    req.get('https://fake/genomes/FAKE0.1/md5checksums.txt', text=checksum_file_content)

    return entry, config, download_jobs
예제 #18
0
def test_md5sum():
    expected = '74d72df33d621f5eb6300dc9a2e06573'
    filename = _get_file('partial_summary.txt')
    ret = core.md5sum(filename)
    assert ret == expected
예제 #19
0
def test_md5sum():
    expected = '74d72df33d621f5eb6300dc9a2e06573'
    filename = _get_file('partial_summary.txt')
    ret = core.md5sum(filename)
    assert ret == expected
예제 #20
0
 def create_checksum_line(filename):
     return '{}\t./{}\n'.format(core.md5sum(filename), path.basename(filename))