コード例 #1
0
ファイル: rna_extract.py プロジェクト: pythseq/rna_extract
def main():
    parser = argparse.ArgumentParser(description="Extract tRNA and rRNA features from a sequence specified by NCBI GenBank ID")
    parser.add_argument("ncbi_id", help="NCBI GenBank ID to extract RNA features from.")
    parser.add_argument("--reuse", action="store_true", default=False,
                        help="Reuse exising file if available")

    args = parser.parse_args()

    # Download file in GenBank format
    # Could download into a StringIO buffer by using the
    # ncbi_acc_download.download functions directly, but then we can't --reuse
    filename = "{}.gbk".format(args.ncbi_id)
    if not os.path.isfile(filename):
        cfg = Config(recursive=True, format="genbank", out=filename)
        download_to_file(args.ncbi_id, cfg, filename)

    # Extract RNA features
    records = list(SeqIO.parse(filename, "genbank"))
    for record in records:
        for feature in record.features:
            if feature.type not in ("tRNA", "rRNA"):
                continue
            # this crashes if there is no locus tag, but NCBI genomes should have these
            locus_tag = feature.qualifiers['locus_tag'][0]
            header = ">{type}|{locus_tag}".format(type=feature.type, locus_tag=locus_tag)
            seq = feature.extract(record.seq)
            print(header, seq, sep='\n')
コード例 #2
0
ファイル: test_core.py プロジェクト: zdk123/ncbi-acc-download
def test_download_to_file(req, tmpdir):
    """Test downloading things from NCBI."""
    req.get(ENTREZ_URL, text='This works.')
    outdir = tmpdir.mkdir('outdir')
    filename = outdir.join('foo')
    expected = outdir.join('foo.gbk')
    config = core.Config(molecule='nucleotide', verbose=False)

    core.download_to_file('FOO', config, filename=filename)

    assert expected.check()
コード例 #3
0
ファイル: core.py プロジェクト: antismash/downloader
def download_job_files(config: Config, job: Job) -> None:
    """Download the files of an antiSMASH job."""
    job.state = 'downloading'
    job.status = "Downloading {} from NCBI".format(job.download)
    job.trace.append(config.name)
    job.commit()

    dl_prefix = os.path.join(config.workdir, job.job_id, 'input', job.download)
    nad_conf = NadConfig(format="genbank", recursive=True)

    download_to_file(job.download, nad_conf, dl_prefix)

    job.state = 'queued'
    job.needs_download = False
    job.status = "pending"
    job.filename = '{}.gbk'.format(job.download)
    job.commit()
コード例 #4
0
def test_download_to_file_retry(req, tmpdir):
    """Test downloading things from NCBI, retrying after a 429 status."""
    req.get(ENTREZ_URL,
            response_list=[
                {
                    "text": u'Whoa, slow down',
                    "status_code": 429,
                    "headers": {
                        "Retry-After": "0"
                    }
                },
                {
                    "text": 'This works.'
                },
            ])
    outdir = tmpdir.mkdir('outdir')
    filename = outdir.join('foo')
    expected = outdir.join('foo.gbk')
    config = core.Config(molecule='nucleotide', verbose=False)

    core.download_to_file('FOO', config, filename=filename)

    assert expected.check()
コード例 #5
0
ファイル: test_core.py プロジェクト: zdk123/ncbi-acc-download
def test_download_to_file_append(req, tmpdir):
    """Test appending multiple downloads into a single file."""
    req.get(ENTREZ_URL, text='This works.\n')
    outdir = tmpdir.mkdir('outdir')
    filename = outdir.join('foo.txt')
    expected = outdir.join('foo.txt')
    config = core.Config(molecule='nucleotide', verbose=False, out='foo.txt')

    core.download_to_file('FOO', config, filename=str(filename), append=False)
    core.download_to_file('BAR', config, filename=str(filename), append=True)
    core.download_to_file('BAZ', config, filename=str(filename), append=True)

    assert expected.check()
    assert len(expected.readlines()) == 3