Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser(description="Extract tRNA and rRNA features from a sequence specified by NCBI GenBank ID")
    parser.add_argument("ncbi_id", help="NCBI GenBank ID to extract RNA features from.")
    parser.add_argument("--reuse", action="store_true", default=False,
                        help="Reuse exising file if available")

    args = parser.parse_args()

    # Download file in GenBank format
    # Could download into a StringIO buffer by using the
    # ncbi_acc_download.download functions directly, but then we can't --reuse
    filename = "{}.gbk".format(args.ncbi_id)
    if not os.path.isfile(filename):
        cfg = Config(recursive=True, format="genbank", out=filename)
        download_to_file(args.ncbi_id, cfg, filename)

    # Extract RNA features
    records = list(SeqIO.parse(filename, "genbank"))
    for record in records:
        for feature in record.features:
            if feature.type not in ("tRNA", "rRNA"):
                continue
            # this crashes if there is no locus tag, but NCBI genomes should have these
            locus_tag = feature.qualifiers['locus_tag'][0]
            header = ">{type}|{locus_tag}".format(type=feature.type, locus_tag=locus_tag)
            seq = feature.extract(record.seq)
            print(header, seq, sep='\n')
Esempio n. 2
0
def test_write_stream(mocker):
    req = mocker.Mock()
    req.iter_content = mocker.Mock(side_effect=requests.exceptions.ChunkedEncodingError)
    handle = StringIO()
    cfg = Config()

    with pytest.raises(DownloadError):
        download.write_stream(req, handle, "FAKE", cfg)
Esempio n. 3
0
def test_download_wgs_parts_wgs_scafld(req):
    cfg = Config(format="genbank")
    wgs_contig = open(full_path('wgs_scafld.gbk'), 'rt')
    with open(full_path('wgs_full.gbk'), 'rt') as handle:
        full_file = handle.read()
    req.get(ENTREZ_URL, text=full_file)

    outhandle = wgs.download_wgs_parts(wgs_contig, cfg)
    assert outhandle.getvalue() == full_file
    wgs_contig.close()
Esempio n. 4
0
def test_download_wgs_parts_wgs(req):
    cfg = Config(format="genbank")
    wgs_contig = open(full_path('wgs.gbk'), 'rt')
    req.get(ENTREZ_URL, body=open(full_path('wgs_full.gbk'), 'rt'))

    outhandle = wgs.download_wgs_parts(wgs_contig, cfg)
    wgs_full = open(full_path('wgs_full.gbk'), 'rt')
    assert outhandle.getvalue() == wgs_full.read()
    wgs_full.close()
    wgs_contig.close()
Esempio n. 5
0
def test_download_wgs_no_parts(req):
    cfg = Config(format="genbank")
    supercontig = open(full_path('supercontig_full.gbk'), 'rt')
    req.get(ENTREZ_URL, status_code=404)

    outhandle = wgs.download_wgs_parts(supercontig, cfg)
    supercontig_full = open(full_path('supercontig_full.gbk'), 'rt')
    assert outhandle.getvalue() == supercontig_full.read()
    supercontig_full.close()
    supercontig.close()
Esempio n. 6
0
def test_download_wgs_parts_no_biopython():
    old_have_biopython = wgs.HAVE_BIOPYTHON
    wgs.HAVE_BIOPYTHON = False

    cfg = Config(format="genbank")

    handle = StringIO()

    new_handle = wgs.download_wgs_parts(handle, cfg)
    wgs.HAVE_BIOPYTHON = old_have_biopython
    assert handle == new_handle
Esempio n. 7
0
def test_download_wgs_parts_wgs_retry(req):
    cfg = Config(format="genbank")
    wgs_contig = open(full_path('wgs.gbk'), 'rt')
    req.get(ENTREZ_URL, response_list=[
        {"text": u'Whoa, slow down', "status_code": 429, "headers": {"Retry-After": "0"}},
        {"body": open(full_path('wgs_full.gbk'), 'rt')},
    ])

    outhandle = wgs.download_wgs_parts(wgs_contig, cfg)
    wgs_full = open(full_path('wgs_full.gbk'), 'rt')
    assert outhandle.getvalue() == wgs_full.read()
    wgs_full.close()
    wgs_contig.close()