def main(): parser = argparse.ArgumentParser(description="Extract tRNA and rRNA features from a sequence specified by NCBI GenBank ID") parser.add_argument("ncbi_id", help="NCBI GenBank ID to extract RNA features from.") parser.add_argument("--reuse", action="store_true", default=False, help="Reuse exising file if available") args = parser.parse_args() # Download file in GenBank format # Could download into a StringIO buffer by using the # ncbi_acc_download.download functions directly, but then we can't --reuse filename = "{}.gbk".format(args.ncbi_id) if not os.path.isfile(filename): cfg = Config(recursive=True, format="genbank", out=filename) download_to_file(args.ncbi_id, cfg, filename) # Extract RNA features records = list(SeqIO.parse(filename, "genbank")) for record in records: for feature in record.features: if feature.type not in ("tRNA", "rRNA"): continue # this crashes if there is no locus tag, but NCBI genomes should have these locus_tag = feature.qualifiers['locus_tag'][0] header = ">{type}|{locus_tag}".format(type=feature.type, locus_tag=locus_tag) seq = feature.extract(record.seq) print(header, seq, sep='\n')
def test_write_stream(mocker): req = mocker.Mock() req.iter_content = mocker.Mock(side_effect=requests.exceptions.ChunkedEncodingError) handle = StringIO() cfg = Config() with pytest.raises(DownloadError): download.write_stream(req, handle, "FAKE", cfg)
def test_download_wgs_parts_wgs_scafld(req): cfg = Config(format="genbank") wgs_contig = open(full_path('wgs_scafld.gbk'), 'rt') with open(full_path('wgs_full.gbk'), 'rt') as handle: full_file = handle.read() req.get(ENTREZ_URL, text=full_file) outhandle = wgs.download_wgs_parts(wgs_contig, cfg) assert outhandle.getvalue() == full_file wgs_contig.close()
def test_download_wgs_parts_wgs(req): cfg = Config(format="genbank") wgs_contig = open(full_path('wgs.gbk'), 'rt') req.get(ENTREZ_URL, body=open(full_path('wgs_full.gbk'), 'rt')) outhandle = wgs.download_wgs_parts(wgs_contig, cfg) wgs_full = open(full_path('wgs_full.gbk'), 'rt') assert outhandle.getvalue() == wgs_full.read() wgs_full.close() wgs_contig.close()
def test_download_wgs_no_parts(req): cfg = Config(format="genbank") supercontig = open(full_path('supercontig_full.gbk'), 'rt') req.get(ENTREZ_URL, status_code=404) outhandle = wgs.download_wgs_parts(supercontig, cfg) supercontig_full = open(full_path('supercontig_full.gbk'), 'rt') assert outhandle.getvalue() == supercontig_full.read() supercontig_full.close() supercontig.close()
def test_download_wgs_parts_no_biopython(): old_have_biopython = wgs.HAVE_BIOPYTHON wgs.HAVE_BIOPYTHON = False cfg = Config(format="genbank") handle = StringIO() new_handle = wgs.download_wgs_parts(handle, cfg) wgs.HAVE_BIOPYTHON = old_have_biopython assert handle == new_handle
def test_download_wgs_parts_wgs_retry(req): cfg = Config(format="genbank") wgs_contig = open(full_path('wgs.gbk'), 'rt') req.get(ENTREZ_URL, response_list=[ {"text": u'Whoa, slow down', "status_code": 429, "headers": {"Retry-After": "0"}}, {"body": open(full_path('wgs_full.gbk'), 'rt')}, ]) outhandle = wgs.download_wgs_parts(wgs_contig, cfg) wgs_full = open(full_path('wgs_full.gbk'), 'rt') assert outhandle.getvalue() == wgs_full.read() wgs_full.close() wgs_contig.close()