def main(args): with rsidx.open(args.vcf, 'r') as vcffh: with sqlite3.connect(args.dbfile) as dbconn: index(dbconn, vcffh, cache_size=args.cache_size, mmap_size=args.mmap_size)
def test_index_multi(capsys): vcffile = data_file('chr9-multi.vcf.gz') with TempFileName(suffix='.rsidx') as idxfile, rsidx.open(vcffile, 'r') as vcffh: with sqlite3.connect(idxfile) as dbconn: rsidx.index.index(dbconn, vcffh) arglist = ['search', vcffile, idxfile, 'rs60995877'] args = rsidx.cli.get_parser().parse_args(arglist) rsidx.search.main(args) terminal = capsys.readouterr() assert terminal.out.count('\trs60995877\t') == 7
def test_index_bogus_rsids(): with NamedTemporaryFile(suffix='.sqlite3') as db: with sqlite3.connect(db.name) as dbconn: vcffile = data_file('chr4-sample-corrupted-ids.vcf.gz') with rsidx.open(vcffile, 'r') as vcffh: rsidx.index.index(dbconn, vcffh) c = dbconn.cursor() query = ('SELECT * FROM rsid_to_coord WHERE rsid IN ' '(538736078, 547329663, 1440788236, 1234497371)') results = list(c.execute(query)) assert results == [(1234497371, '4', 218446)]
def test_index_multi_rsids(): with NamedTemporaryFile(suffix='.sqlite3') as db: with sqlite3.connect(db.name) as dbconn: vcffile = data_file('multiple_id.vcf.gz') with rsidx.open(vcffile, 'r') as vcffh: rsidx.index.index(dbconn, vcffh) c = dbconn.cursor() query = ('SELECT * FROM rsid_to_coord WHERE rsid IN ' '(72634902, 145742571)') results = list(c.execute(query)) assert sorted(results) == sorted([(72634902, '1', 1900106), (145742571, '1', 1900106)])
def test_index(cachesize, mmapsize): with NamedTemporaryFile(suffix='.sqlite3') as db: with sqlite3.connect(db.name) as dbconn: with rsidx.open(data_file('chr17-sample.vcf.gz'), 'r') as vcffh: rsidx.index.index(dbconn, vcffh, cache_size=cachesize, mmap_size=mmapsize, logint=10) c = dbconn.cursor() query = ('SELECT * FROM rsid_to_coord WHERE rsid IN ' '(1238461543, 1472751972)') results = list(c.execute(query)) assert sorted(results) == sorted([(1238461543, '17', 624973), (1472751972, '17', 132359)])
def test_search_cli(doheader, numlines, suffix): with NamedTemporaryFile(suffix=suffix) as outfile: arglist = [ 'search', data_file('chr17-sample.vcf.gz'), data_file('chr17-sample.rsidx'), '--out', outfile.name, 'rs1472751972', 'rs1287502205', 'rs897983471', 'rs1172219431', 'rs189123651' ] args = rsidx.cli.get_parser().parse_args(arglist) args.header = doheader rsidx.search.main(args) with rsidx.open(outfile.name, 'r') as fh: outlines = fh.read().strip().split('\n') assert len(outlines) == numlines
def main(args): if os.path.exists(args.idx): message = 'WARNING: index file "{:s}" exists'.format(args.idx) if args.force: message += ', overwriting' try: os.unlink(args.idx) except FileNotFoundError: # prevent exploits # pragma: no cover pass else: message += ', stubbornly refusing to proceed' print('[rsidx]', message, file=sys.stderr) if not args.force: raise SystemExit with rsidx.open(args.vcf, 'r') as vcffh: with sqlite3.connect(args.idx) as dbconn: index(dbconn, vcffh, cache_size=args.cache_size, mmap_size=args.mmap_size)
def main(args): conn = sqlite3.connect(args.dbfile) with rsidx.open(args.out, 'w') as out: for line in search(args.rsid, conn, args.vcf, header=args.header): print(line, end='', file=out) conn.close()
def test_open(): with NamedTemporaryFile() as tf: with pytest.raises(ValueError, match=r'invalid mode "rwx"'): with rsidx.open(tf.name, 'rwx') as fh: pass