def load_dbsnp(): dbsnp_file = open(settings.INTERMEDIATE_FILE_DIR + 'dbsnp.tsv', 'w') for i, variant in enumerate(vcf_stuff.iterate_vcf(open(settings.DBSNP_VCF_FILE))): if not i % 100000: print i fields = [ str(variant.xpos), variant.ref, variant.alt, variant.vcf_id, ] dbsnp_file.write('\t'.join(fields)+'\n') dbsnp_file.close()
import sys from xbrowse.parsers.vcf_stuff import iterate_vcf from xbrowse.utils import get_aaf, compressed_file if __name__ == '__main__': vcf_file = compressed_file(sys.argv[1]) for variant in iterate_vcf(vcf_file, genotypes=True): print '\t'.join([ str(variant.xpos), variant.ref, variant.alt, str(get_aaf(variant)), ])
import gzip import argparse from xbrowse.parsers import vcf_stuff if __name__ == '__main__': parser = argparse.ArgumentParser(description='Create a CSV from the ClinVar VCF file that can go into a pandas dataframe') parser.add_argument('vcf') args = parser.parse_args() for variant in vcf_stuff.iterate_vcf(gzip.open(args.vcf)): fields = [ str(variant.xpos), variant.ref, variant.alt ] print '\t'.join(fields)