def load(parser, args): if (args.db is None or args.vcf is None): parser.print_help() exit("ERROR: load needs both a VCF file and a database file\n") annos = annotations.get_anno_files( args ) # force skipping CADD and GERP if the data files have not been installed if args.skip_cadd is False: if 'cadd_score' not in annos: sys.stderr.write("\nCADD scores are not being loaded because the" " annotation file could not be found.\n" "`Run gemini update --dataonly --extra cadd_score`" " to install the annotation file.\n\n") args.skip_cadd = True else: sys.stderr.write("CADD scores are being loaded (to skip use:--skip-cadd).\n") if args.skip_gerp_bp is False: if 'gerp_bp' not in annos: sys.stderr.write("\nGERP per bp is not being loaded because the annotation file" " could not be found.\n Run `gemini update --dataonly --extra gerp_bp`" " to install the annotation file.\n\n") args.skip_gerp_bp = True else: sys.stderr.write("GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n") # collect of the the add'l annotation files annotations.load_annos( args ) if args.scheduler: load_ipython(args) elif args.cores > 1: load_multicore(args) else: load_singlecore(args)
def load(parser, args): if (args.db is None or args.vcf is None): parser.print_help() exit("ERROR: load needs both a VCF file and a database file\n") if args.anno_type not in ['snpEff', 'VEP', None]: parser.print_help() exit("\nERROR: Unsupported selection for -t\n") # collect of the the add'l annotation files annotations.load_annos(args) # create a new gemini loader and populate # the gemini db and files from the VCF for try_count in range(2): try: if try_count > 0: args.tmp_db = os.path.join(args.tempdir, "%s.db" % uuid.uuid4()) gemini_loader = GeminiLoader(args) gemini_loader.store_resources() gemini_loader.store_version() gemini_loader.store_vcf_header() extra_fields = gemini_loader.populate_from_vcf() gemini_loader.update_gene_table() # gemini_loader.build_indices_and_disconnect() if not args.no_genotypes and not args.no_load_genotypes: gemini_loader.store_sample_gt_counts() if try_count > 0: shutil.move(args.tmp_db, args.db) break except sqlite3.OperationalError, e: sys.stderr.write("sqlite3.OperationalError: %s\n" % e)
def load(parser, args): if (args.db is None or args.vcf is None): parser.print_help() exit("ERROR: load needs both a VCF file and a database file\n") if args.anno_type not in ['snpEff', 'VEP', None, "all"]: parser.print_help() exit("\nERROR: Unsupported selection for -t\n") # collect of the the add'l annotation files annotations.load_annos(args) # create a new gemini loader and populate # the gemini db and files from the VCF for try_count in range(2): try: if try_count > 0: args.tmp_db = os.path.join(args.tempdir, "%s.db" % uuid.uuid4()) gemini_loader = GeminiLoader(args) gemini_loader.store_resources() gemini_loader.store_version() gemini_loader.store_vcf_header() gemini_loader.populate_from_vcf() gemini_loader.update_gene_table() gemini_loader.build_indices_and_disconnect() if not args.no_genotypes and not args.no_load_genotypes: gemini_loader.store_sample_gt_counts() if try_count > 0: shutil.move(args.tmp_db, args.db) break except sql.exc.OperationalError, e: sys.stderr.write("sqlalchemy.OperationalError: %s\n" % e)
def load(parser, args): if (args.db is None or args.vcf is None): parser.print_help() exit("ERROR: load needs both a VCF file and a database file\n") annos = annotations.get_anno_files(args) # force skipping CADD and GERP if the data files have not been installed if args.skip_cadd is False: if 'cadd_score' not in annos: sys.stderr.write("\nCADD scores are not being loaded because the" " annotation file could not be found.\n" "`Run gemini update --dataonly --extra cadd_score`" " to install the annotation file.\n\n") args.skip_cadd = True else: sys.stderr.write("CADD scores are being loaded (to skip use:--skip-cadd).\n") if args.skip_gerp_bp is False: if 'gerp_bp' not in annos: sys.stderr.write("\nGERP per bp is not being loaded because the annotation file" " could not be found.\n Run `gemini update --dataonly --extra gerp_bp`" " to install the annotation file.\n\n") args.skip_gerp_bp = True else: sys.stderr.write("GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n") # collect of the the add'l annotation files annotations.load_annos(args) if args.scheduler: load_ipython(args) elif args.cores > 1: load_multicore(args) else: load_singlecore(args)
def load(parser, args): #if (args.db is None or args.vcf is None): if args.vcf is None: parser.print_help() exit("ERROR: load needs both a VCF file\n") start_time = time.time() annos = annotations.get_anno_files( args ) # force skipping CADD and GERP if the data files have not been installed if args.skip_cadd is False: if 'cadd_score' not in annos: sys.stderr.write("\nCADD scores are not being loaded because the" " annotation file could not be found.\n" "`Run geminicassandra update --dataonly --extra cadd_score`" " to install the annotation file.\n\n") args.skip_cadd = True else: sys.stderr.write("CADD scores are being loaded (to skip use:--skip-cadd).\n") if args.skip_gerp_bp is False: if 'gerp_bp' not in annos: sys.stderr.write("\nGERP per bp is not being loaded because the annotation file" " could not be found.\n Run `geminicassandra update --dataonly --extra gerp_bp`" " to install the annotation file.\n\n") args.skip_gerp_bp = True else: sys.stderr.write("GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n") # collect of the the add'l annotation files annotations.load_annos( args ) time_2 = start_time time_3 = start_time if(args.node_num == 1): gemini_loader = GeminiLoader(args) gemini_loader.setup_db() time_2 = time.time() gemini_loader.single_core_stuff() time_3 = time.time() n_variants = 0 if args.cores > 1: n_variants = load_multicore(args) else: n_variants = load_singlecore(args) insert_n_variants(map(strip, args.contact_points.split(',')), args.keyspace, n_variants) end_time = time.time() total_time = str(end_time - start_time) db_creation_time = str(time_2 - start_time) single_core_time = str(time_3 - time_2) parallel_time = str(end_time - time_3) print "Finished loading in %s s" % total_time if args.timing_log != None: with open(args.timing_log, "a") as myfile: myfile.write(",".join([args.exp_id, total_time, db_creation_time, single_core_time, parallel_time]) + "\n")
def load(parser, args): if (args.db is None or args.vcf is None): parser.print_help() exit("ERROR: load needs both a VCF file and a database file\n") # collect of the the add'l annotation files annotations.load_annos() if args.scheduler: load_ipython(args) elif args.cores > 1: load_multicore(args) else: load_singlecore(args)
def load(parser, args): if args.db is None or args.vcf is None: parser.print_help() exit("ERROR: load needs both a VCF file and a database file\n") if args.anno_type not in ["snpEff", "VEP", None]: parser.print_help() exit("\nERROR: Unsupported selection for -t\n") # collect of the the add'l annotation files annotations.load_annos() # create a new gemini loader and populate # the gemini db and files from the VCF gemini_loader = GeminiLoader(args) gemini_loader.populate_from_vcf() gemini_loader.build_indices_and_disconnect() gemini_loader.store_sample_gt_counts()
def load(parser, args): if (args.db is None or args.vcf is None): parser.print_help() exit("ERROR: load needs both a VCF file and a database file\n") if args.anno_type not in ['snpEff', 'VEP', None]: parser.print_help() exit("\nERROR: Unsupported selection for -t\n") # collect of the the add'l annotation files annotations.load_annos() if use_scheduler(args): load_ipython(args) elif args.cores > 1: load_multicore(args) else: load_singlecore(args)
def load(parser, args): if (args.db is None or args.vcf is None): parser.print_help() exit("ERROR: load needs both a VCF file and a database file\n") if args.anno_type not in ['snpEff', 'VEP', None]: parser.print_help() exit("\nERROR: Unsupported selection for -t\n") # collect of the the add'l annotation files annotations.load_annos() # create a new gemini loader and populate # the gemini db and files from the VCF gemini_loader = GeminiLoader(args) gemini_loader.populate_from_vcf() gemini_loader.build_indices_and_disconnect() gemini_loader.store_sample_gt_counts()
def load(parser, args): if (args.db is None or args.vcf is None): parser.print_help() exit("ERROR: load needs both a VCF file and a database file\n") if args.skip_cadd is False: sys.stdout.write("CADD is being loaded (to skip use:--skip-cadd).\n") if args.skip_gerp_bp is False: sys.stdout.write("GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n") # collect of the the add'l annotation files annotations.load_annos() if args.scheduler: load_ipython(args) elif args.cores > 1: load_multicore(args) else: load_singlecore(args)
def load(parser, args): if (args.db is None or args.vcf is None): parser.print_help() exit("ERROR: load needs both a VCF file and a database file\n") if args.anno_type not in ['snpEff', 'VEP', None]: parser.print_help() exit("\nERROR: Unsupported selection for -t\n") # collect of the the add'l annotation files annotations.load_annos( args ) # create a new gemini loader and populate # the gemini db and files from the VCF gemini_loader = GeminiLoader(args) gemini_loader.store_resources() gemini_loader.store_version() gemini_loader.store_vcf_header() gemini_loader.populate_from_vcf() gemini_loader.update_gene_table() # gemini_loader.build_indices_and_disconnect() if not args.no_genotypes and not args.no_load_genotypes: gemini_loader.store_sample_gt_counts()
def load(parser, args): #if (args.db is None or args.vcf is None): if args.vcf is None: parser.print_help() exit("ERROR: load needs both a VCF file\n") start_time = time.time() annos = annotations.get_anno_files(args) # force skipping CADD and GERP if the data files have not been installed if args.skip_cadd is False: if 'cadd_score' not in annos: sys.stderr.write( "\nCADD scores are not being loaded because the" " annotation file could not be found.\n" "`Run geminicassandra update --dataonly --extra cadd_score`" " to install the annotation file.\n\n") args.skip_cadd = True else: sys.stderr.write( "CADD scores are being loaded (to skip use:--skip-cadd).\n") if args.skip_gerp_bp is False: if 'gerp_bp' not in annos: sys.stderr.write( "\nGERP per bp is not being loaded because the annotation file" " could not be found.\n Run `geminicassandra update --dataonly --extra gerp_bp`" " to install the annotation file.\n\n") args.skip_gerp_bp = True else: sys.stderr.write( "GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n") # collect of the the add'l annotation files annotations.load_annos(args) time_2 = start_time time_3 = start_time if (args.node_num == 1): gemini_loader = GeminiLoader(args) gemini_loader.setup_db() time_2 = time.time() gemini_loader.single_core_stuff() time_3 = time.time() n_variants = 0 if args.cores > 1: n_variants = load_multicore(args) else: n_variants = load_singlecore(args) insert_n_variants(map(strip, args.contact_points.split(',')), args.keyspace, n_variants) end_time = time.time() total_time = str(end_time - start_time) db_creation_time = str(time_2 - start_time) single_core_time = str(time_3 - time_2) parallel_time = str(end_time - time_3) print "Finished loading in %s s" % total_time if args.timing_log != None: with open(args.timing_log, "a") as myfile: myfile.write(",".join([ args.exp_id, total_time, db_creation_time, single_core_time, parallel_time ]) + "\n")