def merge_chunks_multicore(chunks, db): if len(chunks) <= 1: ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') print st, "indexing final database." shutil.move(chunks[0], db) main_conn = sqlite3.connect(db) main_conn.isolation_level = None main_curr = main_conn.cursor() main_curr.execute('PRAGMA synchronous = OFF') main_curr.execute('PRAGMA journal_mode=MEMORY') gemini_db.create_indices(main_curr) main_conn.commit() main_curr.close() return db else: ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') print st, "merging", len(chunks), "chunks." procs = [] sub_merges = get_chunks_to_merge(chunks) tmp_dbs = get_temp_dbs(len(sub_merges), os.path.dirname(sub_merges[0][0])) for sub_merge, tmp_db in zip(sub_merges, tmp_dbs): cmd = get_merge_chunks_cmd(sub_merge, tmp_db) procs.append(subprocess.Popen(cmd, shell=True)) wait_until_finished(procs) cleanup_temp_db_files(chunks) merge_chunks_multicore(tmp_dbs, db)
def build_indices_and_disconnect(self): """ Create the db table indices and close up db connection """ # index our tables for speed database.create_indices(self.c) # commit data and close up database.close_and_commit(self.c, self.conn)
def finalize_merged_db(tmp_db, db): ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') print st, "indexing final database." main_conn = sqlite3.connect(tmp_db) main_conn.isolation_level = None main_curr = main_conn.cursor() main_curr.execute('PRAGMA synchronous = OFF') main_curr.execute('PRAGMA journal_mode=MEMORY') gemini_db.create_indices(main_curr) main_conn.commit() main_curr.close() shutil.move(tmp_db, db)
def finalize_merged_db(tmp_db, db): ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') print st, "indexing final database." main_conn = sqlite3.connect(tmp_db) main_conn.isolation_level = None main_curr = main_conn.cursor() main_curr.execute('PRAGMA synchronous = OFF') main_curr.execute('PRAGMA journal_mode=MEMORY') gemini_db.add_max_aaf(main_curr) gemini_db.create_indices(main_curr) main_conn.commit() main_curr.close() shutil.move(tmp_db, db)
def merge_db_chunks(args): # open up a new database if os.path.exists(args.db): os.remove(args.db) gemini_db.create_tables( args.db, gemini_load_chunk.get_extra_effects_fields(args) if args.vcf else []) main_conn = sqlite3.connect(args.db) main_conn.isolation_level = None main_curr = main_conn.cursor() main_curr.execute('PRAGMA synchronous = OFF') main_curr.execute('PRAGMA journal_mode=MEMORY') databases = [] for database in args.chunkdbs: databases.append(database) for idx, database in enumerate(databases): db = database[0] append_variant_info(main_curr, db) # we only need to add these tables from one of the chunks. if idx == 0: append_sample_genotype_counts(main_curr, db) append_sample_info(main_curr, db) append_resource_info(main_curr, db) append_version_info(main_curr, db) append_vcf_header(main_curr, db) append_gene_summary(main_curr, db) append_gene_detailed(main_curr, db) else: update_sample_genotype_counts(main_curr, db) if args.index: gemini_db.create_indices(main_curr) main_conn.commit() main_curr.close()
def merge_db_chunks(args): # open up a new database if os.path.exists(args.db): os.remove(args.db) main_conn = sqlite3.connect(args.db) main_conn.isolation_level = None main_curr = main_conn.cursor() main_curr.execute('PRAGMA synchronous = OFF') main_curr.execute('PRAGMA journal_mode=MEMORY') # create the gemini database tables for the new DB gemini_db.create_tables(main_curr, gemini_load_chunk.get_extra_effects_fields(args) if args.vcf else []) databases = [] for database in args.chunkdbs: databases.append(database) for idx, database in enumerate(databases): db = database[0] append_variant_info(main_curr, db) # we only need to add these tables from one of the chunks. if idx == 0: append_sample_genotype_counts(main_curr, db) append_sample_info(main_curr, db) append_resource_info(main_curr, db) append_version_info(main_curr, db) append_vcf_header(main_curr, db) append_gene_summary(main_curr, db) append_gene_detailed(main_curr, db) else: update_sample_genotype_counts(main_curr, db) if args.index: gemini_db.create_indices(main_curr) main_conn.commit() main_curr.close()
def merge_db_chunks(args): # open up a new database if os.path.exists(args.db): os.remove(args.db) main_conn = sqlite3.connect(args.db) main_conn.isolation_level = None main_conn.row_factory = sqlite3.Row main_curr = main_conn.cursor() main_curr.execute('PRAGMA synchronous = OFF') main_curr.execute('PRAGMA journal_mode=MEMORY') # create the gemini database tables for the new DB gemini_db.create_tables(main_curr) databases = [] for database in args.chunkdbs: databases.append(database) for idx, database in enumerate(databases): db = database[0] append_variant_info(main_curr, db) # we only need to add these tables from one of the chunks. if idx == 0: append_sample_genotype_counts(main_curr, db) append_sample_info(main_curr, db) append_resource_info(main_curr, db) append_version_info(main_curr, db) else: update_sample_genotype_counts(main_curr, db) gemini_db.create_indices(main_curr) main_conn.commit() main_curr.close()