Esempio n. 1
0
def merge_chunks_multicore(chunks, db):
    if len(chunks) <= 1:
        ts = time.time()
        st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
        print st, "indexing final database."

        shutil.move(chunks[0], db)
        main_conn = sqlite3.connect(db)
        main_conn.isolation_level = None
        main_curr = main_conn.cursor()
        main_curr.execute('PRAGMA synchronous = OFF')
        main_curr.execute('PRAGMA journal_mode=MEMORY')

        gemini_db.create_indices(main_curr)

        main_conn.commit()
        main_curr.close()
        return db
    else:
        ts = time.time()
        st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
        print st, "merging", len(chunks), "chunks."
        procs = []
        sub_merges = get_chunks_to_merge(chunks)
        tmp_dbs = get_temp_dbs(len(sub_merges),
                               os.path.dirname(sub_merges[0][0]))
        for sub_merge, tmp_db in zip(sub_merges, tmp_dbs):
            cmd = get_merge_chunks_cmd(sub_merge, tmp_db)
            procs.append(subprocess.Popen(cmd, shell=True))
        wait_until_finished(procs)
        cleanup_temp_db_files(chunks)
        merge_chunks_multicore(tmp_dbs, db)
Esempio n. 2
0
def merge_chunks_multicore(chunks, db):
    if len(chunks) <= 1:
        ts = time.time()
        st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
        print st, "indexing final database."

        shutil.move(chunks[0], db)
        main_conn = sqlite3.connect(db)
        main_conn.isolation_level = None
        main_curr = main_conn.cursor()
        main_curr.execute('PRAGMA synchronous = OFF')
        main_curr.execute('PRAGMA journal_mode=MEMORY')
        
        gemini_db.create_indices(main_curr)
        
        main_conn.commit()
        main_curr.close()
        return db
    else:
        ts = time.time()
        st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
        print st, "merging", len(chunks), "chunks."
        procs = []
        sub_merges = get_chunks_to_merge(chunks)
        tmp_dbs = get_temp_dbs(len(sub_merges), os.path.dirname(sub_merges[0][0]))
        for sub_merge, tmp_db in zip(sub_merges, tmp_dbs):
            cmd = get_merge_chunks_cmd(sub_merge, tmp_db)
            procs.append(subprocess.Popen(cmd, shell=True))
        wait_until_finished(procs)
        cleanup_temp_db_files(chunks)
        merge_chunks_multicore(tmp_dbs, db)
Esempio n. 3
0
 def build_indices_and_disconnect(self):
     """
     Create the db table indices and close up
     db connection
     """
     # index our tables for speed
     database.create_indices(self.c)
     # commit data and close up
     database.close_and_commit(self.c, self.conn)
Esempio n. 4
0
 def build_indices_and_disconnect(self):
     """
     Create the db table indices and close up
     db connection
     """
     # index our tables for speed
     database.create_indices(self.c)
     # commit data and close up
     database.close_and_commit(self.c, self.conn)
Esempio n. 5
0
def finalize_merged_db(tmp_db, db):
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
    print st, "indexing final database."

    main_conn = sqlite3.connect(tmp_db)
    main_conn.isolation_level = None
    main_curr = main_conn.cursor()
    main_curr.execute('PRAGMA synchronous = OFF')
    main_curr.execute('PRAGMA journal_mode=MEMORY')

    gemini_db.create_indices(main_curr)

    main_conn.commit()
    main_curr.close()

    shutil.move(tmp_db, db)
Esempio n. 6
0
def finalize_merged_db(tmp_db, db):
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
    print st, "indexing final database."

    main_conn = sqlite3.connect(tmp_db)
    main_conn.isolation_level = None
    main_curr = main_conn.cursor()
    main_curr.execute('PRAGMA synchronous = OFF')
    main_curr.execute('PRAGMA journal_mode=MEMORY')

    gemini_db.add_max_aaf(main_curr)
    gemini_db.create_indices(main_curr)

    main_conn.commit()
    main_curr.close()

    shutil.move(tmp_db, db)
Esempio n. 7
0
def merge_db_chunks(args):

    # open up a new database
    if os.path.exists(args.db):
        os.remove(args.db)

    gemini_db.create_tables(
        args.db,
        gemini_load_chunk.get_extra_effects_fields(args) if args.vcf else [])

    main_conn = sqlite3.connect(args.db)
    main_conn.isolation_level = None
    main_curr = main_conn.cursor()
    main_curr.execute('PRAGMA synchronous = OFF')
    main_curr.execute('PRAGMA journal_mode=MEMORY')

    databases = []
    for database in args.chunkdbs:
        databases.append(database)

    for idx, database in enumerate(databases):

        db = database[0]

        append_variant_info(main_curr, db)

        # we only need to add these tables from one of the chunks.
        if idx == 0:
            append_sample_genotype_counts(main_curr, db)
            append_sample_info(main_curr, db)
            append_resource_info(main_curr, db)
            append_version_info(main_curr, db)
            append_vcf_header(main_curr, db)
            append_gene_summary(main_curr, db)
            append_gene_detailed(main_curr, db)
        else:
            update_sample_genotype_counts(main_curr, db)

    if args.index:
        gemini_db.create_indices(main_curr)

    main_conn.commit()
    main_curr.close()
Esempio n. 8
0
def merge_db_chunks(args):

    # open up a new database
    if os.path.exists(args.db):
        os.remove(args.db)

    main_conn = sqlite3.connect(args.db)
    main_conn.isolation_level = None
    main_curr = main_conn.cursor()
    main_curr.execute('PRAGMA synchronous = OFF')
    main_curr.execute('PRAGMA journal_mode=MEMORY')
    # create the gemini database tables for the new DB
    gemini_db.create_tables(main_curr, gemini_load_chunk.get_extra_effects_fields(args) if args.vcf else [])

    databases = []
    for database in args.chunkdbs:
        databases.append(database)

    for idx, database in enumerate(databases):

        db = database[0]

        append_variant_info(main_curr, db)

        # we only need to add these tables from one of the chunks.
        if idx == 0:
            append_sample_genotype_counts(main_curr, db)
            append_sample_info(main_curr, db)
            append_resource_info(main_curr, db)
            append_version_info(main_curr, db)
            append_vcf_header(main_curr, db)
            append_gene_summary(main_curr, db)
            append_gene_detailed(main_curr, db)
        else:
            update_sample_genotype_counts(main_curr, db)

    if args.index:
        gemini_db.create_indices(main_curr)

    main_conn.commit()
    main_curr.close()
Esempio n. 9
0
def merge_db_chunks(args):

    # open up a new database
    if os.path.exists(args.db):
        os.remove(args.db)

    main_conn = sqlite3.connect(args.db)
    main_conn.isolation_level = None
    main_conn.row_factory = sqlite3.Row
    main_curr = main_conn.cursor()
    main_curr.execute('PRAGMA synchronous = OFF')
    main_curr.execute('PRAGMA journal_mode=MEMORY')
    # create the gemini database tables for the new DB
    gemini_db.create_tables(main_curr)

    databases = []
    for database in args.chunkdbs:
        databases.append(database)

    for idx, database in enumerate(databases):

        db = database[0]

        append_variant_info(main_curr, db)

        # we only need to add these tables from one of the chunks.
        if idx == 0:
            append_sample_genotype_counts(main_curr, db)
            append_sample_info(main_curr, db)
            append_resource_info(main_curr, db)
            append_version_info(main_curr, db)
        else:
            update_sample_genotype_counts(main_curr, db)

    gemini_db.create_indices(main_curr)
    main_conn.commit()
    main_curr.close()