Esempio n. 1
0
def load_singlecore(args):
    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    l = GeminiLoader(args)
    l.populate_from_vcf()
    if not args.no_genotypes and not args.no_load_genotypes:
        l.store_sample_gt_counts()
    return l
Esempio n. 2
0
def load(parser, args):
    #if (args.db is None or args.vcf is None):
    if args.vcf is None:
        parser.print_help()
        exit("ERROR: load needs both a VCF file\n")
    
    start_time = time.time()
    annos = annotations.get_anno_files( args )
    # force skipping CADD and GERP if the data files have not been installed
    if args.skip_cadd is False:
        if 'cadd_score' not in annos:
            sys.stderr.write("\nCADD scores are not being loaded because the"
            " annotation file could not be found.\n"
            "`Run geminicassandra update --dataonly --extra cadd_score`"
            " to install the annotation file.\n\n")
            args.skip_cadd = True
        else:
            sys.stderr.write("CADD scores are being loaded (to skip use:--skip-cadd).\n")
    if args.skip_gerp_bp is False:
        if 'gerp_bp' not in annos:
            sys.stderr.write("\nGERP per bp is not being loaded because the annotation file"
                        " could not be found.\n    Run `geminicassandra update --dataonly --extra gerp_bp`"
                        " to install the annotation file.\n\n")
            args.skip_gerp_bp = True
        else:
            sys.stderr.write("GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n")
    # collect of the the add'l annotation files
    annotations.load_annos( args )
    
    time_2 = start_time
    time_3 = start_time
    
    if(args.node_num == 1):
        gemini_loader = GeminiLoader(args)
        gemini_loader.setup_db()
        time_2 = time.time()
        gemini_loader.single_core_stuff()
        time_3 = time.time()
        
    n_variants = 0
    
    if args.cores > 1:
        n_variants = load_multicore(args)
    else:
        n_variants = load_singlecore(args)
        
    insert_n_variants(map(strip, args.contact_points.split(',')), args.keyspace, n_variants)
        
    end_time = time.time()
    total_time = str(end_time - start_time)
    db_creation_time = str(time_2 - start_time)
    single_core_time = str(time_3 - time_2)
    parallel_time = str(end_time - time_3)
    print "Finished loading in %s s" % total_time
    if args.timing_log != None:
        with open(args.timing_log, "a") as myfile:
            myfile.write(",".join([args.exp_id, total_time, db_creation_time, single_core_time, parallel_time]) + "\n")        
Esempio n. 3
0
def load_singlecore(args):
    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    l = GeminiLoader(args)
    l.populate_from_vcf()
    if not args.no_genotypes and not args.no_load_genotypes:
        l.store_sample_gt_counts()
    return l
Esempio n. 4
0
def load_singlecore(args):
    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    gemini_loader = GeminiLoader(args)
    gemini_loader.store_resources()
    gemini_loader.store_version()
    gemini_loader.store_vcf_header()
    gemini_loader.populate_from_vcf()

    if not args.skip_gene_tables and not args.test_mode:
        gemini_loader.update_gene_table()
    if not args.test_mode:
        gemini_loader.build_indices_and_disconnect()

    if not args.no_genotypes and not args.no_load_genotypes:
        gemini_loader.store_sample_gt_counts()
    gemini_annotate.add_extras(args.db, [args.db])
Esempio n. 5
0
def load_singlecore(args):
    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    gemini_loader = GeminiLoader(args)
    gemini_loader.store_resources()
    gemini_loader.store_version()
    gemini_loader.populate_from_vcf()
    gemini_loader.build_indices_and_disconnect()

    if not args.no_genotypes and not args.no_load_genotypes:
        gemini_loader.store_sample_gt_counts()
Esempio n. 6
0
def load_singlecore(args):
    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    gemini_loader = GeminiLoader(args)
    gemini_loader.store_resources()
    gemini_loader.store_version()
    gemini_loader.populate_from_vcf()
    gemini_loader.build_indices_and_disconnect()

    if not args.no_genotypes and not args.no_load_genotypes:
        gemini_loader.store_sample_gt_counts()
Esempio n. 7
0
def load_singlecore(args):
    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    gemini_loader = GeminiLoader(args)
    gemini_loader.store_resources()
    gemini_loader.store_version()
    gemini_loader.store_vcf_header()
    gemini_loader.populate_from_vcf()


    if not args.skip_gene_tables and not args.test_mode:
        gemini_loader.update_gene_table()
    if not args.test_mode:
        gemini_loader.build_indices_and_disconnect()

    if not args.no_genotypes and not args.no_load_genotypes:
        gemini_loader.store_sample_gt_counts()
    gemini_annotate.add_extras(args.db, [args.db])
Esempio n. 8
0
def finish(args, loader=None):
    """
    all things that are performed by single core, multi, ipython
    should be done here
    """
    if loader is None:
        loader = GeminiLoader(args, prepare_db=False)

    print "storing version, header, etc."
    loader.store_resources()
    loader.store_version()
    loader.store_vcf_header()

    if not args.skip_gene_tables:
        print "storing gene-detailed"
        loader._get_gene_detailed()
        print "storing gene-summary"
        loader._get_gene_summary()
        if not args.test_mode:
            print "updating gene-table"
            loader.update_gene_table()
    if not args.test_mode:
        print "building indices"
        loader.build_indices_and_disconnect()
    else:
        import database
        database.close_and_commit(loader.c)
Esempio n. 9
0
def finish(args, loader=None):
    """
    all things that are performed by single core, multi, ipython
    should be done here
    """
    if loader is None:
        loader = GeminiLoader(args, prepare_db=False)

    print "storing version, header, etc."
    loader.store_resources()
    loader.store_version()
    loader.store_vcf_header()

    if not args.skip_gene_tables:
        print "storing gene-detailed"
        loader._get_gene_detailed()
        print "storing gene-summary"
        loader._get_gene_summary()
        if not args.test_mode:
            print "updating gene-table"
            loader.update_gene_table()
    if not args.test_mode:
        print "building indices"
        loader.build_indices_and_disconnect()
    else:
        import database
        database.close_and_commit(loader.c)
Esempio n. 10
0
def load_singlecore(args):
    # create a new geminicassandra loader and populate
    # the geminicassandra db and files from the VCF
    gemini_loader = GeminiLoader(args)
    gemini_loader.connect_to_db()
    if not args.no_genotypes and not args.no_load_genotypes:
        gemini_loader._init_sample_gt_counts()
        
    gemini_loader.populate_from_vcf()


    '''if not args.skip_gene_tables and not args.test_mode:
        gemini_loader.update_gene_table()'''
    
    if not args.no_genotypes and not args.no_load_genotypes:
        gemini_loader.store_sample_gt_counts()
    if not args.test_mode:
        gemini_loader.disconnect()
Esempio n. 11
0
def load_singlecore(args):
    # create a new geminicassandra loader and populate
    # the geminicassandra db and files from the VCF
    gemini_loader = GeminiLoader(args)
    gemini_loader.connect_to_db()
    if not args.no_genotypes and not args.no_load_genotypes:
        gemini_loader._init_sample_gt_counts()

    gemini_loader.populate_from_vcf()
    '''if not args.skip_gene_tables and not args.test_mode:
        gemini_loader.update_gene_table()'''

    if not args.no_genotypes and not args.no_load_genotypes:
        gemini_loader.store_sample_gt_counts()
    if not args.test_mode:
        gemini_loader.disconnect()
Esempio n. 12
0
def load(parser, args):
    #if (args.db is None or args.vcf is None):
    if args.vcf is None:
        parser.print_help()
        exit("ERROR: load needs both a VCF file\n")

    start_time = time.time()
    annos = annotations.get_anno_files(args)
    # force skipping CADD and GERP if the data files have not been installed
    if args.skip_cadd is False:
        if 'cadd_score' not in annos:
            sys.stderr.write(
                "\nCADD scores are not being loaded because the"
                " annotation file could not be found.\n"
                "`Run geminicassandra update --dataonly --extra cadd_score`"
                " to install the annotation file.\n\n")
            args.skip_cadd = True
        else:
            sys.stderr.write(
                "CADD scores are being loaded (to skip use:--skip-cadd).\n")
    if args.skip_gerp_bp is False:
        if 'gerp_bp' not in annos:
            sys.stderr.write(
                "\nGERP per bp is not being loaded because the annotation file"
                " could not be found.\n    Run `geminicassandra update --dataonly --extra gerp_bp`"
                " to install the annotation file.\n\n")
            args.skip_gerp_bp = True
        else:
            sys.stderr.write(
                "GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n")
    # collect of the the add'l annotation files
    annotations.load_annos(args)

    time_2 = start_time
    time_3 = start_time

    if (args.node_num == 1):
        gemini_loader = GeminiLoader(args)
        gemini_loader.setup_db()
        time_2 = time.time()
        gemini_loader.single_core_stuff()
        time_3 = time.time()

    n_variants = 0

    if args.cores > 1:
        n_variants = load_multicore(args)
    else:
        n_variants = load_singlecore(args)

    insert_n_variants(map(strip, args.contact_points.split(',')),
                      args.keyspace, n_variants)

    end_time = time.time()
    total_time = str(end_time - start_time)
    db_creation_time = str(time_2 - start_time)
    single_core_time = str(time_3 - time_2)
    parallel_time = str(end_time - time_3)
    print "Finished loading in %s s" % total_time
    if args.timing_log != None:
        with open(args.timing_log, "a") as myfile:
            myfile.write(",".join([
                args.exp_id, total_time, db_creation_time, single_core_time,
                parallel_time
            ]) + "\n")