Python load_annos Exemples, annotations.load_annos Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : gemini_load.py Projet : IMPIMBA/gemini

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")

    annos = annotations.get_anno_files( args )
    # force skipping CADD and GERP if the data files have not been installed
    if args.skip_cadd is False:
        if 'cadd_score' not in annos:
            sys.stderr.write("\nCADD scores are not being loaded because the"
            " annotation file could not be found.\n"
            "`Run gemini update --dataonly --extra cadd_score`"
            " to install the annotation file.\n\n")
            args.skip_cadd = True
        else:
            sys.stderr.write("CADD scores are being loaded (to skip use:--skip-cadd).\n")
    if args.skip_gerp_bp is False:
        if 'gerp_bp' not in annos:
            sys.stderr.write("\nGERP per bp is not being loaded because the annotation file"
                        " could not be found.\n    Run `gemini update --dataonly --extra gerp_bp`"
                        " to install the annotation file.\n\n")
            args.skip_gerp_bp = True
        else:
            sys.stderr.write("GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n")
    # collect of the the add'l annotation files
    annotations.load_annos( args )

    if args.scheduler:
        load_ipython(args)
    elif args.cores > 1:
        load_multicore(args)
    else:
        load_singlecore(args)

Exemple #2

0

Afficher le fichier

Fichier : gemini_load_chunk.py Projet : MinocheAE/gemini

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")
    if args.anno_type not in ['snpEff', 'VEP', None]:
        parser.print_help()
        exit("\nERROR: Unsupported selection for -t\n")

    # collect of the the add'l annotation files
    annotations.load_annos(args)

    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    for try_count in range(2):
        try:
            if try_count > 0:
                args.tmp_db = os.path.join(args.tempdir, "%s.db" % uuid.uuid4())

            gemini_loader = GeminiLoader(args)
            gemini_loader.store_resources()
            gemini_loader.store_version()
            gemini_loader.store_vcf_header()
            extra_fields = gemini_loader.populate_from_vcf()
            gemini_loader.update_gene_table()
            # gemini_loader.build_indices_and_disconnect()

            if not args.no_genotypes and not args.no_load_genotypes:
                gemini_loader.store_sample_gt_counts()

            if try_count > 0:
                shutil.move(args.tmp_db, args.db)
            break
        except sqlite3.OperationalError, e:
            sys.stderr.write("sqlite3.OperationalError: %s\n" % e)

Exemple #3

0

Afficher le fichier

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")
    if args.anno_type not in ['snpEff', 'VEP', None, "all"]:
        parser.print_help()
        exit("\nERROR: Unsupported selection for -t\n")

    # collect of the the add'l annotation files
    annotations.load_annos(args)

    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    for try_count in range(2):
        try:
            if try_count > 0:
                args.tmp_db = os.path.join(args.tempdir, "%s.db" % uuid.uuid4())

            gemini_loader = GeminiLoader(args)
            gemini_loader.store_resources()
            gemini_loader.store_version()
            gemini_loader.store_vcf_header()
            gemini_loader.populate_from_vcf()
            gemini_loader.update_gene_table()
            gemini_loader.build_indices_and_disconnect()

            if not args.no_genotypes and not args.no_load_genotypes:
                gemini_loader.store_sample_gt_counts()

            if try_count > 0:
                shutil.move(args.tmp_db, args.db)
            break
        except sql.exc.OperationalError, e:
            sys.stderr.write("sqlalchemy.OperationalError: %s\n" % e)

Exemple #4

0

Afficher le fichier

Fichier : gemini_load.py Projet : shameer/gemini

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")

    annos = annotations.get_anno_files(args)
    # force skipping CADD and GERP if the data files have not been installed
    if args.skip_cadd is False:
        if 'cadd_score' not in annos:
            sys.stderr.write("\nCADD scores are not being loaded because the"
            " annotation file could not be found.\n"
            "`Run gemini update --dataonly --extra cadd_score`"
            " to install the annotation file.\n\n")
            args.skip_cadd = True
        else:
            sys.stderr.write("CADD scores are being loaded (to skip use:--skip-cadd).\n")
    if args.skip_gerp_bp is False:
        if 'gerp_bp' not in annos:
            sys.stderr.write("\nGERP per bp is not being loaded because the annotation file"
                        " could not be found.\n    Run `gemini update --dataonly --extra gerp_bp`"
                        " to install the annotation file.\n\n")
            args.skip_gerp_bp = True
        else:
            sys.stderr.write("GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n")
    # collect of the the add'l annotation files
    annotations.load_annos(args)

    if args.scheduler:
        load_ipython(args)
    elif args.cores > 1:
        load_multicore(args)
    else:
        load_singlecore(args)

Exemple #5

0

Afficher le fichier

Fichier : gemini_load.py Projet : bgossele/gemini

def load(parser, args):
    #if (args.db is None or args.vcf is None):
    if args.vcf is None:
        parser.print_help()
        exit("ERROR: load needs both a VCF file\n")
    
    start_time = time.time()
    annos = annotations.get_anno_files( args )
    # force skipping CADD and GERP if the data files have not been installed
    if args.skip_cadd is False:
        if 'cadd_score' not in annos:
            sys.stderr.write("\nCADD scores are not being loaded because the"
            " annotation file could not be found.\n"
            "`Run geminicassandra update --dataonly --extra cadd_score`"
            " to install the annotation file.\n\n")
            args.skip_cadd = True
        else:
            sys.stderr.write("CADD scores are being loaded (to skip use:--skip-cadd).\n")
    if args.skip_gerp_bp is False:
        if 'gerp_bp' not in annos:
            sys.stderr.write("\nGERP per bp is not being loaded because the annotation file"
                        " could not be found.\n    Run `geminicassandra update --dataonly --extra gerp_bp`"
                        " to install the annotation file.\n\n")
            args.skip_gerp_bp = True
        else:
            sys.stderr.write("GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n")
    # collect of the the add'l annotation files
    annotations.load_annos( args )
    
    time_2 = start_time
    time_3 = start_time
    
    if(args.node_num == 1):
        gemini_loader = GeminiLoader(args)
        gemini_loader.setup_db()
        time_2 = time.time()
        gemini_loader.single_core_stuff()
        time_3 = time.time()
        
    n_variants = 0
    
    if args.cores > 1:
        n_variants = load_multicore(args)
    else:
        n_variants = load_singlecore(args)
        
    insert_n_variants(map(strip, args.contact_points.split(',')), args.keyspace, n_variants)
        
    end_time = time.time()
    total_time = str(end_time - start_time)
    db_creation_time = str(time_2 - start_time)
    single_core_time = str(time_3 - time_2)
    parallel_time = str(end_time - time_3)
    print "Finished loading in %s s" % total_time
    if args.timing_log != None:
        with open(args.timing_log, "a") as myfile:
            myfile.write(",".join([args.exp_id, total_time, db_creation_time, single_core_time, parallel_time]) + "\n")

Exemple #6

0

Afficher le fichier

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")

    # collect of the the add'l annotation files
    annotations.load_annos()

    if args.scheduler:
        load_ipython(args)
    elif args.cores > 1:
        load_multicore(args)
    else:
        load_singlecore(args)

Exemple #7

0

Afficher le fichier

Fichier : gemini_load.py Projet : jeffhsu3/gemini

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")

    # collect of the the add'l annotation files
    annotations.load_annos()

    if args.scheduler:
        load_ipython(args)
    elif args.cores > 1:
        load_multicore(args)
    else:
        load_singlecore(args)

Exemple #8

0

Afficher le fichier

Fichier : gemini_load.py Projet : angelinasusan/gemini

def load(parser, args):
    if args.db is None or args.vcf is None:
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")
    if args.anno_type not in ["snpEff", "VEP", None]:
        parser.print_help()
        exit("\nERROR: Unsupported selection for -t\n")

    # collect of the the add'l annotation files
    annotations.load_annos()

    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    gemini_loader = GeminiLoader(args)
    gemini_loader.populate_from_vcf()
    gemini_loader.build_indices_and_disconnect()
    gemini_loader.store_sample_gt_counts()

Exemple #9

0

Afficher le fichier

Fichier : gemini_load.py Projet : jdiez/gemini

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")
    if args.anno_type not in ['snpEff', 'VEP', None]:
        parser.print_help()
        exit("\nERROR: Unsupported selection for -t\n")

    # collect of the the add'l annotation files
    annotations.load_annos()

    if use_scheduler(args):
        load_ipython(args)
    elif args.cores > 1:
        load_multicore(args)
    else:
        load_singlecore(args)

Exemple #10

0

Afficher le fichier

Fichier : gemini_load.py Projet : hjanime/gemini

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")
    if args.anno_type not in ['snpEff', 'VEP', None]:
        parser.print_help()
        exit("\nERROR: Unsupported selection for -t\n")

    # collect of the the add'l annotation files
    annotations.load_annos()

    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    gemini_loader = GeminiLoader(args)
    gemini_loader.populate_from_vcf()
    gemini_loader.build_indices_and_disconnect()
    gemini_loader.store_sample_gt_counts()

Exemple #11

0

Afficher le fichier

Fichier : gemini_load.py Projet : geniusphil/gemini

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")
    if args.skip_cadd is False:
        sys.stdout.write("CADD is being loaded (to skip use:--skip-cadd).\n")
    if args.skip_gerp_bp is False:
        sys.stdout.write("GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n")
    # collect of the the add'l annotation files
    annotations.load_annos()

    if args.scheduler:
        load_ipython(args)
    elif args.cores > 1:
        load_multicore(args)
    else:
        load_singlecore(args)

Exemple #12

0

Afficher le fichier

Fichier : gemini_load.py Projet : chapmanb/gemini

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")
    if args.anno_type not in ['snpEff', 'VEP', None]:
        parser.print_help()
        exit("\nERROR: Unsupported selection for -t\n")

    # collect of the the add'l annotation files
    annotations.load_annos()

    if use_scheduler(args):
        load_ipython(args)
    elif args.cores > 1:
        load_multicore(args)
    else:
        load_singlecore(args)

Exemple #13

0

Afficher le fichier

Fichier : gemini_load_chunk.py Projet : IMPIMBA/gemini

def load(parser, args):
    if (args.db is None or args.vcf is None):
        parser.print_help()
        exit("ERROR: load needs both a VCF file and a database file\n")
    if args.anno_type not in ['snpEff', 'VEP', None]:
        parser.print_help()
        exit("\nERROR: Unsupported selection for -t\n")

    # collect of the the add'l annotation files
    annotations.load_annos( args )

    # create a new gemini loader and populate
    # the gemini db and files from the VCF
    gemini_loader = GeminiLoader(args)
    gemini_loader.store_resources()
    gemini_loader.store_version()
    gemini_loader.store_vcf_header()
    gemini_loader.populate_from_vcf()
    gemini_loader.update_gene_table()
    # gemini_loader.build_indices_and_disconnect()

    if not args.no_genotypes and not args.no_load_genotypes:
        gemini_loader.store_sample_gt_counts()

Exemple #14

0

Afficher le fichier

Fichier : gemini_load.py Projet : bgossele/gemini

def load(parser, args):
    #if (args.db is None or args.vcf is None):
    if args.vcf is None:
        parser.print_help()
        exit("ERROR: load needs both a VCF file\n")

    start_time = time.time()
    annos = annotations.get_anno_files(args)
    # force skipping CADD and GERP if the data files have not been installed
    if args.skip_cadd is False:
        if 'cadd_score' not in annos:
            sys.stderr.write(
                "\nCADD scores are not being loaded because the"
                " annotation file could not be found.\n"
                "`Run geminicassandra update --dataonly --extra cadd_score`"
                " to install the annotation file.\n\n")
            args.skip_cadd = True
        else:
            sys.stderr.write(
                "CADD scores are being loaded (to skip use:--skip-cadd).\n")
    if args.skip_gerp_bp is False:
        if 'gerp_bp' not in annos:
            sys.stderr.write(
                "\nGERP per bp is not being loaded because the annotation file"
                " could not be found.\n    Run `geminicassandra update --dataonly --extra gerp_bp`"
                " to install the annotation file.\n\n")
            args.skip_gerp_bp = True
        else:
            sys.stderr.write(
                "GERP per bp is being loaded (to skip use:--skip-gerp-bp).\n")
    # collect of the the add'l annotation files
    annotations.load_annos(args)

    time_2 = start_time
    time_3 = start_time

    if (args.node_num == 1):
        gemini_loader = GeminiLoader(args)
        gemini_loader.setup_db()
        time_2 = time.time()
        gemini_loader.single_core_stuff()
        time_3 = time.time()

    n_variants = 0

    if args.cores > 1:
        n_variants = load_multicore(args)
    else:
        n_variants = load_singlecore(args)

    insert_n_variants(map(strip, args.contact_points.split(',')),
                      args.keyspace, n_variants)

    end_time = time.time()
    total_time = str(end_time - start_time)
    db_creation_time = str(time_2 - start_time)
    single_core_time = str(time_3 - time_2)
    parallel_time = str(end_time - time_3)
    print "Finished loading in %s s" % total_time
    if args.timing_log != None:
        with open(args.timing_log, "a") as myfile:
            myfile.write(",".join([
                args.exp_id, total_time, db_creation_time, single_core_time,
                parallel_time
            ]) + "\n")