Example #1
0
def prep_gemini_db(fnames, call_info, samples, extras):
    """Prepare a gemini database from VCF inputs prepared with snpEff.
    """
    data = samples[0]
    use_gemini = do_db_build(samples) and any(
        vcfutils.vcf_has_variants(f) for f in fnames)
    name, caller, is_batch = call_info
    out_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "gemini"))
    multisample_vcf = get_multisample_vcf(fnames, name, caller, data)
    gemini_db = os.path.join(out_dir, "%s-%s.db" % (name, caller))
    if not utils.file_exists(gemini_db) and use_gemini:
        passonly = all("gemini_allvariants" not in dd.get_tools_on(d)
                       for d in samples)
        gemini_vcf = multiallelic.to_single(multisample_vcf,
                                            data,
                                            passonly=passonly)
        ped_file = create_ped_file(samples + extras, gemini_vcf)
        # Use original approach for hg19/GRCh37 pending additional testing
        if support_gemini_orig(data) and not any(
                dd.get_vcfanno(d) for d in samples):
            gemini_db = create_gemini_db_orig(gemini_vcf, data, gemini_db,
                                              ped_file)
        else:
            gemini_db = create_gemini_db(gemini_vcf, data, gemini_db, ped_file)
    return [[(name, caller), {
        "db": gemini_db if utils.file_exists(gemini_db) else None,
        "vcf": multisample_vcf if is_batch else None
    }]]
Example #2
0
def prep_gemini_db(fnames, call_info, samples, extras):
    """Prepare a gemini database from VCF inputs prepared with snpEff.
    """
    data = samples[0]
    out_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "gemini"))
    name, caller, is_batch = call_info
    gemini_db = os.path.join(out_dir, "%s-%s.db" % (name, caller))
    multisample_vcf = get_multisample_vcf(fnames, name, caller, data)
    gemini_vcf = multiallelic.to_single(multisample_vcf, data)
    use_gemini_quick = (do_db_build(samples) and
                        any(vcfutils.vcf_has_variants(f) for f in fnames))
    if not utils.file_exists(gemini_db) and use_gemini_quick:
        use_gemini = do_db_build(samples) and any(vcfutils.vcf_has_variants(f) for f in fnames)
        if use_gemini:
            ped_file = create_ped_file(samples + extras, gemini_vcf)
            gemini_db = create_gemini_db(gemini_vcf, data, gemini_db, ped_file)
    return [[(name, caller), {"db": gemini_db if utils.file_exists(gemini_db) else None,
                              "vcf": multisample_vcf if is_batch else None}]]
Example #3
0
def prep_gemini_db(fnames, call_info, samples, extras):
    """Prepare a gemini database from VCF inputs prepared with snpEff.
    """
    data = samples[0]
    out_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "gemini"))
    name, caller, is_batch = call_info
    gemini_db = os.path.join(out_dir, "%s-%s.db" % (name, caller))
    multisample_vcf = get_multisample_vcf(fnames, name, caller, data)
    gemini_vcf = multiallelic.to_single(multisample_vcf, data)
    use_gemini_quick = (do_db_build(samples) and
                        any(vcfutils.vcf_has_variants(f) for f in fnames))
    if not utils.file_exists(gemini_db) and use_gemini_quick:
        use_gemini = do_db_build(samples) and any(vcfutils.vcf_has_variants(f) for f in fnames)
        if use_gemini:
            ped_file = create_ped_file(samples + extras, gemini_vcf)
            gemini_db = create_gemini_db(gemini_vcf, data, gemini_db, ped_file)
    return [[(name, caller), {"db": gemini_db if utils.file_exists(gemini_db) else None,
                              "vcf": multisample_vcf if is_batch else None}]]
Example #4
0
def prep_gemini_db(fnames, call_info, samples, extras):
    """Prepare a gemini database from VCF inputs prepared with snpEff.
    """
    data = samples[0]
    use_gemini = do_db_build(samples) and any(vcfutils.vcf_has_variants(f) for f in fnames)
    name, caller, is_batch = call_info
    out_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "gemini"))
    gemini_vcf = get_multisample_vcf(fnames, name, caller, data)
    if use_gemini:
        passonly = all("gemini_allvariants" not in dd.get_tools_on(d) for d in samples)
        gemini_vcf = multiallelic.to_single(gemini_vcf, data, passonly=passonly)
    gemini_vcf = _run_vcfanno(gemini_vcf, data, use_gemini)
    gemini_db = os.path.join(out_dir, "%s-%s.db" % (name, caller))
    if vcfutils.vcf_has_variants(gemini_vcf):
        if not utils.file_exists(gemini_db) and use_gemini:
            ped_file = create_ped_file(samples + extras, gemini_vcf)
            # Use original approach for hg19/GRCh37 pending additional testing
            if support_gemini_orig(data) and not any(dd.get_vcfanno(d) for d in samples):
                gemini_db = create_gemini_db_orig(gemini_vcf, data, gemini_db, ped_file)
            else:
                gemini_db = create_gemini_db(gemini_vcf, data, gemini_db, ped_file)
    return [[(name, caller), {"db": gemini_db if utils.file_exists(gemini_db) else None,
                              "vcf": gemini_vcf,
                              "decomposed": use_gemini}]]