Ejemplo n.º 1
0
def get_eigen(partitions : int = None,namenode : str = ""):

    ht = import_vcf(
        "hdfs://" + namenode + "/user/hdfs/data/EIGEN_coding_noncoding.grch37.vcf.gz",
        "37",
        "eigen",
        min_partitions=partitions,
        force_bgz=True)
    return ht
Ejemplo n.º 2
0
def get_exac(partitions: int = None, namenode: str = ""):

    ht = import_vcf("hdfs://" + namenode +
                    "/user/hdfs/data/ExAC.r1.sites.vep.vcf.gz",
                    "37",
                    "exac",
                    min_partitions=partitions,
                    force_bgz=True)
    return ht
Ejemplo n.º 3
0
def get_mpc():

    ht = import_vcf(
        's3://seqr-resources/mpc/fordist_constraint_official_mpc_values.vcf.gz',
        "37",
        "mpc",
        min_partitions=30
    )  #ht = import_vcf("s3://seqr-resources/topmed/bravo-dbsnp-all.removed_chr_prefix.liftunder_GRCh37.vcf.gz","37","topmed")
    return ht
def load_hgmd_vcf(partitions: int = None, namenode: str = ""):

    mt = import_vcf('hdfs://' + namenode +
                    '/user/hadoop/data/hgmd_pro_2018.4_hg19.vcf.gz',
                    "37",
                    "hgmd_grch37",
                    min_partitions=partitions,
                    force_bgz=True)
    return mt
Ejemplo n.º 5
0
def get_topmed(partitions: int = None, namenode: str = ""):

    ht = import_vcf(
        "hdfs://" + namenode +
        "/user/hdfs/data/bravo-dbsnp-all.removed_chr_prefix.liftunder_GRCh37.vcf.gz",
        "37",
        "topmed",
        min_partitions=partitions,
        force_bgz=True)
    return ht
def import_primate(partitions : int = None,namenode : str = ""):

    ht = import_vcf(
        "hdfs://" + namenode + "/user/hdfs/data/PrimateAI_scores_v0.2.vcf.gz",
        "37",
        "primate_ai",
        min_partitions=partitions,
        force_bgz=True
    )
    return ht
def add_vcf_to_hail(sample: SeqrSample,
                    filename,
                    local=False,
                    genome_version="37",
                    partitions: int = None):
    mt = import_vcf(filename,
                    genome_version,
                    sample.individual_id,
                    force_bgz=True,
                    min_partitions=partitions)
    mt = add_global_metadata(mt, sample.path_to_vcf, sample.family_id,
                             sample.individual_id)

    return mt