예제 #1
0
def get_info(split: bool = True) -> VersionedTableResource:
    """
    Gets the gnomAD v3 info TableResource

    :param version: Version of annotation path to return
    :param split: Whether to return the split or multi-allelic version of the resource
    :return: gnomAD v3 info VersionedTableResource
    """

    return VersionedTableResource(
        CURRENT_RELEASE,
        {
            release:
            TableResource(path="{}/gnomad_genomes_v{}_info{}.ht".format(
                _annotations_root(release), release,
                ".split" if split else ""))
            for release in RELEASES
        },
    )
예제 #2
0
def get_rf(
    data: str = "rf_result",
    run_hash: Optional[str] = None,
) -> Union[str, TableResource]:
    """
    Gets the path to the desired RF data.
    Data can take the following values:
        - 'training': path to the training data for a given run
        - 'model': path to pyspark pipeline RF model
        - 'rf_result' (default): path to HT containing result of RF filtering
    :param str data: One of 'training', 'model' or 'rf_result' (default)
    :param str run_hash: Hash of RF run to load
    :return: Path to desired RF data
    """

    if data == "model":
        return f"{tmp_dir}/models/{run_hash}/{data}.model"
    else:
        return TableResource(f"{tmp_dir}/models/{run_hash}/{data}.ht")
예제 #3
0
def coverage(data_type: str) -> VersionedTableResource:
    """
    Retrieves gnomAD's coverage table by data_type

    :param data_type: One of "exomes" or "genomes"
    :return: Coverage Table
    """
    if data_type not in DATA_TYPES:
        raise DataException(f'{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}')

    if data_type == "exomes":
        current_release = CURRENT_EXOME_RELEASE
        releases = EXOME_RELEASES
    else:
        current_release = CURRENT_GENOME_RELEASE
        releases = GENOME_RELEASES

    return VersionedTableResource(
        current_release,
        {release: TableResource(path=_public_coverage_ht_path(data_type, release)) for release in releases},
    )
예제 #4
0
def _import_clinvar(**kwargs) -> hl.Table:
    clinvar = import_sites_vcf(**kwargs)
    clinvar = clinvar.filter(
        hl.len(clinvar.alleles) > 1
    )  # Get around problematic single entry in alleles array in the clinvar vcf
    clinvar = vep_or_lookup_vep(clinvar, reference="GRCh38")
    return clinvar


# Resources with no versioning needed
purcell_5k_intervals = TableResource(
    path=
    "gs://gnomad-public/resources/grch38/purcell_5k_intervals/purcell5k.ht",
    import_func=_import_purcell_5k,
    import_args={
        "path":
        "gs://gnomad-public/resources/grch38/purcell_5k_intervals/purcell5k.interval_list",
    },
)

na12878_giab = MatrixTableResource(
    path=
    "gs://gnomad-public/resources/grch38/na12878/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_PGandRTGphasetransfer.mt",
    import_func=hl.import_vcf,
    import_args={
        "path":
        "gs://gnomad-public/resources/grch38/na12878/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_PGandRTGphasetransfer.vcf.gz",
        "force_bgz": True,
        "min_partitions": 100,
        "reference_genome": "GRCh38",
예제 #5
0
def ld_scores(pop: str) -> TableResource:
    """Get resource for the LD scores for the given population."""
    return TableResource(path=_ld_scores_path("genomes", pop))
예제 #6
0
파일: meta.py 프로젝트: edenkal13/gnomad_qc
    """
    Gets the path to the finalized sample metadata information after sample QC

    :param version: gnomAD release version
    :param meta_version: metadata version to return
    :return: String path to the finalized metadata
    """
    return (
        f"{_meta_root_path(version)}/gnomad_v{version}_metadata_v{meta_version}.tsv.gz"
    )


_meta_versions = {
    "3.1":
    TableResource(
        path=
        "gs://gnomad/metadata/genomes_v3.1/gnomad_v3.1_sample_qc_metadata.ht"),
    "3":
    TableResource(
        path="gs://gnomad/metadata/genomes_v3/gnomad_v3_metadata_2019-09-27.ht"
    ),
}

_project_meta_versions = {
    "3.1":
    TableResource(
        path="gs://gnomad/metadata/genomes_v3.1/v3.1_project_meta.ht"),
    "3":
    TableResource(
        path="gs://gnomad/metadata/genomes_v3/09-09-2019_v3_project_meta.ht",
        import_func=hl.import_table,
예제 #7
0
na12878_giab = MatrixTableResource(
    path="gs://gnomad-public/resources/grch37/na12878/NA12878_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-Solid-10X_CHROM1-X_v3.3_highconf.mt",
    import_func=hl.import_vcf,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/na12878/NA12878_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-Solid-10X_CHROM1-X_v3.3_highconf.vcf.bgz",
        "force_bgz": True,
        "min_partitions": 100,
        "reference_genome": "GRCh37",
    },
)

hapmap = TableResource(
    path="gs://gnomad-public/resources/grch37/hapmap/hapmap_3.3.b37.ht",
    import_func=import_sites_vcf,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/hapmap/hapmap_3.3.b37.vcf.bgz",
        "force_bgz": True,
        "min_partitions": 100,
        "reference_genome": "GRCh37",
    },
)

kgp_omni = TableResource(
    path="gs://gnomad-public/resources/grch37/kgp/1000G_omni2.5.b37.ht",
    import_func=import_sites_vcf,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/kgp/1000G_omni2.5.b37.vcf.bgz",
        "force_bgz": True,
        "min_partitions": 100,
        "reference_genome": "GRCh37",
    },
)
예제 #8
0
def ld_index(pop: str) -> TableResource:
    """Get resource for the LD indices for the given population."""
    return TableResource(path=_ld_index_path("genomes", pop))
예제 #9
0
                                       version: str = CURRENT_RELEASE) -> str:
    """
    Provides the path to the transmitted singleton VCF used as input to VQSR

    :param bool adj: Whether to use adj genotypes
    :param version: Version of transmitted singleton VCF path to return
    :return:
    """
    return f'{_annotations_root(version)}/transmitted_singletons_{"adj" if adj else "raw"}.vcf.bgz'


last_END_position = VersionedTableResource(
    CURRENT_RELEASE,
    {
        release: TableResource(
            f"{_annotations_root(release)}/gnomad_genomes_v{release}_last_END_positions.ht"
        )
        for release in RELEASES
    },
)

freq = VersionedTableResource(
    CURRENT_RELEASE,
    {
        release: TableResource(
            f"{_annotations_root(release)}/gnomad_genomes_v{release}.frequencies.ht"
        )
        for release in RELEASES
    },
)
예제 #10
0
    :param split: Whether to return the split or multi-allelic version of the resource
    :return: gnomAD v3 info TableResource
    """
    path = '{}/gnomad_genomes_v3_info{}.ht'.format(ANNOTATIONS_ROOT,
                                                   '.split' if split else '')
    return TableResource(path)


def get_filters(model_id: str, split: bool = True) -> TableResource:
    """
    Gets the specified filtering annotation resource.

    :param model_id: Filtering model id
    :param split: Split or multi-allelic version of the filtering file
    :return: Filtering annotation file
    """
    path = '{}/{}_filtering{}.ht'.format(ANNOTATIONS_ROOT, model_id,
                                         '.split' if split else '')
    return TableResource(path)


last_END_position = TableResource(
    f'{ANNOTATIONS_ROOT}/gnomad_genomes_v3_last_END_positions.ht')
freq = TableResource(f'{ANNOTATIONS_ROOT}/gnomad_genomes_v3.frequencies.ht')
qual_hist = TableResource(
    f'{ANNOTATIONS_ROOT}/gnomad_genomes_v3.qual_hists.ht')
vep = TableResource(f'{ANNOTATIONS_ROOT}/gnomad_genomes_v3_vep.ht')
info_vcf_path = f'{ANNOTATIONS_ROOT}/gnomad_genomes_v3_info.vcf.bgz'
qc_ac = TableResource(f'{ANNOTATIONS_ROOT}/gnomad_genomes_qc_ac.ht')
fam_stats = TableResource(f'{ANNOTATIONS_ROOT}/gnomad_genomes_qc_fam_stats.ht')
예제 #11
0
def ld_index(pop: str) -> TableResource:
    return TableResource(path=_ld_index_path('genomes', pop))
예제 #12
0
def vep(data_type) -> TableResource:
    return TableResource(path=_annotations_ht_path(data_type, 'vep'))
예제 #13
0
def omes_by_platform_concordance(data_type) -> TableResource:
    return TableResource(
        path=_annotations_ht_path(data_type, 'omes_by_platform_concordance'))
예제 #14
0
        },
    )


def get_rf_result(model_id: Optional[str] = None) -> VersionedTableResource:
    """
    Get the results of RF filtering for a given run

    :param model_id: RF run to load
    :return: VersionedTableResource for RF filtered data
    """
    return VersionedTableResource(
        CURRENT_RELEASE,
        {
            release: TableResource(
                f"{get_variant_qc_root(release)}/rf/models/{model_id}/rf_result.ht"
            )
            for release in RELEASES
        },
    )


final_filter = VersionedTableResource(
    CURRENT_RELEASE,
    {
        release:
        TableResource(f"{get_variant_qc_root(release)}/final_filter.ht")
        for release in RELEASES
    },
)
예제 #15
0
def syndip_concordance(data_type) -> TableResource:
    return TableResource(
        path=_annotations_ht_path(data_type, 'syndip_concordance'))
예제 #16
0
def NA12878_concordance(data_type) -> TableResource:
    return TableResource(
        path=_annotations_ht_path(data_type, 'NA12878_concordance'))
예제 #17
0
def rf(data_type) -> TableResource:
    return TableResource(path=_annotations_ht_path(data_type, 'rf'))
예제 #18
0
def frequencies(data_type) -> TableResource:
    return TableResource(path=_annotations_ht_path(data_type, 'frequencies'))
예제 #19
0
def family_stats(data_type) -> TableResource:
    return TableResource(path=_annotations_ht_path(data_type, 'family_stats'))
예제 #20
0
def get_score_quantile_bins(model_id: str, aggregated: bool) -> TableResource:
    return TableResource('{}/{}.{}.ht'.format(
        f"{tmp_dir}", model_id, 'binned' if aggregated else 'rank'))
예제 #21
0
    """
    relatedness table annotated with get_relationship_expr.

    :return: Annotated relatedness table
    """
    relatedness_ht = relatedness.ht()
    return relatedness_ht.annotate(relationship=get_relationship_expr(
        kin_expr=relatedness_ht.kin,
        ibd0_expr=relatedness_ht.ibd0,
        ibd1_expr=relatedness_ht.ibd1,
        ibd2_expr=relatedness_ht.ibd2,
    ))


# QC Sites (gnomAD v2 QC sites, lifted over)
gnomad_v2_qc_sites = TableResource(
    "gs://gnomad-public/resources/grch38/gnomad_v2_qc_sites_b38.ht")

# Dense MT of samples at QC sites
qc = VersionedMatrixTableResource(
    CURRENT_RELEASE, {
        release: MatrixTableResource(
            f"gs://gnomad/sample_qc/mt/genomes_v{release}/gnomad_v{release}_qc_mt_v2_sites_dense.mt"
        )
        for release in RELEASES
    })

# PC relate PCA scores
pc_relate_pca_scores = VersionedTableResource(
    CURRENT_RELEASE, {
        release: TableResource(
            f"{get_sample_qc_root(release)}/gnomad_v{release}_qc_mt_v2_sites_pc_scores.ht"
예제 #22
0
def ld_scores(pop: str) -> TableResource:
    return TableResource(path=_ld_scores_path('genomes', pop))
예제 #23
0
파일: meta.py 프로젝트: tpoterba/gnomad_qc
import hail as hl
from gnomad.resources.resource_utils import (TableResource, PedigreeResource,
                                             VersionedPedigreeResource)

# Samples metadata
META_ROOT = "gs://gnomad/metadata/genomes_v3"
meta = TableResource(f'{META_ROOT}/gnomad_v3_metadata_2019-09-27.ht')
meta_tsv_path = f'{META_ROOT}/gnomad_v3_metadata_2019-09-27.tsv.gz'
project_meta = TableResource(import_func=hl.import_table,
                             import_args={
                                 'path':
                                 f'{META_ROOT}/09-09-2019_v3_project_meta.txt',
                                 'impute': True,
                                 'key': 's',
                                 'min_partitions': 100
                             })
pedigree = VersionedPedigreeResource(
    'final',  # TODO: Make sure "final" is the best label once the family scripts are in
    {
        'raw': PedigreeResource(f'{META_ROOT}/gnomad_v3_raw.fam',
                                delimiter="\t"),
        'final': PedigreeResource(f'{META_ROOT}/gnomad_v3.fam', delimiter="\t")
    })

trios = VersionedPedigreeResource(  # TODO: Should this be merged with Pedigree into a single resource?
    'final',  # TODO: Make sure "final" is the best label once the family scripts are in
    {
        'raw': PedigreeResource(f'{META_ROOT}/gnomad_v3_trios_raw.fam'),
        'final': PedigreeResource(f'{META_ROOT}/gnomad_v3_trios.fam')
    }
)