Beispiel #1
0
    "annotate_gnomad_v2_variants_with_mnvs",
    annotate_variants_with_mnvs,
    "/gnomad_v2/gnomad_v2_variants_annotated_1.ht",
    {
        "variants_path": pipeline.get_task("prepare_gnomad_v2_variants"),
        "mnvs_path": pipeline.get_task("prepare_gnomad_v2_mnvs"),
    },
)

pipeline.add_task(
    "annotate_gnomad_v2_transcript_consequences",
    annotate_transcript_consequences,
    "/gnomad_v2/gnomad_v2_variants_annotated_2.ht",
    {
        "variants_path": pipeline.get_task("annotate_gnomad_v2_variants_with_mnvs"),
        "transcripts_path": genes_pipeline.get_task("extract_grch37_transcripts"),
    },
)

###############################################
# LoF curation
###############################################

pipeline.add_task(
    "prepare_gnomad_v2_lof_curation_results",
    import_gnomad_v2_lof_curation_results,
    "/gnomad_v2/gnomad_v2_lof_curation_results.ht",
    {"genes_path": genes_pipeline.get_task("prepare_grch37_genes")},
    {
        "curation_result_paths": [
            "gs://gnomad-public/truth-sets/source/lof-curation/AP4_curation_results.csv",
Beispiel #2
0
    import_exac_vcf,
    "/exac/exac_variants.ht",
    {
        "path":
        "gs://gnomad-public/legacy/exac_browser/ExAC.r1.sites.vep.vcf.gz"
    },
)

pipeline.add_task(
    "annotate_exac_transcript_consequences",
    annotate_transcript_consequences,
    "/exac/exac_variants_annotated_1.ht",
    {
        "variants_path": pipeline.get_task("import_exac_vcf"),
        "transcripts_path":
        genes_pipeline.get_task("extract_grch37_transcripts"),
    },
)

###############################################
# Coverage
###############################################

pipeline.add_task(
    "import_exac_coverage",
    import_exac_coverage,
    "/exac/exac_coverage.ht",
)

###############################################
# Run
Beispiel #3
0

def truncate_clinvar_variant_ids(ds):
    return ds.annotate(variant_id=hl.if_else(
        hl.len(ds.variant_id) >= 32_766, ds.variant_id[:32_632] +
        "...", ds.variant_id))


DATASETS_CONFIG = {
    ##############################################################################################################
    # Genes
    ##############################################################################################################
    "genes_grch37": {
        "get_table":
        lambda: hl.read_table(
            genes_pipeline.get_task("annotate_grch37_genes_step_3").
            get_output_path()),
        "args": {
            "index":
            "genes_grch37",
            "index_fields": [
                "gene_id", "symbol_upper_case", "search_terms", "xstart",
                "xstop"
            ],
            "id_field":
            "gene_id",
            "block_size":
            200,
        },
    },
    "genes_grch38": {
        "get_table":
Beispiel #4
0
    "download_clinvar_grch38_vcf",
    "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz",
    "/external_sources/clinvar_grch38.vcf.gz",
)

pipeline.add_task(
    "prepare_clinvar_grch38_variants",
    prepare_clinvar_variants,
    "/clinvar/clinvar_grch38_base.ht",
    {"vcf_path": pipeline.get_task("download_clinvar_grch38_vcf")},
    {"reference_genome": "GRCh38"},
)

pipeline.add_task(
    "annotate_clinvar_grch38_transcript_consequences",
    annotate_transcript_consequences,
    "/clinvar/clinvar_grch38_annotated.ht",
    {
        "variants_path": pipeline.get_task("prepare_clinvar_grch38_variants"),
        "transcripts_path": genes_pipeline.get_task("extract_grch38_transcripts"),
        "mane_transcripts_path": genes_pipeline.get_task("import_mane_select_transcripts"),
    },
)

###############################################
# Run
###############################################

if __name__ == "__main__":
    run_pipeline(pipeline)