"annotate_gnomad_v2_variants_with_mnvs", annotate_variants_with_mnvs, "/gnomad_v2/gnomad_v2_variants_annotated_1.ht", { "variants_path": pipeline.get_task("prepare_gnomad_v2_variants"), "mnvs_path": pipeline.get_task("prepare_gnomad_v2_mnvs"), }, ) pipeline.add_task( "annotate_gnomad_v2_transcript_consequences", annotate_transcript_consequences, "/gnomad_v2/gnomad_v2_variants_annotated_2.ht", { "variants_path": pipeline.get_task("annotate_gnomad_v2_variants_with_mnvs"), "transcripts_path": genes_pipeline.get_task("extract_grch37_transcripts"), }, ) ############################################### # LoF curation ############################################### pipeline.add_task( "prepare_gnomad_v2_lof_curation_results", import_gnomad_v2_lof_curation_results, "/gnomad_v2/gnomad_v2_lof_curation_results.ht", {"genes_path": genes_pipeline.get_task("prepare_grch37_genes")}, { "curation_result_paths": [ "gs://gnomad-public/truth-sets/source/lof-curation/AP4_curation_results.csv",
import_exac_vcf, "/exac/exac_variants.ht", { "path": "gs://gnomad-public/legacy/exac_browser/ExAC.r1.sites.vep.vcf.gz" }, ) pipeline.add_task( "annotate_exac_transcript_consequences", annotate_transcript_consequences, "/exac/exac_variants_annotated_1.ht", { "variants_path": pipeline.get_task("import_exac_vcf"), "transcripts_path": genes_pipeline.get_task("extract_grch37_transcripts"), }, ) ############################################### # Coverage ############################################### pipeline.add_task( "import_exac_coverage", import_exac_coverage, "/exac/exac_coverage.ht", ) ############################################### # Run
def truncate_clinvar_variant_ids(ds): return ds.annotate(variant_id=hl.if_else( hl.len(ds.variant_id) >= 32_766, ds.variant_id[:32_632] + "...", ds.variant_id)) DATASETS_CONFIG = { ############################################################################################################## # Genes ############################################################################################################## "genes_grch37": { "get_table": lambda: hl.read_table( genes_pipeline.get_task("annotate_grch37_genes_step_3"). get_output_path()), "args": { "index": "genes_grch37", "index_fields": [ "gene_id", "symbol_upper_case", "search_terms", "xstart", "xstop" ], "id_field": "gene_id", "block_size": 200, }, }, "genes_grch38": { "get_table":
"download_clinvar_grch38_vcf", "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz", "/external_sources/clinvar_grch38.vcf.gz", ) pipeline.add_task( "prepare_clinvar_grch38_variants", prepare_clinvar_variants, "/clinvar/clinvar_grch38_base.ht", {"vcf_path": pipeline.get_task("download_clinvar_grch38_vcf")}, {"reference_genome": "GRCh38"}, ) pipeline.add_task( "annotate_clinvar_grch38_transcript_consequences", annotate_transcript_consequences, "/clinvar/clinvar_grch38_annotated.ht", { "variants_path": pipeline.get_task("prepare_clinvar_grch38_variants"), "transcripts_path": genes_pipeline.get_task("extract_grch38_transcripts"), "mane_transcripts_path": genes_pipeline.get_task("import_mane_select_transcripts"), }, ) ############################################### # Run ############################################### if __name__ == "__main__": run_pipeline(pipeline)