from data_pipeline.data_types.coverage import prepare_coverage from data_pipeline.data_types.variant import annotate_transcript_consequences from data_pipeline.datasets.gnomad_v2.gnomad_v2_lof_curation import import_gnomad_v2_lof_curation_results from data_pipeline.datasets.gnomad_v2.gnomad_v2_mnvs import ( prepare_gnomad_v2_mnvs, annotate_variants_with_mnvs, replace_quote_char, ) from data_pipeline.datasets.gnomad_v2.gnomad_v2_variants import prepare_gnomad_v2_variants from data_pipeline.pipelines.genes import pipeline as genes_pipeline pipeline = Pipeline() ############################################### # MNVs ############################################### pipeline.add_download_task( "download_mnvs", "https://storage.googleapis.com/gnomad-public/release/2.1/mnv/gnomad_mnv_coding_v0.tsv", "/gnomad_v2/gnomad_mnv_coding_v0.tsv", ) pipeline.add_download_task( "download_3bp_mnvs", "https://storage.googleapis.com/gnomad-public/release/2.1/mnv/gnomad_mnv_coding_3bp_fullannotation.tsv", "/gnomad_v2/gnomad_mnv_coding_3bp_fullannotation.tsv",
from data_pipeline.pipeline import Pipeline, run_pipeline from data_pipeline.data_types.variant import annotate_transcript_consequences from data_pipeline.datasets.exac.exac_coverage import import_exac_coverage from data_pipeline.datasets.exac.exac_variants import import_exac_vcf from data_pipeline.pipelines.genes import pipeline as genes_pipeline pipeline = Pipeline() ############################################### # Variants ############################################### pipeline.add_task( "import_exac_vcf", import_exac_vcf, "/exac/exac_variants.ht", { "path": "gs://gnomad-public/legacy/exac_browser/ExAC.r1.sites.vep.vcf.gz" }, ) pipeline.add_task( "annotate_exac_transcript_consequences", annotate_transcript_consequences, "/exac/exac_variants_annotated_1.ht", { "variants_path": pipeline.get_task("import_exac_vcf"),
from data_pipeline.data_types.gene import prepare_genes from data_pipeline.data_types.canonical_transcript import get_canonical_transcripts from data_pipeline.data_types.mane_select_transcript import import_mane_select_transcripts from data_pipeline.data_types.transcript import ( annotate_gene_transcripts_with_tissue_expression, annotate_gene_transcripts_with_refseq_id, extract_transcripts, ) from data_pipeline.data_types.gtex_tissue_expression import prepare_gtex_expression_data from data_pipeline.data_types.pext import prepare_pext_data from data_pipeline.datasets.exac.exac_constraint import prepare_exac_constraint from data_pipeline.datasets.exac.exac_regional_missense_constraint import prepare_exac_regional_missense_constraint from data_pipeline.datasets.gnomad_v2.gnomad_v2_constraint import prepare_gnomad_v2_constraint pipeline = Pipeline() ############################################### # Import GENCODE and HGNC files ############################################### pipeline.add_download_task( "download_gencode_v19_gtf", "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz", "/external_sources/gencode.v19.gtf.gz", ) pipeline.add_download_task( "download_gencode_v35_gtf", "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_35/gencode.v35.annotation.gtf.gz", "/external_sources/gencode.v35.gtf.gz",
from data_pipeline.pipeline import Pipeline, run_pipeline from data_pipeline.data_types.variant import annotate_transcript_consequences from data_pipeline.datasets.clinvar import prepare_clinvar_variants from data_pipeline.pipelines.genes import pipeline as genes_pipeline pipeline = Pipeline() pipeline.add_download_task( "download_clinvar_grch38_vcf", "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz", "/external_sources/clinvar_grch38.vcf.gz", ) pipeline.add_task( "prepare_clinvar_grch38_variants", prepare_clinvar_variants, "/clinvar/clinvar_grch38_base.ht", {"vcf_path": pipeline.get_task("download_clinvar_grch38_vcf")}, {"reference_genome": "GRCh38"}, ) pipeline.add_task( "annotate_clinvar_grch38_transcript_consequences", annotate_transcript_consequences, "/clinvar/clinvar_grch38_annotated.ht", { "variants_path": pipeline.get_task("prepare_clinvar_grch38_variants"),
from data_pipeline.pipeline import Pipeline, run_pipeline from data_pipeline.data_types.coverage import prepare_coverage from data_pipeline.data_types.variant import annotate_transcript_consequences from data_pipeline.datasets.gnomad_v3.gnomad_v3_variants import prepare_gnomad_v3_variants from data_pipeline.pipelines.genes import pipeline as genes_pipeline pipeline = Pipeline() ############################################### # Variants ############################################### pipeline.add_task( "prepare_gnomad_v3_variants", prepare_gnomad_v3_variants, "/gnomad_v3/gnomad_v3_variants_base.ht", {"path": "gs://gnomad/release/3.1/ht/genomes/gnomad.genomes.v3.1.sites.ht"}, ) pipeline.add_task( "annotate_gnomad_v3_transcript_consequences", annotate_transcript_consequences, "/gnomad_v3/gnomad_v3_variants_annotated_1.ht", { "variants_path": pipeline.get_task("prepare_gnomad_v3_variants"), "transcripts_path": genes_pipeline.get_task("extract_grch38_transcripts"), "mane_transcripts_path": genes_pipeline.get_task("import_mane_select_transcripts"),
import hail as hl from data_pipeline.pipeline import Pipeline, run_pipeline from data_pipeline.data_types.variant import annotate_transcript_consequences from data_pipeline.datasets.clinvar import ( import_clinvar_xml, prepare_clinvar_variants, annotate_clinvar_variants_in_gnomad, ) from data_pipeline.pipelines.genes import pipeline as genes_pipeline pipeline = Pipeline() pipeline.add_download_task( "download_clinvar_xml", "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz", "/external_sources/clinvar.xml.gz", ) pipeline.add_task( "import_clinvar_xml", import_clinvar_xml, "/clinvar/clinvar.ht", {"clinvar_xml_path": pipeline.get_task("download_clinvar_xml")}, ) pipeline.add_task(
from data_pipeline.pipeline import Pipeline, run_pipeline from data_pipeline.data_types.variant import annotate_transcript_consequences from data_pipeline.datasets.mitochondria import prepare_mitochondrial_coverage, prepare_mitochondrial_variants from data_pipeline.pipelines.genes import pipeline as genes_pipeline pipeline = Pipeline() ############################################### # Variants ############################################### pipeline.add_task( "prepare_mitochondrial_variants", prepare_mitochondrial_variants, "/mitochondria/mitochondrial_variants_base.ht", { "path": "gs://gnomad-public-requester-pays/release/3.1/ht/genomes/gnomad.genomes.v3.1.sites.chrM.ht", "mnvs_path": "gs://gnomad-browser/mt_mnvs.tsv", }, ) pipeline.add_task( "annotate_mitochondrial_variant_transcript_consequences", annotate_transcript_consequences, "/mitochondria/mitochondrial_variants_annotated_1.ht", { "variants_path":
from data_pipeline.pipeline import Pipeline, run_pipeline from data_pipeline.datasets.gnomad_sv_v2 import prepare_gnomad_structural_variants pipeline = Pipeline() ############################################### # Variants ############################################### pipeline.add_task( "prepare_structural_variants", prepare_gnomad_structural_variants, "/gnomad_sv_v2/structural_variants.ht", { "vcf_path": "gs://gnomad-public/papers/2019-sv/gnomad_v2.1_sv.sites.vcf.gz", "controls_vcf_path": "gs://gnomad-public/papers/2019-sv/gnomad_v2.1_sv.controls_only.sites.vcf.gz", "non_neuro_vcf_path": "gs://gnomad-public/papers/2019-sv/gnomad_v2.1_sv.nonneuro.sites.vcf.gz", "histograms_path": "gs://gnomad-public/papers/2019-sv/gnomad_sv_hists.ht", }, ) ############################################### # Run ############################################### if __name__ == "__main__":