"gs://seqr-reference-data/GRCh38/gnomad/coverage/gnomad.chr5.cov.liftover.GRCh38.txt.gz",
            "gs://seqr-reference-data/GRCh38/gnomad/coverage/gnomad.chr6.cov.liftover.GRCh38.txt.gz",
            "gs://seqr-reference-data/GRCh38/gnomad/coverage/gnomad.chr7.cov.liftover.GRCh38.txt.gz",
            "gs://seqr-reference-data/GRCh38/gnomad/coverage/gnomad.chr8.cov.liftover.GRCh38.txt.gz",
            "gs://seqr-reference-data/GRCh38/gnomad/coverage/gnomad.chr9.cov.liftover.GRCh38.txt.gz",
            "gs://seqr-reference-data/GRCh38/gnomad/coverage/gnomad.chrX.cov.liftover.GRCh38.txt.gz",
        ],
        "output_path": "gs://%(output_bucket)s/GRCh38/gnomad/genomes.coverage.vds" % args.__dict__,
    },
}


field_types = {
    '#chrom': TString(),
    'pos': TInt(),
    'mean': TDouble(),
    'median': TDouble(),
    '1': TDouble(),
    '5': TDouble(),
    '10': TDouble(),
    '15': TDouble(),
    '20': TDouble(),
    '25': TDouble(),
    '30': TDouble(),
    '50': TDouble(),
    '100': TDouble(),
}


for label, data_paths in COVERAGE_TSV_PATHS.items():
コード例 #2
0
hc = hail.HailContext(log="/tmp/hail.log")

gene_results_url = "gs://epi-browser/2018-11-07_epi25-exome-browser-gene-results-table-reduced.csv"

kt = hc.import_table(gene_results_url,
                     delimiter=",",
                     missing="NA",
                     quote='"',
                     types={
                         'gene_name': TString(),
                         'description': TString(),
                         'gene_id': TString(),
                         'xcase_lof': TInt(),
                         'xctrl_lof': TInt(),
                         'pval_lof': TDouble(),
                         'xcase_mpc': TInt(),
                         'xctrl_mpc': TInt(),
                         'pval_mpc': TDouble(),
                         'xcase_infrIndel': TInt(),
                         'xctrl_infrIndel': TInt(),
                         'pval_infrIndel': TDouble(),
                         'pval_meta': TDouble(),
                         'analysis_group': TString(),
                     })

es = ElasticsearchClient(args.host, args.port)

es.export_kt_to_elasticsearch(
    kt,
    index_name="epi_exome_gene_results_181107",
コード例 #3
0
               type=int)
p.add_argument("-s",
               "--num-shards",
               help="Number of shards",
               default=1,
               type=int)

# parse args
args = p.parse_args()

hc = hail.HailContext(log="/hail.log")  #, branching_factor=1)

FILE_PATH = 'gs://gnomad-browser/datasets/gtex_tissues_by_transcript_all_171012.csv'

types = {
    'adiposeSubcutaneous': TDouble(),
    'adiposeVisceralOmentum': TDouble(),
    'adrenalGland': TDouble(),
    'arteryAorta': TDouble(),
    'arteryCoronary': TDouble(),
    'arteryTibial': TDouble(),
    'bladder': TDouble(),
    'brainAmygdala': TDouble(),
    'brainAnteriorcingulatecortexBa24': TDouble(),
    'brainCaudateBasalganglia': TDouble(),
    'brainCerebellarhemisphere': TDouble(),
    'brainCerebellum': TDouble(),
    'brainCortex': TDouble(),
    'brainFrontalcortexBa9': TDouble(),
    'brainHippocampus': TDouble(),
    'brainHypothalamus': TDouble(),