missing="NA",
                     quote='"',
                     types={
                         'gene_name': TString(),
                         'description': TString(),
                         'gene_id': TString(),
                         'xcase_lof': TInt(),
                         'xctrl_lof': TInt(),
                         'pval_lof': TDouble(),
                         'xcase_mpc': TInt(),
                         'xctrl_mpc': TInt(),
                         'pval_mpc': TDouble(),
                         'xcase_infrIndel': TInt(),
                         'xctrl_infrIndel': TInt(),
                         'pval_infrIndel': TDouble(),
                         'pval_meta': TDouble(),
                         'analysis_group': TString(),
                     })

es = ElasticsearchClient(args.host, args.port)

es.export_kt_to_elasticsearch(
    kt,
    index_name="epi_exome_gene_results_181107",
    index_type_name="result",
    block_size=args.block_size,
    num_shards=args.num_shards,
    delete_index_before_exporting=True,
    verbose=True,
)
Ejemplo n.º 2
0
for group in analysis_groups:
    group_results = variant_results.filter('analysis_group == "%s"' % group).drop("analysis_group")
    group_results = group_results.annotate(
        "%s = { %s }" % (group, ", ".join(["%s: %s" % (col, col) for col in result_columns]))
    ).select(["v", group])
    variants = variants.join(group_results.key_by("v"))

variants = variants.annotate("groups = { %s }" % ", ".join(["%s:%s" % (group, group) for group in analysis_groups]))
variants = variants.drop(list(analysis_groups))

variants = variants.annotate("v = Variant(v)")
variants = variants.annotate("variant_id = %s" % get_expr_for_variant_id())
variants = variants.annotate("chrom = %s" % get_expr_for_contig())
variants = variants.annotate("pos = %s" % get_expr_for_start_pos())
variants = variants.annotate("xpos = %s" % get_expr_for_xpos())
variants = variants.drop(["v"])

pprint.pprint(variants.schema)

es = ElasticsearchClient(args.host, args.port)

es.export_kt_to_elasticsearch(
    variants,
    index_name=args.index,
    index_type_name="variant",
    block_size=args.block_size,
    num_shards=args.num_shards,
    delete_index_before_exporting=True,
    verbose=True,
)
Ejemplo n.º 3
0
for field_name in transcript_annotations_to_keep:
    new_field_name = field_name.split("_")[0] + "".join(
        map(lambda word: word.capitalize(),
            field_name.split("_")[1:]))
    combined_kt = combined_kt.annotate(
        "%(new_field_name)s = mainTranscript.%(field_name)s" % locals())

combined_kt = combined_kt.drop(["mainTranscript"])

pprint(combined_kt.schema)

DISABLE_INDEX_AND_DOC_VALUES_FOR_FIELDS = ("sortedTranscriptConsequences", )

print("======== Export to elasticsearch ======")
es = ElasticsearchClient(
    host=args.host,
    port=args.port,
)

es.export_kt_to_elasticsearch(
    combined_kt,
    index_name=args.index,
    index_type_name=args.index_type,
    block_size=args.block_size,
    num_shards=args.num_shards,
    delete_index_before_exporting=True,
    disable_doc_values_for_fields=DISABLE_INDEX_AND_DOC_VALUES_FOR_FIELDS,
    disable_index_for_fields=DISABLE_INDEX_AND_DOC_VALUES_FOR_FIELDS,
    verbose=True,
)
    COVERAGE_PATHS = EXOME_COVERAGE_CSV_PATHS[-1]

kt_coverage = hc.import_table(COVERAGE_PATHS, types=types)
kt_coverage = kt_coverage.rename({
    '#chrom': 'chrom',
    '1': 'over1',
    '5': 'over5',
    '10': 'over10',
    '15': 'over15',
    '20': 'over20',
    '25': 'over25',
    '30': 'over30',
    '50': 'over50',
    '100': 'over100',
})
print(kt_coverage.schema)
print("======== Export exome coverage to elasticsearch ======")

es = ElasticsearchClient(
    host=args.host,
    port=args.port,
)

es.export_kt_to_elasticsearch(kt_coverage,
                              index_name=args.index,
                              index_type_name=args.index_type,
                              num_shards=args.num_shards,
                              block_size=args.block_size,
                              delete_index_before_exporting=True,
                              verbose=True)
Ejemplo n.º 5
0
kt = hc.read_table(gene_results_url)

kt = kt.rename({
    'ensembl_gene_id': 'gene_id',
    'Xcase_lof': 'xcase_lof',
    'Xctrl_lof': 'xctrl_lof',
    'Pval_lof': 'pval_lof',
    'Xcase_mpc': 'xcase_mpc',
    'Xctrl_mpc': 'xctrl_mpc',
    'Pval_mpc': 'pval_mpc',
    'Pval_meta': 'pval_meta',
})

kt = kt.annotate("analysis_group = \"all\"")

es = ElasticsearchClient(
    host=args.host,
    port=args.port,
)

es.export_kt_to_elasticsearch(
    kt,
    index_name="schizophrenia_gene_results_171213",
    index_type_name="result",
    block_size=args.block_size,
    num_shards=args.num_shards,
    delete_index_before_exporting=True,
    verbose=True,
)