def vep_protein_domain_filter_expr( d: hl.expr.DictExpression) -> hl.expr.BooleanExpression: """ Return True of False if any protein domain source(s) are contained within pre-defined protein domain sources. Expected as input dict<k,v> where keys (k) represent source/database and values (v) the annotated domain_id. :param d: hl.DictExpression :return: hl.BoolExpression """ domain_dbs = hl.set(PROTEIN_DOMAIN_DB) return (d.key_set().intersection(domain_dbs).length() >= 1)
def get_group_to_counts_expr(k: hl.expr.StructExpression, counts: hl.expr.DictExpression) -> hl.expr.ArrayExpression: return hl.range(1, k.snv - 1, step=-1).flatmap( lambda snv: hl.range(0, k.all + 1).flatmap( lambda af: hl.range(0, k.csq + 1).map( lambda csq: hl.struct(snv=hl.bool(snv), all=hl.bool(af), csq=csq) ) ) ).filter( lambda key: counts.contains(key) ).map( lambda key: counts[key] )
def inbreeding_coeff( gt_counts: hl.expr.DictExpression, ) -> hl.expr.Float32Expression: n = gt_counts.get(0, 0) + gt_counts.get(1, 0) + gt_counts.get(2, 0) p = (2 * gt_counts.get(0, 0) + gt_counts.get(1, 0)) / (2 * n) q = (2 * gt_counts.get(2, 0) + gt_counts.get(1, 0)) / (2 * n) return 1 - (gt_counts.get(1, 0) / (2 * p * q * n))