def sor_from_sb( sb: Union[hl.expr.ArrayNumericExpression, hl.expr.ArrayExpression] ) -> hl.expr.Float64Expression: """ Computes `SOR` (Symmetric Odds Ratio test) annotation from the `SB` (strand balance table) field. .. note:: This function can either take - an array of length four containing the forward and reverse strands' counts of ref and alt alleles: [ref fwd, ref rev, alt fwd, alt rev] - a two dimensional array with arrays of length two, containing the counts: [[ref fwd, ref rev], [alt fwd, alt rev]] GATK code here: https://github.com/broadinstitute/gatk/blob/master/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/StrandOddsRatio.java :param sb: Count of ref/alt reads on each strand :return: SOR value """ if not isinstance(sb, hl.expr.ArrayNumericExpression): sb = hl.bind(lambda x: hl.flatten(x), sb) sb = sb.map(lambda x: hl.float64(x) + 1) ref_fw = sb[0] ref_rv = sb[1] alt_fw = sb[2] alt_rv = sb[3] symmetrical_ratio = ((ref_fw * alt_rv) / (alt_fw * ref_rv)) + ( (alt_fw * ref_rv) / (ref_fw * alt_rv) ) ref_ratio = hl.min(ref_rv, ref_fw) / hl.max(ref_rv, ref_fw) alt_ratio = hl.min(alt_fw, alt_rv) / hl.max(alt_fw, alt_rv) sor = hl.log(symmetrical_ratio) + hl.log(ref_ratio) - hl.log(alt_ratio) return sor
def compute_same_hap_log_like(n, p, q, x): res = ( hl.cond( q > 0, hl.fold( lambda i, j: i + j[0] * j[1], 0.0, hl.zip(gt_counts, [ hl.log10(x) * 2, hl.log10(2 * x * e), hl.log10(e) * 2, hl.log10(2 * x * p), hl.log10(2 * (p * e + x * q)), hl.log10(2 * q * e), hl.log10(p) * 2, hl.log10(2 * p * q), hl.log10(q) * 2 ])), -1e31 # Very large negative value if no q is present )) # If desired, add distance posterior based on value derived from regression if distance is not None: res = res + hl.max(-6, hl.log10(0.97 - 0.03 * hl.log(distance + 1))) return res
def test(self): schema = hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tint32, d=hl.tint32, e=hl.tstr, f=hl.tarray(hl.tint32), g=hl.tarray( hl.tstruct(x=hl.tint32, y=hl.tint32, z=hl.tstr)), h=hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tstr), i=hl.tbool, j=hl.tstruct(x=hl.tint32, y=hl.tint32, z=hl.tstr)) rows = [{'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3], 'g': [hl.Struct(x=1, y=5, z='banana')], 'h': hl.Struct(a=5, b=3, c='winter'), 'i': True, 'j': hl.Struct(x=3, y=2, z='summer')}] kt = hl.Table.parallelize(rows, schema) result = convert_struct_to_dict(kt.annotate( chisq=hl.chisq(kt.a, kt.b, kt.c, kt.d), ctt=hl.ctt(kt.a, kt.b, kt.c, kt.d, 5), dict=hl.dict(hl.zip([kt.a, kt.b], [kt.c, kt.d])), dpois=hl.dpois(4, kt.a), drop=kt.h.drop('b', 'c'), exp=hl.exp(kt.c), fet=hl.fisher_exact_test(kt.a, kt.b, kt.c, kt.d), hwe=hl.hardy_weinberg_p(1, 2, 1), index=hl.index(kt.g, 'z'), is_defined=hl.is_defined(kt.i), is_missing=hl.is_missing(kt.i), is_nan=hl.is_nan(hl.float64(kt.a)), json=hl.json(kt.g), log=hl.log(kt.a, kt.b), log10=hl.log10(kt.c), or_else=hl.or_else(kt.a, 5), or_missing=hl.or_missing(kt.i, kt.j), pchisqtail=hl.pchisqtail(kt.a, kt.b), pcoin=hl.rand_bool(0.5), pnorm=hl.pnorm(0.2), pow=2.0 ** kt.b, ppois=hl.ppois(kt.a, kt.b), qchisqtail=hl.qchisqtail(kt.a, kt.b), range=hl.range(0, 5, kt.b), rnorm=hl.rand_norm(0.0, kt.b), rpois=hl.rand_pois(kt.a), runif=hl.rand_unif(kt.b, kt.a), select=kt.h.select('c', 'b'), sqrt=hl.sqrt(kt.a), to_str=[hl.str(5), hl.str(kt.a), hl.str(kt.g)], where=hl.cond(kt.i, 5, 10) ).take(1)[0])
def compute_chet_log_like(n, p, q, x): res = (hl.cond((p > 0) & (q > 0), hl.fold( lambda i, j: i + j[0] * j[1], 0, hl.zip(gt_counts, [ hl.log10(x) * 2, hl.log10(2 * x * q), hl.log10(q) * 2, hl.log10(2 * x * p), hl.log10(2 * (p * q + x * e)), hl.log10(2 * q * e), hl.log10(p) * 2, hl.log10(2 * p * e), hl.log10(e) * 2 ])), -1e-31)) # If desired, add distance posterior based on value derived from regression if distance is not None: res = res + hl.max(-6, hl.log10(0.03 + 0.03 * hl.log(distance - 1))) return res