def test_agg_cse(self): x = ir.GetField(ir.Ref('row'), 'idx') inner_sum = ir.ApplyBinaryPrimOp('+', x, x) agg = ir.ApplyAggOp('AggOp', [], [inner_sum]) outer_sum = ir.ApplyBinaryPrimOp('+', agg, agg) filter = ir.AggFilter(ir.TrueIR(), outer_sum, False) table_agg = ir.TableAggregate(ir.TableRange(5, 1), ir.MakeTuple([outer_sum, filter])) expected = ( '(TableAggregate (TableRange 5 1)' ' (AggLet __cse_1 False (GetField idx (Ref row))' ' (AggLet __cse_3 False (ApplyBinaryPrimOp `+` (Ref __cse_1) (Ref __cse_1))' ' (Let __cse_2 (ApplyAggOp AggOp () ((Ref __cse_3)))' ' (MakeTuple (0 1)' ' (ApplyBinaryPrimOp `+` (Ref __cse_2) (Ref __cse_2))' ' (AggFilter False (True)' ' (Let __cse_4 (ApplyAggOp AggOp () ((Ref __cse_3)))' ' (ApplyBinaryPrimOp `+` (Ref __cse_4) (Ref __cse_4)))))))))') assert expected == CSERenderer()(table_agg)
def value_irs(self): b = ir.TrueIR() c = ir.Ref('c') i = ir.I32(5) j = ir.I32(7) st = ir.Str('Hail') a = ir.Ref('a') aa = ir.Ref('aa') da = ir.Ref('da') nd = ir.Ref('nd') v = ir.Ref('v') s = ir.Ref('s') t = ir.Ref('t') call = ir.Ref('call') table = ir.TableRange(5, 3) matrix_read = ir.MatrixRead( ir.MatrixNativeReader( resource('backward_compatability/1.0.0/matrix_table/0.hmt'), None, False), False, False) block_matrix_read = ir.BlockMatrixRead( ir.BlockMatrixNativeReader('fake_file_path')) value_irs = [ i, ir.I64(5), ir.F32(3.14), ir.F64(3.14), s, ir.TrueIR(), ir.FalseIR(), ir.Void(), ir.Cast(i, hl.tfloat64), ir.NA(hl.tint32), ir.IsNA(i), ir.If(b, i, j), ir.Coalesce(i, j), ir.Let('v', i, v), ir.Ref('x'), ir.ApplyBinaryPrimOp('+', i, j), ir.ApplyUnaryPrimOp('-', i), ir.ApplyComparisonOp('EQ', i, j), ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)), ir.ArrayRef(a, i, ir.Str('foo')), ir.ArrayLen(a), ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)), ir.ArraySort(a, 'l', 'r', ir.ApplyComparisonOp("LT", ir.Ref('l'), ir.Ref('r'))), ir.ToSet(a), ir.ToDict(da), ir.ToArray(a), ir.MakeNDArray( ir.MakeArray([ir.F64(-1.0), ir.F64(1.0)], hl.tarray(hl.tfloat64)), ir.MakeTuple([ir.I64(1), ir.I64(2)]), ir.TrueIR()), ir.NDArrayShape(nd), ir.NDArrayReshape(nd, ir.MakeTuple([ir.I64(5)])), ir.NDArrayRef(nd, [ir.I64(1), ir.I64(2)]), ir.NDArrayMap(nd, 'v', v), ir.NDArrayMatMul(nd, nd), ir.LowerBoundOnOrderedCollection(a, i, True), ir.GroupByKey(da), ir.ArrayMap(a, 'v', v), ir.ArrayZip([a, a], ['a', 'b'], ir.TrueIR(), 'ExtendNA'), ir.ArrayFilter(a, 'v', v), ir.ArrayFlatMap(aa, 'v', v), ir.ArrayFold(a, ir.I32(0), 'x', 'v', v), ir.ArrayScan(a, ir.I32(0), 'x', 'v', v), ir.ArrayLeftJoinDistinct(a, a, 'l', 'r', ir.I32(0), ir.I32(1)), ir.ArrayFor(a, 'v', ir.Void()), ir.AggFilter(ir.TrueIR(), ir.I32(0), False), ir.AggExplode(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', ir.I32(0), False), ir.AggGroupBy(ir.TrueIR(), ir.I32(0), False), ir.AggArrayPerElement( ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', 'y', ir.I32(0), False), ir.ApplyAggOp('Collect', [], [ir.I32(0)]), ir.ApplyScanOp('Collect', [], [ir.I32(0)]), ir.ApplyAggOp('CallStats', [ir.I32(2)], [call]), ir.ApplyAggOp('TakeBy', [ir.I32(10)], [ir.F64(-2.11), ir.F64(-2.11)]), ir.Begin([ir.Void()]), ir.MakeStruct([('x', i)]), ir.SelectFields(s, ['x', 'z']), ir.InsertFields(s, [('x', i)], None), ir.GetField(s, 'x'), ir.MakeTuple([i, b]), ir.GetTupleElement(t, 1), ir.Die(ir.Str('mumblefoo'), hl.tfloat64), ir.Apply('&&', hl.tbool, b, c), ir.Apply('toFloat64', hl.tfloat64, i), ir.Literal(hl.tarray(hl.tint32), [1, 2, None]), ir.TableCount(table), ir.TableGetGlobals(table), ir.TableCollect(ir.TableKeyBy(table, [], False)), ir.TableToValueApply(table, {'name': 'ForceCountTable'}), ir.MatrixToValueApply(matrix_read, {'name': 'ForceCountMatrixTable'}), ir.TableAggregate( table, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], [ir.I32(0)]))])), ir.TableWrite( table, ir.TableNativeWriter(new_temp_file(), False, True, "fake_codec_spec$$")), ir.TableWrite( table, ir.TableTextWriter(new_temp_file(), None, True, 0, ",")), ir.MatrixAggregate( matrix_read, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], [ir.I32(0)]))])), ir.MatrixWrite( matrix_read, ir.MatrixNativeWriter(new_temp_file(), False, False, "", None, None)), ir.MatrixWrite( matrix_read, ir.MatrixNativeWriter( new_temp_file(), False, False, "", '[{"start":{"row_idx":0},"end":{"row_idx": 10},"includeStart":true,"includeEnd":false}]', hl.dtype('array<interval<struct{row_idx:int32}>>'))), ir.MatrixWrite( matrix_read, ir.MatrixVCFWriter(new_temp_file(), None, False, None)), ir.MatrixWrite(matrix_read, ir.MatrixGENWriter(new_temp_file(), 4)), ir.MatrixWrite(matrix_read, ir.MatrixPLINKWriter(new_temp_file())), ir.MatrixMultiWrite([matrix_read, matrix_read], ir.MatrixNativeMultiWriter( new_temp_file(), False, False)), ir.BlockMatrixWrite( block_matrix_read, ir.BlockMatrixNativeWriter('fake_file_path', False, False, False)), ir.LiftMeOut(ir.I32(1)) ] return value_irs
def _to_expr(e, dtype): if e is None: return None elif isinstance(e, Expression): if e.dtype != dtype: assert is_numeric(dtype), 'expected {}, got {}'.format( dtype, e.dtype) if dtype == tfloat64: return hl.float64(e) elif dtype == tfloat32: return hl.float32(e) elif dtype == tint64: return hl.int64(e) else: assert dtype == tint32 return hl.int32(e) return e elif not is_compound(dtype): # these are not container types and cannot contain expressions if we got here return e elif isinstance(dtype, tstruct): new_fields = [] found_expr = False for f, t in dtype.items(): value = _to_expr(e[f], t) found_expr = found_expr or isinstance(value, Expression) new_fields.append(value) if not found_expr: return e else: exprs = [ new_fields[i] if isinstance(new_fields[i], Expression) else hl.literal(new_fields[i], dtype[i]) for i in range(len(new_fields)) ] fields = {name: expr for name, expr in zip(dtype.keys(), exprs)} from .typed_expressions import StructExpression return StructExpression._from_fields(fields) elif isinstance(dtype, tarray): elements = [] found_expr = False for element in e: value = _to_expr(element, dtype.element_type) found_expr = found_expr or isinstance(value, Expression) elements.append(value) if not found_expr: return e else: assert len(elements) > 0 exprs = [ element if isinstance(element, Expression) else hl.literal( element, dtype.element_type) for element in elements ] indices, aggregations = unify_all(*exprs) x = ir.MakeArray([e._ir for e in exprs], None) return expressions.construct_expr(x, dtype, indices, aggregations) elif isinstance(dtype, tset): elements = [] found_expr = False for element in e: value = _to_expr(element, dtype.element_type) found_expr = found_expr or isinstance(value, Expression) elements.append(value) if not found_expr: return e else: assert len(elements) > 0 exprs = [ element if isinstance(element, Expression) else hl.literal( element, dtype.element_type) for element in elements ] indices, aggregations = unify_all(*exprs) x = ir.ToSet( ir.ToStream(ir.MakeArray([e._ir for e in exprs], None))) return expressions.construct_expr(x, dtype, indices, aggregations) elif isinstance(dtype, ttuple): elements = [] found_expr = False assert len(e) == len(dtype.types) for i in range(len(e)): value = _to_expr(e[i], dtype.types[i]) found_expr = found_expr or isinstance(value, Expression) elements.append(value) if not found_expr: return e else: exprs = [ elements[i] if isinstance(elements[i], Expression) else hl.literal(elements[i], dtype.types[i]) for i in range(len(elements)) ] indices, aggregations = unify_all(*exprs) x = ir.MakeTuple([expr._ir for expr in exprs]) return expressions.construct_expr(x, dtype, indices, aggregations) elif isinstance(dtype, tdict): keys = [] values = [] found_expr = False for k, v in e.items(): k_ = _to_expr(k, dtype.key_type) v_ = _to_expr(v, dtype.value_type) found_expr = found_expr or isinstance(k_, Expression) found_expr = found_expr or isinstance(v_, Expression) keys.append(k_) values.append(v_) if not found_expr: return e else: assert len(keys) > 0 # Here I use `to_expr` to call `lit` the keys and values separately. # I anticipate a common mode is statically-known keys and Expression # values. key_array = to_expr(keys, tarray(dtype.key_type)) value_array = to_expr(values, tarray(dtype.value_type)) return hl.dict(hl.zip(key_array, value_array)) elif isinstance(dtype, hl.tndarray): return hl.nd.array(e) else: raise NotImplementedError(dtype)
def value_irs(self): b = ir.TrueIR() c = ir.Ref('c') i = ir.I32(5) j = ir.I32(7) st = ir.Str('Hail') a = ir.Ref('a') aa = ir.Ref('aa') da = ir.Ref('da') v = ir.Ref('v') s = ir.Ref('s') t = ir.Ref('t') call = ir.Ref('call') table = ir.TableRange(5, 3) matrix_read = ir.MatrixRead(ir.MatrixNativeReader( resource('backward_compatability/1.0.0/matrix_table/0.hmt')), False, False) value_irs = [ i, ir.I64(5), ir.F32(3.14), ir.F64(3.14), s, ir.TrueIR(), ir.FalseIR(), ir.Void(), ir.Cast(i, hl.tfloat64), ir.NA(hl.tint32), ir.IsNA(i), ir.If(b, i, j), ir.Let('v', i, v), ir.Ref('x'), ir.ApplyBinaryOp('+', i, j), ir.ApplyUnaryOp('-', i), ir.ApplyComparisonOp('EQ', i, j), ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)), ir.ArrayRef(a, i), ir.ArrayLen(a), ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)), ir.ArraySort(a, b, False), ir.ToSet(a), ir.ToDict(da), ir.ToArray(a), ir.LowerBoundOnOrderedCollection(a, i, True), ir.GroupByKey(da), ir.ArrayMap(a, 'v', v), ir.ArrayFilter(a, 'v', v), ir.ArrayFlatMap(aa, 'v', v), ir.ArrayFold(a, ir.I32(0), 'x', 'v', v), ir.ArrayScan(a, ir.I32(0), 'x', 'v', v), ir.ArrayFor(a, 'v', ir.Void()), ir.AggFilter(ir.TrueIR(), ir.I32(0)), ir.AggExplode(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', ir.I32(0)), ir.AggGroupBy(ir.TrueIR(), ir.I32(0)), ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]), ir.ApplyScanOp('Collect', [], None, [ir.I32(0)]), ir.ApplyAggOp('Histogram', [ir.F64(-5.0), ir.F64(5.0), ir.I32(100)], None, [ir.F64(-2.11)]), ir.ApplyAggOp('CallStats', [], [ir.I32(2)], [call]), ir.ApplyAggOp('TakeBy', [ir.I32(10)], None, [ir.F64(-2.11), ir.F64(-2.11)]), ir.Begin([ir.Void()]), ir.MakeStruct([('x', i)]), ir.SelectFields(s, ['x', 'z']), ir.InsertFields(s, [('x', i)]), ir.GetField(s, 'x'), ir.MakeTuple([i, b]), ir.GetTupleElement(t, 1), ir.StringSlice(st, ir.I32(1), ir.I32(2)), ir.StringLength(st), ir.In(2, hl.tfloat64), ir.Die(ir.Str('mumblefoo'), hl.tfloat64), ir.Apply('&&', b, c), ir.Apply('toFloat64', i), ir.Uniroot('x', ir.F64(3.14), ir.F64(-5.0), ir.F64(5.0)), ir.Literal(hl.tarray(hl.tint32), [1, 2, None]), ir.TableCount(table), ir.TableGetGlobals(table), ir.TableCollect(table), ir.TableAggregate(table, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]))])), ir.TableWrite(table, new_temp_file(), False, True, "fake_codec_spec$$"), ir.MatrixAggregate(matrix_read, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]))])), ir.MatrixWrite(matrix_read, ir.MatrixNativeWriter(new_temp_file(), False, False, "")), ir.MatrixWrite(matrix_read, ir.MatrixVCFWriter(new_temp_file(), None, False, None)), ir.MatrixWrite(matrix_read, ir.MatrixGENWriter(new_temp_file(), 4)), ir.MatrixWrite(matrix_read, ir.MatrixPLINKWriter(new_temp_file())), ] return value_irs
def value_irs(self): b = ir.TrueIR() c = ir.Ref('c') i = ir.I32(5) j = ir.I32(7) st = ir.Str('Hail') a = ir.Ref('a') aa = ir.Ref('aa') da = ir.Ref('da') v = ir.Ref('v') s = ir.Ref('s') t = ir.Ref('t') call = ir.Ref('call') collect_sig = ir.AggSignature('Collect', [], None, [hl.tint32]) call_stats_sig = ir.AggSignature('CallStats', [], [hl.tint32], [hl.tcall]) call_stats_type = hl.tstruct(AC=hl.tarray(hl.tint32), AF=hl.tarray(hl.tfloat64), AN=hl.tint32, homozygote_count=hl.tarray(hl.tint32)) hist_sig = ir.AggSignature( 'Histogram', [hl.tfloat64, hl.tfloat64, hl.tint32], None, [hl.tfloat64]) hist_type = hl.tstruct(bin_edges=hl.tarray(hl.tfloat64), bin_freq=hl.tarray(hl.tint64), n_smaller=hl.tint64, n_larger=hl.tint64) take_by_sig = ir.AggSignature('TakeBy', [hl.tint32], None, [hl.tfloat64, hl.tfloat64]) take_by_type = hl.tarray(hl.tfloat64) value_irs = [ i, ir.I64(5), ir.F32(3.14), ir.F64(3.14), s, ir.TrueIR(), ir.FalseIR(), ir.Void(), ir.Cast(i, hl.tfloat64), ir.NA(hl.tint32), ir.IsNA(i), ir.If(b, i, j), ir.Let('v', i, v), ir.Ref('x'), ir.ApplyBinaryOp('+', i, j), ir.ApplyUnaryOp('-', i), ir.ApplyComparisonOp('EQ', i, j), ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)), ir.ArrayRef(a, i), ir.ArrayLen(a), ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)), ir.ArraySort(a, b, False), ir.ToSet(a), ir.ToDict(da), ir.ToArray(a), ir.LowerBoundOnOrderedCollection(a, i, True), ir.GroupByKey(da), ir.ArrayMap(a, 'v', v), ir.ArrayFilter(a, 'v', v), ir.ArrayFlatMap(aa, 'v', v), ir.ArrayFold(a, ir.I32(0), 'x', 'v', v), ir.ArrayScan(a, ir.I32(0), 'x', 'v', v), ir.ArrayFor(a, 'v', ir.Void()), ir.AggFilter(ir.TrueIR(), ir.I32(0)), ir.AggExplode(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', ir.I32(0)), ir.AggGroupBy(ir.TrueIR(), ir.I32(0)), ir.ApplyAggOp([], None, [ir.I32(0)], collect_sig), ir.ApplyScanOp([], None, [ir.I32(0)], collect_sig), ir.ApplyAggOp([ir.F64(-5.0), ir.F64(5.0), ir.I32(100)], None, [ir.F64(-2.11)], hist_sig), ir.ApplyAggOp([], [ir.I32(2)], [call], call_stats_sig), ir.ApplyAggOp([ir.I32(10)], None, [ir.F64(-2.11), ir.F64(-2.11)], take_by_sig), ir.InitOp(ir.I32(0), [ir.I32(2)], call_stats_sig), ir.SeqOp(ir.I32(0), [i], collect_sig), ir.SeqOp(ir.I32(0), [ir.F64(-2.11), ir.I32(17)], take_by_sig), ir.Begin([ir.Void()]), ir.MakeStruct([('x', i)]), ir.SelectFields(s, ['x', 'z']), ir.InsertFields(s, [('x', i)]), ir.GetField(s, 'x'), ir.MakeTuple([i, b]), ir.GetTupleElement(t, 1), ir.StringSlice(st, ir.I32(1), ir.I32(2)), ir.StringLength(st), ir.In(2, hl.tfloat64), ir.Die('mumblefoo', hl.tfloat64), ir.Apply('&&', b, c), ir.Apply('toFloat64', i), ir.Uniroot('x', ir.F64(3.14), ir.F64(-5.0), ir.F64(5.0)), ir.Literal(hl.tarray(hl.tint32), [1, 2, None]), ] return value_irs
def value_irs(self): b = ir.TrueIR() c = ir.Ref('c', hl.tbool) i = ir.I32(5) j = ir.I32(7) st = ir.Str('Hail') a = ir.Ref('a', hl.tarray(hl.tint32)) aa = ir.Ref('aa', hl.tarray(hl.tarray(hl.tint32))) da = ir.Ref('da', hl.tarray(hl.ttuple(hl.tint32, hl.tstr))) v = ir.Ref('v', hl.tint32) s = ir.Ref('s', hl.tstruct(x=hl.tint32, y=hl.tint64, z=hl.tfloat64)) t = ir.Ref('t', hl.ttuple(hl.tint32, hl.tint64, hl.tfloat64)) call = ir.Ref('call', hl.tcall) collect_sig = ir.AggSignature('Collect', [], None, [hl.tint32]) call_stats_sig = ir.AggSignature('CallStats', [], [hl.tint32], [hl.tcall]) hist_sig = ir.AggSignature('Histogram', [hl.tfloat64, hl.tfloat64, hl.tint32], None, [hl.tfloat64]) take_by_sig = ir.AggSignature('TakeBy', [hl.tint32], None, [hl.tfloat64, hl.tfloat64]) value_irs = [ i, ir.I64(5), ir.F32(3.14), ir.F64(3.14), s, ir.TrueIR(), ir.FalseIR(), ir.Void(), ir.Cast(i, hl.tfloat64), ir.NA(hl.tint32), ir.IsNA(i), ir.If(b, i, j), ir.Let('v', i, v), ir.Ref('x', hl.tint32), ir.ApplyBinaryOp('+', i, j), ir.ApplyUnaryOp('-', i), ir.ApplyComparisonOp('EQ', i, j), ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)), ir.ArrayRef(a, i), ir.ArrayLen(a), ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)), ir.ArraySort(a, b, False), ir.ToSet(a), ir.ToDict(da), ir.ToArray(a), ir.LowerBoundOnOrderedCollection(a, i, True), ir.GroupByKey(da), ir.ArrayMap(a, 'v', v), ir.ArrayFilter(a, 'v', v), ir.ArrayFlatMap(aa, 'v', v), ir.ArrayFold(a, ir.I32(0), 'x', 'v', v), ir.ArrayFor(a, 'v', ir.Void()), ir.ApplyAggOp(ir.I32(0), [], None, collect_sig), ir.ApplyScanOp(ir.I32(0), [], None, collect_sig), ir.ApplyAggOp(ir.F64(-2.11), [ir.F64(-5.0), ir.F64(5.0), ir.I32(100)], None, hist_sig), ir.ApplyAggOp(call, [], [ir.I32(2)], call_stats_sig), ir.ApplyAggOp(ir.F64(-2.11), [ir.I32(10)], None, take_by_sig), ir.InitOp(ir.I32(0), [ir.I32(2)], call_stats_sig), ir.SeqOp(ir.I32(0), [i], collect_sig), ir.SeqOp(ir.I32(0), [ir.F64(-2.11), ir.I32(17)], take_by_sig), ir.Begin([ir.Void()]), ir.MakeStruct([('x', i)]), ir.SelectFields(s, ['x', 'z']), ir.InsertFields(s, [('x', i)]), ir.GetField(s, 'x'), ir.MakeTuple([i, b]), ir.GetTupleElement(t, 1), ir.StringSlice(st, ir.I32(1), ir.I32(2)), ir.StringLength(st), ir.In(2, hl.tfloat64), ir.Die('mumblefoo', hl.tfloat64), ir.Apply('&&', b, c), ir.Apply('toFloat64', i), ir.Apply('isDefined', s), ir.Uniroot('x', ir.F64(3.14), ir.F64(-5.0), ir.F64(5.0)) ] return value_irs