def blockmatrix_irs(self): scalar_ir = ir.F64(2) vector_ir = ir.MakeArray([ir.F64(3), ir.F64(2)], hl.tarray(hl.tfloat64)) read = ir.BlockMatrixRead( ir.BlockMatrixNativeReader(resource('blockmatrix_example/0'))) add_two_bms = ir.BlockMatrixMap2( read, read, 'l', 'r', ir.ApplyBinaryPrimOp('+', ir.Ref('l'), ir.Ref('r')), "Union") negate_bm = ir.BlockMatrixMap( read, 'element', ir.ApplyUnaryPrimOp('-', ir.Ref('element')), False) sqrt_bm = ir.BlockMatrixMap( read, 'element', hl.sqrt(construct_expr(ir.Ref('element'), hl.tfloat64))._ir, False) persisted = ir.BlockMatrixRead(ir.BlockMatrixPersistReader('x', read)) scalar_to_bm = ir.ValueToBlockMatrix(scalar_ir, [1, 1], 1) col_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [2, 1], 1) row_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [1, 2], 1) broadcast_scalar = ir.BlockMatrixBroadcast(scalar_to_bm, [], [2, 2], 256) broadcast_col = ir.BlockMatrixBroadcast(col_vector_to_bm, [0], [2, 2], 256) broadcast_row = ir.BlockMatrixBroadcast(row_vector_to_bm, [1], [2, 2], 256) transpose = ir.BlockMatrixBroadcast(broadcast_scalar, [1, 0], [2, 2], 256) matmul = ir.BlockMatrixDot(broadcast_scalar, transpose) rectangle = ir.Literal(hl.tarray(hl.tint64), [0, 1, 5, 6]) band = ir.Literal(hl.ttuple(hl.tint64, hl.tint64), (-1, 1)) intervals = ir.Literal( hl.ttuple(hl.tarray(hl.tint64), hl.tarray(hl.tint64)), ([0, 1, 5, 6], [5, 6, 8, 9])) sparsify1 = ir.BlockMatrixSparsify(read, rectangle, ir.RectangleSparsifier) sparsify2 = ir.BlockMatrixSparsify(read, band, ir.BandSparsifier(True)) sparsify3 = ir.BlockMatrixSparsify(read, intervals, ir.RowIntervalSparsifier(True)) densify = ir.BlockMatrixDensify(read) pow_ir = (construct_expr(ir.Ref('l'), hl.tfloat64)**construct_expr( ir.Ref('r'), hl.tfloat64))._ir squared_bm = ir.BlockMatrixMap2(scalar_to_bm, scalar_to_bm, 'l', 'r', pow_ir, "NeedsDense") slice_bm = ir.BlockMatrixSlice( matmul, [slice(0, 2, 1), slice(0, 1, 1)]) return [ read, persisted, add_two_bms, negate_bm, sqrt_bm, scalar_to_bm, col_vector_to_bm, row_vector_to_bm, broadcast_scalar, broadcast_col, broadcast_row, squared_bm, transpose, sparsify1, sparsify2, sparsify3, densify, matmul, slice_bm ]
def test_parses(self): backend = Env.spark_backend('BlockMatrixIRTests.test_parses') bmir = hl.linalg.BlockMatrix.fill(1, 1, 0.0)._bmir backend.execute(ir.BlockMatrixWrite(bmir, ir.BlockMatrixPersistWriter('x', 'MEMORY_ONLY'))) persist = ir.BlockMatrixRead(ir.BlockMatrixPersistReader('x', bmir)) for x in (self.blockmatrix_irs() + [persist]): backend._parse_blockmatrix_ir(str(x)) backend.unpersist_block_matrix('x')
def blockmatrix_irs(self): scalar_ir = ir.F64(2) vector_ir = ir.MakeArray([ir.F64(3), ir.F64(2)], hl.tarray(hl.tfloat64)) read = ir.BlockMatrixRead( ir.BlockMatrixNativeReader(resource('blockmatrix_example/0'))) add_two_bms = ir.BlockMatrixMap2( read, read, 'l', 'r', ir.ApplyBinaryPrimOp('+', ir.Ref('l'), ir.Ref('r'))) negate_bm = ir.BlockMatrixMap( read, 'element', ir.ApplyUnaryPrimOp('-', ir.Ref('element'))) sqrt_bm = ir.BlockMatrixMap( read, 'element', hl.sqrt(construct_expr(ir.Ref('element'), hl.tfloat64))._ir) scalar_to_bm = ir.ValueToBlockMatrix(scalar_ir, [1, 1], 1) col_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [2, 1], 1) row_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [1, 2], 1) broadcast_scalar = ir.BlockMatrixBroadcast(scalar_to_bm, [], [2, 2], 256) broadcast_col = ir.BlockMatrixBroadcast(col_vector_to_bm, [0], [2, 2], 256) broadcast_row = ir.BlockMatrixBroadcast(row_vector_to_bm, [1], [2, 2], 256) transpose = ir.BlockMatrixBroadcast(broadcast_scalar, [1, 0], [2, 2], 256) matmul = ir.BlockMatrixDot(broadcast_scalar, transpose) pow_ir = (construct_expr(ir.Ref('l'), hl.tfloat64)**construct_expr( ir.Ref('r'), hl.tfloat64))._ir squared_bm = ir.BlockMatrixMap2(scalar_to_bm, scalar_to_bm, 'l', 'r', pow_ir) slice_bm = ir.BlockMatrixSlice( matmul, [slice(0, 2, 1), slice(0, 1, 1)]) return [ read, add_two_bms, negate_bm, sqrt_bm, scalar_to_bm, col_vector_to_bm, row_vector_to_bm, broadcast_scalar, broadcast_col, broadcast_row, squared_bm, transpose, matmul, slice_bm ]
def block_matrix_irs(self): scalar_ir = ir.F64(2) vector_ir = ir.MakeArray([ir.F64(3), ir.F64(2)], hl.tarray(hl.tfloat64)) read = ir.BlockMatrixRead(resource('blockmatrix_example/0')) add_two_bms = BlockMatrixIRTests._make_element_wise_op_ir(read, read, '+') scalar_to_bm = ir.ValueToBlockMatrix(scalar_ir, [], 1, []) vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [2], 1, [False]) broadcast_scalar = ir.BlockMatrixBroadcast(scalar_to_bm, "scalar", [2, 2], 256, [False, False]) broadcast_col = ir.BlockMatrixBroadcast(vector_to_bm, "col", [2, 2], 256, [False, False]) broadcast_row = ir.BlockMatrixBroadcast(vector_to_bm, "row", [2, 2], 256, [False, False]) return [ read, add_two_bms, scalar_to_bm, vector_to_bm, broadcast_scalar, broadcast_col, broadcast_row, ]
def table_irs(self): b = ir.TrueIR() table_read = ir.TableRead( ir.TableNativeReader( resource('backward_compatability/1.0.0/table/0.ht'), None, False), False) table_read_row_type = hl.dtype( 'struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set<str>, mset: set<float64>, d: dict<array<str>, float64>, md: dict<int32, str>, h38: locus<GRCh38>, ml: locus<GRCh37>, i: interval<locus<GRCh37>>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus<GRCh37>, bool)}' ) matrix_read = ir.MatrixRead( ir.MatrixNativeReader( resource('backward_compatability/1.0.0/matrix_table/0.hmt'), None, False), False, False) block_matrix_read = ir.BlockMatrixRead( ir.BlockMatrixNativeReader('fake_file_path')) aa = hl.literal([[0.00], [0.01], [0.02]])._ir range = ir.TableRange(10, 4) table_irs = [ ir.TableKeyBy(table_read, ['m', 'd'], False), ir.TableFilter(table_read, b), table_read, ir.MatrixColsTable(matrix_read), ir.TableAggregateByKey(table_read, ir.MakeStruct([('a', ir.I32(5))])), ir.TableKeyByAndAggregate(table_read, ir.MakeStruct([('a', ir.I32(5))]), ir.MakeStruct([('b', ir.I32(5))]), 1, 2), ir.TableJoin(table_read, ir.TableRange(100, 10), 'inner', 1), ir.MatrixEntriesTable(matrix_read), ir.MatrixRowsTable(matrix_read), ir.TableParallelize( ir.MakeStruct([('rows', ir.Literal(hl.tarray(hl.tstruct(a=hl.tint32)), [{ 'a': None }, { 'a': 5 }, { 'a': -3 }])), ('global', ir.MakeStruct([]))]), None), ir.TableMapRows( ir.TableKeyBy(table_read, []), ir.MakeStruct([('a', ir.GetField(ir.Ref('row'), 'f32')), ('b', ir.F64(-2.11))])), ir.TableMapGlobals( table_read, ir.MakeStruct([('foo', ir.NA(hl.tarray(hl.tint32)))])), ir.TableRange(100, 10), ir.TableRepartition(table_read, 10, ir.RepartitionStrategy.COALESCE), ir.TableUnion([ir.TableRange(100, 10), ir.TableRange(50, 10)]), ir.TableExplode(table_read, ['mset']), ir.TableHead(table_read, 10), ir.TableOrderBy(ir.TableKeyBy(table_read, []), [('m', 'A'), ('m', 'D')]), ir.TableDistinct(table_read), ir.CastMatrixToTable(matrix_read, '__entries', '__cols'), ir.TableRename(table_read, {'idx': 'idx_foo'}, {'global_f32': 'global_foo'}), ir.TableMultiWayZipJoin([table_read, table_read], '__data', '__globals'), ir.MatrixToTableApply( matrix_read, { 'name': 'LinearRegressionRowsSingle', 'yFields': ['col_m'], 'xField': 'entry_m', 'covFields': [], 'rowBlockSize': 10, 'passThrough': [] }), ir.TableToTableApply(table_read, { 'name': 'TableFilterPartitions', 'parts': [0], 'keep': True }), ir.BlockMatrixToTableApply(block_matrix_read, aa, { 'name': 'PCRelate', 'maf': 0.01, 'blockSize': 4096 }), ir.TableFilterIntervals(table_read, [ hl.utils.Interval(hl.utils.Struct(row_idx=0), hl.utils.Struct(row_idx=10)) ], hl.tstruct(row_idx=hl.tint32), keep=False), ] return table_irs
def value_irs(self): b = ir.TrueIR() c = ir.Ref('c') i = ir.I32(5) j = ir.I32(7) st = ir.Str('Hail') a = ir.Ref('a') aa = ir.Ref('aa') da = ir.Ref('da') nd = ir.Ref('nd') v = ir.Ref('v') s = ir.Ref('s') t = ir.Ref('t') call = ir.Ref('call') table = ir.TableRange(5, 3) matrix_read = ir.MatrixRead( ir.MatrixNativeReader( resource('backward_compatability/1.0.0/matrix_table/0.hmt'), None, False), False, False) block_matrix_read = ir.BlockMatrixRead( ir.BlockMatrixNativeReader('fake_file_path')) value_irs = [ i, ir.I64(5), ir.F32(3.14), ir.F64(3.14), s, ir.TrueIR(), ir.FalseIR(), ir.Void(), ir.Cast(i, hl.tfloat64), ir.NA(hl.tint32), ir.IsNA(i), ir.If(b, i, j), ir.Coalesce(i, j), ir.Let('v', i, v), ir.Ref('x'), ir.ApplyBinaryPrimOp('+', i, j), ir.ApplyUnaryPrimOp('-', i), ir.ApplyComparisonOp('EQ', i, j), ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)), ir.ArrayRef(a, i, ir.Str('foo')), ir.ArrayLen(a), ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)), ir.ArraySort(a, 'l', 'r', ir.ApplyComparisonOp("LT", ir.Ref('l'), ir.Ref('r'))), ir.ToSet(a), ir.ToDict(da), ir.ToArray(a), ir.MakeNDArray( ir.MakeArray([ir.F64(-1.0), ir.F64(1.0)], hl.tarray(hl.tfloat64)), ir.MakeTuple([ir.I64(1), ir.I64(2)]), ir.TrueIR()), ir.NDArrayShape(nd), ir.NDArrayReshape(nd, ir.MakeTuple([ir.I64(5)])), ir.NDArrayRef(nd, [ir.I64(1), ir.I64(2)]), ir.NDArrayMap(nd, 'v', v), ir.NDArrayMatMul(nd, nd), ir.LowerBoundOnOrderedCollection(a, i, True), ir.GroupByKey(da), ir.ArrayMap(a, 'v', v), ir.ArrayZip([a, a], ['a', 'b'], ir.TrueIR(), 'ExtendNA'), ir.ArrayFilter(a, 'v', v), ir.ArrayFlatMap(aa, 'v', v), ir.ArrayFold(a, ir.I32(0), 'x', 'v', v), ir.ArrayScan(a, ir.I32(0), 'x', 'v', v), ir.ArrayLeftJoinDistinct(a, a, 'l', 'r', ir.I32(0), ir.I32(1)), ir.ArrayFor(a, 'v', ir.Void()), ir.AggFilter(ir.TrueIR(), ir.I32(0), False), ir.AggExplode(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', ir.I32(0), False), ir.AggGroupBy(ir.TrueIR(), ir.I32(0), False), ir.AggArrayPerElement( ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', 'y', ir.I32(0), False), ir.ApplyAggOp('Collect', [], [ir.I32(0)]), ir.ApplyScanOp('Collect', [], [ir.I32(0)]), ir.ApplyAggOp('CallStats', [ir.I32(2)], [call]), ir.ApplyAggOp('TakeBy', [ir.I32(10)], [ir.F64(-2.11), ir.F64(-2.11)]), ir.Begin([ir.Void()]), ir.MakeStruct([('x', i)]), ir.SelectFields(s, ['x', 'z']), ir.InsertFields(s, [('x', i)], None), ir.GetField(s, 'x'), ir.MakeTuple([i, b]), ir.GetTupleElement(t, 1), ir.Die(ir.Str('mumblefoo'), hl.tfloat64), ir.Apply('&&', hl.tbool, b, c), ir.Apply('toFloat64', hl.tfloat64, i), ir.Literal(hl.tarray(hl.tint32), [1, 2, None]), ir.TableCount(table), ir.TableGetGlobals(table), ir.TableCollect(ir.TableKeyBy(table, [], False)), ir.TableToValueApply(table, {'name': 'ForceCountTable'}), ir.MatrixToValueApply(matrix_read, {'name': 'ForceCountMatrixTable'}), ir.TableAggregate( table, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], [ir.I32(0)]))])), ir.TableWrite( table, ir.TableNativeWriter(new_temp_file(), False, True, "fake_codec_spec$$")), ir.TableWrite( table, ir.TableTextWriter(new_temp_file(), None, True, 0, ",")), ir.MatrixAggregate( matrix_read, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], [ir.I32(0)]))])), ir.MatrixWrite( matrix_read, ir.MatrixNativeWriter(new_temp_file(), False, False, "", None, None)), ir.MatrixWrite( matrix_read, ir.MatrixNativeWriter( new_temp_file(), False, False, "", '[{"start":{"row_idx":0},"end":{"row_idx": 10},"includeStart":true,"includeEnd":false}]', hl.dtype('array<interval<struct{row_idx:int32}>>'))), ir.MatrixWrite( matrix_read, ir.MatrixVCFWriter(new_temp_file(), None, False, None)), ir.MatrixWrite(matrix_read, ir.MatrixGENWriter(new_temp_file(), 4)), ir.MatrixWrite(matrix_read, ir.MatrixPLINKWriter(new_temp_file())), ir.MatrixMultiWrite([matrix_read, matrix_read], ir.MatrixNativeMultiWriter( new_temp_file(), False, False)), ir.BlockMatrixWrite( block_matrix_read, ir.BlockMatrixNativeWriter('fake_file_path', False, False, False)), ir.LiftMeOut(ir.I32(1)) ] return value_irs
def value_irs(self): b = ir.TrueIR() c = ir.Ref('c') i = ir.I32(5) j = ir.I32(7) st = ir.Str('Hail') a = ir.Ref('a') aa = ir.Ref('aa') da = ir.Ref('da') nd = ir.Ref('nd') v = ir.Ref('v') s = ir.Ref('s') t = ir.Ref('t') call = ir.Ref('call') table = ir.TableRange(5, 3) matrix_read = ir.MatrixRead(ir.MatrixNativeReader( resource('backward_compatability/1.0.0/matrix_table/0.hmt')), False, False) block_matrix_read = ir.BlockMatrixRead('fake_file_path') value_irs = [ i, ir.I64(5), ir.F32(3.14), ir.F64(3.14), s, ir.TrueIR(), ir.FalseIR(), ir.Void(), ir.Cast(i, hl.tfloat64), ir.NA(hl.tint32), ir.IsNA(i), ir.If(b, i, j), ir.Let('v', i, v), ir.Ref('x'), ir.ApplyBinaryOp('+', i, j), ir.ApplyUnaryOp('-', i), ir.ApplyComparisonOp('EQ', i, j), ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)), ir.ArrayRef(a, i), ir.ArrayLen(a), ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)), ir.ArraySort(a, b, False), ir.ToSet(a), ir.ToDict(da), ir.ToArray(a), ir.MakeNDArray(ir.MakeArray([ir.F64(-1.0), ir.F64(1.0)], hl.tarray(hl.tfloat64)), ir.MakeArray([ir.I64(1), ir.I64(2)], hl.tarray(hl.tint64)), ir.TrueIR()), ir.NDArrayRef(nd, ir.MakeArray([ir.I64(1), ir.I64(2)], hl.tarray(hl.tint64))), ir.LowerBoundOnOrderedCollection(a, i, True), ir.GroupByKey(da), ir.ArrayMap(a, 'v', v), ir.ArrayFilter(a, 'v', v), ir.ArrayFlatMap(aa, 'v', v), ir.ArrayFold(a, ir.I32(0), 'x', 'v', v), ir.ArrayScan(a, ir.I32(0), 'x', 'v', v), ir.ArrayLeftJoinDistinct(a, a, 'l', 'r', ir.I32(0), ir.I32(1)), ir.ArrayFor(a, 'v', ir.Void()), ir.AggFilter(ir.TrueIR(), ir.I32(0)), ir.AggExplode(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', ir.I32(0)), ir.AggGroupBy(ir.TrueIR(), ir.I32(0)), ir.AggArrayPerElement(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', ir.I32(0)), ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]), ir.ApplyScanOp('Collect', [], None, [ir.I32(0)]), ir.ApplyAggOp('Histogram', [ir.F64(-5.0), ir.F64(5.0), ir.I32(100)], None, [ir.F64(-2.11)]), ir.ApplyAggOp('CallStats', [], [ir.I32(2)], [call]), ir.ApplyAggOp('TakeBy', [ir.I32(10)], None, [ir.F64(-2.11), ir.F64(-2.11)]), ir.Begin([ir.Void()]), ir.MakeStruct([('x', i)]), ir.SelectFields(s, ['x', 'z']), ir.InsertFields(s, [('x', i)], None), ir.GetField(s, 'x'), ir.MakeTuple([i, b]), ir.GetTupleElement(t, 1), ir.StringSlice(st, ir.I32(1), ir.I32(2)), ir.StringLength(st), ir.In(2, hl.tfloat64), ir.Die(ir.Str('mumblefoo'), hl.tfloat64), ir.Apply('&&', b, c), ir.Apply('toFloat64', i), ir.Uniroot('x', ir.F64(3.14), ir.F64(-5.0), ir.F64(5.0)), ir.Literal(hl.tarray(hl.tint32), [1, 2, None]), ir.TableCount(table), ir.TableGetGlobals(table), ir.TableCollect(table), ir.TableToValueApply(table, {'name': 'ForceCountTable'}), ir.MatrixToValueApply(matrix_read, {'name': 'ForceCountMatrixTable'}), ir.TableAggregate(table, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]))])), ir.TableWrite(table, new_temp_file(), False, True, "fake_codec_spec$$"), ir.TableExport(table, new_temp_file(), None, True, 0, ","), ir.MatrixAggregate(matrix_read, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]))])), ir.MatrixWrite(matrix_read, ir.MatrixNativeWriter(new_temp_file(), False, False, "")), ir.MatrixWrite(matrix_read, ir.MatrixVCFWriter(new_temp_file(), None, False, None)), ir.MatrixWrite(matrix_read, ir.MatrixGENWriter(new_temp_file(), 4)), ir.MatrixWrite(matrix_read, ir.MatrixPLINKWriter(new_temp_file())), ir.MatrixMultiWrite([matrix_read, matrix_read], ir.MatrixNativeMultiWriter(new_temp_file(), False, False)), ir.BlockMatrixWrite(block_matrix_read, 'fake_file_path', False, False, False) ] return value_irs
def block_matrix_irs(self): read = ir.BlockMatrixRead('fake_file_path') add = ir.BlockMatrixAdd(read, read) return [read, add]