Esempio n. 1
0
    def blockmatrix_irs(self):
        scalar_ir = ir.F64(2)
        vector_ir = ir.MakeArray([ir.F64(3), ir.F64(2)],
                                 hl.tarray(hl.tfloat64))

        read = ir.BlockMatrixRead(
            ir.BlockMatrixNativeReader(resource('blockmatrix_example/0')))
        add_two_bms = ir.BlockMatrixMap2(
            read, read, 'l', 'r',
            ir.ApplyBinaryPrimOp('+', ir.Ref('l'), ir.Ref('r')), "Union")
        negate_bm = ir.BlockMatrixMap(
            read, 'element', ir.ApplyUnaryPrimOp('-', ir.Ref('element')),
            False)
        sqrt_bm = ir.BlockMatrixMap(
            read, 'element',
            hl.sqrt(construct_expr(ir.Ref('element'), hl.tfloat64))._ir, False)
        persisted = ir.BlockMatrixRead(ir.BlockMatrixPersistReader('x', read))

        scalar_to_bm = ir.ValueToBlockMatrix(scalar_ir, [1, 1], 1)
        col_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [2, 1], 1)
        row_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [1, 2], 1)
        broadcast_scalar = ir.BlockMatrixBroadcast(scalar_to_bm, [], [2, 2],
                                                   256)
        broadcast_col = ir.BlockMatrixBroadcast(col_vector_to_bm, [0], [2, 2],
                                                256)
        broadcast_row = ir.BlockMatrixBroadcast(row_vector_to_bm, [1], [2, 2],
                                                256)
        transpose = ir.BlockMatrixBroadcast(broadcast_scalar, [1, 0], [2, 2],
                                            256)
        matmul = ir.BlockMatrixDot(broadcast_scalar, transpose)

        rectangle = ir.Literal(hl.tarray(hl.tint64), [0, 1, 5, 6])
        band = ir.Literal(hl.ttuple(hl.tint64, hl.tint64), (-1, 1))
        intervals = ir.Literal(
            hl.ttuple(hl.tarray(hl.tint64), hl.tarray(hl.tint64)),
            ([0, 1, 5, 6], [5, 6, 8, 9]))

        sparsify1 = ir.BlockMatrixSparsify(read, rectangle,
                                           ir.RectangleSparsifier)
        sparsify2 = ir.BlockMatrixSparsify(read, band, ir.BandSparsifier(True))
        sparsify3 = ir.BlockMatrixSparsify(read, intervals,
                                           ir.RowIntervalSparsifier(True))

        densify = ir.BlockMatrixDensify(read)

        pow_ir = (construct_expr(ir.Ref('l'), hl.tfloat64)**construct_expr(
            ir.Ref('r'), hl.tfloat64))._ir
        squared_bm = ir.BlockMatrixMap2(scalar_to_bm, scalar_to_bm, 'l', 'r',
                                        pow_ir, "NeedsDense")
        slice_bm = ir.BlockMatrixSlice(
            matmul, [slice(0, 2, 1), slice(0, 1, 1)])

        return [
            read, persisted, add_two_bms, negate_bm, sqrt_bm, scalar_to_bm,
            col_vector_to_bm, row_vector_to_bm, broadcast_scalar,
            broadcast_col, broadcast_row, squared_bm, transpose, sparsify1,
            sparsify2, sparsify3, densify, matmul, slice_bm
        ]
Esempio n. 2
0
    def test_parses(self):
        backend = Env.spark_backend('BlockMatrixIRTests.test_parses')

        bmir = hl.linalg.BlockMatrix.fill(1, 1, 0.0)._bmir
        backend.execute(ir.BlockMatrixWrite(bmir, ir.BlockMatrixPersistWriter('x', 'MEMORY_ONLY')))
        persist = ir.BlockMatrixRead(ir.BlockMatrixPersistReader('x', bmir))

        for x in (self.blockmatrix_irs() + [persist]):
            backend._parse_blockmatrix_ir(str(x))

        backend.unpersist_block_matrix('x')
Esempio n. 3
0
    def blockmatrix_irs(self):
        scalar_ir = ir.F64(2)
        vector_ir = ir.MakeArray([ir.F64(3), ir.F64(2)],
                                 hl.tarray(hl.tfloat64))

        read = ir.BlockMatrixRead(
            ir.BlockMatrixNativeReader(resource('blockmatrix_example/0')))
        add_two_bms = ir.BlockMatrixMap2(
            read, read, 'l', 'r',
            ir.ApplyBinaryPrimOp('+', ir.Ref('l'), ir.Ref('r')))
        negate_bm = ir.BlockMatrixMap(
            read, 'element', ir.ApplyUnaryPrimOp('-', ir.Ref('element')))
        sqrt_bm = ir.BlockMatrixMap(
            read, 'element',
            hl.sqrt(construct_expr(ir.Ref('element'), hl.tfloat64))._ir)

        scalar_to_bm = ir.ValueToBlockMatrix(scalar_ir, [1, 1], 1)
        col_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [2, 1], 1)
        row_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [1, 2], 1)
        broadcast_scalar = ir.BlockMatrixBroadcast(scalar_to_bm, [], [2, 2],
                                                   256)
        broadcast_col = ir.BlockMatrixBroadcast(col_vector_to_bm, [0], [2, 2],
                                                256)
        broadcast_row = ir.BlockMatrixBroadcast(row_vector_to_bm, [1], [2, 2],
                                                256)
        transpose = ir.BlockMatrixBroadcast(broadcast_scalar, [1, 0], [2, 2],
                                            256)
        matmul = ir.BlockMatrixDot(broadcast_scalar, transpose)

        pow_ir = (construct_expr(ir.Ref('l'), hl.tfloat64)**construct_expr(
            ir.Ref('r'), hl.tfloat64))._ir
        squared_bm = ir.BlockMatrixMap2(scalar_to_bm, scalar_to_bm, 'l', 'r',
                                        pow_ir)
        slice_bm = ir.BlockMatrixSlice(
            matmul, [slice(0, 2, 1), slice(0, 1, 1)])

        return [
            read, add_two_bms, negate_bm, sqrt_bm, scalar_to_bm,
            col_vector_to_bm, row_vector_to_bm, broadcast_scalar,
            broadcast_col, broadcast_row, squared_bm, transpose, matmul,
            slice_bm
        ]
Esempio n. 4
0
    def block_matrix_irs(self):
        scalar_ir = ir.F64(2)
        vector_ir = ir.MakeArray([ir.F64(3), ir.F64(2)], hl.tarray(hl.tfloat64))

        read = ir.BlockMatrixRead(resource('blockmatrix_example/0'))
        add_two_bms = BlockMatrixIRTests._make_element_wise_op_ir(read, read, '+')

        scalar_to_bm = ir.ValueToBlockMatrix(scalar_ir, [], 1, [])
        vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [2], 1, [False])
        broadcast_scalar = ir.BlockMatrixBroadcast(scalar_to_bm, "scalar", [2, 2], 256, [False, False])
        broadcast_col = ir.BlockMatrixBroadcast(vector_to_bm, "col", [2, 2], 256, [False, False])
        broadcast_row = ir.BlockMatrixBroadcast(vector_to_bm, "row", [2, 2], 256, [False, False])

        return [
            read,
            add_two_bms,
            scalar_to_bm,
            vector_to_bm,
            broadcast_scalar,
            broadcast_col,
            broadcast_row,
        ]
Esempio n. 5
0
    def table_irs(self):
        b = ir.TrueIR()
        table_read = ir.TableRead(
            ir.TableNativeReader(
                resource('backward_compatability/1.0.0/table/0.ht'), None,
                False), False)
        table_read_row_type = hl.dtype(
            'struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set<str>, mset: set<float64>, d: dict<array<str>, float64>, md: dict<int32, str>, h38: locus<GRCh38>, ml: locus<GRCh37>, i: interval<locus<GRCh37>>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus<GRCh37>, bool)}'
        )

        matrix_read = ir.MatrixRead(
            ir.MatrixNativeReader(
                resource('backward_compatability/1.0.0/matrix_table/0.hmt'),
                None, False), False, False)

        block_matrix_read = ir.BlockMatrixRead(
            ir.BlockMatrixNativeReader('fake_file_path'))

        aa = hl.literal([[0.00], [0.01], [0.02]])._ir

        range = ir.TableRange(10, 4)
        table_irs = [
            ir.TableKeyBy(table_read, ['m', 'd'], False),
            ir.TableFilter(table_read, b),
            table_read,
            ir.MatrixColsTable(matrix_read),
            ir.TableAggregateByKey(table_read,
                                   ir.MakeStruct([('a', ir.I32(5))])),
            ir.TableKeyByAndAggregate(table_read,
                                      ir.MakeStruct([('a', ir.I32(5))]),
                                      ir.MakeStruct([('b', ir.I32(5))]), 1, 2),
            ir.TableJoin(table_read, ir.TableRange(100, 10), 'inner', 1),
            ir.MatrixEntriesTable(matrix_read),
            ir.MatrixRowsTable(matrix_read),
            ir.TableParallelize(
                ir.MakeStruct([('rows',
                                ir.Literal(hl.tarray(hl.tstruct(a=hl.tint32)),
                                           [{
                                               'a': None
                                           }, {
                                               'a': 5
                                           }, {
                                               'a': -3
                                           }])),
                               ('global', ir.MakeStruct([]))]), None),
            ir.TableMapRows(
                ir.TableKeyBy(table_read, []),
                ir.MakeStruct([('a', ir.GetField(ir.Ref('row'), 'f32')),
                               ('b', ir.F64(-2.11))])),
            ir.TableMapGlobals(
                table_read,
                ir.MakeStruct([('foo', ir.NA(hl.tarray(hl.tint32)))])),
            ir.TableRange(100, 10),
            ir.TableRepartition(table_read, 10,
                                ir.RepartitionStrategy.COALESCE),
            ir.TableUnion([ir.TableRange(100, 10),
                           ir.TableRange(50, 10)]),
            ir.TableExplode(table_read, ['mset']),
            ir.TableHead(table_read, 10),
            ir.TableOrderBy(ir.TableKeyBy(table_read, []), [('m', 'A'),
                                                            ('m', 'D')]),
            ir.TableDistinct(table_read),
            ir.CastMatrixToTable(matrix_read, '__entries', '__cols'),
            ir.TableRename(table_read, {'idx': 'idx_foo'},
                           {'global_f32': 'global_foo'}),
            ir.TableMultiWayZipJoin([table_read, table_read], '__data',
                                    '__globals'),
            ir.MatrixToTableApply(
                matrix_read, {
                    'name': 'LinearRegressionRowsSingle',
                    'yFields': ['col_m'],
                    'xField': 'entry_m',
                    'covFields': [],
                    'rowBlockSize': 10,
                    'passThrough': []
                }),
            ir.TableToTableApply(table_read, {
                'name': 'TableFilterPartitions',
                'parts': [0],
                'keep': True
            }),
            ir.BlockMatrixToTableApply(block_matrix_read, aa, {
                'name': 'PCRelate',
                'maf': 0.01,
                'blockSize': 4096
            }),
            ir.TableFilterIntervals(table_read, [
                hl.utils.Interval(hl.utils.Struct(row_idx=0),
                                  hl.utils.Struct(row_idx=10))
            ],
                                    hl.tstruct(row_idx=hl.tint32),
                                    keep=False),
        ]

        return table_irs
Esempio n. 6
0
    def value_irs(self):
        b = ir.TrueIR()
        c = ir.Ref('c')
        i = ir.I32(5)
        j = ir.I32(7)
        st = ir.Str('Hail')
        a = ir.Ref('a')
        aa = ir.Ref('aa')
        da = ir.Ref('da')
        nd = ir.Ref('nd')
        v = ir.Ref('v')
        s = ir.Ref('s')
        t = ir.Ref('t')
        call = ir.Ref('call')

        table = ir.TableRange(5, 3)

        matrix_read = ir.MatrixRead(
            ir.MatrixNativeReader(
                resource('backward_compatability/1.0.0/matrix_table/0.hmt'),
                None, False), False, False)

        block_matrix_read = ir.BlockMatrixRead(
            ir.BlockMatrixNativeReader('fake_file_path'))

        value_irs = [
            i,
            ir.I64(5),
            ir.F32(3.14),
            ir.F64(3.14), s,
            ir.TrueIR(),
            ir.FalseIR(),
            ir.Void(),
            ir.Cast(i, hl.tfloat64),
            ir.NA(hl.tint32),
            ir.IsNA(i),
            ir.If(b, i, j),
            ir.Coalesce(i, j),
            ir.Let('v', i, v),
            ir.Ref('x'),
            ir.ApplyBinaryPrimOp('+', i, j),
            ir.ApplyUnaryPrimOp('-', i),
            ir.ApplyComparisonOp('EQ', i, j),
            ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)],
                         hl.tarray(hl.tint32)),
            ir.ArrayRef(a, i, ir.Str('foo')),
            ir.ArrayLen(a),
            ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)),
            ir.ArraySort(a, 'l', 'r',
                         ir.ApplyComparisonOp("LT", ir.Ref('l'), ir.Ref('r'))),
            ir.ToSet(a),
            ir.ToDict(da),
            ir.ToArray(a),
            ir.MakeNDArray(
                ir.MakeArray([ir.F64(-1.0), ir.F64(1.0)],
                             hl.tarray(hl.tfloat64)),
                ir.MakeTuple([ir.I64(1), ir.I64(2)]), ir.TrueIR()),
            ir.NDArrayShape(nd),
            ir.NDArrayReshape(nd, ir.MakeTuple([ir.I64(5)])),
            ir.NDArrayRef(nd, [ir.I64(1), ir.I64(2)]),
            ir.NDArrayMap(nd, 'v', v),
            ir.NDArrayMatMul(nd, nd),
            ir.LowerBoundOnOrderedCollection(a, i, True),
            ir.GroupByKey(da),
            ir.ArrayMap(a, 'v', v),
            ir.ArrayZip([a, a], ['a', 'b'], ir.TrueIR(), 'ExtendNA'),
            ir.ArrayFilter(a, 'v', v),
            ir.ArrayFlatMap(aa, 'v', v),
            ir.ArrayFold(a, ir.I32(0), 'x', 'v', v),
            ir.ArrayScan(a, ir.I32(0), 'x', 'v', v),
            ir.ArrayLeftJoinDistinct(a, a, 'l', 'r', ir.I32(0), ir.I32(1)),
            ir.ArrayFor(a, 'v', ir.Void()),
            ir.AggFilter(ir.TrueIR(), ir.I32(0), False),
            ir.AggExplode(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x',
                          ir.I32(0), False),
            ir.AggGroupBy(ir.TrueIR(), ir.I32(0), False),
            ir.AggArrayPerElement(
                ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', 'y',
                ir.I32(0), False),
            ir.ApplyAggOp('Collect', [], [ir.I32(0)]),
            ir.ApplyScanOp('Collect', [], [ir.I32(0)]),
            ir.ApplyAggOp('CallStats', [ir.I32(2)], [call]),
            ir.ApplyAggOp('TakeBy', [ir.I32(10)],
                          [ir.F64(-2.11), ir.F64(-2.11)]),
            ir.Begin([ir.Void()]),
            ir.MakeStruct([('x', i)]),
            ir.SelectFields(s, ['x', 'z']),
            ir.InsertFields(s, [('x', i)], None),
            ir.GetField(s, 'x'),
            ir.MakeTuple([i, b]),
            ir.GetTupleElement(t, 1),
            ir.Die(ir.Str('mumblefoo'), hl.tfloat64),
            ir.Apply('&&', hl.tbool, b, c),
            ir.Apply('toFloat64', hl.tfloat64, i),
            ir.Literal(hl.tarray(hl.tint32), [1, 2, None]),
            ir.TableCount(table),
            ir.TableGetGlobals(table),
            ir.TableCollect(ir.TableKeyBy(table, [], False)),
            ir.TableToValueApply(table, {'name': 'ForceCountTable'}),
            ir.MatrixToValueApply(matrix_read,
                                  {'name': 'ForceCountMatrixTable'}),
            ir.TableAggregate(
                table,
                ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [],
                                                     [ir.I32(0)]))])),
            ir.TableWrite(
                table,
                ir.TableNativeWriter(new_temp_file(), False, True,
                                     "fake_codec_spec$$")),
            ir.TableWrite(
                table, ir.TableTextWriter(new_temp_file(), None, True, 0,
                                          ",")),
            ir.MatrixAggregate(
                matrix_read,
                ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [],
                                                     [ir.I32(0)]))])),
            ir.MatrixWrite(
                matrix_read,
                ir.MatrixNativeWriter(new_temp_file(), False, False, "", None,
                                      None)),
            ir.MatrixWrite(
                matrix_read,
                ir.MatrixNativeWriter(
                    new_temp_file(), False, False, "",
                    '[{"start":{"row_idx":0},"end":{"row_idx": 10},"includeStart":true,"includeEnd":false}]',
                    hl.dtype('array<interval<struct{row_idx:int32}>>'))),
            ir.MatrixWrite(
                matrix_read,
                ir.MatrixVCFWriter(new_temp_file(), None, False, None)),
            ir.MatrixWrite(matrix_read, ir.MatrixGENWriter(new_temp_file(),
                                                           4)),
            ir.MatrixWrite(matrix_read, ir.MatrixPLINKWriter(new_temp_file())),
            ir.MatrixMultiWrite([matrix_read, matrix_read],
                                ir.MatrixNativeMultiWriter(
                                    new_temp_file(), False, False)),
            ir.BlockMatrixWrite(
                block_matrix_read,
                ir.BlockMatrixNativeWriter('fake_file_path', False, False,
                                           False)),
            ir.LiftMeOut(ir.I32(1))
        ]

        return value_irs
Esempio n. 7
0
    def value_irs(self):
        b = ir.TrueIR()
        c = ir.Ref('c')
        i = ir.I32(5)
        j = ir.I32(7)
        st = ir.Str('Hail')
        a = ir.Ref('a')
        aa = ir.Ref('aa')
        da = ir.Ref('da')
        nd = ir.Ref('nd')
        v = ir.Ref('v')
        s = ir.Ref('s')
        t = ir.Ref('t')
        call = ir.Ref('call')

        table = ir.TableRange(5, 3)

        matrix_read = ir.MatrixRead(ir.MatrixNativeReader(
            resource('backward_compatability/1.0.0/matrix_table/0.hmt')), False, False)

        block_matrix_read = ir.BlockMatrixRead('fake_file_path')

        value_irs = [
            i, ir.I64(5), ir.F32(3.14), ir.F64(3.14), s, ir.TrueIR(), ir.FalseIR(), ir.Void(),
            ir.Cast(i, hl.tfloat64),
            ir.NA(hl.tint32),
            ir.IsNA(i),
            ir.If(b, i, j),
            ir.Let('v', i, v),
            ir.Ref('x'),
            ir.ApplyBinaryOp('+', i, j),
            ir.ApplyUnaryOp('-', i),
            ir.ApplyComparisonOp('EQ', i, j),
            ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)),
            ir.ArrayRef(a, i),
            ir.ArrayLen(a),
            ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)),
            ir.ArraySort(a, b, False),
            ir.ToSet(a),
            ir.ToDict(da),
            ir.ToArray(a),
            ir.MakeNDArray(ir.MakeArray([ir.F64(-1.0), ir.F64(1.0)], hl.tarray(hl.tfloat64)),
                           ir.MakeArray([ir.I64(1), ir.I64(2)], hl.tarray(hl.tint64)),
                           ir.TrueIR()),
            ir.NDArrayRef(nd, ir.MakeArray([ir.I64(1), ir.I64(2)], hl.tarray(hl.tint64))),
            ir.LowerBoundOnOrderedCollection(a, i, True),
            ir.GroupByKey(da),
            ir.ArrayMap(a, 'v', v),
            ir.ArrayFilter(a, 'v', v),
            ir.ArrayFlatMap(aa, 'v', v),
            ir.ArrayFold(a, ir.I32(0), 'x', 'v', v),
            ir.ArrayScan(a, ir.I32(0), 'x', 'v', v),
            ir.ArrayLeftJoinDistinct(a, a, 'l', 'r', ir.I32(0), ir.I32(1)),
            ir.ArrayFor(a, 'v', ir.Void()),
            ir.AggFilter(ir.TrueIR(), ir.I32(0)),
            ir.AggExplode(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', ir.I32(0)),
            ir.AggGroupBy(ir.TrueIR(), ir.I32(0)),
            ir.AggArrayPerElement(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', ir.I32(0)),
            ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]),
            ir.ApplyScanOp('Collect', [], None, [ir.I32(0)]),
            ir.ApplyAggOp('Histogram', [ir.F64(-5.0), ir.F64(5.0), ir.I32(100)], None, [ir.F64(-2.11)]),
            ir.ApplyAggOp('CallStats', [], [ir.I32(2)], [call]),
            ir.ApplyAggOp('TakeBy', [ir.I32(10)], None, [ir.F64(-2.11), ir.F64(-2.11)]),
            ir.Begin([ir.Void()]),
            ir.MakeStruct([('x', i)]),
            ir.SelectFields(s, ['x', 'z']),
            ir.InsertFields(s, [('x', i)], None),
            ir.GetField(s, 'x'),
            ir.MakeTuple([i, b]),
            ir.GetTupleElement(t, 1),
            ir.StringSlice(st, ir.I32(1), ir.I32(2)),
            ir.StringLength(st),
            ir.In(2, hl.tfloat64),
            ir.Die(ir.Str('mumblefoo'), hl.tfloat64),
            ir.Apply('&&', b, c),
            ir.Apply('toFloat64', i),
            ir.Uniroot('x', ir.F64(3.14), ir.F64(-5.0), ir.F64(5.0)),
            ir.Literal(hl.tarray(hl.tint32), [1, 2, None]),
            ir.TableCount(table),
            ir.TableGetGlobals(table),
            ir.TableCollect(table),
            ir.TableToValueApply(table, {'name': 'ForceCountTable'}),
            ir.MatrixToValueApply(matrix_read, {'name': 'ForceCountMatrixTable'}),
            ir.TableAggregate(table, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]))])),
            ir.TableWrite(table, new_temp_file(), False, True, "fake_codec_spec$$"),
            ir.TableExport(table, new_temp_file(), None, True, 0, ","),
            ir.MatrixAggregate(matrix_read, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]))])),
            ir.MatrixWrite(matrix_read, ir.MatrixNativeWriter(new_temp_file(), False, False, "")),
            ir.MatrixWrite(matrix_read, ir.MatrixVCFWriter(new_temp_file(), None, False, None)),
            ir.MatrixWrite(matrix_read, ir.MatrixGENWriter(new_temp_file(), 4)),
            ir.MatrixWrite(matrix_read, ir.MatrixPLINKWriter(new_temp_file())),
            ir.MatrixMultiWrite([matrix_read, matrix_read], ir.MatrixNativeMultiWriter(new_temp_file(), False, False)),
            ir.BlockMatrixWrite(block_matrix_read, 'fake_file_path', False, False, False)
        ]

        return value_irs
Esempio n. 8
0
    def block_matrix_irs(self):
        read = ir.BlockMatrixRead('fake_file_path')
        add = ir.BlockMatrixAdd(read, read)

        return [read, add]