Esempio n. 1
0
    def value_irs(self):
        b = ir.TrueIR()
        c = ir.Ref('c')
        i = ir.I32(5)
        j = ir.I32(7)
        a = ir.Ref('a')
        st = ir.Ref('st')
        aa = ir.Ref('aa')
        sta = ir.Ref('sta')
        da = ir.Ref('da')
        nd = ir.Ref('nd')
        v = ir.Ref('v')
        s = ir.Ref('s')
        t = ir.Ref('t')
        call = ir.Ref('call')

        table = ir.TableRange(5, 3)

        matrix_read = ir.MatrixRead(
            ir.MatrixNativeReader(
                resource('backward_compatability/1.0.0/matrix_table/0.hmt'),
                None, False), False, False)

        block_matrix_read = ir.BlockMatrixRead(
            ir.BlockMatrixNativeReader(resource('blockmatrix_example/0')))

        value_irs = [
            i,
            ir.I64(5),
            ir.F32(3.14),
            ir.F64(3.14),
            s,
            ir.TrueIR(),
            ir.FalseIR(),
            ir.Void(),
            ir.Cast(i, hl.tfloat64),
            ir.NA(hl.tint32),
            ir.IsNA(i),
            ir.If(b, i, j),
            ir.Coalesce(i, j),
            ir.Let('v', i, v),
            ir.Ref('x'),
            ir.ApplyBinaryPrimOp('+', i, j),
            ir.ApplyUnaryPrimOp('-', i),
            ir.ApplyComparisonOp('EQ', i, j),
            ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)],
                         hl.tarray(hl.tint32)),
            ir.ArrayRef(a, i),
            ir.ArrayLen(a),
            ir.ArraySort(ir.ToStream(a), 'l', 'r',
                         ir.ApplyComparisonOp("LT", ir.Ref('l'), ir.Ref('r'))),
            ir.ToSet(a),
            ir.ToDict(da),
            ir.ToArray(a),
            ir.CastToArray(ir.NA(hl.tset(hl.tint32))),
            ir.MakeNDArray(
                ir.MakeArray([ir.F64(-1.0), ir.F64(1.0)],
                             hl.tarray(hl.tfloat64)),
                ir.MakeTuple([ir.I64(1), ir.I64(2)]), ir.TrueIR()),
            ir.NDArrayShape(nd),
            ir.NDArrayReshape(nd, ir.MakeTuple([ir.I64(5)])),
            ir.NDArrayRef(nd, [ir.I64(1), ir.I64(2)]),
            ir.NDArrayMap(nd, 'v', v),
            ir.NDArrayMatMul(nd, nd),
            ir.LowerBoundOnOrderedCollection(a, i, True),
            ir.GroupByKey(da),
            ir.StreamMap(st, 'v', v),
            ir.StreamZip([st, st], ['a', 'b'], ir.TrueIR(), 'ExtendNA'),
            ir.StreamFilter(st, 'v', v),
            ir.StreamFlatMap(sta, 'v', ir.ToStream(v)),
            ir.StreamFold(st, ir.I32(0), 'x', 'v', v),
            ir.StreamScan(st, ir.I32(0), 'x', 'v', v),
            ir.StreamJoinRightDistinct(st, st, ['k'], ['k'], 'l', 'r',
                                       ir.I32(1), "left"),
            ir.StreamFor(st, 'v', ir.Void()),
            ir.AggFilter(ir.TrueIR(), ir.I32(0), False),
            ir.AggExplode(ir.StreamRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x',
                          ir.I32(0), False),
            ir.AggGroupBy(ir.TrueIR(), ir.I32(0), False),
            ir.AggArrayPerElement(
                ir.ToArray(ir.StreamRange(ir.I32(0), ir.I32(2), ir.I32(1))),
                'x', 'y', ir.I32(0), False),
            ir.ApplyAggOp('Collect', [], [ir.I32(0)]),
            ir.ApplyScanOp('Collect', [], [ir.I32(0)]),
            ir.ApplyAggOp('CallStats', [ir.I32(2)], [call]),
            ir.ApplyAggOp('TakeBy', [ir.I32(10)],
                          [ir.F64(-2.11), ir.F64(-2.11)]),
            ir.Begin([ir.Void()]),
            ir.MakeStruct([('x', i)]),
            ir.SelectFields(s, ['x', 'z']),
            ir.InsertFields(s, [('x', i)], None),
            ir.GetField(s, 'x'),
            ir.MakeTuple([i, b]),
            ir.GetTupleElement(t, 1),
            ir.Die(ir.Str('mumblefoo'), hl.tfloat64),
            ir.Apply('land', hl.tbool, b, c),
            ir.Apply('toFloat64', hl.tfloat64, i),
            ir.Literal(hl.tarray(hl.tint32), [1, 2, None]),
            ir.TableCount(table),
            ir.TableGetGlobals(table),
            ir.TableCollect(ir.TableKeyBy(table, [], False)),
            ir.TableToValueApply(table, {'name': 'ForceCountTable'}),
            ir.MatrixToValueApply(matrix_read,
                                  {'name': 'ForceCountMatrixTable'}),
            ir.TableAggregate(
                table,
                ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [],
                                                     [ir.I32(0)]))])),
            ir.TableWrite(
                table,
                ir.TableNativeWriter(new_temp_file(), False, True,
                                     "fake_codec_spec$$")),
            ir.TableWrite(
                table,
                ir.TableTextWriter(new_temp_file(), None, True, "concatenated",
                                   ",")),
            ir.MatrixAggregate(
                matrix_read,
                ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [],
                                                     [ir.I32(0)]))])),
            ir.MatrixWrite(
                matrix_read,
                ir.MatrixNativeWriter(new_temp_file(), False, False, "", None,
                                      None, None)),
            ir.MatrixWrite(
                matrix_read,
                ir.MatrixNativeWriter(
                    new_temp_file(), False, False, "",
                    '[{"start":{"row_idx":0},"end":{"row_idx": 10},"includeStart":true,"includeEnd":false}]',
                    hl.dtype('array<interval<struct{row_idx:int32}>>'),
                    'some_file')),
            ir.MatrixWrite(
                matrix_read,
                ir.MatrixVCFWriter(new_temp_file(), None,
                                   ir.ExportType.CONCATENATED, None, False)),
            ir.MatrixWrite(matrix_read, ir.MatrixGENWriter(new_temp_file(),
                                                           4)),
            ir.MatrixWrite(matrix_read, ir.MatrixPLINKWriter(new_temp_file())),
            ir.MatrixMultiWrite(
                [matrix_read, matrix_read],
                ir.MatrixNativeMultiWriter(
                    [new_temp_file(), new_temp_file()], False, False, None)),
            ir.BlockMatrixWrite(
                block_matrix_read,
                ir.BlockMatrixNativeWriter('fake.bm', False, False, False)),
            ir.LiftMeOut(ir.I32(1)),
            ir.BlockMatrixWrite(
                block_matrix_read,
                ir.BlockMatrixPersistWriter('x', 'MEMORY_ONLY')),
        ]

        return value_irs
Esempio n. 2
0
    def table_irs(self):
        b = ir.TrueIR()
        table_read = ir.TableRead(
            ir.TableNativeReader(
                resource('backward_compatability/1.1.0/table/0.ht'), None,
                False), False)
        table_read_row_type = hl.dtype(
            'struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set<str>, mset: set<float64>, d: dict<array<str>, float64>, md: dict<int32, str>, h38: locus<GRCh38>, ml: locus<GRCh37>, i: interval<locus<GRCh37>>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus<GRCh37>, bool)}'
        )

        matrix_read = ir.MatrixRead(
            ir.MatrixNativeReader(
                resource('backward_compatability/1.0.0/matrix_table/0.hmt'),
                None, False), False, False)

        block_matrix_read = ir.BlockMatrixRead(
            ir.BlockMatrixNativeReader(resource('blockmatrix_example/0')))

        aa = hl.literal([[0.00], [0.01], [0.02]])._ir

        range = ir.TableRange(10, 4)
        table_irs = [
            ir.TableKeyBy(table_read, ['m', 'd'], False),
            ir.TableFilter(table_read, b), table_read,
            ir.MatrixColsTable(matrix_read),
            ir.TableAggregateByKey(table_read,
                                   ir.MakeStruct([('a', ir.I32(5))])),
            ir.TableKeyByAndAggregate(table_read,
                                      ir.MakeStruct([('a', ir.I32(5))]),
                                      ir.MakeStruct([('b', ir.I32(5))]), 1, 2),
            ir.TableJoin(table_read, ir.TableRange(100, 10), 'inner', 1),
            ir.MatrixEntriesTable(matrix_read),
            ir.MatrixRowsTable(matrix_read),
            ir.TableParallelize(
                ir.MakeStruct([('rows',
                                ir.Literal(hl.tarray(hl.tstruct(a=hl.tint32)),
                                           [{
                                               'a': None
                                           }, {
                                               'a': 5
                                           }, {
                                               'a': -3
                                           }])),
                               ('global', ir.MakeStruct([]))]), None),
            ir.TableMapRows(
                ir.TableKeyBy(table_read, []),
                ir.MakeStruct([('a', ir.GetField(ir.Ref('row'), 'f32')),
                               ('b', ir.F64(-2.11))])),
            ir.TableMapGlobals(
                table_read,
                ir.MakeStruct([('foo', ir.NA(hl.tarray(hl.tint32)))])),
            ir.TableRange(100, 10),
            ir.TableRepartition(table_read, 10,
                                ir.RepartitionStrategy.COALESCE),
            ir.TableUnion([ir.TableRange(100, 10),
                           ir.TableRange(50, 10)]),
            ir.TableExplode(table_read, ['mset']),
            ir.TableHead(table_read, 10),
            ir.TableOrderBy(ir.TableKeyBy(table_read, []), [('m', 'A'),
                                                            ('m', 'D')]),
            ir.TableDistinct(table_read),
            ir.CastMatrixToTable(matrix_read, '__entries', '__cols'),
            ir.TableRename(table_read, {'idx': 'idx_foo'},
                           {'global_f32': 'global_foo'}),
            ir.TableMultiWayZipJoin([table_read, table_read], '__data',
                                    '__globals'),
            ir.MatrixToTableApply(
                matrix_read, {
                    'name': 'LinearRegressionRowsSingle',
                    'yFields': ['col_m'],
                    'xField': 'entry_m',
                    'covFields': [],
                    'rowBlockSize': 10,
                    'passThrough': []
                }),
            ir.TableToTableApply(table_read, {
                'name': 'TableFilterPartitions',
                'parts': [0],
                'keep': True
            }),
            ir.BlockMatrixToTableApply(block_matrix_read, aa, {
                'name': 'PCRelate',
                'maf': 0.01,
                'blockSize': 4096
            }),
            ir.TableFilterIntervals(table_read, [
                hl.utils.Interval(hl.utils.Struct(row_idx=0),
                                  hl.utils.Struct(row_idx=10))
            ],
                                    hl.tstruct(row_idx=hl.tint32),
                                    keep=False),
            ir.TableMapPartitions(table_read, 'glob', 'rows', ir.Ref('rows'))
        ]

        return table_irs
Esempio n. 3
0
    def matrix_irs(self):
        hl.index_bgen(resource('example.8bits.bgen'),
                      reference_genome=hl.get_reference('GRCh37'),
                      contig_recoding={'01': '1'})

        collect = ir.MakeStruct([('x', ir.ApplyAggOp('Collect', [],
                                                     [ir.I32(0)]))])

        matrix_read = ir.MatrixRead(
            ir.MatrixNativeReader(
                resource('backward_compatability/1.0.0/matrix_table/0.hmt'),
                None, False), False, False)
        table_read = ir.TableRead(
            ir.TableNativeReader(
                resource('backward_compatability/1.0.0/table/0.ht'), None,
                False), False)

        matrix_range = ir.MatrixRead(ir.MatrixRangeReader(1, 1, 10))
        matrix_irs = [
            ir.MatrixRepartition(matrix_range, 100,
                                 ir.RepartitionStrategy.SHUFFLE),
            ir.MatrixUnionRows(matrix_range, matrix_range),
            ir.MatrixDistinctByRow(matrix_range),
            ir.MatrixRowsHead(matrix_read, 5),
            ir.MatrixColsHead(matrix_read, 5),
            ir.CastTableToMatrix(
                ir.CastMatrixToTable(matrix_read, '__entries', '__cols'),
                '__entries', '__cols', []),
            ir.MatrixAggregateRowsByKey(matrix_read, collect, collect),
            ir.MatrixAggregateColsByKey(matrix_read, collect, collect),
            matrix_read,
            matrix_range,
            ir.MatrixRead(
                ir.MatrixVCFReader(resource('sample.vcf'), ['GT'], hl.tfloat64,
                                   None, None, None, None, None, None, False,
                                   True, False, True, None, None, None)),
            ir.MatrixRead(
                ir.MatrixBGENReader(resource('example.8bits.bgen'), None, {},
                                    10, 1, None)),
            ir.MatrixFilterRows(matrix_read, ir.FalseIR()),
            ir.MatrixFilterCols(matrix_read, ir.FalseIR()),
            ir.MatrixFilterEntries(matrix_read, ir.FalseIR()),
            ir.MatrixChooseCols(matrix_read, [1, 0]),
            ir.MatrixMapCols(matrix_read, ir.MakeStruct([('x', ir.I64(20))]),
                             ['x']),
            ir.MatrixKeyRowsBy(matrix_read, ['row_i64'], False),
            ir.MatrixMapRows(ir.MatrixKeyRowsBy(matrix_read, []),
                             ir.MakeStruct([('x', ir.I64(20))])),
            ir.MatrixMapEntries(matrix_read,
                                ir.MakeStruct([('x', ir.I64(20))])),
            ir.MatrixMapGlobals(matrix_read,
                                ir.MakeStruct([('x', ir.I64(20))])),
            ir.MatrixCollectColsByKey(matrix_read),
            ir.MatrixExplodeRows(matrix_read, ['row_aset']),
            ir.MatrixExplodeCols(matrix_read, ['col_aset']),
            ir.MatrixAnnotateRowsTable(matrix_read, table_read, '__foo'),
            ir.MatrixAnnotateColsTable(matrix_read, table_read, '__foo'),
            ir.MatrixToMatrixApply(matrix_read, {
                'name': 'MatrixFilterPartitions',
                'parts': [0],
                'keep': True
            }),
            ir.MatrixRename(matrix_read, {'global_f32': 'global_foo'},
                            {'col_f32': 'col_foo'}, {'row_aset': 'row_aset2'},
                            {'entry_f32': 'entry_foo'}),
            ir.MatrixFilterIntervals(matrix_read, [
                hl.utils.Interval(hl.utils.Struct(row_idx=0),
                                  hl.utils.Struct(row_idx=10))
            ],
                                     hl.tstruct(row_idx=hl.tint32),
                                     keep=False),
        ]

        return matrix_irs
Esempio n. 4
0
    def value_irs(self):
        b = ir.TrueIR()
        c = ir.Ref('c')
        i = ir.I32(5)
        j = ir.I32(7)
        st = ir.Str('Hail')
        a = ir.Ref('a')
        aa = ir.Ref('aa')
        da = ir.Ref('da')
        v = ir.Ref('v')
        s = ir.Ref('s')
        t = ir.Ref('t')
        call = ir.Ref('call')

        table = ir.TableRange(5, 3)

        matrix_read = ir.MatrixRead(ir.MatrixNativeReader(
            resource('backward_compatability/1.0.0/matrix_table/0.hmt')), False, False)

        value_irs = [
            i, ir.I64(5), ir.F32(3.14), ir.F64(3.14), s, ir.TrueIR(), ir.FalseIR(), ir.Void(),
            ir.Cast(i, hl.tfloat64),
            ir.NA(hl.tint32),
            ir.IsNA(i),
            ir.If(b, i, j),
            ir.Let('v', i, v),
            ir.Ref('x'),
            ir.ApplyBinaryOp('+', i, j),
            ir.ApplyUnaryOp('-', i),
            ir.ApplyComparisonOp('EQ', i, j),
            ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)),
            ir.ArrayRef(a, i),
            ir.ArrayLen(a),
            ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)),
            ir.ArraySort(a, b, False),
            ir.ToSet(a),
            ir.ToDict(da),
            ir.ToArray(a),
            ir.LowerBoundOnOrderedCollection(a, i, True),
            ir.GroupByKey(da),
            ir.ArrayMap(a, 'v', v),
            ir.ArrayFilter(a, 'v', v),
            ir.ArrayFlatMap(aa, 'v', v),
            ir.ArrayFold(a, ir.I32(0), 'x', 'v', v),
            ir.ArrayScan(a, ir.I32(0), 'x', 'v', v),
            ir.ArrayLeftJoinDistinct(a, a, 'l', 'r', ir.I32(0), ir.I32(1)),
            ir.ArrayFor(a, 'v', ir.Void()),
            ir.AggFilter(ir.TrueIR(), ir.I32(0)),
            ir.AggExplode(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', ir.I32(0)),
            ir.AggGroupBy(ir.TrueIR(), ir.I32(0)),
            ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]),
            ir.ApplyScanOp('Collect', [], None, [ir.I32(0)]),
            ir.ApplyAggOp('Histogram', [ir.F64(-5.0), ir.F64(5.0), ir.I32(100)], None, [ir.F64(-2.11)]),
            ir.ApplyAggOp('CallStats', [], [ir.I32(2)], [call]),
            ir.ApplyAggOp('TakeBy', [ir.I32(10)], None, [ir.F64(-2.11), ir.F64(-2.11)]),
            ir.Begin([ir.Void()]),
            ir.MakeStruct([('x', i)]),
            ir.SelectFields(s, ['x', 'z']),
            ir.InsertFields(s, [('x', i)], None),
            ir.GetField(s, 'x'),
            ir.MakeTuple([i, b]),
            ir.GetTupleElement(t, 1),
            ir.StringSlice(st, ir.I32(1), ir.I32(2)),
            ir.StringLength(st),
            ir.In(2, hl.tfloat64),
            ir.Die(ir.Str('mumblefoo'), hl.tfloat64),
            ir.Apply('&&', b, c),
            ir.Apply('toFloat64', i),
            ir.Uniroot('x', ir.F64(3.14), ir.F64(-5.0), ir.F64(5.0)),
            ir.Literal(hl.tarray(hl.tint32), [1, 2, None]),
            ir.TableCount(table),
            ir.TableGetGlobals(table),
            ir.TableCollect(table),
            ir.TableToValueApply(table, {'name': 'ForceCountTable'}),
            ir.MatrixToValueApply(matrix_read, {'name': 'ForceCountMatrixTable'}),
            ir.TableAggregate(table, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]))])),
            ir.TableWrite(table, new_temp_file(), False, True, "fake_codec_spec$$"),
            ir.MatrixAggregate(matrix_read, ir.MakeStruct([('foo', ir.ApplyAggOp('Collect', [], None, [ir.I32(0)]))])),
            ir.MatrixWrite(matrix_read, ir.MatrixNativeWriter(new_temp_file(), False, False, "")),
            ir.MatrixWrite(matrix_read, ir.MatrixVCFWriter(new_temp_file(), None, False, None)),
            ir.MatrixWrite(matrix_read, ir.MatrixGENWriter(new_temp_file(), 4)),
            ir.MatrixWrite(matrix_read, ir.MatrixPLINKWriter(new_temp_file())),
            ir.MatrixMultiWrite([matrix_read, matrix_read], ir.MatrixNativeMultiWriter(new_temp_file(), False, False)),
        ]

        return value_irs
Esempio n. 5
0
    def value_irs(self):
        b = ir.TrueIR()
        c = ir.Ref('c', hl.tbool)
        i = ir.I32(5)
        j = ir.I32(7)
        st = ir.Str('Hail')
        a = ir.Ref('a', hl.tarray(hl.tint32))
        aa = ir.Ref('aa', hl.tarray(hl.tarray(hl.tint32)))
        da = ir.Ref('da', hl.tarray(hl.ttuple(hl.tint32, hl.tstr)))
        v = ir.Ref('v', hl.tint32)
        s = ir.Ref('s', hl.tstruct(x = hl.tint32, y = hl.tint64, z = hl.tfloat64))
        t = ir.Ref('t', hl.ttuple(hl.tint32, hl.tint64, hl.tfloat64))
        call = ir.Ref('call', hl.tcall)

        collect_sig = ir.AggSignature('Collect', [], None, [hl.tint32])

        call_stats_sig = ir.AggSignature('CallStats', [], [hl.tint32], [hl.tcall])

        hist_sig = ir.AggSignature(
            'Histogram', [hl.tfloat64, hl.tfloat64, hl.tint32], None, [hl.tfloat64])

        take_by_sig = ir.AggSignature('TakeBy', [hl.tint32], None, [hl.tfloat64, hl.tfloat64])

        value_irs = [
            i, ir.I64(5), ir.F32(3.14), ir.F64(3.14), s, ir.TrueIR(), ir.FalseIR(), ir.Void(),
            ir.Cast(i, hl.tfloat64),
            ir.NA(hl.tint32),
            ir.IsNA(i),
            ir.If(b, i, j),
            ir.Let('v', i, v),
            ir.Ref('x', hl.tint32),
            ir.ApplyBinaryOp('+', i, j),
            ir.ApplyUnaryOp('-', i),
            ir.ApplyComparisonOp('EQ', i, j),
            ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)),
            ir.ArrayRef(a, i),
            ir.ArrayLen(a),
            ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)),
            ir.ArraySort(a, b, False),
            ir.ToSet(a),
            ir.ToDict(da),
            ir.ToArray(a),
            ir.LowerBoundOnOrderedCollection(a, i, True),
            ir.GroupByKey(da),
            ir.ArrayMap(a, 'v', v),
            ir.ArrayFilter(a, 'v', v),
            ir.ArrayFlatMap(aa, 'v', v),
            ir.ArrayFold(a, ir.I32(0), 'x', 'v', v),
            ir.ArrayFor(a, 'v', ir.Void()),
            ir.ApplyAggOp(ir.I32(0), [], None, collect_sig),
            ir.ApplyScanOp(ir.I32(0), [], None, collect_sig),
            ir.ApplyAggOp(
                ir.F64(-2.11), [ir.F64(-5.0), ir.F64(5.0), ir.I32(100)], None, hist_sig),
            ir.ApplyAggOp(call, [], [ir.I32(2)], call_stats_sig),
            ir.ApplyAggOp(ir.F64(-2.11), [ir.I32(10)], None, take_by_sig),
            ir.InitOp(ir.I32(0), [ir.I32(2)], call_stats_sig),
            ir.SeqOp(ir.I32(0), [i], collect_sig),
            ir.SeqOp(ir.I32(0), [ir.F64(-2.11), ir.I32(17)], take_by_sig),
            ir.Begin([ir.Void()]),
            ir.MakeStruct([('x', i)]),
            ir.SelectFields(s, ['x', 'z']),
            ir.InsertFields(s, [('x', i)]),
            ir.GetField(s, 'x'),
            ir.MakeTuple([i, b]),
            ir.GetTupleElement(t, 1),
            ir.StringSlice(st, ir.I32(1), ir.I32(2)),
            ir.StringLength(st),
            ir.In(2, hl.tfloat64),
            ir.Die('mumblefoo', hl.tfloat64),
            ir.Apply('&&', b, c),
            ir.Apply('toFloat64', i),
            ir.Apply('isDefined', s),
            ir.Uniroot('x', ir.F64(3.14), ir.F64(-5.0), ir.F64(5.0))
        ]

        return value_irs
Esempio n. 6
0
    def value_irs(self):
        b = ir.TrueIR()
        c = ir.Ref('c')
        i = ir.I32(5)
        j = ir.I32(7)
        st = ir.Str('Hail')
        a = ir.Ref('a')
        aa = ir.Ref('aa')
        da = ir.Ref('da')
        v = ir.Ref('v')
        s = ir.Ref('s')
        t = ir.Ref('t')
        call = ir.Ref('call')

        collect_sig = ir.AggSignature('Collect', [], None, [hl.tint32])

        call_stats_sig = ir.AggSignature('CallStats', [], [hl.tint32],
                                         [hl.tcall])
        call_stats_type = hl.tstruct(AC=hl.tarray(hl.tint32),
                                     AF=hl.tarray(hl.tfloat64),
                                     AN=hl.tint32,
                                     homozygote_count=hl.tarray(hl.tint32))

        hist_sig = ir.AggSignature('Histogram',
                                   [hl.tfloat64, hl.tfloat64, hl.tint32], None,
                                   [hl.tfloat64])
        hist_type = hl.tstruct(bin_edges=hl.tarray(hl.tfloat64),
                               bin_freq=hl.tarray(hl.tint64),
                               n_smaller=hl.tint64,
                               n_larger=hl.tint64)

        take_by_sig = ir.AggSignature('TakeBy', [hl.tint32], None,
                                      [hl.tfloat64, hl.tfloat64])
        take_by_type = hl.tarray(hl.tfloat64)

        value_irs = [
            i,
            ir.I64(5),
            ir.F32(3.14),
            ir.F64(3.14),
            s,
            ir.TrueIR(),
            ir.FalseIR(),
            ir.Void(),
            ir.Cast(i, hl.tfloat64),
            ir.NA(hl.tint32),
            ir.IsNA(i),
            ir.If(b, i, j),
            ir.Let('v', i, v),
            ir.Ref('x'),
            ir.ApplyBinaryOp('+', i, j),
            ir.ApplyUnaryOp('-', i),
            ir.ApplyComparisonOp('EQ', i, j),
            ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)],
                         hl.tarray(hl.tint32)),
            ir.ArrayRef(a, i),
            ir.ArrayLen(a),
            ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)),
            ir.ArraySort(a, b, False),
            ir.ToSet(a),
            ir.ToDict(da),
            ir.ToArray(a),
            ir.LowerBoundOnOrderedCollection(a, i, True),
            ir.GroupByKey(da),
            ir.ArrayMap(a, 'v', v),
            ir.ArrayFilter(a, 'v', v),
            ir.ArrayFlatMap(aa, 'v', v),
            ir.ArrayFold(a, ir.I32(0), 'x', 'v', v),
            ir.ArrayScan(a, ir.I32(0), 'x', 'v', v),
            ir.ArrayFor(a, 'v', ir.Void()),
            ir.AggFilter(ir.TrueIR(), ir.I32(0)),
            ir.AggExplode(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x',
                          ir.I32(0)),
            ir.AggGroupBy(ir.TrueIR(), ir.I32(0)),
            ir.ApplyAggOp([ir.I32(0)], [], None, collect_sig,
                          hl.tarray(hl.tint32)),
            ir.ApplyScanOp([ir.I32(0)], [], None, collect_sig,
                           hl.tarray(hl.tint32)),
            ir.ApplyAggOp([ir.F64(-2.11)],
                          [ir.F64(-5.0),
                           ir.F64(5.0), ir.I32(100)], None, hist_sig,
                          hist_type),
            ir.ApplyAggOp([call], [], [ir.I32(2)], call_stats_sig,
                          call_stats_type),
            ir.ApplyAggOp([ir.F64(-2.11), ir.F64(-2.11)], [ir.I32(10)], None,
                          take_by_sig, take_by_type),
            ir.InitOp(ir.I32(0), [ir.I32(2)], call_stats_sig),
            ir.SeqOp(ir.I32(0), [i], collect_sig),
            ir.SeqOp(ir.I32(0), [ir.F64(-2.11), ir.I32(17)], take_by_sig),
            ir.Begin([ir.Void()]),
            ir.MakeStruct([('x', i)]),
            ir.SelectFields(s, ['x', 'z']),
            ir.InsertFields(s, [('x', i)]),
            ir.GetField(s, 'x'),
            ir.MakeTuple([i, b]),
            ir.GetTupleElement(t, 1),
            ir.StringSlice(st, ir.I32(1), ir.I32(2)),
            ir.StringLength(st),
            ir.In(2, hl.tfloat64),
            ir.Die('mumblefoo', hl.tfloat64),
            ir.Apply('&&', b, c),
            ir.Apply('toFloat64', i),
            ir.Uniroot('x', ir.F64(3.14), ir.F64(-5.0), ir.F64(5.0)),
            ir.Literal(hl.tarray(hl.tint32), [1, 2, None]),
        ]

        return value_irs
Esempio n. 7
0
    def test_matrix_ir_parses(self):
        hl.index_bgen(resource('example.8bits.bgen'),
                      reference_genome=hl.get_reference('GRCh37'),
                      contig_recoding={'01': '1'})

        collect = ir.MakeStruct([('x',
                                  ir.ApplyAggOp('Collect', [], None,
                                                [ir.I32(0)]))])

        matrix_read = ir.MatrixRead(
            ir.MatrixNativeReader(
                resource('backward_compatability/1.0.0/matrix_table/0.hmt')),
            False, False)
        table_read = ir.TableRead(
            ir.TableNativeReader(
                resource('backward_compatability/1.0.0/table/0.ht')), False)

        matrix_range = ir.MatrixRead(ir.MatrixRangeReader(1, 1, 10))

        matrix_irs = [
            ir.MatrixRepartition(matrix_range, 100,
                                 ir.RepartitionStrategy.SHUFFLE),
            ir.MatrixUnionRows(matrix_range, matrix_range),
            ir.MatrixDistinctByRow(matrix_range),
            ir.CastTableToMatrix(
                ir.CastMatrixToTable(matrix_read, '__entries', '__cols'),
                '__entries', '__cols', []),
            ir.MatrixAggregateRowsByKey(matrix_read, collect, collect),
            ir.MatrixAggregateColsByKey(matrix_read, collect, collect),
            matrix_read, matrix_range,
            ir.MatrixRead(
                ir.MatrixVCFReader(resource('sample.vcf'), ['GT'], hl.tfloat64,
                                   None, None, None, None, False, True, False,
                                   True, None)),
            ir.MatrixRead(
                ir.MatrixBGENReader(resource('example.8bits.bgen'), None, {},
                                    10, 1, None)),
            ir.MatrixFilterRows(matrix_read, ir.FalseIR()),
            ir.MatrixFilterCols(matrix_read, ir.FalseIR()),
            ir.MatrixFilterEntries(matrix_read, ir.FalseIR()),
            ir.MatrixChooseCols(matrix_read, [1, 0]),
            ir.MatrixMapCols(matrix_read, ir.MakeStruct([('x', ir.I64(20))]),
                             ['x']),
            ir.MatrixKeyRowsBy(matrix_read, ['row_i64'], False),
            ir.MatrixMapRows(ir.MatrixKeyRowsBy(matrix_read, []),
                             ir.MakeStruct([('x', ir.I64(20))])),
            ir.MatrixMapEntries(matrix_read,
                                ir.MakeStruct([('x', ir.I64(20))])),
            ir.MatrixMapGlobals(matrix_read,
                                ir.MakeStruct([('x', ir.I64(20))])),
            ir.TableToMatrixTable(table_read, ['f32', 'i64'], ['m', 'astruct'],
                                  ['aset'], ['mset'], 100),
            ir.MatrixCollectColsByKey(matrix_read),
            ir.MatrixExplodeRows(matrix_read, ['row_aset']),
            ir.MatrixExplodeCols(matrix_read, ['col_aset']),
            ir.MatrixAnnotateRowsTable(matrix_read, table_read, '__foo'),
            ir.MatrixAnnotateColsTable(matrix_read, table_read, '__foo'),
            ir.MatrixToMatrixApply(matrix_read, {
                'name': 'MatrixFilterPartitions',
                'parts': [0],
                'keep': True
            })
        ]

        for x in matrix_irs:
            try:
                Env.hail().expr.ir.IRParser.parse_matrix_ir(str(x))
            except Exception as e:
                raise ValueError(str(x)) from e