def table_irs(self): b = ir.TrueIR() table_read = ir.TableRead( 'src/test/resources/backward_compatability/1.0.0/table/0.ht', False, None) table_read_row_type = hl.dtype( 'struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set<str>, mset: set<float64>, d: dict<array<str>, float64>, md: dict<int32, str>, h38: locus<GRCh38>, ml: locus<GRCh37>, i: interval<locus<GRCh37>>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus<GRCh37>, bool)}' ) matrix_read = ir.MatrixRead( 'src/test/resources/backward_compatability/1.0.0/matrix_table/0.hmt', False, False) range = ir.TableRange(10, 4) table_irs = [ ir.TableUnkey(table_read), ir.TableKeyBy(table_read, ['m', 'd'], 1, True), ir.TableFilter(table_read, b), table_read, ir.MatrixColsTable(matrix_read), ir.TableAggregateByKey(table_read, ir.MakeStruct([('a', ir.I32(5))])), ir.TableKeyByAndAggregate(table_read, ir.MakeStruct([('a', ir.I32(5))]), ir.MakeStruct([('b', ir.I32(5))]), 1, 2), ir.TableJoin(table_read, ir.TableRange(100, 10), 'inner'), ir.MatrixEntriesTable(matrix_read), ir.MatrixRowsTable(matrix_read), ir.TableParallelize( 'Table{global:Struct{},key:None,row:Struct{a:Int32}}', ir.Value(hl.tarray(hl.tstruct(a=hl.tint32)), [{ 'a': None }, { 'a': 5 }, { 'a': -3 }]), None), ir.TableMapRows( table_read, ir.MakeStruct([('a', ir.GetField(ir.Ref('row', table_read_row_type), 'f32')), ('b', ir.F64(-2.11))]), None, None), ir.TableMapGlobals( table_read, ir.MakeStruct([('foo', ir.NA(hl.tarray(hl.tint32)))]), ir.Value(hl.tstruct(), {})), ir.TableRange(100, 10), ir.TableRepartition(table_read, 10, False), ir.TableUnion([ir.TableRange(100, 10), ir.TableRange(50, 10)]), ir.TableExplode(table_read, 'mset'), ir.TableHead(table_read, 10), ir.TableOrderBy(ir.TableUnkey(table_read), [('m', 'A'), ('m', 'D')]), ir.TableDistinct(table_read), ir.LocalizeEntries(matrix_read, '__entries') ] return table_irs
def test_matrix_ir_parses(self): hl.index_bgen(resource('example.8bits.bgen'), reference_genome=hail.get_reference('GRCh37'), contig_recoding={'01': '1'}) collect_sig = ir.AggSignature('Collect', [], None, [hl.tint32]) collect = ir.MakeStruct([('x', ir.ApplyAggOp([ir.I32(0)], [], None, collect_sig, hl.tarray(hl.tint32)))]) matrix_read = ir.MatrixRead( resource('backward_compatability/1.0.0/matrix_table/0.hmt'), False, False) table_read = ir.TableRead( resource('backward_compatability/1.0.0/table/0.ht'), False, None) matrix_irs = [ ir.MatrixUnionRows(ir.MatrixRange(5, 5, 1), ir.MatrixRange(5, 5, 1)), ir.UnlocalizeEntries(ir.LocalizeEntries(matrix_read, '__entries'), ir.MatrixColsTable(matrix_read), '__entries'), ir.MatrixAggregateRowsByKey(matrix_read, collect, collect), ir.MatrixAggregateColsByKey(matrix_read, collect, collect), ir.MatrixRange(1, 1, 10), ir.MatrixImportVCF([resource('sample.vcf')], False, False, None, None, False, ['GT'], hail.get_reference('GRCh37'), {}, True, False), ir.MatrixImportBGEN([resource('example.8bits.bgen')], ['GP'], resource('example.sample'), {}, 10, 1, ['varid'], None), ir.MatrixFilterRows(matrix_read, ir.FalseIR()), ir.MatrixFilterCols(matrix_read, ir.FalseIR()), ir.MatrixFilterEntries(matrix_read, ir.FalseIR()), ir.MatrixChooseCols(matrix_read, [1, 0]), ir.MatrixMapCols(matrix_read, ir.MakeStruct([('x', ir.I64(20))]), ['x']), ir.MatrixKeyRowsBy(matrix_read, ['row_i64'], False), ir.MatrixMapRows(ir.MatrixKeyRowsBy(matrix_read, []), ir.MakeStruct([('x', ir.I64(20))])), ir.MatrixMapEntries(matrix_read, ir.MakeStruct([('x', ir.I64(20))])), ir.MatrixMapGlobals(matrix_read, ir.MakeStruct([('x', ir.I64(20))])), ir.TableToMatrixTable(table_read, ['f32', 'i64'], ['m', 'astruct'], ['aset'], ['mset'], 100), ir.MatrixCollectColsByKey(matrix_read), ir.MatrixExplodeRows(matrix_read, ['row_aset']), ir.MatrixExplodeCols(matrix_read, ['col_aset']), ir.MatrixAnnotateRowsTable(matrix_read, table_read, '__foo', None), ir.MatrixAnnotateColsTable(matrix_read, table_read, '__foo'), ] for x in matrix_irs: try: Env.hail().expr.Parser.parse_matrix_ir(str(x)) except Exception as e: raise ValueError(str(x)) from e
def table_irs(self): b = ir.TrueIR() table_read = ir.TableRead( ir.TableNativeReader(resource('backward_compatability/1.0.0/table/0.ht'), None, False), False) table_read_row_type = hl.dtype('struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set<str>, mset: set<float64>, d: dict<array<str>, float64>, md: dict<int32, str>, h38: locus<GRCh38>, ml: locus<GRCh37>, i: interval<locus<GRCh37>>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus<GRCh37>, bool)}') matrix_read = ir.MatrixRead( ir.MatrixNativeReader(resource('backward_compatability/1.0.0/matrix_table/0.hmt'), None, False), False, False) range = ir.TableRange(10, 4) table_irs = [ ir.TableKeyBy(table_read, ['m', 'd'], False), ir.TableFilter(table_read, b), table_read, ir.MatrixColsTable(matrix_read), ir.TableAggregateByKey( table_read, ir.MakeStruct([('a', ir.I32(5))])), ir.TableKeyByAndAggregate( table_read, ir.MakeStruct([('a', ir.I32(5))]), ir.MakeStruct([('b', ir.I32(5))]), 1, 2), ir.TableJoin( table_read, ir.TableRange(100, 10), 'inner', 1), ir.MatrixEntriesTable(matrix_read), ir.MatrixRowsTable(matrix_read), ir.TableParallelize(ir.MakeStruct([ ('rows', ir.Literal(hl.tarray(hl.tstruct(a=hl.tint32)), [{'a':None}, {'a':5}, {'a':-3}])), ('global', ir.MakeStruct([]))]), None), ir.TableMapRows( ir.TableKeyBy(table_read, []), ir.MakeStruct([ ('a', ir.GetField(ir.Ref('row'), 'f32')), ('b', ir.F64(-2.11))])), ir.TableMapGlobals( table_read, ir.MakeStruct([ ('foo', ir.NA(hl.tarray(hl.tint32)))])), ir.TableRange(100, 10), ir.TableRepartition(table_read, 10, ir.RepartitionStrategy.COALESCE), ir.TableUnion( [ir.TableRange(100, 10), ir.TableRange(50, 10)]), ir.TableExplode(table_read, ['mset']), ir.TableHead(table_read, 10), ir.TableOrderBy(ir.TableKeyBy(table_read, []), [('m', 'A'), ('m', 'D')]), ir.TableDistinct(table_read), ir.CastMatrixToTable(matrix_read, '__entries', '__cols'), ir.TableRename(table_read, {'idx': 'idx_foo'}, {'global_f32': 'global_foo'}), ir.TableMultiWayZipJoin([table_read, table_read], '__data', '__globals'), ir.MatrixToTableApply(matrix_read, {'name': 'LinearRegressionRowsSingle', 'yFields': ['col_m'], 'xField': 'entry_m', 'covFields': [], 'rowBlockSize': 10, 'passThrough': []}), ir.TableToTableApply(table_read, {'name': 'TableFilterPartitions', 'parts': [0], 'keep': True}), ir.TableFilterIntervals(table_read, [hl.utils.Interval(hl.utils.Struct(row_idx=0), hl.utils.Struct(row_idx=10))], hl.tstruct(row_idx=hl.tint32), keep=False), ] return table_irs