def __matmul__(self, right: 'DNDArray') -> 'DNDArray': left = self assert left.block_size == right.block_size assert left.n_cols == right.n_rows assert left.n_block_cols == right.n_block_rows n_rows = left.n_rows n_cols = right.n_cols block_size = left.block_size n_block_rows = left.n_block_rows n_block_inner = left.n_block_cols n_block_cols = right.n_block_cols n_multiplies = n_block_rows * n_block_cols * n_block_inner o = hl.utils.range_table(n_multiplies, n_partitions=n_multiplies) o = o.key_by( r=o.idx // (n_block_cols * n_block_inner), c=(o.idx % (n_block_cols * n_block_inner)) // n_block_inner, k=o.idx % n_block_inner ).select() o = o._key_by_assert_sorted('r', 'c', 'k') o = o._key_by_assert_sorted('r', 'k', 'c') o = o.annotate(left=left.m[o.r, o.k].block) o = o._key_by_assert_sorted('k', 'c', 'r') o = o.annotate(right=right.m[o.k, o.c].block) o = o.annotate(product=o.left @ o.right) # FIXME: use ndarray sum / fma def ndarray_to_array(ndarray): return hl.rbind( ndarray.shape[0], ndarray.shape[1], lambda n_rows, n_cols: hl.range(hl.int(n_rows * n_cols)).map( lambda absolute: o.product[absolute % n_rows, absolute // n_rows])) o = o.annotate(shape=o.product.shape, product=ndarray_to_array(o.product)) o = o._key_by_assert_sorted('r', 'c', 'k') o = o._key_by_assert_sorted('r', 'c') import hail.methods.misc as misc misc.require_key(o, 'collect_by_key') import hail.ir as ir o = Table(ir.TableAggregateByKey( o._tir, hl.struct( shape=hl.agg.take(o.shape, 1)[0], block=hl.agg.array_sum(o.product))._ir)) o = o.annotate(block=hl.nd.from_column_major(o.block, o.shape)) o = o.select('block') o = o.select_globals( r_field='r', c_field='c', n_rows=n_rows, n_cols=n_cols, n_block_rows=n_block_rows, n_block_cols=n_block_cols, block_size=block_size) return DNDArray(o)
def table_irs(self): b = ir.TrueIR() table_read = ir.TableRead( 'src/test/resources/backward_compatability/1.0.0/table/0.ht', False, None) table_read_row_type = hl.dtype( 'struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set<str>, mset: set<float64>, d: dict<array<str>, float64>, md: dict<int32, str>, h38: locus<GRCh38>, ml: locus<GRCh37>, i: interval<locus<GRCh37>>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus<GRCh37>, bool)}' ) matrix_read = ir.MatrixRead( 'src/test/resources/backward_compatability/1.0.0/matrix_table/0.hmt', False, False) range = ir.TableRange(10, 4) table_irs = [ ir.TableUnkey(table_read), ir.TableKeyBy(table_read, ['m', 'd'], 1, True), ir.TableFilter(table_read, b), table_read, ir.MatrixColsTable(matrix_read), ir.TableAggregateByKey(table_read, ir.MakeStruct([('a', ir.I32(5))])), ir.TableKeyByAndAggregate(table_read, ir.MakeStruct([('a', ir.I32(5))]), ir.MakeStruct([('b', ir.I32(5))]), 1, 2), ir.TableJoin(table_read, ir.TableRange(100, 10), 'inner'), ir.MatrixEntriesTable(matrix_read), ir.MatrixRowsTable(matrix_read), ir.TableParallelize( 'Table{global:Struct{},key:None,row:Struct{a:Int32}}', ir.Value(hl.tarray(hl.tstruct(a=hl.tint32)), [{ 'a': None }, { 'a': 5 }, { 'a': -3 }]), None), ir.TableMapRows( table_read, ir.MakeStruct([('a', ir.GetField(ir.Ref('row', table_read_row_type), 'f32')), ('b', ir.F64(-2.11))]), None, None), ir.TableMapGlobals( table_read, ir.MakeStruct([('foo', ir.NA(hl.tarray(hl.tint32)))]), ir.Value(hl.tstruct(), {})), ir.TableRange(100, 10), ir.TableRepartition(table_read, 10, False), ir.TableUnion([ir.TableRange(100, 10), ir.TableRange(50, 10)]), ir.TableExplode(table_read, 'mset'), ir.TableHead(table_read, 10), ir.TableOrderBy(ir.TableUnkey(table_read), [('m', 'A'), ('m', 'D')]), ir.TableDistinct(table_read), ir.LocalizeEntries(matrix_read, '__entries') ] return table_irs
def table_irs(self): b = ir.TrueIR() table_read = ir.TableRead( ir.TableNativeReader(resource('backward_compatability/1.0.0/table/0.ht'), None, False), False) table_read_row_type = hl.dtype('struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set<str>, mset: set<float64>, d: dict<array<str>, float64>, md: dict<int32, str>, h38: locus<GRCh38>, ml: locus<GRCh37>, i: interval<locus<GRCh37>>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus<GRCh37>, bool)}') matrix_read = ir.MatrixRead( ir.MatrixNativeReader(resource('backward_compatability/1.0.0/matrix_table/0.hmt'), None, False), False, False) range = ir.TableRange(10, 4) table_irs = [ ir.TableKeyBy(table_read, ['m', 'd'], False), ir.TableFilter(table_read, b), table_read, ir.MatrixColsTable(matrix_read), ir.TableAggregateByKey( table_read, ir.MakeStruct([('a', ir.I32(5))])), ir.TableKeyByAndAggregate( table_read, ir.MakeStruct([('a', ir.I32(5))]), ir.MakeStruct([('b', ir.I32(5))]), 1, 2), ir.TableJoin( table_read, ir.TableRange(100, 10), 'inner', 1), ir.MatrixEntriesTable(matrix_read), ir.MatrixRowsTable(matrix_read), ir.TableParallelize(ir.MakeStruct([ ('rows', ir.Literal(hl.tarray(hl.tstruct(a=hl.tint32)), [{'a':None}, {'a':5}, {'a':-3}])), ('global', ir.MakeStruct([]))]), None), ir.TableMapRows( ir.TableKeyBy(table_read, []), ir.MakeStruct([ ('a', ir.GetField(ir.Ref('row'), 'f32')), ('b', ir.F64(-2.11))])), ir.TableMapGlobals( table_read, ir.MakeStruct([ ('foo', ir.NA(hl.tarray(hl.tint32)))])), ir.TableRange(100, 10), ir.TableRepartition(table_read, 10, ir.RepartitionStrategy.COALESCE), ir.TableUnion( [ir.TableRange(100, 10), ir.TableRange(50, 10)]), ir.TableExplode(table_read, ['mset']), ir.TableHead(table_read, 10), ir.TableOrderBy(ir.TableKeyBy(table_read, []), [('m', 'A'), ('m', 'D')]), ir.TableDistinct(table_read), ir.CastMatrixToTable(matrix_read, '__entries', '__cols'), ir.TableRename(table_read, {'idx': 'idx_foo'}, {'global_f32': 'global_foo'}), ir.TableMultiWayZipJoin([table_read, table_read], '__data', '__globals'), ir.MatrixToTableApply(matrix_read, {'name': 'LinearRegressionRowsSingle', 'yFields': ['col_m'], 'xField': 'entry_m', 'covFields': [], 'rowBlockSize': 10, 'passThrough': []}), ir.TableToTableApply(table_read, {'name': 'TableFilterPartitions', 'parts': [0], 'keep': True}), ir.TableFilterIntervals(table_read, [hl.utils.Interval(hl.utils.Struct(row_idx=0), hl.utils.Struct(row_idx=10))], hl.tstruct(row_idx=hl.tint32), keep=False), ] return table_irs
def _block_inner_product(self, right: 'DNDArray', block_product: Callable[[Expression, Expression], Expression], block_aggregate: Callable[[Expression], Expression] ) -> 'DNDArray': left = self assert left.block_size == right.block_size assert left.n_cols == right.n_rows assert left.n_block_cols == right.n_block_rows n_rows = left.n_rows n_cols = right.n_cols block_size = left.block_size n_block_rows = left.n_block_rows n_block_inner = left.n_block_cols n_block_cols = right.n_block_cols n_multiplies = n_block_rows * n_block_cols * n_block_inner o = hl.utils.range_table(n_multiplies, n_partitions=n_multiplies) o = o.key_by( r=o.idx // (n_block_cols * n_block_inner), c=(o.idx % (n_block_cols * n_block_inner)) // n_block_inner, k=o.idx % n_block_inner ).select() o = o._key_by_assert_sorted('r', 'c', 'k') o = o._key_by_assert_sorted('r', 'k', 'c') o = o.annotate(left=left.m[o.r, o.k].block) o = o._key_by_assert_sorted('k', 'c', 'r') o = o.annotate(right=right.m[o.k, o.c].block) o = o.annotate(product=block_product(o.left, o.right)) o = o._key_by_assert_sorted('r', 'c', 'k') o = o._key_by_assert_sorted('r', 'c') import hail.methods.misc as misc misc.require_key(o, 'collect_by_key') import hail.ir as ir o = Table(ir.TableAggregateByKey( o._tir, hl.struct(block=block_aggregate(o.product))._ir)) o = o.select('block') o = o.select_globals( n_rows=n_rows, n_cols=n_cols, n_block_rows=n_block_rows, n_block_cols=n_block_cols, block_size=block_size) return DNDArray(o)