def _compute_type(self): for c in self.children: c.typ # force child_typ = self.children[0].typ self._type = hl.ttable( hl.tstruct(**{self.global_name: hl.tarray(child_typ.global_type)}), child_typ.key_type._insert_field(self.data_name, hl.tarray(child_typ.value_type)), child_typ.row_key)
def table_irs(self): b = ir.TrueIR() table_read = ir.TableRead( ir.TableNativeReader(resource('backward_compatability/1.0.0/table/0.ht')), False) table_read_row_type = hl.dtype('struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set<str>, mset: set<float64>, d: dict<array<str>, float64>, md: dict<int32, str>, h38: locus<GRCh38>, ml: locus<GRCh37>, i: interval<locus<GRCh37>>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus<GRCh37>, bool)}') matrix_read = ir.MatrixRead( ir.MatrixNativeReader(resource('backward_compatability/1.0.0/matrix_table/0.hmt')), False, False) range = ir.TableRange(10, 4) table_irs = [ ir.TableKeyBy(table_read, ['m', 'd'], False), ir.TableFilter(table_read, b), table_read, ir.MatrixColsTable(matrix_read), ir.TableAggregateByKey( table_read, ir.MakeStruct([('a', ir.I32(5))])), ir.TableKeyByAndAggregate( table_read, ir.MakeStruct([('a', ir.I32(5))]), ir.MakeStruct([('b', ir.I32(5))]), 1, 2), ir.TableJoin( table_read, ir.TableRange(100, 10), 'inner', 1), ir.MatrixEntriesTable(matrix_read), ir.MatrixRowsTable(matrix_read), ir.TableParallelize(ir.MakeStruct([ ('rows', ir.Literal(hl.tarray(hl.tstruct(a=hl.tint32)), [{'a':None}, {'a':5}, {'a':-3}])), ('global', ir.MakeStruct([]))]), None), ir.TableMapRows( ir.TableKeyBy(table_read, []), ir.MakeStruct([ ('a', ir.GetField(ir.Ref('row'), 'f32')), ('b', ir.F64(-2.11))])), ir.TableMapGlobals( table_read, ir.MakeStruct([ ('foo', ir.NA(hl.tarray(hl.tint32)))])), ir.TableRange(100, 10), ir.TableRepartition(table_read, 10, ir.RepartitionStrategy.COALESCE), ir.TableUnion( [ir.TableRange(100, 10), ir.TableRange(50, 10)]), ir.TableExplode(table_read, ['mset']), ir.TableHead(table_read, 10), ir.TableOrderBy(ir.TableKeyBy(table_read, []), [('m', 'A'), ('m', 'D')]), ir.TableDistinct(table_read), ir.CastMatrixToTable(matrix_read, '__entries', '__cols'), ir.TableRename(table_read, {'idx': 'idx_foo'}, {'global_f32': 'global_foo'}), ir.TableMultiWayZipJoin([table_read, table_read], '__data', '__globals'), ir.MatrixToTableApply(matrix_read, {'name': 'LinearRegressionRowsSingle', 'yFields': ['col_m'], 'xField': 'entry_m', 'covFields': [], 'rowBlockSize': 10, 'passThrough': []}), ir.TableToTableApply(table_read, {'name': 'TableFilterPartitions', 'parts': [0], 'keep': True}) ] return table_irs
def _compute_type(self): child_typ = self.child.typ self._type = hl.tmatrix( child_typ.global_type, child_typ.col_key_type._concat( hl.tstruct(**{f: hl.tarray(t) for f, t in child_typ.col_value_type.items()})), child_typ.col_key, child_typ.row_type, child_typ.row_key, hl.tstruct(**{f: hl.tarray(t) for f, t in child_typ.entry_type.items()}))
def table_irs(self): b = ir.TrueIR() table_read = ir.TableRead( 'src/test/resources/backward_compatability/1.0.0/table/0.ht', False, None) table_read_row_type = hl.dtype( 'struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set<str>, mset: set<float64>, d: dict<array<str>, float64>, md: dict<int32, str>, h38: locus<GRCh38>, ml: locus<GRCh37>, i: interval<locus<GRCh37>>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus<GRCh37>, bool)}' ) matrix_read = ir.MatrixRead( 'src/test/resources/backward_compatability/1.0.0/matrix_table/0.hmt', False, False) range = ir.TableRange(10, 4) table_irs = [ ir.TableUnkey(table_read), ir.TableKeyBy(table_read, ['m', 'd'], 1, True), ir.TableFilter(table_read, b), table_read, ir.MatrixColsTable(matrix_read), ir.TableAggregateByKey(table_read, ir.MakeStruct([('a', ir.I32(5))])), ir.TableJoin(table_read, ir.TableRange(100, 10), 'inner'), ir.MatrixEntriesTable(matrix_read), ir.MatrixRowsTable(matrix_read), ir.TableParallelize( 'Table{global:Struct{},key:None,row:Struct{a:Int32}}', ir.Value(hl.tarray(hl.tstruct(a=hl.tint32)), [{ 'a': None }, { 'a': 5 }, { 'a': -3 }]), None), ir.TableMapRows( table_read, ir.MakeStruct([('a', ir.GetField(ir.Ref('row', table_read_row_type), 'f32')), ('b', ir.F64(-2.11))]), None, None), ir.TableMapGlobals( table_read, ir.MakeStruct([('foo', ir.NA(hl.tarray(hl.tint32)))]), ir.Value(hl.tstruct(), {})), ir.TableRange(100, 10), ir.TableRepartition(table_read, 10, False), ir.TableUnion([ir.TableRange(100, 10), ir.TableRange(50, 10)]), ir.TableExplode(table_read, 'mset'), ir.TableOrderBy(ir.TableUnkey(table_read), [('m', 'A'), ('m', 'D')]), ir.TableDistinct(table_read), ] return table_irs
def table_irs(self): b = ir.TrueIR() table_read = ir.TableRead( resource('backward_compatability/1.0.0/table/0.ht'), False, None) table_read_row_type = hl.dtype('struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set<str>, mset: set<float64>, d: dict<array<str>, float64>, md: dict<int32, str>, h38: locus<GRCh38>, ml: locus<GRCh37>, i: interval<locus<GRCh37>>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus<GRCh37>, bool)}') matrix_read = ir.MatrixRead( ir.MatrixNativeReader(resource('backward_compatability/1.0.0/matrix_table/0.hmt')), False, False) range = ir.TableRange(10, 4) table_irs = [ ir.TableKeyBy(table_read, ['m', 'd'], False), ir.TableFilter(table_read, b), table_read, ir.MatrixColsTable(matrix_read), ir.TableAggregateByKey( table_read, ir.MakeStruct([('a', ir.I32(5))])), ir.TableKeyByAndAggregate( table_read, ir.MakeStruct([('a', ir.I32(5))]), ir.MakeStruct([('b', ir.I32(5))]), 1, 2), ir.TableJoin( table_read, ir.TableRange(100, 10), 'inner', 1), ir.MatrixEntriesTable(matrix_read), ir.MatrixRowsTable(matrix_read), ir.TableParallelize(ir.MakeStruct([ ('rows', ir.Literal(hl.tarray(hl.tstruct(a=hl.tint32)), [{'a':None}, {'a':5}, {'a':-3}])), ('global', ir.MakeStruct([]))]), None), ir.TableMapRows( ir.TableKeyBy(table_read, []), ir.MakeStruct([ ('a', ir.GetField(ir.Ref('row'), 'f32')), ('b', ir.F64(-2.11))])), ir.TableMapGlobals( table_read, ir.MakeStruct([ ('foo', ir.NA(hl.tarray(hl.tint32)))])), ir.TableRange(100, 10), ir.TableRepartition(table_read, 10, False), ir.TableUnion( [ir.TableRange(100, 10), ir.TableRange(50, 10)]), ir.TableExplode(table_read, 'mset'), ir.TableHead(table_read, 10), ir.TableOrderBy(ir.TableKeyBy(table_read, []), [('m', 'A'), ('m', 'D')]), ir.TableDistinct(table_read), ir.CastMatrixToTable(matrix_read, '__entries', '__cols'), ir.TableRename(table_read, {'idx': 'idx_foo'}, {'global_f32': 'global_foo'}), ir.TableMultiWayZipJoin([table_read, table_read], '__data', '__globals'), ] return table_irs
def test_import_vcf_missing_info_field_elements(self): mt = hl.import_vcf(resource('missingInfoArray.vcf'), reference_genome='GRCh37', array_elements_required=False) mt = mt.select_rows(FOO=mt.info.FOO, BAR=mt.info.BAR) expected = hl.Table.parallelize([{'locus': hl.Locus('X', 16050036), 'alleles': ['A', 'C'], 'FOO': [1, None], 'BAR': [2, None, None]}, {'locus': hl.Locus('X', 16061250), 'alleles': ['T', 'A', 'C'], 'FOO': [None, 2, None], 'BAR': [None, 1.0, None]}], hl.tstruct(locus=hl.tlocus('GRCh37'), alleles=hl.tarray(hl.tstr), FOO=hl.tarray(hl.tint), BAR=hl.tarray(hl.tfloat64)), key=['locus', 'alleles']) self.assertTrue(mt.rows()._same(expected))
def blockmatrix_irs(self): scalar_ir = ir.F64(2) vector_ir = ir.MakeArray([ir.F64(3), ir.F64(2)], hl.tarray(hl.tfloat64)) read = ir.BlockMatrixRead(ir.BlockMatrixNativeReader(resource('blockmatrix_example/0'))) add_two_bms = ir.BlockMatrixMap2(read, read, 'l', 'r', ir.ApplyBinaryPrimOp('+', ir.Ref('l'), ir.Ref('r')), "Union") negate_bm = ir.BlockMatrixMap(read, 'element', ir.ApplyUnaryPrimOp('-', ir.Ref('element')), False) sqrt_bm = ir.BlockMatrixMap(read, 'element', hl.sqrt(construct_expr(ir.Ref('element'), hl.tfloat64))._ir, False) persisted = ir.BlockMatrixRead(ir.BlockMatrixPersistReader('x', read)) scalar_to_bm = ir.ValueToBlockMatrix(scalar_ir, [1, 1], 1) col_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [2, 1], 1) row_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [1, 2], 1) broadcast_scalar = ir.BlockMatrixBroadcast(scalar_to_bm, [], [2, 2], 256) broadcast_col = ir.BlockMatrixBroadcast(col_vector_to_bm, [0], [2, 2], 256) broadcast_row = ir.BlockMatrixBroadcast(row_vector_to_bm, [1], [2, 2], 256) transpose = ir.BlockMatrixBroadcast(broadcast_scalar, [1, 0], [2, 2], 256) matmul = ir.BlockMatrixDot(broadcast_scalar, transpose) rectangle = ir.Literal(hl.tarray(hl.tint64), [0, 1, 5, 6]) band = ir.Literal(hl.ttuple(hl.tint64, hl.tint64), (-1, 1)) intervals = ir.Literal(hl.ttuple(hl.tarray(hl.tint64), hl.tarray(hl.tint64)), ([0, 1, 5, 6], [5, 6, 8, 9])) sparsify1 = ir.BlockMatrixSparsify(read, rectangle, ir.RectangleSparsifier) sparsify2 = ir.BlockMatrixSparsify(read, band, ir.BandSparsifier(True)) sparsify3 = ir.BlockMatrixSparsify(read, intervals, ir.RowIntervalSparsifier(True)) densify = ir.BlockMatrixDensify(read) pow_ir = (construct_expr(ir.Ref('l'), hl.tfloat64) ** construct_expr(ir.Ref('r'), hl.tfloat64))._ir squared_bm = ir.BlockMatrixMap2(scalar_to_bm, scalar_to_bm, 'l', 'r', pow_ir, "NeedsDense") slice_bm = ir.BlockMatrixSlice(matmul, [slice(0, 2, 1), slice(0, 1, 1)]) return [ read, persisted, add_two_bms, negate_bm, sqrt_bm, scalar_to_bm, col_vector_to_bm, row_vector_to_bm, broadcast_scalar, broadcast_col, broadcast_row, squared_bm, transpose, sparsify1, sparsify2, sparsify3, densify, matmul, slice_bm ]
def test_parses(self): env = {'c': hl.tbool, 'a': hl.tarray(hl.tint32), 'aa': hl.tarray(hl.tarray(hl.tint32)), 'da': hl.tarray(hl.ttuple(hl.tint32, hl.tstr)), 'v': hl.tint32, 's': hl.tstruct(x=hl.tint32, y=hl.tint64, z=hl.tfloat64), 't': hl.ttuple(hl.tint32, hl.tint64, hl.tfloat64), 'call': hl.tcall, 'x': hl.tint32} env = {name: t._jtype for name, t in env.items()} for x in self.value_irs(): Env.hail().expr.ir.IRParser.parse_value_ir(str(x), env, {})
def test_parses(self): env = {'c': hl.tbool, 'a': hl.tarray(hl.tint32), 'aa': hl.tarray(hl.tarray(hl.tint32)), 'da': hl.tarray(hl.ttuple(hl.tint32, hl.tstr)), 'v': hl.tint32, 's': hl.tstruct(x=hl.tint32, y=hl.tint64, z=hl.tfloat64), 't': hl.ttuple(hl.tint32, hl.tint64, hl.tfloat64), 'call': hl.tcall, 'x': hl.tint32} env = {name: t._parsable_string() for name, t in env.items()} for x in self.value_irs(): Env.hail().expr.ir.IRParser.parse_value_ir(str(x), env, {})
def test(self): schema = hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tint32, d=hl.tint32, e=hl.tstr, f=hl.tarray(hl.tint32), g=hl.tarray( hl.tstruct(x=hl.tint32, y=hl.tint32, z=hl.tstr)), h=hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tstr), i=hl.tbool, j=hl.tstruct(x=hl.tint32, y=hl.tint32, z=hl.tstr)) rows = [{'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3], 'g': [hl.Struct(x=1, y=5, z='banana')], 'h': hl.Struct(a=5, b=3, c='winter'), 'i': True, 'j': hl.Struct(x=3, y=2, z='summer')}] kt = hl.Table.parallelize(rows, schema) result = convert_struct_to_dict(kt.annotate( chisq=hl.chisq(kt.a, kt.b, kt.c, kt.d), ctt=hl.ctt(kt.a, kt.b, kt.c, kt.d, 5), dict=hl.dict(hl.zip([kt.a, kt.b], [kt.c, kt.d])), dpois=hl.dpois(4, kt.a), drop=kt.h.drop('b', 'c'), exp=hl.exp(kt.c), fet=hl.fisher_exact_test(kt.a, kt.b, kt.c, kt.d), hwe=hl.hardy_weinberg_p(1, 2, 1), index=hl.index(kt.g, 'z'), is_defined=hl.is_defined(kt.i), is_missing=hl.is_missing(kt.i), is_nan=hl.is_nan(hl.float64(kt.a)), json=hl.json(kt.g), log=hl.log(kt.a, kt.b), log10=hl.log10(kt.c), or_else=hl.or_else(kt.a, 5), or_missing=hl.or_missing(kt.i, kt.j), pchisqtail=hl.pchisqtail(kt.a, kt.b), pcoin=hl.rand_bool(0.5), pnorm=hl.pnorm(0.2), pow=2.0 ** kt.b, ppois=hl.ppois(kt.a, kt.b), qchisqtail=hl.qchisqtail(kt.a, kt.b), range=hl.range(0, 5, kt.b), rnorm=hl.rand_norm(0.0, kt.b), rpois=hl.rand_pois(kt.a), runif=hl.rand_unif(kt.b, kt.a), select=kt.h.select('c', 'b'), sqrt=hl.sqrt(kt.a), to_str=[hl.str(5), hl.str(kt.a), hl.str(kt.g)], where=hl.cond(kt.i, 5, 10) ).take(1)[0])
def _compute_type(self): name = self.config['name'] child_typ = self.child.typ if name == 'MatrixFilterPartitions': self._type = child_typ else: assert name == 'WindowByLocus', name self._type = hl.tmatrix( child_typ.global_type, child_typ.col_type, child_typ.col_key, child_typ.row_type._insert_field('prev_rows', hl.tarray(child_typ.row_type)), child_typ.row_key, child_typ.entry_type._insert_field('prev_entries', hl.tarray(child_typ.entry_type)))
def read_variants_ht(path: str) -> hl.Table: variants_ht = hl.read_table(path) # Make sure that types match assert (isinstance(variants_ht.key[0], hl.expr.LocusExpression) & (variants_ht.key[1].dtype == hl.tarray(hl.tstr)) & isinstance(variants_ht.key[2], hl.expr.LocusExpression) & (variants_ht.key[3].dtype == hl.tarray(hl.tstr))) variants_ht = variants_ht.key_by(**get_sorted_variants_expr( variants_ht.key[0], variants_ht.key[1], variants_ht.key[2], variants_ht.key[3])).persist() return variants_ht
def test_parses(self): env = {'c': hl.tbool, 'a': hl.tarray(hl.tint32), 'st': hl.tstream(hl.tint32), 'aa': hl.tarray(hl.tarray(hl.tint32)), 'sta': hl.tstream(hl.tarray(hl.tint32)), 'da': hl.tarray(hl.ttuple(hl.tint32, hl.tstr)), 'nd': hl.tndarray(hl.tfloat64, 1), 'v': hl.tint32, 's': hl.tstruct(x=hl.tint32, y=hl.tint64, z=hl.tfloat64), 't': hl.ttuple(hl.tint32, hl.tint64, hl.tfloat64), 'call': hl.tcall, 'x': hl.tint32} for x in self.value_irs(): Env.spark_backend('ValueIRTests.test_parses')._parse_value_ir(str(x), env)
def _compute_type(self): name = self.config['name'] child_typ = self.child.typ if (name == 'MatrixFilterPartitions' or name == 'MatrixFilterIntervals'): self._type = child_typ else: assert name == 'WindowByLocus', name self._type = hl.tmatrix( child_typ.global_type, child_typ.col_type, child_typ.col_key, child_typ.row_type._insert_field('prev_rows', hl.tarray(child_typ.row_type)), child_typ.row_key, child_typ.entry_type._insert_field('prev_entries', hl.tarray(child_typ.entry_type)))
def __init__(self, schema, paths, key, intervals): assert (key is None) == (intervals is None) self.schema = schema self.paths = paths self.key = key if intervals is not None: t = hl.expr.impute_type(intervals) if not isinstance(t, hl.tarray) and not isinstance( t.element_type, hl.tinterval): raise TypeError("'intervals' must be an array of tintervals") pt = t.element_type.point_type if isinstance(pt, hl.tstruct): self._interval_type = t else: self._interval_type = hl.tarray( hl.tinterval(hl.tstruct(__point=pt))) if intervals is not None and t != self._interval_type: self.intervals = [ hl.Interval(hl.Struct(__point=i.start), hl.Struct(__point=i.end), i.includes_start, i.includes_end) for i in intervals ] else: self.intervals = intervals
def downsample(x, y, label=None, n_divisions=500) -> ArrayExpression: """Downsample (x, y) coordinate datapoints. Parameters --------- x : :class:`.NumericExpression` X-values to be downsampled. y : :class:`.NumericExpression` Y-values to be downsampled. label : :class:`.StringExpression` or :class:`.ArrayExpression` Additional data for each (x, y) coordinate. Can pass in multiple fields in an :class:`.ArrayExpression`. n_divisions : :obj:`int` Factor by which to downsample (default value = 500). A lower input results in fewer output datapoints. Returns ------- :class:`.ArrayExpression` Expression for downsampled coordinate points (x, y). The element type of the array is :py:data:`.ttuple` of :py:data:`.tfloat64`, :py:data:`.tfloat64`, and :py:data:`.tarray` of :py:data:`.tstring` """ if label is None: label = hl.null(hl.tarray(hl.tstr)) elif isinstance(label, StringExpression): label = hl.array([label]) return _agg_func('downsample', [x, y, label], tarray(ttuple(tfloat64, tfloat64, tarray(tstr))), constructor_args=[n_divisions])
def downsample(x, y, label=None, n_divisions=500) -> ArrayExpression: """Downsample (x, y) coordinate datapoints. Parameters --------- x : :class:`.NumericExpression` X-values to be downsampled. y : :class:`.NumericExpression` Y-values to be downsampled. label : :class:`.StringExpression` or :class:`.ArrayExpression` Additional data for each (x, y) coordinate. Can pass in multiple fields in an :class:`.ArrayExpression`. n_divisions : :obj:`int` Factor by which to downsample (default value = 500). A lower input results in fewer output datapoints. Returns ------- :class:`.ArrayExpression` Expression for downsampled coordinate points (x, y). The element type of the array is :py:data:`.ttuple` of :py:data:`.tfloat64`, :py:data:`.tfloat64`, and :py:data:`.tarray` of :py:data:`.tstring` """ if label is None: label = hl.null(hl.tarray(hl.tstr)) elif isinstance(label, StringExpression): label = hl.array([label]) return _agg_func('downsample', _to_agg(x), tarray(ttuple(tfloat64, tfloat64, tarray(tstr))), constructor_args=[n_divisions], seq_op_args=[lambda x: x, y, label])
def _get_train_counts(ht: hl.Table) -> Tuple[int, int]: """ Determine the number of TP and FP variants in the input Table and report some stats on Ti, Tv, indels. :param ht: Input Table :return: Counts of TP and FP variants in the table """ train_stats = hl.struct(n=hl.agg.count()) if "alleles" in ht.row and ht.row.alleles.dtype == hl.tarray(hl.tstr): train_stats = train_stats.annotate( ti=hl.agg.count_where( hl.expr.is_transition(ht.alleles[0], ht.alleles[1])), tv=hl.agg.count_where( hl.expr.is_transversion(ht.alleles[0], ht.alleles[1])), indel=hl.agg.count_where( hl.expr.is_indel(ht.alleles[0], ht.alleles[1])), ) # Sample training examples pd_stats = (ht.group_by(**{ "contig": ht.locus.contig, "tp": ht._tp, "fp": ht._fp }).aggregate(**train_stats).to_pandas()) logger.info(pformat(pd_stats)) pd_stats = pd_stats.fillna(False) # Number of true positive and false positive variants to be sampled for the training set n_tp = pd_stats[pd_stats["tp"] & ~pd_stats["fp"]]["n"].sum() n_fp = pd_stats[~pd_stats["tp"] & pd_stats["fp"]]["n"].sum() return n_tp, n_fp
def test_group_cols_by_aggregate(self): mt, mt2 = self.get_groupable_matrix2() col_result = (mt.group_cols_by(group=mt2.cols()[mt.col_idx].col_idx2 < 2) .aggregate_cols(collect=hl.agg.collect(mt.col_idx)) .aggregate_cols(count=hl.agg.count()) .aggregate_entries(sum=hl.agg.sum(mt2[mt.row_idx, mt.col_idx].x + mt.glob) + mt.glob - 15 - mt.row_idx) # tests fixed indices .aggregate_entries(x=5) .result()) col_expected = ( hl.Table.parallelize( [{'group': True, 'row_idx': 0, 'sum': 1, 'collect': [0, 1], 'count': 2, 'r1': 3, 'x': 5}, {'group': True, 'row_idx': 1, 'sum': 2, 'collect': [0, 1], 'count': 2, 'r1': 3, 'x': 5}, {'group': True, 'row_idx': 2, 'sum': 3, 'collect': [0, 1], 'count': 2, 'r1': 3, 'x': 5}, {'group': True, 'row_idx': 3, 'sum': 4, 'collect': [0, 1], 'count': 2, 'r1': 3, 'x': 5}, {'group': False, 'row_idx': 0, 'sum': 5, 'collect': [2, 3], 'count': 2, 'r1': 3, 'x': 5}, {'group': False, 'row_idx': 1, 'sum': 6, 'collect': [2, 3], 'count': 2, 'r1': 3, 'x': 5}, {'group': False, 'row_idx': 2, 'sum': 7, 'collect': [2, 3], 'count': 2, 'r1': 3, 'x': 5}, {'group': False, 'row_idx': 3, 'sum': 8, 'collect': [2, 3], 'count': 2, 'r1': 3, 'x': 5}], hl.tstruct(row_idx=hl.tint32, r1=hl.tint32, group=hl.tbool, collect=hl.tarray(hl.tint32), count=hl.tint64, sum=hl.tint64, x=hl.tint32) ).annotate_globals(glob=5).key_by('row_idx', 'group') ) self.assertTrue(col_result.entries()._same(col_expected))
def test_uniqueness(self): db = hl.experimental.DB(config=AnnotationDBTests.db_json) t = hl.utils.range_table(10) t = t.annotate(locus=hl.locus('1', t.idx + 1)) t = db.annotate_rows_db(t, 'unique_dataset', 'nonunique_dataset') t.unique_dataset.dtype == hl.tstruct(annotation=hl.tstr) t.nonunique_dataset.dtype == hl.tstruct(annotation=hl.tarray(hl.tstr))
def validate_variant_results_table(ds): assert ds.key.dtype.fields == ("locus", "alleles"), "Table must be keyed by locus and alleles" assert ds.locus.dtype in (hl.tlocus("GRCh37"), hl.tlocus("GRCh38")), "'locus' must be a locus type" assert ds.alleles.dtype == hl.tarray(hl.tstr), "'alleles' must be an array of strings" required_fields = { "gene_id": hl.tstr, "consequence": hl.tstr, "hgvsc": hl.tstr, "hgvsp": hl.tstr, } for field, typ in required_fields.items(): assert field in ds.row_value.dtype.fields, f"Missing required field '{field}'" assert ds[field].dtype == typ, f"{field} should be type {typ}" assert "group_results" in ds.row_value.dtype.fields, "Table must have a 'group_results' field" assert isinstance(ds.group_results.dtype, hl.tdict), "'group_results' must be a dict" assert ds.group_results.dtype.key_type == hl.tstr, "'group_results' keys must be strings" assert isinstance(ds.group_results.dtype.value_type, hl.tstruct), "'group_results' value must be a struct" for typ in ds.group_results.dtype.value_type.types: assert ( typ in ALLOWED_RESULT_TYPES ), f"'group_results' fields may only be one of {', '.join(map(str, ALLOWED_RESULT_TYPES))}" assert isinstance(ds.info.dtype, hl.tstruct), "'info' must be a struct" for typ in ds.info.dtype.types: assert ( typ in ALLOWED_RESULT_TYPES ), f"'info' fields may only be one of {', '.join(map(str, ALLOWED_RESULT_TYPES))}"
def impute_sex_aggregator(call, aaf, aaf_threshold=0.0, include_par=False, female_threshold=0.4, male_threshold=0.8) -> hl.Table: """:func:`.impute_sex` as an aggregator.""" mt = call._indices.source rg = mt.locus.dtype.reference_genome x_contigs = hl.literal( hl.eval( hl.map(lambda x_contig: hl.parse_locus_interval(x_contig, rg), rg.x_contigs))) inbreeding = hl.agg.inbreeding(call, aaf) is_female = hl.if_else( inbreeding.f_stat < female_threshold, True, hl.if_else(inbreeding.f_stat > male_threshold, False, hl.is_missing('tbool'))) expression = hl.struct(is_female=is_female, **inbreeding) if not include_par: interval_type = hl.tarray(hl.tinterval(hl.tlocus(rg))) par_intervals = hl.literal(rg.par, interval_type) expression = hl.agg.filter( ~par_intervals.any( lambda par_interval: par_interval.contains(mt.locus)), expression) expression = hl.agg.filter( (aaf > aaf_threshold) & (aaf < (1 - aaf_threshold)), expression) expression = hl.agg.filter( x_contigs.any(lambda contig: contig.contains(mt.locus)), expression) return expression
def test_annotate_globals(self): mt = hl.utils.range_matrix_table(1, 1) ht = hl.utils.range_table(1, 1) data = [(5, hl.tint, operator.eq), (float('nan'), hl.tfloat32, lambda x, y: str(x) == str(y)), (float('inf'), hl.tfloat64, lambda x, y: str(x) == str(y)), (float('-inf'), hl.tfloat64, lambda x, y: str(x) == str(y)), (1.111, hl.tfloat64, operator.eq), ([ hl.Struct(**{ 'a': None, 'b': 5 }), hl.Struct(**{ 'a': 'hello', 'b': 10 }) ], hl.tarray(hl.tstruct(a=hl.tstr, b=hl.tint)), operator.eq)] for x, t, f in data: self.assertTrue( f(mt.annotate_globals(foo=hl.literal(x, t)).foo.value, x), f"{x}, {t}") self.assertTrue( f(ht.annotate_globals(foo=hl.literal(x, t)).foo.value, x), f"{x}, {t}")
def _compute_type(self): name = self.config['name'] child_typ = self.child.typ pass_through = self.config['passThrough'] if name == 'LinearRegressionRowsChained': chained_schema = hl.dtype( 'struct{n:array<int32>,sum_x:array<float64>,y_transpose_x:array<array<float64>>,beta:array<array<float64>>,standard_error:array<array<float64>>,t_stat:array<array<float64>>,p_value:array<array<float64>>}' ) self._type = hl.ttable( child_typ.global_type, (child_typ.row_key_type._insert_fields( **{f: child_typ.row_type[f] for f in pass_through})._concat(chained_schema)), child_typ.row_key) elif name == 'LinearRegressionRowsSingle': chained_schema = hl.dtype( 'struct{n:int32,sum_x:float64,y_transpose_x:array<float64>,beta:array<float64>,standard_error:array<float64>,t_stat:array<float64>,p_value:array<float64>}' ) self._type = hl.ttable( child_typ.global_type, (child_typ.row_key_type._insert_fields( **{f: child_typ.row_type[f] for f in pass_through})._concat(chained_schema)), child_typ.row_key) else: assert name == 'LogisticRegression', name pass_through = self.config['passThrough'] logreg_type = hl.tstruct(logistic_regression=hl.tarray( regression_test_type(self.config['test']))) self._type = hl.ttable( child_typ.global_type, (child_typ.row_key_type._insert_fields( **{f: child_typ.row_type[f] for f in pass_through})._concat(logreg_type)), child_typ.row_key)
def test_ndarray_shape(): np_e = np.array(3) np_row = np.array([1, 2, 3]) np_col = np.array([[1], [2], [3]]) np_m = np.array([[1, 2], [3, 4]]) np_nd = np.arange(30).reshape((2, 5, 3)) e = hl._ndarray(np_e) row = hl._ndarray(np_row) col = hl._ndarray(np_col) m = hl._ndarray(np_m) nd = hl._ndarray(np_nd) missing = hl._ndarray(hl.null(hl.tarray(hl.tint32))) assert_all_eval_to( (e.shape, np_e.shape), (row.shape, np_row.shape), (col.shape, np_col.shape), (m.shape, np_m.shape), (nd.shape, np_nd.shape), ((row + nd).shape, (np_row + np_nd).shape), ((row + col).shape, (np_row + np_col).shape), (m.transpose().shape, np_m.transpose().shape), (missing.shape, None) )
def test_ndarray_ref(): scalar = 5.0 np_scalar = np.array(scalar) h_scalar = hl.nd.array(scalar) h_np_scalar = hl.nd.array(np_scalar) assert_evals_to(h_scalar[()], 5.0) assert_evals_to(h_np_scalar[()], 5.0) cube = [[[0, 1], [2, 3]], [[4, 5], [6, 7]]] h_cube = hl.nd.array(cube) h_np_cube = hl.nd.array(np.array(cube)) missing = hl.nd.array(hl.null(hl.tarray(hl.tint32))) assert_all_eval_to( (h_cube[0, 0, 1], 1), (h_cube[1, 1, 0], 6), (h_np_cube[0, 0, 1], 1), (h_np_cube[1, 1, 0], 6), (hl.nd.array([[[[1]]]])[0, 0, 0, 0], 1), (hl.nd.array([[[1, 2]], [[3, 4]]])[1, 0, 0], 3), (missing[1], None), (hl.nd.array([1, 2, 3])[hl.null(hl.tint32)], None), (h_cube[0, 0, hl.null(hl.tint32)], None)) with pytest.raises(FatalError) as exc: hl.eval(hl.nd.array([1, 2, 3])[4]) assert "Index out of bounds" in str(exc)
def test_ndarray_eval(): data_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] nd_expr = hl._ndarray(data_list) evaled = hl.eval(nd_expr) np_equiv = np.array(data_list, dtype=np.int32) assert(np.array_equal(evaled, np_equiv)) assert(evaled.strides == np_equiv.strides) assert hl.eval(hl._ndarray([[], []])).strides == (8, 8) assert np.array_equal(hl.eval(hl._ndarray([])), np.array([])) zero_array = np.zeros((10, 10), dtype=np.int64) evaled_zero_array = hl.eval(hl.literal(zero_array)) assert np.array_equal(evaled_zero_array, zero_array) assert zero_array.dtype == evaled_zero_array.dtype # Testing from hail arrays assert np.array_equal(hl.eval(hl._ndarray(hl.range(6))), np.arange(6)) assert np.array_equal(hl.eval(hl._ndarray(hl.int64(4))), np.array(4)) # Testing missing data assert hl.eval(hl._ndarray(hl.null(hl.tarray(hl.tint32)))) is None with pytest.raises(ValueError) as exc: hl._ndarray([[4], [1, 2, 3], 5]) assert "inner dimensions do not match" in str(exc.value)
def test_import_vcf_missing_format_field_elements(self): mt = hl.import_vcf(resource('missingFormatArray.vcf'), reference_genome='GRCh37', array_elements_required=False) mt = mt.select_rows().select_entries('AD', 'PL') expected = hl.Table.parallelize([{'locus': hl.Locus('X', 16050036), 'alleles': ['A', 'C'], 's': 'C1046::HG02024', 'AD': [None, None], 'PL': [0, None, 180]}, {'locus': hl.Locus('X', 16050036), 'alleles': ['A', 'C'], 's': 'C1046::HG02025', 'AD': [None, 6], 'PL': [70, None]}, {'locus': hl.Locus('X', 16061250), 'alleles': ['T', 'A', 'C'], 's': 'C1046::HG02024', 'AD': [0, 0, None], 'PL': [396, None, None, 33, None, 0]}, {'locus': hl.Locus('X', 16061250), 'alleles': ['T', 'A', 'C'], 's': 'C1046::HG02025', 'AD': [0, 0, 9], 'PL': [None, None, None]}], hl.tstruct(locus=hl.tlocus('GRCh37'), alleles=hl.tarray(hl.tstr), s=hl.tstr, AD=hl.tarray(hl.tint), PL=hl.tarray(hl.tint)), key=['locus', 'alleles', 's']) self.assertTrue(mt.entries()._same(expected))
def test_matrix_ir_parses(self): hl.index_bgen(resource('example.8bits.bgen'), reference_genome=hail.get_reference('GRCh37'), contig_recoding={'01': '1'}) collect_sig = ir.AggSignature('Collect', [], None, [hl.tint32]) collect = ir.MakeStruct([('x', ir.ApplyAggOp([ir.I32(0)], [], None, collect_sig, hl.tarray(hl.tint32)))]) matrix_read = ir.MatrixRead( resource('backward_compatability/1.0.0/matrix_table/0.hmt'), False, False) table_read = ir.TableRead( resource('backward_compatability/1.0.0/table/0.ht'), False, None) matrix_irs = [ ir.MatrixUnionRows(ir.MatrixRange(5, 5, 1), ir.MatrixRange(5, 5, 1)), ir.UnlocalizeEntries(ir.LocalizeEntries(matrix_read, '__entries'), ir.MatrixColsTable(matrix_read), '__entries'), ir.MatrixAggregateRowsByKey(matrix_read, collect, collect), ir.MatrixAggregateColsByKey(matrix_read, collect, collect), ir.MatrixRange(1, 1, 10), ir.MatrixImportVCF([resource('sample.vcf')], False, False, None, None, False, ['GT'], hail.get_reference('GRCh37'), {}, True, False), ir.MatrixImportBGEN([resource('example.8bits.bgen')], ['GP'], resource('example.sample'), {}, 10, 1, ['varid'], None), ir.MatrixFilterRows(matrix_read, ir.FalseIR()), ir.MatrixFilterCols(matrix_read, ir.FalseIR()), ir.MatrixFilterEntries(matrix_read, ir.FalseIR()), ir.MatrixChooseCols(matrix_read, [1, 0]), ir.MatrixMapCols(matrix_read, ir.MakeStruct([('x', ir.I64(20))]), ['x']), ir.MatrixKeyRowsBy(matrix_read, ['row_i64'], False), ir.MatrixMapRows(ir.MatrixKeyRowsBy(matrix_read, []), ir.MakeStruct([('x', ir.I64(20))])), ir.MatrixMapEntries(matrix_read, ir.MakeStruct([('x', ir.I64(20))])), ir.MatrixMapGlobals(matrix_read, ir.MakeStruct([('x', ir.I64(20))])), ir.TableToMatrixTable(table_read, ['f32', 'i64'], ['m', 'astruct'], ['aset'], ['mset'], 100), ir.MatrixCollectColsByKey(matrix_read), ir.MatrixExplodeRows(matrix_read, ['row_aset']), ir.MatrixExplodeCols(matrix_read, ['col_aset']), ir.MatrixAnnotateRowsTable(matrix_read, table_read, '__foo', None), ir.MatrixAnnotateColsTable(matrix_read, table_read, '__foo'), ] for x in matrix_irs: try: Env.hail().expr.Parser.parse_matrix_ir(str(x)) except Exception as e: raise ValueError(str(x)) from e
def test_import_bgen_GT_GP_entries(self): hl.index_bgen(resource('example.8bits.bgen'), contig_recoding={'01': '1'}, reference_genome='GRCh37') bgen = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['GT', 'GP'], sample_file=resource('example.sample')) self.assertEqual(bgen.entry.dtype, hl.tstruct(GT=hl.tcall, GP=hl.tarray(hl.tfloat64)))
def test_import_bgen_row_fields(self): default_row_fields = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['dosage'], contig_recoding={'01': '1'}, reference_genome='GRCh37') self.assertEqual( default_row_fields.row.dtype, hl.tstruct(locus=hl.tlocus('GRCh37'), alleles=hl.tarray(hl.tstr), rsid=hl.tstr, varid=hl.tstr)) no_row_fields = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['dosage'], contig_recoding={'01': '1'}, reference_genome='GRCh37', _row_fields=[]) self.assertEqual( no_row_fields.row.dtype, hl.tstruct(locus=hl.tlocus('GRCh37'), alleles=hl.tarray(hl.tstr))) varid_only = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['dosage'], contig_recoding={'01': '1'}, reference_genome='GRCh37', _row_fields=['varid']) self.assertEqual( varid_only.row.dtype, hl.tstruct(locus=hl.tlocus('GRCh37'), alleles=hl.tarray(hl.tstr), varid=hl.tstr)) rsid_only = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['dosage'], contig_recoding={'01': '1'}, reference_genome='GRCh37', _row_fields=['rsid']) self.assertEqual( rsid_only.row.dtype, hl.tstruct(locus=hl.tlocus('GRCh37'), alleles=hl.tarray(hl.tstr), rsid=hl.tstr)) self.assertTrue(default_row_fields.drop('varid')._same(rsid_only)) self.assertTrue(default_row_fields.drop('rsid')._same(varid_only)) self.assertTrue( default_row_fields.drop('varid', 'rsid')._same(no_row_fields))
def test_ndarray_eval(): data_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] mishapen_data_list1 = [[4], [1, 2, 3]] mishapen_data_list2 = [[[1], [2, 3]]] mishapen_data_list3 = [[4], [1, 2, 3], 5] nd_expr = hl.nd.array(data_list) evaled = hl.eval(nd_expr) np_equiv = np.array(data_list, dtype=np.int32) np_equiv_fortran_style = np.asfortranarray(np_equiv) np_equiv_extra_dimension = np_equiv.reshape((3, 1, 3)) assert (np.array_equal(evaled, np_equiv)) assert (evaled.strides == np_equiv.strides) assert hl.eval(hl.nd.array([[], []])).strides == (8, 8) assert np.array_equal(hl.eval(hl.nd.array([])), np.array([])) zero_array = np.zeros((10, 10), dtype=np.int64) evaled_zero_array = hl.eval(hl.literal(zero_array)) assert np.array_equal(evaled_zero_array, zero_array) assert zero_array.dtype == evaled_zero_array.dtype # Testing correct interpretation of numpy strides assert np.array_equal(hl.eval(hl.literal(np_equiv_fortran_style)), np_equiv_fortran_style) assert np.array_equal(hl.eval(hl.literal(np_equiv_extra_dimension)), np_equiv_extra_dimension) # Testing from hail arrays assert np.array_equal(hl.eval(hl.nd.array(hl.range(6))), np.arange(6)) assert np.array_equal(hl.eval(hl.nd.array(hl.int64(4))), np.array(4)) # Testing from nested hail arrays assert np.array_equal( hl.eval(hl.nd.array(hl.array([hl.array(x) for x in data_list]))), np.arange(9).reshape((3, 3)) + 1) # Testing missing data assert hl.eval(hl.nd.array(hl.null(hl.tarray(hl.tint32)))) is None with pytest.raises(ValueError) as exc: hl.nd.array(mishapen_data_list1) assert "inner dimensions do not match" in str(exc.value) with pytest.raises(FatalError) as exc: hl.eval(hl.nd.array(hl.array(mishapen_data_list1))) assert "inner dimensions do not match" in str(exc.value) with pytest.raises(FatalError) as exc: hl.eval(hl.nd.array(hl.array(mishapen_data_list2))) assert "inner dimensions do not match" in str(exc.value) with pytest.raises(ValueError) as exc: hl.nd.array(mishapen_data_list3) assert "inner dimensions do not match" in str(exc.value)
def test_agg_explode(self): t = hl.Table.parallelize([ hl.struct(a=[1, 2]), hl.struct(a=hl.empty_array(hl.tint32)), hl.struct(a=hl.null(hl.tarray(hl.tint32))), hl.struct(a=[3]), hl.struct(a=[hl.null(hl.tint32)]) ]) self.assertCountEqual(t.aggregate(hl.agg.collect(hl.agg.explode(t.a))), [1, 2, None, 3])
def test_agg_explode(self): t = hl.Table.parallelize([ hl.struct(a=[1, 2]), hl.struct(a=hl.empty_array(hl.tint32)), hl.struct(a=hl.null(hl.tarray(hl.tint32))), hl.struct(a=[3]), hl.struct(a=[hl.null(hl.tint32)]) ]) self.assertCountEqual(t.aggregate(hl.agg.explode(lambda elt: hl.agg.collect(elt), t.a)), [1, 2, None, 3])
def _compute_type(self): child_typ = self.child.typ if self.product: value_type = hl.tarray(self.table.typ.value_type) else: value_type = self.table.typ.value_type self._type = hl.tmatrix( child_typ.global_type, child_typ.col_type, child_typ.col_key, child_typ.row_type._insert_field(self.root, value_type), child_typ.row_key, child_typ.entry_type)
def array_floating_point_divide(arg_type, ret_type): register_function("/", ( arg_type, hl.tarray(arg_type), ), hl.tarray(ret_type)) register_function("/", (hl.tarray(arg_type), arg_type), hl.tarray(ret_type)) register_function("/", (hl.tarray(arg_type), hl.tarray(arg_type)), hl.tarray(ret_type))
def _compute_type(self): left_typ = self.left.typ right_typ = self.right.typ if self.product: right_val_typ = left_typ.row_type._insert_field( self.root, hl.tarray(right_typ.value_type)) else: right_val_typ = left_typ.row_type._insert_field( self.root, right_typ.value_type) self._type = hl.ttable(left_typ.global_type, right_val_typ, left_typ.row_key)
def test_loop_memory(self): def foo(recur, arr, idx): return hl.if_else(idx > 10, arr, recur(arr.append(hl.str(idx)), idx + 1)) assert hl.eval( hl.experimental.loop(foo, hl.tarray(hl.tstr), hl.literal(['foo']), 1)) == [ 'foo', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10' ]
def test_localize_entries(self): ref_schema = hl.tstruct(row_idx=hl.tint32, __entries=hl.tarray(hl.tstruct(v=hl.tint32))) ref_data = [{'row_idx': i, '__entries': [{'v': i+j} for j in range(6)]} for i in range(8)] ref_tab = hl.Table.parallelize(ref_data, ref_schema).key_by('row_idx') ref_tab = ref_tab.select_globals(__cols=[hl.struct(col_idx=i) for i in range(6)]) mt = hl.utils.range_matrix_table(8, 6) mt = mt.annotate_entries(v=mt.row_idx+mt.col_idx) t = mt._localize_entries('__entries', '__cols') self.assertTrue(t._same(ref_tab))
def test_localize_self_join(self): ref_schema = hl.tstruct(row_idx=hl.tint32, __entries=hl.tarray(hl.tstruct(v=hl.tint32))) ref_data = [{'row_idx': i, '__entries': [{'v': i+j} for j in range(6)]} for i in range(8)] ref_tab = hl.Table.parallelize(ref_data, ref_schema).key_by('row_idx') ref_tab = ref_tab.join(ref_tab, how='outer') mt = hl.utils.range_matrix_table(8, 6) mt = mt.annotate_entries(v=mt.row_idx+mt.col_idx) t = mt._localize_entries('__entries', '__cols').drop('__cols') t = t.join(t, how='outer') self.assertTrue(t._same(ref_tab))
def _linreg(y, x, nested_dim): k = len(x) k0 = nested_dim if k0 < 0 or k0 > k: raise ValueError("linreg: `nested_dim` must be between 0 and the number " f"of covariates ({k}), inclusive") t = hl.tstruct(beta=hl.tarray(hl.tfloat64), standard_error=hl.tarray(hl.tfloat64), t_stat=hl.tarray(hl.tfloat64), p_value=hl.tarray(hl.tfloat64), multiple_standard_error=hl.tfloat64, multiple_r_squared=hl.tfloat64, adjusted_r_squared=hl.tfloat64, f_stat=hl.tfloat64, multiple_p_value=hl.tfloat64, n=hl.tint64) x = hl.array(x) k = hl.int32(k) k0 = hl.int32(k0) return _agg_func('LinearRegression', [y, x], t, [k, k0])
def test_import_bgen_row_fields(self): hl.index_bgen(resource('example.8bits.bgen'), contig_recoding={'01': '1'}, reference_genome='GRCh37') default_row_fields = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['dosage']) self.assertEqual(default_row_fields.row.dtype, hl.tstruct(locus=hl.tlocus('GRCh37'), alleles=hl.tarray(hl.tstr), rsid=hl.tstr, varid=hl.tstr)) no_row_fields = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['dosage'], _row_fields=[]) self.assertEqual(no_row_fields.row.dtype, hl.tstruct(locus=hl.tlocus('GRCh37'), alleles=hl.tarray(hl.tstr))) varid_only = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['dosage'], _row_fields=['varid']) self.assertEqual(varid_only.row.dtype, hl.tstruct(locus=hl.tlocus('GRCh37'), alleles=hl.tarray(hl.tstr), varid=hl.tstr)) rsid_only = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['dosage'], _row_fields=['rsid']) self.assertEqual(rsid_only.row.dtype, hl.tstruct(locus=hl.tlocus('GRCh37'), alleles=hl.tarray(hl.tstr), rsid=hl.tstr)) self.assertTrue(default_row_fields.drop('varid')._same(rsid_only)) self.assertTrue(default_row_fields.drop('rsid')._same(varid_only)) self.assertTrue( default_row_fields.drop('varid', 'rsid')._same(no_row_fields))
def test_annotate_globals(self): mt = hl.utils.range_matrix_table(1, 1) ht = hl.utils.range_table(1, 1) data = [ (5, hl.tint, operator.eq), (float('nan'), hl.tfloat32, lambda x, y: str(x) == str(y)), (float('inf'), hl.tfloat64, lambda x, y: str(x) == str(y)), (float('-inf'), hl.tfloat64, lambda x, y: str(x) == str(y)), (1.111, hl.tfloat64, operator.eq), ([hl.Struct(**{'a': None, 'b': 5}), hl.Struct(**{'a': 'hello', 'b': 10})], hl.tarray(hl.tstruct(a=hl.tstr, b=hl.tint)), operator.eq) ] for x, t, f in data: self.assertTrue(f(hl.eval(mt.annotate_globals(foo=hl.literal(x, t)).foo), x), f"{x}, {t}") self.assertTrue(f(hl.eval(ht.annotate_globals(foo=hl.literal(x, t)).foo), x), f"{x}, {t}")
def test_aggregate2(self): schema = hl.tstruct(status=hl.tint32, GT=hl.tcall, qPheno=hl.tint32) rows = [{'status': 0, 'GT': hl.Call([0, 0]), 'qPheno': 3}, {'status': 0, 'GT': hl.Call([0, 1]), 'qPheno': 13}] kt = hl.Table.parallelize(rows, schema) result = convert_struct_to_dict( kt.group_by(status=kt.status) .aggregate( x1=agg.collect(kt.qPheno * 2), x2=agg.explode(lambda elt: agg.collect(elt), [kt.qPheno, kt.qPheno + 1]), x3=agg.min(kt.qPheno), x4=agg.max(kt.qPheno), x5=agg.sum(kt.qPheno), x6=agg.product(hl.int64(kt.qPheno)), x7=agg.count(), x8=agg.count_where(kt.qPheno == 3), x9=agg.fraction(kt.qPheno == 1), x10=agg.stats(hl.float64(kt.qPheno)), x11=agg.hardy_weinberg_test(kt.GT), x13=agg.inbreeding(kt.GT, 0.1), x14=agg.call_stats(kt.GT, ["A", "T"]), x15=agg.collect(hl.Struct(a=5, b="foo", c=hl.Struct(banana='apple')))[0], x16=agg.collect(hl.Struct(a=5, b="foo", c=hl.Struct(banana='apple')).c.banana)[0], x17=agg.explode(lambda elt: agg.collect(elt), hl.null(hl.tarray(hl.tint32))), x18=agg.explode(lambda elt: agg.collect(elt), hl.null(hl.tset(hl.tint32))), x19=agg.take(kt.GT, 1, ordering=-kt.qPheno) ).take(1)[0]) expected = {u'status': 0, u'x13': {u'n_called': 2, u'expected_homs': 1.64, u'f_stat': -1.777777777777777, u'observed_homs': 1}, u'x14': {u'AC': [3, 1], u'AF': [0.75, 0.25], u'AN': 4, u'homozygote_count': [1, 0]}, u'x15': {u'a': 5, u'c': {u'banana': u'apple'}, u'b': u'foo'}, u'x10': {u'min': 3.0, u'max': 13.0, u'sum': 16.0, u'stdev': 5.0, u'n': 2, u'mean': 8.0}, u'x8': 1, u'x9': 0.0, u'x16': u'apple', u'x11': {u'het_freq_hwe': 0.5, u'p_value': 0.5}, u'x2': [3, 4, 13, 14], u'x3': 3, u'x1': [6, 26], u'x6': 39, u'x7': 2, u'x4': 13, u'x5': 16, u'x17': [], u'x18': [], u'x19': [hl.Call([0, 1])]} self.maxDiff = None self.assertDictEqual(result, expected)
def values(self): values = [ (hl.tbool, True), (hl.tint32, 0), (hl.tint64, 0), (hl.tfloat32, 0.5), (hl.tfloat64, 0.5), (hl.tstr, "foo"), (hl.tstruct(x=hl.tint32), hl.Struct(x=0)), (hl.tarray(hl.tint32), [0, 1, 4]), (hl.tset(hl.tint32), {0, 1, 4}), (hl.tdict(hl.tstr, hl.tint32), {"a": 0, "b": 1, "c": 4}), (hl.tinterval(hl.tint32), hl.Interval(0, 1, True, False)), (hl.tlocus(hl.default_reference()), hl.Locus("1", 1)), (hl.tcall, hl.Call([0, 1])) ] return values
def test_multi_way_zip_join(self): d1 = [{"id": 0, "name": "a", "data": 0.0}, {"id": 1, "name": "b", "data": 3.14}, {"id": 2, "name": "c", "data": 2.78}] d2 = [{"id": 0, "name": "d", "data": 1.1}, {"id": 0, "name": "x", "data": 2.2}, {"id": 2, "name": "v", "data": 7.89}] d3 = [{"id": 1, "name": "f", "data": 9.99}, {"id": 2, "name": "g", "data": -1.0}, {"id": 3, "name": "z", "data": 0.01}] s = hl.tstruct(id=hl.tint32, name=hl.tstr, data=hl.tfloat64) ts = [hl.Table.parallelize(r, schema=s, key='id') for r in [d1, d2, d3]] joined = hl.Table._multi_way_zip_join(ts, '__data', '__globals').drop('__globals') dexpected = [{"id": 0, "__data": [{"name": "a", "data": 0.0}, {"name": "d", "data": 1.1}, None]}, {"id": 0, "__data": [None, {"name": "x", "data": 2.2}, None]}, {"id": 1, "__data": [{"name": "b", "data": 3.14}, None, {"name": "f", "data": 9.99}]}, {"id": 2, "__data": [{"name": "c", "data": 2.78}, {"name": "v", "data": 7.89}, {"name": "g", "data": -1.0}]}, {"id": 3, "__data": [None, None, {"name": "z", "data": 0.01}]}] expected = hl.Table.parallelize( dexpected, schema=hl.tstruct(id=hl.tint32, __data=hl.tarray(hl.tstruct(name=hl.tstr, data=hl.tfloat64))), key='id') self.assertTrue(expected._same(joined)) expected2 = expected.transmute(data=expected['__data']) joined_same_name = hl.Table._multi_way_zip_join(ts, 'data', 'globals').drop('globals') self.assertTrue(expected2._same(joined_same_name)) joined_nothing = hl.Table._multi_way_zip_join(ts, 'data', 'globals').drop('data', 'globals') self.assertEqual(joined_nothing._force_count(), 5)
def blockmatrix_irs(self): scalar_ir = ir.F64(2) vector_ir = ir.MakeArray([ir.F64(3), ir.F64(2)], hl.tarray(hl.tfloat64)) read = ir.BlockMatrixRead(ir.BlockMatrixNativeReader(resource('blockmatrix_example/0'))) add_two_bms = ir.BlockMatrixMap2(read, read, ir.ApplyBinaryPrimOp('+', ir.Ref('l'), ir.Ref('r'))) negate_bm = ir.BlockMatrixMap(read, ir.ApplyUnaryPrimOp('-', ir.Ref('element'))) sqrt_bm = ir.BlockMatrixMap(read, hl.sqrt(construct_expr(ir.Ref('element'), hl.tfloat64))._ir) scalar_to_bm = ir.ValueToBlockMatrix(scalar_ir, [1, 1], 1) col_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [2, 1], 1) row_vector_to_bm = ir.ValueToBlockMatrix(vector_ir, [1, 2], 1) broadcast_scalar = ir.BlockMatrixBroadcast(scalar_to_bm, [], [2, 2], 256) broadcast_col = ir.BlockMatrixBroadcast(col_vector_to_bm, [0], [2, 2], 256) broadcast_row = ir.BlockMatrixBroadcast(row_vector_to_bm, [1], [2, 2], 256) transpose = ir.BlockMatrixBroadcast(broadcast_scalar, [1, 0], [2, 2], 256) matmul = ir.BlockMatrixDot(broadcast_scalar, transpose) pow_ir = (construct_expr(ir.Ref('l'), hl.tfloat64) ** construct_expr(ir.Ref('r'), hl.tfloat64))._ir squared_bm = ir.BlockMatrixMap2(scalar_to_bm, scalar_to_bm, pow_ir) return [ read, add_two_bms, negate_bm, sqrt_bm, scalar_to_bm, col_vector_to_bm, row_vector_to_bm, broadcast_scalar, broadcast_col, broadcast_row, squared_bm, transpose, matmul ]
def test_str_annotation_regression(self): t = hl.Table.parallelize([{'alleles': ['A', 'T']}], hl.tstruct(alleles=hl.tarray(hl.tstr))) t = t.annotate(ref=t.alleles[0]) t._force_count()
def test_annotate(self): schema = hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tint32, d=hl.tint32, e=hl.tstr, f=hl.tarray(hl.tint32)) rows = [{'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3]}, {'a': 0, 'b': 5, 'c': 13, 'd': -1, 'e': "cat", 'f': []}, {'a': 4, 'b': 2, 'c': 20, 'd': 3, 'e': "dog", 'f': [5, 6, 7]}] kt = hl.Table.parallelize(rows, schema) self.assertTrue(kt.annotate()._same(kt)) result1 = convert_struct_to_dict(kt.annotate(foo=kt.a + 1, foo2=kt.a).take(1)[0]) self.assertDictEqual(result1, {'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3], 'foo': 5, 'foo2': 4}) result3 = convert_struct_to_dict(kt.annotate( x1=kt.f.map(lambda x: x * 2), x2=kt.f.map(lambda x: [x, x + 1]).flatmap(lambda x: x), x3=hl.min(kt.f), x4=hl.max(kt.f), x5=hl.sum(kt.f), x6=hl.product(kt.f), x7=kt.f.length(), x8=kt.f.filter(lambda x: x == 3), x9=kt.f[1:], x10=kt.f[:], x11=kt.f[1:2], x12=kt.f.map(lambda x: [x, x + 1]), x13=kt.f.map(lambda x: [[x, x + 1], [x + 2]]).flatmap(lambda x: x), x14=hl.cond(kt.a < kt.b, kt.c, hl.null(hl.tint32)), x15={1, 2, 3} ).take(1)[0]) self.assertDictEqual(result3, {'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3], 'x1': [2, 4, 6], 'x2': [1, 2, 2, 3, 3, 4], 'x3': 1, 'x4': 3, 'x5': 6, 'x6': 6, 'x7': 3, 'x8': [3], 'x9': [2, 3], 'x10': [1, 2, 3], 'x11': [2], 'x12': [[1, 2], [2, 3], [3, 4]], 'x13': [[1, 2], [3], [2, 3], [4], [3, 4], [5]], 'x14': None, 'x15': set([1, 2, 3])}) kt.annotate( x1=kt.a + 5, x2=5 + kt.a, x3=kt.a + kt.b, x4=kt.a - 5, x5=5 - kt.a, x6=kt.a - kt.b, x7=kt.a * 5, x8=5 * kt.a, x9=kt.a * kt.b, x10=kt.a / 5, x11=5 / kt.a, x12=kt.a / kt.b, x13=-kt.a, x14=+kt.a, x15=kt.a == kt.b, x16=kt.a == 5, x17=5 == kt.a, x18=kt.a != kt.b, x19=kt.a != 5, x20=5 != kt.a, x21=kt.a > kt.b, x22=kt.a > 5, x23=5 > kt.a, x24=kt.a >= kt.b, x25=kt.a >= 5, x26=5 >= kt.a, x27=kt.a < kt.b, x28=kt.a < 5, x29=5 < kt.a, x30=kt.a <= kt.b, x31=kt.a <= 5, x32=5 <= kt.a, x33=(kt.a == 0) & (kt.b == 5), x34=(kt.a == 0) | (kt.b == 5), x35=False, x36=True )
def test_filter(self): schema = hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tint32, d=hl.tint32, e=hl.tstr, f=hl.tarray(hl.tint32)) rows = [{'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3]}, {'a': 0, 'b': 5, 'c': 13, 'd': -1, 'e': "cat", 'f': []}, {'a': 4, 'b': 2, 'c': 20, 'd': 3, 'e': "dog", 'f': [5, 6, 7]}] kt = hl.Table.parallelize(rows, schema) self.assertEqual(kt.filter(kt.a == 4).count(), 2) self.assertEqual(kt.filter((kt.d == -1) | (kt.c == 20) | (kt.e == "hello")).count(), 3) self.assertEqual(kt.filter((kt.c != 20) & (kt.a == 4)).count(), 1) self.assertEqual(kt.filter(True).count(), 3)
def test_transmute(self): schema = hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tint32, d=hl.tint32, e=hl.tstr, f=hl.tarray(hl.tint32), g=hl.tstruct(x=hl.tbool, y=hl.tint32)) rows = [{'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3], 'g': {'x': True, 'y': 2}}, {'a': 0, 'b': 5, 'c': 13, 'd': -1, 'e': "cat", 'f': [], 'g': {'x': True, 'y': 2}}, {'a': 4, 'b': 2, 'c': 20, 'd': 3, 'e': "dog", 'f': [5, 6, 7], 'g': None}] df = hl.Table.parallelize(rows, schema) df = df.transmute(h=df.a + df.b + df.c + df.g.y) r = df.select('h').collect() self.assertEqual(list(df.row), ['d', 'e', 'f', 'h']) self.assertEqual(r, [hl.Struct(h=x) for x in [10, 20, None]])
def test_select(self): schema = hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tint32, d=hl.tint32, e=hl.tstr, f=hl.tarray(hl.tint32), g=hl.tstruct(x=hl.tbool, y=hl.tint32)) rows = [{'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3], 'g': {'x': True, 'y': 2}}, {'a': 0, 'b': 5, 'c': 13, 'd': -1, 'e': "cat", 'f': [], 'g': {'x': True, 'y': 2}}, {'a': 4, 'b': 2, 'c': 20, 'd': 3, 'e': "dog", 'f': [5, 6, 7], 'g': None}] kt = hl.Table.parallelize(rows, schema) t1 = kt.select(kt.a, kt.e) self.assertEqual(list(t1.row), ['a', 'e']) self.assertEqual(list(t1.key), []) t2 = kt.key_by('e') t2 = t2.select(t2.a) self.assertEqual(list(t2.row), ['e', 'a']) self.assertEqual(list(t2.key), ['e']) self.assertEqual(list(kt.select(kt.a, foo=kt.a + kt.b - kt.c - kt.d).row), ['a', 'foo']) self.assertEqual(list(kt.select(kt.a, foo=kt.a + kt.b - kt.c - kt.d, **kt.g).row), ['a', 'foo', 'x', 'y']) # select no fields s = kt.select() self.assertEqual(list(s.row), []) self.assertEqual(list(s.key), [])
def floating_point_divide(arg_type, ret_type): register_function("/", (arg_type, hl.tarray(arg_type),), hl.tarray(ret_type)) register_function("/", (hl.tarray(arg_type),arg_type), hl.tarray(ret_type)) register_function("/", (hl.tarray(arg_type),hl.tarray(arg_type)), hl.tarray(ret_type))
def value_irs(self): b = ir.TrueIR() c = ir.Ref('c') i = ir.I32(5) j = ir.I32(7) st = ir.Str('Hail') a = ir.Ref('a') aa = ir.Ref('aa') da = ir.Ref('da') v = ir.Ref('v') s = ir.Ref('s') t = ir.Ref('t') call = ir.Ref('call') table = ir.TableRange(5, 3) collect_sig = ir.AggSignature('Collect', [], None, [hl.tint32]) call_stats_sig = ir.AggSignature('CallStats', [], [hl.tint32], [hl.tcall]) hist_sig = ir.AggSignature( 'Histogram', [hl.tfloat64, hl.tfloat64, hl.tint32], None, [hl.tfloat64]) take_by_sig = ir.AggSignature('TakeBy', [hl.tint32], None, [hl.tfloat64, hl.tfloat64]) table = ir.TableRange(10, 4) value_irs = [ i, ir.I64(5), ir.F32(3.14), ir.F64(3.14), s, ir.TrueIR(), ir.FalseIR(), ir.Void(), ir.Cast(i, hl.tfloat64), ir.NA(hl.tint32), ir.IsNA(i), ir.If(b, i, j), ir.Let('v', i, v), ir.Ref('x'), ir.ApplyBinaryOp('+', i, j), ir.ApplyUnaryOp('-', i), ir.ApplyComparisonOp('EQ', i, j), ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)), ir.ArrayRef(a, i), ir.ArrayLen(a), ir.ArrayRange(ir.I32(0), ir.I32(5), ir.I32(1)), ir.ArraySort(a, b, False), ir.ToSet(a), ir.ToDict(da), ir.ToArray(a), ir.LowerBoundOnOrderedCollection(a, i, True), ir.GroupByKey(da), ir.ArrayMap(a, 'v', v), ir.ArrayFilter(a, 'v', v), ir.ArrayFlatMap(aa, 'v', v), ir.ArrayFold(a, ir.I32(0), 'x', 'v', v), ir.ArrayScan(a, ir.I32(0), 'x', 'v', v), ir.ArrayFor(a, 'v', ir.Void()), ir.AggFilter(ir.TrueIR(), ir.I32(0)), ir.AggExplode(ir.ArrayRange(ir.I32(0), ir.I32(2), ir.I32(1)), 'x', ir.I32(0)), ir.AggGroupBy(ir.TrueIR(), ir.I32(0)), ir.ApplyAggOp([], None, [ir.I32(0)], collect_sig), ir.ApplyScanOp([], None, [ir.I32(0)], collect_sig), ir.ApplyAggOp([ir.F64(-5.0), ir.F64(5.0), ir.I32(100)], None, [ir.F64(-2.11)], hist_sig), ir.ApplyAggOp([], [ir.I32(2)], [call], call_stats_sig), ir.ApplyAggOp([ir.I32(10)], None, [ir.F64(-2.11), ir.F64(-2.11)], take_by_sig), ir.InitOp(ir.I32(0), [ir.I32(2)], call_stats_sig), ir.SeqOp(ir.I32(0), [i], collect_sig), ir.SeqOp(ir.I32(0), [ir.F64(-2.11), ir.I32(17)], take_by_sig), ir.Begin([ir.Void()]), ir.MakeStruct([('x', i)]), ir.SelectFields(s, ['x', 'z']), ir.InsertFields(s, [('x', i)]), ir.GetField(s, 'x'), ir.MakeTuple([i, b]), ir.GetTupleElement(t, 1), ir.StringSlice(st, ir.I32(1), ir.I32(2)), ir.StringLength(st), ir.In(2, hl.tfloat64), ir.Die('mumblefoo', hl.tfloat64), ir.Apply('&&', b, c), ir.Apply('toFloat64', i), ir.Uniroot('x', ir.F64(3.14), ir.F64(-5.0), ir.F64(5.0)), ir.Literal(hl.tarray(hl.tint32), [1, 2, None]), ir.TableCount(table), ir.TableAggregate(table, ir.MakeStruct([('foo', ir.ApplyAggOp([], None, [ir.I32(0)], collect_sig))])), ir.TableWrite(table, new_temp_file(), False, True, "fake_codec_spec$$"), ] return value_irs
def test_mendel_errors(self): mt = hl.import_vcf(resource('mendel.vcf')) ped = hl.Pedigree.read(resource('mendel.fam')) men, fam, ind, var = hl.mendel_errors(mt['GT'], ped) self.assertEqual(men.key.dtype, hl.tstruct(locus=mt.locus.dtype, alleles=hl.tarray(hl.tstr), s=hl.tstr)) self.assertEqual(men.row.dtype, hl.tstruct(locus=mt.locus.dtype, alleles=hl.tarray(hl.tstr), s=hl.tstr, fam_id=hl.tstr, mendel_code=hl.tint)) self.assertEqual(fam.key.dtype, hl.tstruct(pat_id=hl.tstr, mat_id=hl.tstr)) self.assertEqual(fam.row.dtype, hl.tstruct(pat_id=hl.tstr, mat_id=hl.tstr, fam_id=hl.tstr, children=hl.tint, errors=hl.tint64, snp_errors=hl.tint64)) self.assertEqual(ind.key.dtype, hl.tstruct(s=hl.tstr)) self.assertEqual(ind.row.dtype, hl.tstruct(s=hl.tstr, fam_id=hl.tstr, errors=hl.tint64, snp_errors=hl.tint64)) self.assertEqual(var.key.dtype, hl.tstruct(locus=mt.locus.dtype, alleles=hl.tarray(hl.tstr))) self.assertEqual(var.row.dtype, hl.tstruct(locus=mt.locus.dtype, alleles=hl.tarray(hl.tstr), errors=hl.tint64)) self.assertEqual(men.count(), 41) self.assertEqual(fam.count(), 2) self.assertEqual(ind.count(), 7) self.assertEqual(var.count(), mt.count_rows()) self.assertEqual(set(fam.select('children', 'errors', 'snp_errors').collect()), { hl.utils.Struct(pat_id='Dad1', mat_id='Mom1', children=2, errors=41, snp_errors=39), hl.utils.Struct(pat_id='Dad2', mat_id='Mom2', children=1, errors=0, snp_errors=0) }) self.assertEqual(set(ind.select('errors', 'snp_errors').collect()), { hl.utils.Struct(s='Son1', errors=23, snp_errors=22), hl.utils.Struct(s='Dtr1', errors=18, snp_errors=17), hl.utils.Struct(s='Dad1', errors=19, snp_errors=18), hl.utils.Struct(s='Mom1', errors=22, snp_errors=21), hl.utils.Struct(s='Dad2', errors=0, snp_errors=0), hl.utils.Struct(s='Mom2', errors=0, snp_errors=0), hl.utils.Struct(s='Son2', errors=0, snp_errors=0) }) to_keep = hl.set([ (hl.Locus("1", 1), ['C', 'CT']), (hl.Locus("1", 2), ['C', 'T']), (hl.Locus("X", 1), ['C', 'T']), (hl.Locus("X", 3), ['C', 'T']), (hl.Locus("Y", 1), ['C', 'T']), (hl.Locus("Y", 3), ['C', 'T']) ]) self.assertEqual(var.filter(to_keep.contains((var.locus, var.alleles))) .order_by('locus') .select('locus', 'alleles', 'errors').collect(), [ hl.utils.Struct(locus=hl.Locus("1", 1), alleles=['C', 'CT'], errors=2), hl.utils.Struct(locus=hl.Locus("1", 2), alleles=['C', 'T'], errors=1), hl.utils.Struct(locus=hl.Locus("X", 1), alleles=['C', 'T'], errors=2), hl.utils.Struct(locus=hl.Locus("X", 3), alleles=['C', 'T'], errors=1), hl.utils.Struct(locus=hl.Locus("Y", 1), alleles=['C', 'T'], errors=1), hl.utils.Struct(locus=hl.Locus("Y", 3), alleles=['C', 'T'], errors=1), ]) ped2 = hl.Pedigree.read(resource('mendelWithMissingSex.fam')) men2, _, _, _ = hl.mendel_errors(mt['GT'], ped2) self.assertTrue(men2.filter(men2.s == 'Dtr1')._same(men.filter(men.s == 'Dtr1')))
def _compute_type(self): child_typ = self.child.typ self._type = hl.ttable(child_typ.global_type._insert_field(self.cols_field_name, hl.tarray(child_typ.col_type)), child_typ.row_type._insert_field(self.entries_field_name, hl.tarray(child_typ.entry_type)), child_typ.row_key)
def _compute_type(self): name = self.config['name'] child_typ = self.child.typ if name == 'LinearRegressionRowsChained': pass_through = self.config['passThrough'] chained_schema = hl.dtype( 'struct{n:array<int32>,sum_x:array<float64>,y_transpose_x:array<array<float64>>,beta:array<array<float64>>,standard_error:array<array<float64>>,t_stat:array<array<float64>>,p_value:array<array<float64>>}') self._type = hl.ttable( child_typ.global_type, (child_typ.row_key_type ._insert_fields(**{f: child_typ.row_type[f] for f in pass_through}) ._concat(chained_schema)), child_typ.row_key) elif name == 'LinearRegressionRowsSingle': pass_through = self.config['passThrough'] chained_schema = hl.dtype( 'struct{n:int32,sum_x:float64,y_transpose_x:array<float64>,beta:array<float64>,standard_error:array<float64>,t_stat:array<float64>,p_value:array<float64>}') self._type = hl.ttable( child_typ.global_type, (child_typ.row_key_type ._insert_fields(**{f: child_typ.row_type[f] for f in pass_through}) ._concat(chained_schema)), child_typ.row_key) elif name == 'LogisticRegression': pass_through = self.config['passThrough'] logreg_type = hl.tstruct(logistic_regression=hl.tarray(regression_test_type(self.config['test']))) self._type = hl.ttable( child_typ.global_type, (child_typ.row_key_type ._insert_fields(**{f: child_typ.row_type[f] for f in pass_through}) ._concat(logreg_type)), child_typ.row_key) elif name == 'PoissonRegression': pass_through = self.config['passThrough'] poisreg_type = regression_test_type(self.config['test']) self._type = hl.ttable( child_typ.global_type, (child_typ.row_key_type ._insert_fields(**{f: child_typ.row_type[f] for f in pass_through}) ._concat(poisreg_type)), child_typ.row_key) elif name == 'Skat': key_field = self.config['keyField'] key_type = child_typ.row_type[key_field] skat_type = hl.dtype(f'struct{{id:{key_type},size:int32,q_stat:float64,p_value:float64,fault:int32}}') self._type = hl.ttable( hl.tstruct(), skat_type, ['id']) elif name == 'PCA': self._type = hl.ttable( hl.tstruct(eigenvalues=hl.tarray(hl.tfloat64), scores=hl.tarray(child_typ.col_key_type._insert_field('scores', hl.tarray(hl.tfloat64)))), child_typ.row_key_type._insert_field('loadings', dtype('array<float64>')), child_typ.row_key) else: assert name == 'LocalLDPrune', name self._type = hl.ttable( hl.tstruct(), child_typ.row_key_type._insert_fields(mean=hl.tfloat64, centered_length_rec=hl.tfloat64), list(child_typ.row_key))