Exemple #1
0
    def test_agg_cols_explode(self):
        t = hl.utils.range_matrix_table(1, 10)

        tests = [(agg.explode(
            lambda elt: agg.collect(elt + 1).append(0),
            hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1],
                    hl.empty_array(hl.tint32))), [9, 10, 10, 11, 0]),
                 (agg.explode(
                     lambda elt: agg.explode(
                         lambda elt2: agg.collect(elt2 + 1).append(0),
                         [elt, elt + 1]),
                     hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1],
                             hl.empty_array(hl.tint32))),
                  [9, 10, 10, 11, 10, 11, 11, 12, 0]),
                 (agg.explode(
                     lambda elt: agg.filter(elt > 8,
                                            agg.collect(elt + 1).append(0)),
                     hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1],
                             hl.empty_array(hl.tint32))), [10, 10, 11, 0]),
                 (agg.explode(
                     lambda elt: agg.group_by(elt % 3,
                                              agg.collect(elt + 1).append(0)),
                     hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1],
                             hl.empty_array(hl.tint32))), {
                                 0: [10, 10, 0],
                                 1: [11, 0],
                                 2: [9, 0]
                             })]
        for aggregation, expected in tests:
            self.assertEqual(
                t.select_rows(result=aggregation).result.collect()[0],
                expected)
Exemple #2
0
 def test_agg_cols_group_by(self):
     t = hl.utils.range_matrix_table(1, 10)
     tests = [
         (agg.group_by(
             t.col_idx % 2,
             hl.array(agg.collect_as_set(t.col_idx + 1)).append(0)), {
                 0: [1, 3, 5, 7, 9, 0],
                 1: [2, 4, 6, 8, 10, 0]
             }),
         (agg.group_by(
             t.col_idx % 3,
             agg.filter(
                 t.col_idx > 7,
                 hl.array(agg.collect_as_set(t.col_idx + 1)).append(0))), {
                     0: [10, 0],
                     1: [0],
                     2: [9, 0]
                 }),
         (agg.group_by(
             t.col_idx % 3,
             agg.explode(
                 lambda elt: agg.collect(elt + 1).append(0),
                 hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1],
                         hl.empty_array(hl.tint32)))), {
                             0: [10, 11, 0],
                             1: [0],
                             2: [9, 10, 0]
                         }),
     ]
     for aggregation, expected in tests:
         self.assertEqual(
             t.select_rows(result=aggregation).result.collect()[0],
             expected)
Exemple #3
0
    def test_aggregate2(self):
        schema = hl.tstruct(status=hl.tint32, GT=hl.tcall, qPheno=hl.tint32)

        rows = [{'status': 0, 'GT': hl.Call([0, 0]), 'qPheno': 3},
                {'status': 0, 'GT': hl.Call([0, 1]), 'qPheno': 13}]

        kt = hl.Table.parallelize(rows, schema)

        result = convert_struct_to_dict(
            kt.group_by(status=kt.status)
                .aggregate(
                x1=agg.collect(kt.qPheno * 2),
                x2=agg.explode(lambda elt: agg.collect(elt), [kt.qPheno, kt.qPheno + 1]),
                x3=agg.min(kt.qPheno),
                x4=agg.max(kt.qPheno),
                x5=agg.sum(kt.qPheno),
                x6=agg.product(hl.int64(kt.qPheno)),
                x7=agg.count(),
                x8=agg.count_where(kt.qPheno == 3),
                x9=agg.fraction(kt.qPheno == 1),
                x10=agg.stats(hl.float64(kt.qPheno)),
                x11=agg.hardy_weinberg_test(kt.GT),
                x13=agg.inbreeding(kt.GT, 0.1),
                x14=agg.call_stats(kt.GT, ["A", "T"]),
                x15=agg.collect(hl.Struct(a=5, b="foo", c=hl.Struct(banana='apple')))[0],
                x16=agg.collect(hl.Struct(a=5, b="foo", c=hl.Struct(banana='apple')).c.banana)[0],
                x17=agg.explode(lambda elt: agg.collect(elt), hl.null(hl.tarray(hl.tint32))),
                x18=agg.explode(lambda elt: agg.collect(elt), hl.null(hl.tset(hl.tint32))),
                x19=agg.take(kt.GT, 1, ordering=-kt.qPheno)
            ).take(1)[0])

        expected = {u'status': 0,
                    u'x13': {u'n_called': 2, u'expected_homs': 1.64, u'f_stat': -1.777777777777777,
                             u'observed_homs': 1},
                    u'x14': {u'AC': [3, 1], u'AF': [0.75, 0.25], u'AN': 4, u'homozygote_count': [1, 0]},
                    u'x15': {u'a': 5, u'c': {u'banana': u'apple'}, u'b': u'foo'},
                    u'x10': {u'min': 3.0, u'max': 13.0, u'sum': 16.0, u'stdev': 5.0, u'n': 2, u'mean': 8.0},
                    u'x8': 1, u'x9': 0.0, u'x16': u'apple',
                    u'x11': {u'het_freq_hwe': 0.5, u'p_value': 0.5},
                    u'x2': [3, 4, 13, 14], u'x3': 3, u'x1': [6, 26], u'x6': 39, u'x7': 2, u'x4': 13, u'x5': 16,
                    u'x17': [],
                    u'x18': [],
                    u'x19': [hl.Call([0, 1])]}

        self.maxDiff = None

        self.assertDictEqual(result, expected)
Exemple #4
0
    def test_aggregate2(self):
        schema = hl.tstruct(status=hl.tint32, GT=hl.tcall, qPheno=hl.tint32)

        rows = [{'status': 0, 'GT': hl.Call([0, 0]), 'qPheno': 3},
                {'status': 0, 'GT': hl.Call([0, 1]), 'qPheno': 13}]

        kt = hl.Table.parallelize(rows, schema)

        result = convert_struct_to_dict(
            kt.group_by(status=kt.status)
                .aggregate(
                x1=agg.collect(kt.qPheno * 2),
                x2=agg.explode(lambda elt: agg.collect(elt), [kt.qPheno, kt.qPheno + 1]),
                x3=agg.min(kt.qPheno),
                x4=agg.max(kt.qPheno),
                x5=agg.sum(kt.qPheno),
                x6=agg.product(hl.int64(kt.qPheno)),
                x7=agg.count(),
                x8=agg.count_where(kt.qPheno == 3),
                x9=agg.fraction(kt.qPheno == 1),
                x10=agg.stats(hl.float64(kt.qPheno)),
                x11=agg.hardy_weinberg_test(kt.GT),
                x13=agg.inbreeding(kt.GT, 0.1),
                x14=agg.call_stats(kt.GT, ["A", "T"]),
                x15=agg.collect(hl.Struct(a=5, b="foo", c=hl.Struct(banana='apple')))[0],
                x16=agg.collect(hl.Struct(a=5, b="foo", c=hl.Struct(banana='apple')).c.banana)[0],
                x17=agg.explode(lambda elt: agg.collect(elt), hl.null(hl.tarray(hl.tint32))),
                x18=agg.explode(lambda elt: agg.collect(elt), hl.null(hl.tset(hl.tint32))),
                x19=agg.take(kt.GT, 1, ordering=-kt.qPheno)
            ).take(1)[0])

        expected = {u'status': 0,
                    u'x13': {u'n_called': 2, u'expected_homs': 1.64, u'f_stat': -1.777777777777777,
                             u'observed_homs': 1},
                    u'x14': {u'AC': [3, 1], u'AF': [0.75, 0.25], u'AN': 4, u'homozygote_count': [1, 0]},
                    u'x15': {u'a': 5, u'c': {u'banana': u'apple'}, u'b': u'foo'},
                    u'x10': {u'min': 3.0, u'max': 13.0, u'sum': 16.0, u'stdev': 5.0, u'n': 2, u'mean': 8.0},
                    u'x8': 1, u'x9': 0.0, u'x16': u'apple',
                    u'x11': {u'het_freq_hwe': 0.5, u'p_value': 0.5},
                    u'x2': [3, 4, 13, 14], u'x3': 3, u'x1': [6, 26], u'x6': 39, u'x7': 2, u'x4': 13, u'x5': 16,
                    u'x17': [],
                    u'x18': [],
                    u'x19': [hl.Call([0, 1])]}

        self.maxDiff = None

        self.assertDictEqual(result, expected)
Exemple #5
0
    def test_agg_cols_explode(self):
        t = hl.utils.range_matrix_table(1, 10)

        tests = [(agg.explode(lambda elt: agg.collect(elt + 1).append(0),
                              hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))),
                  [9, 10, 10, 11, 0]),
                 (agg.explode(lambda elt: agg.explode(lambda elt2: agg.collect(elt2 + 1).append(0),
                                                      [elt, elt + 1]),
                              hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))),
                  [9, 10, 10, 11, 10, 11, 11, 12, 0]),
                 (agg.explode(lambda elt: agg.filter(elt > 8,
                                                     agg.collect(elt + 1).append(0)),
                              hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))),
                  [10, 10, 11, 0]),
                 (agg.explode(lambda elt: agg.group_by(elt % 3,
                                                       agg.collect(elt + 1).append(0)),
                                           hl.cond(t.col_idx > 7,
                                                   [t.col_idx, t.col_idx + 1],
                                                   hl.empty_array(hl.tint32))),
                  {0: [10, 10, 0], 1: [11, 0], 2:[9, 0]})
                 ]
        for aggregation, expected in tests:
            self.assertEqual(t.select_rows(result = aggregation).result.collect()[0], expected)
Exemple #6
0
 def test_agg_cols_filter(self):
     t = hl.utils.range_matrix_table(1, 10)
     tests = [(agg.filter(t.col_idx > 7,
                          agg.collect(t.col_idx + 1).append(0)),
               [9, 10, 0]),
              (agg.filter(t.col_idx > 7,
                          agg.explode(lambda elt: agg.collect(elt + 1).append(0),
                                      [t.col_idx, t.col_idx + 1])),
               [9, 10, 10, 11, 0]),
              (agg.filter(t.col_idx > 7,
                          agg.group_by(t.col_idx % 3,
                                       hl.array(agg.collect_as_set(t.col_idx + 1)).append(0))),
               {0: [10, 0], 2: [9, 0]})
              ]
     for aggregation, expected in tests:
         self.assertEqual(t.select_rows(result = aggregation).result.collect()[0], expected)
Exemple #7
0
 def test_agg_cols_filter(self):
     t = hl.utils.range_matrix_table(1, 10)
     tests = [(agg.filter(t.col_idx > 7,
                          agg.collect(t.col_idx + 1).append(0)),
               [9, 10, 0]),
              (agg.filter(t.col_idx > 7,
                          agg.explode(lambda elt: agg.collect(elt + 1).append(0),
                                      [t.col_idx, t.col_idx + 1])),
               [9, 10, 10, 11, 0]),
              (agg.filter(t.col_idx > 7,
                          agg.group_by(t.col_idx % 3,
                                       hl.array(agg.collect_as_set(t.col_idx + 1)).append(0))),
               {0: [10, 0], 2: [9, 0]})
              ]
     for aggregation, expected in tests:
         self.assertEqual(t.select_rows(result = aggregation).result.collect()[0], expected)
Exemple #8
0
    def test_aggregate1(self):
        schema = hl.tstruct(a=hl.tint32,
                            b=hl.tint32,
                            c=hl.tint32,
                            d=hl.tint32,
                            e=hl.tstr,
                            f=hl.tarray(hl.tint32))

        rows = [{
            'a': 4,
            'b': 1,
            'c': 3,
            'd': 5,
            'e': "hello",
            'f': [1, 2, 3]
        }, {
            'a': 0,
            'b': 5,
            'c': 13,
            'd': -1,
            'e': "cat",
            'f': []
        }, {
            'a': 4,
            'b': 2,
            'c': 20,
            'd': 3,
            'e': "dog",
            'f': [5, 6, 7]
        }]

        kt = hl.Table.parallelize(rows, schema)
        results = kt.aggregate(
            hl.Struct(q1=agg.sum(kt.b),
                      q2=agg.count(),
                      q3=agg.collect(kt.e),
                      q4=agg.filter((kt.d >= 5) | (kt.a == 0),
                                    agg.collect(kt.e)),
                      q5=agg.explode(lambda elt: agg.mean(elt), kt.f)))

        self.assertEqual(results.q1, 8)
        self.assertEqual(results.q2, 3)
        self.assertEqual(set(results.q3), {"hello", "cat", "dog"})
        self.assertEqual(set(results.q4), {"hello", "cat"})
        self.assertAlmostEqual(results.q5, 4)
Exemple #9
0
 def test_agg_cols_group_by(self):
     t = hl.utils.range_matrix_table(1, 10)
     tests = [(agg.group_by(t.col_idx % 2,
                            hl.array(agg.collect_as_set(t.col_idx + 1)).append(0)),
               {0: [1, 3, 5, 7, 9, 0], 1: [2, 4, 6, 8, 10, 0]}),
              (agg.group_by(t.col_idx % 3,
                            agg.filter(t.col_idx > 7,
                                       hl.array(agg.collect_as_set(t.col_idx + 1)).append(0))),
               {0: [10, 0], 1: [0], 2: [9, 0]}),
              (agg.group_by(t.col_idx % 3,
                            agg.explode(lambda elt: agg.collect(elt + 1).append(0),
                                        hl.cond(t.col_idx > 7,
                                                [t.col_idx, t.col_idx + 1],
                                                hl.empty_array(hl.tint32)))),
               {0: [10, 11, 0], 1: [0], 2:[9, 10, 0]}),
              ]
     for aggregation, expected in tests:
         self.assertEqual(t.select_rows(result = aggregation).result.collect()[0], expected)
Exemple #10
0
    def test_aggregate1(self):
        schema = hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tint32, d=hl.tint32, e=hl.tstr, f=hl.tarray(hl.tint32))

        rows = [{'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3]},
                {'a': 0, 'b': 5, 'c': 13, 'd': -1, 'e': "cat", 'f': []},
                {'a': 4, 'b': 2, 'c': 20, 'd': 3, 'e': "dog", 'f': [5, 6, 7]}]

        kt = hl.Table.parallelize(rows, schema)
        results = kt.aggregate(hl.Struct(q1=agg.sum(kt.b),
                                         q2=agg.count(),
                                         q3=agg.collect(kt.e),
                                         q4=agg.filter((kt.d >= 5) | (kt.a == 0), agg.collect(kt.e)),
                                         q5=agg.explode(lambda elt: agg.mean(elt), kt.f)))

        self.assertEqual(results.q1, 8)
        self.assertEqual(results.q2, 3)
        self.assertEqual(set(results.q3), {"hello", "cat", "dog"})
        self.assertEqual(set(results.q4), {"hello", "cat"})
        self.assertAlmostEqual(results.q5, 4)