Esempio n. 1
0
 def test_intersection(self) -> None:
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape="{_1: float64}", data={"_1": [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape="{_1: float64}", data={"_1": [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     hist_index = HistogramIndex(column="_1", scheduler=s)
     hist_index.create_dependent_modules(random, "result")
     bisect_min = Bisect(column="_1",
                         op=">",
                         hist_index=hist_index,
                         scheduler=s)
     bisect_min.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_min.input.limit = min_value.output.result
     bisect_max = Bisect(column="_1",
                         op="<",
                         hist_index=hist_index,
                         scheduler=s)
     bisect_max.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_max.input.limit = max_value.output.result
     inter = Intersection(scheduler=s)
     inter.input[0] = bisect_min.output.result
     inter.input[0] = bisect_max.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = inter.output.result
     aio.run(s.start())
     assert hist_index.input_module is not None
     idx = (hist_index.input_module.output["result"].data().eval(
         "(_1>0.3)&(_1<0.8)", result_object="index"))
     self.assertEqual(inter.table.index, bitmap(idx))
Esempio n. 2
0
 def test_paste(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     min_1 = Min(name="min_1" + str(hash(random)),
                 scheduler=s,
                 columns=["_1"])
     min_1.input[0] = random.output.result
     d2t_1 = Dict2Table(scheduler=s)
     d2t_1.input.dict_ = min_1.output.result
     min_2 = Min(name="min_2" + str(hash(random)),
                 scheduler=s,
                 columns=["_2"])
     min_2.input[0] = random.output.result
     d2t_2 = Dict2Table(scheduler=s)
     d2t_2.input.dict_ = min_2.output.result
     bj = Paste(scheduler=s)
     bj.input.first = d2t_1.output.result
     bj.input.second = d2t_2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = bj.output.result
     aio.run(s.start())
     res1 = random.table.min()
     res2 = notNone(bj.table.last()).to_dict()
     self.assertAlmostEqual(res1["_1"], res2["_1"])
     self.assertAlmostEqual(res1["_2"], res2["_2"])
Esempio n. 3
0
 def test_ldexp(self) -> None:
     cls, ufunc, mod_name = ColsLdexp, np.ldexp, "cols_ldexp_"
     print("Testing", mod_name)
     s = self.scheduler()
     cols = 10
     random = RandomTable(
         cols,
         rows=10_000,
         scheduler=s,
         random=lambda x: np.random.randint(10, size=x),  # type: ignore
         dtype="int64",
     )
     module = cls(
         first=["_3", "_5", "_7"],
         second=["_4", "_6", "_8"],
         cols_out=["x", "y", "z"],
         scheduler=s,
     )
     module.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = module.output.result
     aio.run(s.start())
     self.assertListEqual(module.table.columns, ["x", "y", "z"])
     arr = random.table.to_array()
     res1 = ufunc(arr[:, [2, 4, 6]], arr[:, [3, 5, 7]])
     res2 = module.table.to_array()
     self.assertTrue(module.name.startswith(mod_name))
     self.assertTrue(np.allclose(res1, res2, equal_nan=True))
Esempio n. 4
0
 def test_bisect2(self) -> None:
     s = self.scheduler()
     random = RandomTable(2, rows=100_000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=100,
         # update_rows=5,
         # fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     t = Table(name=None, dshape="{value: string}", data={"value": [0.5]})
     min_value = Constant(table=t, scheduler=s)
     hist_index = HistogramIndex(column="_1", scheduler=s)
     hist_index.create_dependent_modules(stirrer, "result")
     bisect_ = Bisect(column="_1",
                      op=">",
                      hist_index=hist_index,
                      scheduler=s)
     bisect_.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_.input.limit = min_value.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = bisect_.output.result
     aio.run(s.start())
     idx = stirrer.table.eval("_1>0.5", result_object="index")
     self.assertEqual(bisect_.table.index, bitmap(idx))
Esempio n. 5
0
    def test_mb_k_means(self) -> None:
        s = self.scheduler()
        n_clusters = 3
        try:
            dataset = (get_dataset("cluster:s3"), )
        except TimeoutError:
            print("Cannot download cluster:s3")
            return

        with s:
            csv = CSVLoader(
                dataset,
                sep=" ",
                skipinitialspace=True,
                header=None,
                index_col=False,
                scheduler=s,
            )
            km = MBKMeans(
                n_clusters=n_clusters,
                random_state=42,
                is_input=False,
                is_greedy=False,
                scheduler=s,
            )
            # km.input.table = csv.output.result
            km.create_dependent_modules(csv)
            pr = Print(proc=self.terse, scheduler=s)
            pr.input[0] = km.output.result
            e = Every(proc=self.terse, scheduler=s)
            e.input[0] = km.output.labels
        aio.run(s.start())
        labels = km.labels()
        assert labels is not None
        self.assertEqual(len(csv.table), len(labels))
Esempio n. 6
0
 def test_merge_simple(self) -> None:
     s = self.scheduler()
     cst1 = Constant(Table(name=None,
                           data=pd.DataFrame({
                               "xmin": [1],
                               "xmax": [2]
                           })),
                     scheduler=s)
     cst2 = Constant(Table(name=None,
                           data=pd.DataFrame({
                               "ymin": [3],
                               "ymax": [4]
                           })),
                     scheduler=s)
     merge = Merge(left_index=True, right_index=True, scheduler=s)
     merge.input[0] = cst1.output.result
     merge.input[0] = cst2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = merge.output.result
     aio.run(s.start())
     _ = merge.trace_stats(max_runs=1)
     # pd.set_option('display.expand_frame_repr', False)
     # print(res)
     df = merge.table
     last = df.loc[df.index[-1]]
     assert last is not None
     self.assertTrue(last["xmin"] == 1 and last["xmax"] == 2
                     and last["ymin"] == 3 and last["ymax"] == 4)
Esempio n. 7
0
 def test_join(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     stat1 = Stats(1, reset_index=True, scheduler=s)
     stat1.input[0] = csv.output.result
     stat2 = Stats(2, reset_index=True, scheduler=s)
     stat2.input[0] = csv.output.result
     # join=Join(scheduler=s)
     # reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s)
     # reduce_.input[0] = stat1.output.stats
     # reduce_.input[0] = stat2.output.stats
     # join = reduce_.expand()
     join = Reduce.expand(
         BinJoin,
         "first",
         "second",
         "table",
         [stat1.output.stats, stat2.output.stats],
         scheduler=s,
     )
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = join.output.result
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = csv.output.result
     aio.run(s.start())
     res = join.trace_stats(max_runs=1)
     print(res)
Esempio n. 8
0
    def t_num_expr_impl(self, cls: Type[NumExprABC]) -> Tuple[Any, ...]:
        s = self.scheduler()
        random1 = RandomTable(10, rows=100000, scheduler=s)
        random2 = RandomTable(10, rows=100000, scheduler=s)
        module = cls(
            columns={
                "first": ["_1", "_2", "_3"],
                "second": ["_1", "_2", "_3"]
            },
            scheduler=s,
        )

        module.input.first = random1.output.result
        module.input.second = random2.output.result
        pr = Print(proc=self.terse, scheduler=s)
        pr.input[0] = module.output.result
        aio.run(s.start())
        first = random1.table.to_array()
        first_2 = first[:, 1]
        first_3 = first[:, 2]
        second = random2.table.to_array()
        second_2 = second[:, 1]
        second_3 = second[:, 2]
        ne_1 = ne.evaluate("first_2+2*second_3")
        ne_2 = ne.evaluate("first_3-5*second_2")
        res = module.table.to_array()
        self.assertTrue(np.allclose(res[:, 0], ne_1, equal_nan=True))
        self.assertTrue(np.allclose(res[:, 1], ne_2, equal_nan=True))
        return first_2, first_3, second_2, second_3
Esempio n. 9
0
    def t_mix_ufunc_impl(
        self,
        cls: Type[MixUfuncABC],
        ufunc1: np.ufunc = np.log,
        ufunc2: np.ufunc = np.add,
    ) -> None:
        s = self.scheduler()
        random1 = RandomTable(10, rows=100000, scheduler=s)
        random2 = RandomTable(10, rows=100000, scheduler=s)
        module = cls(
            columns={
                "first": ["_1", "_2", "_3"],
                "second": ["_1", "_2", "_3"]
            },
            scheduler=s,
        )

        module.input.first = random1.output.result
        module.input.second = random2.output.result
        pr = Print(proc=self.terse, scheduler=s)
        pr.input[0] = module.output.result
        aio.run(s.start())
        first = random1.table.to_array()
        first_2 = first[:, 1]
        _ = first[:, 2]
        second = random2.table.to_array()
        _ = second[:, 1]
        second_3 = second[:, 2]
        ne_1 = ufunc2(first_2, second_3).astype("float64")
        ne_2 = ufunc1(second_3).astype("float64")
        res = module.table.to_array()
        self.assertTrue(np.allclose(res[:, 0], ne_1, equal_nan=True))
        self.assertTrue(np.allclose(res[:, 1], ne_2, equal_nan=True))
Esempio n. 10
0
 def test_hist_index_min_max(self):
     "Test min_out and max_out on HistogramIndex"
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     range_qry = RangeQuery(column='_1', scheduler=s)
     range_qry.create_dependent_modules(random,
                                        'table',
                                        min_value=min_value,
                                        max_value=max_value)
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = range_qry.output.table
     hist_index = range_qry.hist_index
     min_ = Min(name='min_' + str(hash(hist_index)), scheduler=s)
     min_.input.table = hist_index.output.min_out
     prt2 = Print(proc=self.terse, scheduler=s)
     prt2.input.df = min_.output.table
     max_ = Max(name='max_' + str(hash(hist_index)), scheduler=s)
     max_.input.table = hist_index.output.max_out
     pr3 = Print(proc=self.terse, scheduler=s)
     pr3.input.df = max_.output.table
     s.start()
     s.join()
     res1 = random.table().min()['_1']
     res2 = min_.table().last().to_dict()['_1']
     self.assertAlmostEqual(res1, res2)
     res1 = random.table().max()['_1']
     res2 = max_.table().last().to_dict()['_1']
     self.assertAlmostEqual(res1, res2)
Esempio n. 11
0
    def t_mix_ufunc_table_dict_impl(self, cls: Type[MixUfuncABC]) -> None:
        s = self.scheduler()
        random1 = RandomDict(10, scheduler=s)
        random2 = RandomTable(10, rows=100000, scheduler=s)
        module = cls(
            columns={
                "first": ["_1", "_2", "_3"],
                "second": ["_1", "_2", "_3"]
            },
            scheduler=s,
        )

        module.input.first = random1.output.result
        module.input.second = random2.output.result
        pr = Print(proc=self.terse, scheduler=s)
        pr.input[0] = module.output.result
        aio.run(s.start())
        first = list(random1.psdict.values())
        first_2 = first[1]
        _ = first[2]
        second = random2.table.to_array()
        _ = second[:, 1]
        second_3 = second[:, 2]
        ne_1 = np.add(first_2, second_3)
        ne_2 = np.log(second_3)
        res = module.table.to_array()
        self.assertTrue(np.allclose(res[:, 0], ne_1, equal_nan=True))
        self.assertTrue(np.allclose(res[:, 1], ne_2, equal_nan=True))
Esempio n. 12
0
 def test_binary3(self) -> None:
     s = self.scheduler()
     cols = 10
     random1 = RandomTable(cols, rows=100_000, scheduler=s)
     random2 = RandomDict(cols, scheduler=s)
     module = Binary(
         np.add,
         columns={
             "first": ["_3", "_5", "_7"],
             "second": ["_4", "_6", "_8"]
         },
         scheduler=s,
     )
     module.input.first = random1.output.result
     module.input.second = random2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = module.output.result
     aio.run(s.start())
     res1 = np.add(
         random1.table.to_array()[:, [2, 4, 6]],
         np.array(list(random2.psdict.values()))[[3, 5, 7]],
     )
     res2 = module.table.to_array()
     self.assertTrue(module.name.startswith("binary_"))
     self.assertTrue(np.allclose(res1, res2, equal_nan=True))
Esempio n. 13
0
 def test_hub_if_else(self):
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=5,
         update_rows=5,
         fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     switch = Switch(condition=lambda x: False, scheduler=s)
     switch.input[0] = stirrer.output.result
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = switch.output.result
     min_ = Min(name="min_" + str(hash(random)), scheduler=s)
     min_.input[0] = switch.output.result_else
     hub = Hub(scheduler=s)
     hub.input.table = min_.output.result
     hub.input.table = max_.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = hub.output.result
     aio.run(s.start())
     res1 = stirrer.result.min()
     res2 = hub.result
     self.compare(res1, res2)
Esempio n. 14
0
 def _t_impl(self, cls: Type[TableModule], ufunc: np.ufunc,
             mod_name: str) -> None:
     print("Testing", mod_name)
     s = self.scheduler()
     random1 = RandomTable(
         3,
         rows=100_000,
         scheduler=s,
         random=lambda x: np.random.randint(10, size=x),  # type: ignore
         dtype="int64",
     )
     random2 = RandomTable(
         3,
         rows=100_000,
         scheduler=s,
         random=lambda x: np.random.randint(10, size=x),  # type: ignore
         dtype="int64",
     )
     module = cls(scheduler=s)
     module.input.first = random1.output.result
     module.input.second = random2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = module.output.result
     aio.run(s.start())
     res1 = ufunc(random1.table.to_array(), random2.table.to_array())
     res2 = module.table.to_array()
     self.assertTrue(module.name.startswith(mod_name))
     self.assertTrue(np.allclose(res1, res2, equal_nan=True))
Esempio n. 15
0
 def test_join_simple(self) -> None:
     s = self.scheduler()
     cst1 = Constant(
         Table(
             name="test_join_simple_cst1",
             data=pd.DataFrame({"xmin": [1], "xmax": [2]}),
             create=True,
         ),
         scheduler=s,
     )
     cst2 = Constant(
         Table(
             name="test_join_simple_cst2",
             data=pd.DataFrame({"ymin": [3], "ymax": [4]}),
             create=True,
         ),
         scheduler=s,
     )
     cst3 = Constant(
         Table(
             name="test_join_simple_cst3",
             data=pd.DataFrame({"zmin": [5], "zmax": [6]}),
             create=True,
         ),
         scheduler=s,
     )
     # join=Join(scheduler=s)
     # reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s)
     # reduce_.input[0] = cst1.output.result
     # reduce_.input[0] = cst2.output.result
     # reduce_.input[0] = cst3.output.result
     # join = reduce_.expand()
     join = Reduce.expand(
         BinJoin,
         "first",
         "second",
         "result",
         [cst1.output.result, cst2.output.result, cst3.output.result],
         scheduler=s,
     )
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = join.output.result
     aio.run(s.start())
     res = join.trace_stats(max_runs=1)
     print(res)
     df = join.table
     last = df.loc[df.index[-1]]
     assert last is not None
     self.assertTrue(
         last["xmin"] == 1
         and last["xmax"] == 2
         and last["ymin"] == 3
         and last["ymax"] == 4
         and last["zmin"] == 5
         and last["zmax"] == 6
     )
 def test_datashape(self):
     np.random.seed(42)
     s = self.scheduler()
     random = RandomTable(3, rows=10_000, scheduler=s)
     ds = DataShape(scheduler=s)
     ds.input.table = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = ds.output.result
     aio.run(s.start())
     print(s.modules())
Esempio n. 17
0
 def test_hist_index_min_max(self) -> None:
     "Test min_out and max_out on HistogramIndex"
     s = self.scheduler()
     with s:
         random = RandomTable(2, rows=100000, scheduler=s)
         t_min = PsDict({"_1": 0.3})
         min_value = Constant(table=t_min, scheduler=s)
         t_max = PsDict({"_1": 0.8})
         max_value = Constant(table=t_max, scheduler=s)
         range_qry = RangeQuery(column="_1", scheduler=s)
         range_qry.create_dependent_modules(
             random, "result", min_value=min_value, max_value=max_value
         )
         prt = Print(proc=self.terse, scheduler=s)
         prt.input[0] = range_qry.output.result
         hist_index = range_qry.hist_index
         assert hist_index is not None
         min_ = Min(name="min_" + str(hash(hist_index)), scheduler=s)
         min_.input[0] = hist_index.output.min_out
         prt2 = Print(proc=self.terse, scheduler=s)
         prt2.input[0] = min_.output.result
         max_ = Max(name="max_" + str(hash(hist_index)), scheduler=s)
         max_.input[0] = hist_index.output.max_out
         pr3 = Print(proc=self.terse, scheduler=s)
         pr3.input[0] = max_.output.result
     aio.run(s.start())
     res1 = cast(float, random.table.min()["_1"])
     res2 = cast(float, min_.psdict["_1"])
     self.assertAlmostEqual(res1, res2)
     res1 = cast(float, random.table.max()["_1"])
     res2 = cast(float, max_.psdict["_1"])
     self.assertAlmostEqual(res1, res2)
Esempio n. 18
0
 def test_online_cov(self):
     s = self.scheduler()
     random = RandomTable(2, rows=100_000, scheduler=s)
     cov = Corr(mode="CovarianceOnly", scheduler=s)
     cov.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = cov.output.result
     aio.run(s.start())
     res1 = np.cov(random.result.to_array().T)
     res2 = cov.result_as_df(["_1", "_2"]).values
     self.assertTrue(np.allclose(res1, res2))
Esempio n. 19
0
 def test_max(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10_000, scheduler=s)
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     res1 = random.table.max()
     res2 = max_.cxx_module.get_output_table().last().to_dict(ordered=True)
     self.compare(res1, res2)
Esempio n. 20
0
 def test_filter(self) -> None:
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     filter_ = FilterMod(expr="_1 > 0.5", scheduler=s)
     filter_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = filter_.output.result
     aio.run(s.start())
     idx = (filter_.get_input_slot("table").data().eval(
         "_1>0.5", result_object="index"))
     self.assertEqual(filter_.table.index, bitmap(idx))
Esempio n. 21
0
    def test_dataflow_6_dynamic(self) -> None:
        s = self.scheduler()
        table = RandomTable(name="table",
                            columns=["a"],
                            throttle=1000,
                            scheduler=s)
        sink = Sink(name="sink", scheduler=s)
        sink.input.inp = table.output.result
        prt = Print(name="prt", proc=self.terse, scheduler=s)
        prt.input.df = table.output.result
        prt2 = Print(name="prt2", proc=self.terse, scheduler=s)
        prt2.input.df = table.output.result
        # from nose.tools import set_trace; set_trace()
        s.commit()

        async def modify_1(scheduler: Scheduler, run_number: int) -> None:
            with s as dataflow:
                print("Checking module deletion")
                self.assertTrue(isinstance(dataflow, Dataflow))
                deps = dataflow.collateral_damage("prt2")
                self.assertEqual(deps, set(["prt2"]))
                deps = dataflow.collateral_damage("prt")
                self.assertEqual(deps, set(["prt"]))
                deps = dataflow.collateral_damage("prt", "prt2")
                self.assertEqual(deps, set(["prt", "prt2"]))
                dataflow.delete_modules("prt2")
            s.on_loop(modify_2, 5)

        async def modify_2(scheduler: Scheduler, run_number: Any) -> None:
            self.assertFalse("prt2" in scheduler)
            with s as dataflow:
                print("Checking more module deletion")
                deps = dataflow.collateral_damage("prt")
                self.assertEqual(deps, {"prt"})
                deps = dataflow.collateral_damage("prt", "sink")
                self.assertEqual(deps, {"prt", "sink", "table"})
                dataflow.delete_modules("prt")
            s.on_loop(modify_3, 5)

        async def modify_3(scheduler: Scheduler, run_number: int) -> None:
            self.assertFalse("prt" in scheduler)
            with s as dataflow:
                print("Checking even more module deletion")
                deps = dataflow.collateral_damage("sink")
                self.assertEqual(deps, {"sink", "table"})
                dataflow.delete_modules("sink", "table")

        async def stop_error(scheduler: Scheduler, run_number: int) -> None:
            self.assertFalse("Scheduler should have stopped")
            await scheduler.stop()

        s.on_loop(modify_1, 5)
        s.on_loop(stop_error, 100)
        aio.run(s.start())
Esempio n. 22
0
 def test_unary2(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=100_000, scheduler=s)
     module = Unary(np.log, columns=["_3", "_5", "_7"], scheduler=s)
     module.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = module.output.result
     aio.run(s.start())
     res1 = np.log(random.table.to_array()[:, [2, 4, 6]])
     res2 = module.table.to_array()
     self.assertTrue(module.name.startswith("unary_"))
     self.assertTrue(np.allclose(res1, res2, equal_nan=True))
Esempio n. 23
0
 def test_max(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     # s.join()
     res1 = random.table.max()
     res2 = max_.psdict
     self.compare(res1, res2)
Esempio n. 24
0
 def test_reduce2(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=100_000, scheduler=s)
     module = Reduce(np.add, columns=["_3", "_5", "_7"], scheduler=s)
     module.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = module.output.result
     aio.run(s.start())
     res1 = np.add.reduce(random.table.to_array()[:, [2, 4, 6]])
     res2 = np.array(list(module.psdict.values()))
     self.assertTrue(module.name.startswith("reduce_"))
     self.assertTrue(np.allclose(res1, res2, equal_nan=True))
    def _impl_stirred_tst_percentiles_rq(self, accuracy: float,
                                         **kw: Any) -> None:
        """ """
        s = self.scheduler()
        with s:
            random = RandomTable(2, rows=10000, scheduler=s)
            stirrer = Stirrer(update_column="_2",
                              fixed_step_size=1000,
                              scheduler=s,
                              **kw)
            stirrer.input[0] = random.output.result
            t_min = PsDict({"_1": 0.3})
            min_value = Constant(table=t_min, scheduler=s)
            t_max = PsDict({"_1": 0.8})
            max_value = Constant(table=t_max, scheduler=s)
            range_qry = RangeQuery(column="_1", scheduler=s)
            range_qry.create_dependent_modules(stirrer,
                                               "result",
                                               min_value=min_value,
                                               max_value=max_value)

            hist_index = range_qry.hist_index
            assert hist_index
            t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0})
            which_percentiles = Constant(table=t_percentiles, scheduler=s)
            percentiles = Percentiles(accuracy=accuracy, scheduler=s)
            percentiles.input[0] = range_qry.output.result
            percentiles.input.percentiles = which_percentiles.output.result
            percentiles.input.hist = hist_index.output.result
            prt = Print(proc=self.terse, scheduler=s)
            prt.input[0] = percentiles.output.result
        aio.run(s.start())
        pdict = notNone(percentiles.table.last()).to_dict()
        v = range_qry.table["_1"].values
        p25 = np.percentile(v, 25.0)  # type: ignore
        p50 = np.percentile(v, 50.0)  # type: ignore
        p75 = np.percentile(v, 75.0)  # type: ignore
        print(
            "TSV=> accuracy: ",
            accuracy,
            " 25:",
            p25,
            pdict["_25"],
            " 50:",
            p50,
            pdict["_50"],
            " 75:",
            p75,
            pdict["_75"],
        )
        self.assertAlmostEqual(p25, pdict["_25"], delta=0.01)
        self.assertAlmostEqual(p50, pdict["_50"], delta=0.01)
        self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)
Esempio n. 26
0
 def test_distinct_float(self):
     s = self.scheduler()
     csv = SimpleCSVLoader(get_dataset("bigfile"),
                           index_col=False,
                           header=None,
                           scheduler=s)
     dist = Distinct(scheduler=s)
     dist.input[0] = csv.output.result
     prt = Print(proc=self.terse, scheduler=s)
     prt.input[0] = dist.output.result
     aio.run(csv.scheduler().start())
     res = [v for v in dist.result.values() if v is not None]
     self.assertEqual(res, [])  # too many values detected in all cols
Esempio n. 27
0
 def test_sample(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(get_dataset("bigfile"),
                     index_col=False,
                     header=None,
                     scheduler=s)
     smp = Sample(samples=10, scheduler=s)
     smp.input[0] = csv.output.result
     prt = Print(proc=self.terse, scheduler=s)
     prt.input[0] = smp.output.result
     aio.run(csv.scheduler().start())
     # print(repr(smp.result))
     self.assertEqual(len(smp.table), 10)
 def test_hadamard(self) -> None:
     s = self.scheduler()
     random1 = RandomTable(3, rows=100000, scheduler=s)
     random2 = RandomTable(3, rows=100000, scheduler=s)
     module = Hadamard(scheduler=s)
     module.input.x1 = random1.output.result
     module.input.x2 = random2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = module.output.result
     aio.run(s.start())
     res1 = np.multiply(random1.table.to_array(), random2.table.to_array())
     res2 = module.table.to_array()
     self.assertTrue(np.allclose(res1, res2, equal_nan=True))
Esempio n. 29
0
 def test_var(self) -> None:
     s = self.scheduler()
     random = RandomTable(1, rows=1000, scheduler=s)
     var = Var(scheduler=s)
     var.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = var.output.result
     aio.run(s.start())
     res1 = np.array([float(e) for e in random.table.var(ddof=1).values()])
     res2 = np.array([float(e) for e in var.psdict.values()])
     print("res1:", res1)
     print("res2:", res2)
     self.assertTrue(np.allclose(res1, res2))
Esempio n. 30
0
 def test_max(self):
     s = self.scheduler()
     random = SimpleCSVLoader(
         get_dataset("bigfile_multiscale"), nrows=10_000, scheduler=s
     )
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     # s.join()
     res1 = random.result.max()
     res2 = max_.result
     self.compare(res1, res2)