Example #1
0
 def build_heatmap(self, inp: Slot, domain: Any,
                   plan: int) -> Optional[JSon]:
     inp_table = inp.data()
     if inp_table is None:
         return None
     assert isinstance(inp_table, BaseTable)
     if len(inp_table) == 0:
         return None
     row = notNone(inp_table.last()).to_dict()
     json_: JSon = {}
     if not (np.isnan(row["xmin"]) or np.isnan(row["xmax"])
             or np.isnan(row["ymin"]) or np.isnan(row["ymax"])):
         data = row["array"]
         json_["bounds"] = (row["xmin"], row["ymin"], row["xmax"],
                            row["ymax"])
         if self._ipydata:
             assert isinstance(plan, int)
             json_["binnedPixels"] = plan
             self.hist_tensor[:, :, plan] = row["array"]  # type: ignore
         else:
             data = np.copy(row["array"])  # type: ignore
             json_["binnedPixels"] = data
         json_["range"] = [np.min(data), np.max(data)]  # type: ignore
         json_["count"] = np.sum(data)
         json_["value"] = domain
         return json_
     return None
    def t_histogram1d_impl(self, **kw: Any) -> None:
        s = self.scheduler()
        csv = CSVLoader(
            get_dataset("bigfile"), index_col=False, header=None, scheduler=s
        )
        stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw)
        stirrer.input[0] = csv.output.result
        min_ = Min(scheduler=s)
        min_.input[0] = stirrer.output.result
        max_ = Max(scheduler=s)
        max_.input[0] = stirrer.output.result
        histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
        histogram1d.input[0] = stirrer.output.result
        histogram1d.input.min = min_.output.result
        histogram1d.input.max = max_.output.result

        # pr = Print(scheduler=s)
        pr = Every(proc=self.terse, scheduler=s)
        pr.input[0] = histogram1d.output.result
        aio.run(s.start())
        _ = histogram1d.trace_stats()
        last = notNone(histogram1d.table.last()).to_dict()
        h1 = last["array"]
        bounds = (last["min"], last["max"])
        tab = stirrer.table.loc[:, ["_2"]]
        assert tab is not None
        v = tab.to_array().reshape(-1)
        h2, _ = np.histogram(  # type: ignore
            v, bins=histogram1d.params.bins, density=False, range=bounds
        )
        self.assertEqual(np.sum(h1), np.sum(h2))
        self.assertListEqual(h1.tolist(), h2.tolist())
 def test_histogram1d1(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
     histogram1d.input[0] = csv.output.result
     histogram1d.input.min = min_.output.result
     histogram1d.input.max = max_.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = histogram1d.output.result
     aio.run(s.start())
     _ = histogram1d.trace_stats()
     last = notNone(histogram1d.table.last()).to_dict()
     h1 = last["array"]
     bounds = (last["min"], last["max"])
     df = pd.read_csv(
         get_dataset("bigfile"), header=None, usecols=[2]  # type: ignore
     )
     v = df.to_numpy().reshape(-1)
     h2, _ = np.histogram(  # type: ignore
         v, bins=histogram1d.params.bins, density=False, range=bounds
     )
     self.assertListEqual(h1.tolist(), h2.tolist())
Example #4
0
 def test_idxmax2(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, throttle=1000, scheduler=s)
     stirrer = Stirrer(update_column="_1",
                       delete_rows=5,
                       fixed_step_size=100,
                       scheduler=s)
     stirrer.input[0] = random.output.result
     idxmax = IdxMax(scheduler=s)
     idxmax.input[0] = stirrer.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = idxmax.output.result
     pr2 = Print(proc=self.terse, scheduler=s)
     pr2.input[0] = max_.output.result
     aio.run(s.start())
     # import pdb;pdb.set_trace()
     max1 = max_.psdict
     # print('max1', max1)
     max = idxmax.max()
     assert max is not None
     max2 = notNone(max.last()).to_dict()
     # print('max2', max2)
     self.compare(max1, max2)
Example #5
0
 def test_paste(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     min_1 = Min(name="min_1" + str(hash(random)),
                 scheduler=s,
                 columns=["_1"])
     min_1.input[0] = random.output.result
     d2t_1 = Dict2Table(scheduler=s)
     d2t_1.input.dict_ = min_1.output.result
     min_2 = Min(name="min_2" + str(hash(random)),
                 scheduler=s,
                 columns=["_2"])
     min_2.input[0] = random.output.result
     d2t_2 = Dict2Table(scheduler=s)
     d2t_2.input.dict_ = min_2.output.result
     bj = Paste(scheduler=s)
     bj.input.first = d2t_1.output.result
     bj.input.second = d2t_2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = bj.output.result
     aio.run(s.start())
     res1 = random.table.min()
     res2 = notNone(bj.table.last()).to_dict()
     self.assertAlmostEqual(res1["_1"], res2["_1"])
     self.assertAlmostEqual(res1["_2"], res2["_2"])
 def t_histogram2d_impl(self, **kw: Any) -> None:
     s = self.scheduler()
     random = RandomTable(3, rows=100000, scheduler=s)
     stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw)
     stirrer.input[0] = random.output.result
     min_ = Min(scheduler=s)
     min_.input[0] = stirrer.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = stirrer.output.result
     histogram2d = Histogram2D(
         0, 1, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = stirrer.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(s.start())
     last = notNone(histogram2d.table.last()).to_dict()
     h1 = last["array"]
     bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]]
     t = stirrer.table.loc[:, ["_1", "_2"]]
     assert t is not None
     v = t.to_array()
     bins = [histogram2d.params.ybins, histogram2d.params.xbins]
     h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds)
     h2 = np.flip(h2, axis=0)  # type: ignore
     self.assertEqual(np.sum(h1), np.sum(h2))
     self.assertListEqual(h1.reshape(-1).tolist(), h2.reshape(-1).tolist())
 def test_histogram2d1(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram2d = Histogram2D(
         1, 2, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = csv.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(csv.scheduler().start())
     last = notNone(histogram2d.table.last()).to_dict()
     h1 = last["array"]
     bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]]
     df = pd.read_csv(
         get_dataset("bigfile"), header=None, usecols=[1, 2]  # type: ignore
     )
     v = df.to_numpy()  # .reshape(-1, 2)
     bins = [histogram2d.params.ybins, histogram2d.params.xbins]
     h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds)
     h2 = np.flip(h2, axis=0)  # type: ignore
     self.assertTrue(np.allclose(h1, h2))
    def _impl_stirred_tst_percentiles_rq(self, accuracy: float,
                                         **kw: Any) -> None:
        """ """
        s = self.scheduler()
        with s:
            random = RandomTable(2, rows=10000, scheduler=s)
            stirrer = Stirrer(update_column="_2",
                              fixed_step_size=1000,
                              scheduler=s,
                              **kw)
            stirrer.input[0] = random.output.result
            t_min = PsDict({"_1": 0.3})
            min_value = Constant(table=t_min, scheduler=s)
            t_max = PsDict({"_1": 0.8})
            max_value = Constant(table=t_max, scheduler=s)
            range_qry = RangeQuery(column="_1", scheduler=s)
            range_qry.create_dependent_modules(stirrer,
                                               "result",
                                               min_value=min_value,
                                               max_value=max_value)

            hist_index = range_qry.hist_index
            assert hist_index
            t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0})
            which_percentiles = Constant(table=t_percentiles, scheduler=s)
            percentiles = Percentiles(accuracy=accuracy, scheduler=s)
            percentiles.input[0] = range_qry.output.result
            percentiles.input.percentiles = which_percentiles.output.result
            percentiles.input.hist = hist_index.output.result
            prt = Print(proc=self.terse, scheduler=s)
            prt.input[0] = percentiles.output.result
        aio.run(s.start())
        pdict = notNone(percentiles.table.last()).to_dict()
        v = range_qry.table["_1"].values
        p25 = np.percentile(v, 25.0)  # type: ignore
        p50 = np.percentile(v, 50.0)  # type: ignore
        p75 = np.percentile(v, 75.0)  # type: ignore
        print(
            "TSV=> accuracy: ",
            accuracy,
            " 25:",
            p25,
            pdict["_25"],
            " 50:",
            p50,
            pdict["_50"],
            " 75:",
            p75,
            pdict["_75"],
        )
        self.assertAlmostEqual(p25, pdict["_25"], delta=0.01)
        self.assertAlmostEqual(p50, pdict["_50"], delta=0.01)
        self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)
Example #9
0
 def test_last(self) -> None:
     t = Table("table_last", dshape="{a: int, b: float32}", create=True)
     t.resize(10)
     ivalues = np.random.randint(100, size=10)
     t["a"] = ivalues
     fvalues = np.random.rand(10)
     t["b"] = fvalues
     last_ = list(notNone(t.last()).values())
     self.assertEqual(last_, [t._column(0)[-1], t._column(1)[-1]])
     last_a = t.last("a")
     self.assertEqual(last_a, t._column(0)[-1])
     last_a_b = t.last(["a", "b"])
     self.assertEqual(list(last_a_b), last_)
Example #10
0
 def test_last_row(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("smallfile"), index_col=False, header=None, scheduler=s
     )
     lr1 = LastRow(scheduler=s)
     lr1.input[0] = csv.output.result
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = lr1.output.result
     aio.run(s.start())
     df = csv.table
     res = lr1.table
     assert res is not None
     self.assertEqual(res.at[0, "_1"], notNone(df.last())["_1"])
 def _impl_stirred_tst_percentiles(self, accuracy: float,
                                   **kw: Any) -> None:
     """ """
     s = self.scheduler()
     with s:
         random = RandomTable(2, rows=10000, scheduler=s)
         stirrer = Stirrer(update_column="_2",
                           fixed_step_size=1000,
                           scheduler=s,
                           **kw)
         stirrer.input[0] = random.output.result
         hist_index = HistogramIndex(column="_1", scheduler=s)
         hist_index.input[0] = stirrer.output.result
         t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0})
         which_percentiles = Constant(table=t_percentiles, scheduler=s)
         percentiles = Percentiles(accuracy=accuracy, scheduler=s)
         percentiles.input[0] = stirrer.output.result
         percentiles.input.percentiles = which_percentiles.output.result
         percentiles.input.hist = hist_index.output.result
         prt = Print(proc=self.terse, scheduler=s)
         prt.input[0] = percentiles.output.result
     aio.run(s.start())
     pdict = notNone(percentiles.table.last()).to_dict()
     # v = random.table()['_1'].values
     # from nose.tools import set_trace; set_trace()
     v = stirrer.table.to_array(columns=["_1"]).reshape(-1)
     p25 = np.percentile(v, 25.0)  # type: ignore
     p50 = np.percentile(v, 50.0)  # type: ignore
     p75 = np.percentile(v, 75.0)  # type: ignore
     print(
         "Table=> accuracy: ",
         accuracy,
         " 25:",
         p25,
         pdict["_25"],
         " 50:",
         p50,
         pdict["_50"],
         " 75:",
         p75,
         pdict["_75"],
     )
     # from nose.tools import set_trace; set_trace()
     self.assertAlmostEqual(p25, pdict["_25"], delta=0.01)
     self.assertAlmostEqual(p50, pdict["_50"], delta=0.01)
     self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)
Example #12
0
 def test_var_h(self) -> None:
     s = self.scheduler()
     random = RandomTable(1, rows=1000, scheduler=s)
     var = VarH(scheduler=s)
     var.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = var.output.result
     aio.run(s.start())
     table = random.table
     assert table is not None
     res1 = np.array([float(e) for e in table.var(ddof=1).values()])
     res2 = np.array([
         float(e)
         for e in notNone(var.table.last()).to_dict(ordered=True).values()
     ])
     print("res1:", res1)
     print("res2:", res2)
     self.assertTrue(np.allclose(res1, res2))
Example #13
0
 def get_image(self, run_number: Optional[int] = None) -> Optional[str]:
     filename: Optional[str]
     table = self.table
     if table is None or len(table) == 0:
         return None
     last = notNone(table.last())
     # assert last is not None  # len(table) > 0 so last is not None
     if run_number is None or run_number >= last["time"]:
         run_number = last["time"]
         filename = last["filename"]
     else:
         time = table["time"]
         idx = np.where(time == run_number)[0]
         assert last is not None
         if len(idx) == 0:
             filename = last["filename"]
         else:
             filename = table["filename"][idx[0]]
     return filename
Example #14
0
 def test_idxmax(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, throttle=1000, scheduler=s)
     idxmax = IdxMax(scheduler=s)
     idxmax.input[0] = random.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = idxmax.output.result
     pr2 = Print(proc=self.terse, scheduler=s)
     pr2.input[0] = max_.output.result
     aio.run(s.start())
     max1 = max_.psdict
     # print('max1', max1)
     max = idxmax.max()
     assert max is not None
     max2 = notNone(max.last()).to_dict()
     # print('max2', max2)
     self.compare(max1, max2)
Example #15
0
 def test_idxmin(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, throttle=1000, scheduler=s)
     idxmin = IdxMin(scheduler=s)
     idxmin.input[0] = random.output.result
     min_ = Min(scheduler=s)
     min_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = idxmin.output.result
     pr2 = Print(proc=self.terse, scheduler=s)
     pr2.input[0] = min_.output.result
     aio.run(s.start())
     min1 = min_.psdict
     # print('min1', min1)
     min = idxmin.min()
     assert min is not None
     min2 = notNone(min.last()).to_dict()
     # print('min2', min2)
     self.compare(min1, min2)
Example #16
0
 def heatmap_to_json(self, json: JSon, short: bool) -> JSon:
     dfslot = self.get_input_slot("array")
     assert isinstance(dfslot.output_module, Histogram2D)
     histo: Histogram2D = dfslot.output_module
     json["columns"] = [histo.x_column, histo.y_column]
     histo_df = dfslot.data()
     if histo_df is not None and len(histo_df) != 0:
         row = histo_df.last()
         if not (np.isnan(row["xmin"]) or np.isnan(row["xmax"])
                 or np.isnan(row["ymin"]) or np.isnan(row["ymax"])):
             json["bounds"] = {
                 "xmin": row["xmin"],
                 "ymin": row["ymin"],
                 "xmax": row["xmax"],
                 "ymax": row["ymax"],
             }
     df = self.table
     if df is not None and self._last_update != 0:
         json["image"] = notNone(df.last())["filename"]
     return json
Example #17
0
 def test_stats(self) -> None:
     s = self.scheduler()
     csv_module = CSVLoader(get_dataset("smallfile"),
                            index_col=False,
                            header=None,
                            scheduler=s)
     stats = Stats("_1", name="test_stats", scheduler=s)
     wait = Wait(name="wait", delay=3, scheduler=s)
     wait.input.inp = csv_module.output.result
     stats.input._params = wait.output.out
     stats.input[0] = csv_module.output.result
     pr = Print(proc=self.terse, name="print", scheduler=s)
     pr.input[0] = stats.output.result
     aio.run(s.start())
     table = csv_module.table
     stable = stats.table
     last = notNone(stable.last())
     tmin = table["_1"].min()
     self.assertTrue(np.isclose(tmin, last["__1_min"]))
     tmax = table["_1"].max()
     self.assertTrue(np.isclose(tmax, last["__1_max"]))
 def test_combine_first_nan(self) -> None:
     s = self.scheduler(True)
     cst1 = Constant(
         Table(
             name="tcf_xmin_xmax_nan",
             data=pd.DataFrame({"xmin": [1], "xmax": [2]}),
             create=True,
         ),
         scheduler=s,
     )
     cst2 = Constant(
         Table(
             name="tcf_ymin_ymax_nan",
             data=pd.DataFrame({"ymin": [np.nan], "ymax": [np.nan]}),
             create=True,
         ),
         scheduler=s,
     )
     cst3 = Constant(
         Table(
             name="tcf_ymin_ymax2_nan",
             data=pd.DataFrame({"ymin": [3], "ymax": [4]}),
             create=True,
         ),
         scheduler=s,
     )
     cf = CombineFirst(scheduler=s)
     cf.input[0] = cst1.output.result
     cf.input[0] = cst2.output.result
     cf.input[0] = cst3.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = cf.output.result
     aio.run(s.start())
     last = notNone(cf.table.last()).to_dict()
     self.assertTrue(
         last["xmin"] == 1
         and last["xmax"] == 2
         and last["ymin"] == 3
         and last["ymax"] == 4
     )
Example #19
0
 def test_last_row_simple(self) -> None:
     s = self.scheduler()
     t1 = Table(name=get_random_name("cst1"), data={"xmin": [1], "xmax": [2]})
     t2 = Table(name=get_random_name("cst2"), data={"ymin": [3], "ymax": [4]})
     cst1 = Constant(t1, scheduler=s)
     cst2 = Constant(t2, scheduler=s)
     join = Join(scheduler=s)
     join.input[0] = cst1.output.result
     join.input[0] = cst2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = join.output.result
     aio.run(s.start())
     # res = join.trace_stats(max_runs=1)
     # pd.set_option('display.expand_frame_repr', False)
     # print(res)
     last = notNone(join.table.last())
     self.assertTrue(
         last["xmin"] == 1
         and last["xmax"] == 2
         and last["ymin"] == 3
         and last["ymax"] == 4
     )