def test_del3(self) -> None:
     t = Table("table_filtering",
               dshape="{a: int, b: float32}",
               create=True)
     sz = 20
     sz_del = 10
     sz_add = 15
     t.resize(sz)
     np.random.seed(42)
     ivalues = np.random.randint(100, size=sz)
     t["a"] = ivalues
     fvalues: np.ndarray[Any, Any] = np.random.rand(sz) * 100
     t["b"] = fvalues
     df = pd.DataFrame(t.to_dict())
     to_del = np.random.randint(len(t) - 1, size=sz_del)
     del t.loc[to_del]
     df = df.drop(to_del)
     self.assertListEqual(list(t.index), list(df.index))
     ivalues2: np.ndarray[Any, Any] = np.random.randint(100, size=sz_add)
     fvalues2: np.ndarray[Any, Any] = np.random.rand(sz_add) * 100
     dict_add = {"a": ivalues2, "b": fvalues2}
     ix = range(df.index[-1] + 1, df.index[-1] + 1 + sz_add)
     df = df.append(pd.DataFrame(dict_add, index=ix))
     t.append(data=dict_add)
     self.assertSetEqual(set(t.index), set(df.index))
Exemple #2
0
    def append_dataframe(self) -> None:
        # pylint: disable=protected-access
        # self.scheduler._run_number = 1
        df = pd.DataFrame({
            "a": [1, 2, 3],
            "b": [0.1, 0.2, 0.3],
            "c": ["a", "b", "cd"]
        })
        t = Table("table_2", data=df)
        self.assertEqual(len(t), len(df))
        for colname in df:
            coldf = df[colname]
            colt = t[colname]
            self.assertEqual(len(coldf), len(colt))
            self.assertTrue(np.all(coldf.values == colt.values))
        # self.scheduler._run_number = 2
        t.append(df)
        self.assertEqual(len(t), 2 * len(df))
        for colname in df:
            coldf = df[colname]
            colt = t[colname]
            self.assertEqual(2 * len(coldf), len(colt))
            self.assertTrue(np.all(coldf == colt[len(df):len(t)]))

        # self.scheduler._run_number = 3
        t.append(t)  # fun test
        self.assertEqual(len(t), 4 * len(df))
        for colname in df:
            coldf = df[colname]
            colt = t[colname]
            self.assertEqual(4 * len(coldf), len(colt))
            self.assertTrue(
                np.all(colt[0:2 * len(df)] == colt[2 * len(df):len(t)]))
Exemple #3
0
    def append_direct(self) -> None:
        # pylint: disable=protected-access
        d = OrderedDict([("a", [1, 2, 3]), ("b", [0.1, 0.2, 0.3]),
                         ("c", ["a", "b", "cd"])])
        # print(dshape_extract(d))
        df = pd.DataFrame(d)
        # self.scheduler._run_number = 1
        t = Table("table_3", data=d)
        self.assertEqual(len(t), len(df))
        for colname in df:
            coldf = df[colname]
            colt = t[colname]
            self.assertEqual(len(coldf), len(colt))
            self.assertTrue(np.all(coldf == colt.values))

        # self.scheduler._run_number = 2
        t.append(d)
        self.assertEqual(len(t), 2 * len(df))
        for colname in df:
            coldf = df[colname]
            colt = t[colname]
            self.assertEqual(2 * len(coldf), len(colt))
            self.assertTrue(np.all(coldf == colt[len(df):len(t)]))

        # self.scheduler._run_number = 3
        t.append(t)  # fun test
        self.assertEqual(len(t), 4 * len(df))
        for colname in df:
            coldf = df[colname]
            colt = t[colname]
            self.assertEqual(4 * len(coldf), len(colt))
            self.assertTrue(
                np.all(colt[0:2 * len(df)] == colt[2 * len(df):len(t)]))
Exemple #4
0
class Var(TableModule):
    """
    Compute the variance of the columns of an input dataframe.
    """
    parameters = [('history', np.dtype(int), 3)]

    def __init__(self, columns=None, **kwds):
        self._add_slots(kwds,'input_descriptors',
                        [SlotDescriptor('table', type=Table, required=True)])
        super(Var, self).__init__(dataframe_slot='table', **kwds)
        self._columns = columns
        self._data = {}
        self.default_step_size = 1000

    def is_ready(self):
        if self.get_input_slot('table').created.any():
            return True
        return super(Var, self).is_ready()

    def op(self, chunk):
        cols = chunk.columns
        ret = {}
        for c in cols:
            data = self._data.get(c)
            if data is None:
                data = OnlineVariance()
                self._data[c] = data
            data.add(chunk[c])
            ret[c] = data.variance
        return ret

    @synchronized
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():        
            dfslot.reset()
            self._table = None
            dfslot.update(run_number)
        indices = dfslot.created.next(step_size) # returns a slice
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.op(self.filter_columns(input_df,fix_loc(indices)))
        if self._table is None:
            self._table = Table(self.generate_table_name('var'), dshape=input_df.dshape,
#                                scheduler=self.scheduler(),
                                create=True)
        self._table.append(op, indices=[run_number])
        print(self._table)

        if len(self._table) > self.params.history:
            self._table = self._table.loc[self._table.index[-self.params.history:]]
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Exemple #5
0
class Max(TableModule):
    parameters = [('history', np.dtype(int), 3)]

    def __init__(self, columns=None, **kwds):
        self._add_slots(kwds, 'input_descriptors',
                        [SlotDescriptor('table', type=Table, required=True)])
        super(Max, self).__init__(**kwds)
        self._columns = columns
        self.default_step_size = 10000

    def is_ready(self):
        if self.get_input_slot('table').created.any():
            return True
        return super(Max, self).is_ready()

    @synchronized
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():
            dfslot.reset()
            if self._table is not None:
                self._table.resize(0)
            dfslot.update(run_number)
        indices = dfslot.created.next(step_size)  # returns a slice
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.filter_columns(input_df, fix_loc(indices)).max(keepdims=True)
        if self._table is None:
            self._table = Table(
                self.generate_table_name('max'),
                data=op,
                #                                scheduler=self.scheduler(),
                create=True)
        elif len(self._table) == 0:  # has been resetted
            self._table.append(op)
        else:
            last = self._table.last()
            for colname in last:
                current_max = op[colname]
                current_max[0] = np.maximum(current_max, last[colname])
            self._table.append(op)

        #TODO manage the history in a more efficient way
        #if len(self._table) > self.params.history:
        #    self._table = self._table.loc[self._df.index[-self.params.history:]]
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
 def test_mmap3(self):
     #pylint: disable=protected-access
     #self.scheduler._run_number = 1
     self._rmtree()
     df = pd.DataFrame({'a': [1, 2, 3], 'b': [0.1, 0.2, 0.3], 'c': ['a', 'b', 'cd']})
     t = Table('table_2', data=df)
     self.assertEqual(len(t),len(df))
     for colname in df:
         coldf = df[colname]
         colt = t[colname]
         self.assertEqual(len(coldf), len(colt))
         self.assertTrue(np.all(coldf.values==colt.values))
     t.append(df)
     self.assertEqual(len(t),2*len(df))
     self._rmtree()
Exemple #7
0
 def test_mmap3(self) -> None:
     # pylint: disable=protected-access
     # self.scheduler._run_number = 1
     self._rmtree()
     df = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": ["a", "b", "cd"]})
     t = Table("table_2", data=df)
     self.assertEqual(len(t), len(df))
     for colname in df:
         coldf = df[colname]
         colt = t[colname]
         self.assertEqual(len(coldf), len(colt))
         self.assertTrue(np.all(coldf.values == colt.values))
     t.append(df)
     self.assertEqual(len(t), 2 * len(df))
     self._rmtree()
Exemple #8
0
class RandomTable(TableModule):
    "Random table generator module"

    def __init__(self, columns, rows=-1, random=RAND, throttle=False, **kwds):
        super(RandomTable, self).__init__(**kwds)
        self.default_step_size = 1000
        if isinstance(columns, integer_types):
            self.columns = ["_%d" % i for i in range(1, columns + 1)]
        elif isinstance(columns, (list, np.ndarray)):
            self.columns = columns
        else:
            raise ProgressiveError('Invalid type for columns')
        self.rows = rows
        self.random = random
        if throttle and isinstance(throttle, integer_types + (float, )):
            self.throttle = throttle
        else:
            self.throttle = False
        dshape = "{" + (", ".join(
            ["%s: float64" % col for col in self.columns])) + "}"
        self._table = Table(self.generate_table_name('table'),
                            dshape=dshape,
                            create=True)
        self.columns = self._table.columns

    def run_step(self, run_number, step_size, howlong):
        if step_size == 0:  # bug
            logger.error('Received a step_size of 0')
            return self._return_run_step(self.state_ready,
                                         steps_run=0,
                                         creates=0)
        logger.info('generating %d lines', step_size)
        if self.throttle:
            step_size = np.min([self.throttle, step_size])
        if self.rows >= 0 and (len(self._table) + step_size) > self.rows:
            step_size = self.rows - len(self._table)
            if step_size <= 0:
                raise StopIteration
            logger.info('truncating to %d lines', step_size)

        values = OrderedDict()
        for column in self.columns:
            s = self.random(step_size)
            values[column] = s
        with self.lock:
            self._table.append(values)
        next_state = self.state_blocked if self.throttle else self.state_ready
        return self._return_run_step(next_state, steps_run=step_size)
Exemple #9
0
class Variable(Constant):
    def __init__(self, table=None, **kwds):
        self._add_slots(kwds, 'input_descriptors',
                        [SlotDescriptor('like', type=Table, required=False)])
        super(Variable, self).__init__(table, **kwds)

    def is_input(self):
        return True

    def from_input(self, input_):
        if not isinstance(input_, dict):
            raise ProgressiveError('Expecting a dictionary')
        if self._table is None and self.get_input_slot('like') is None:
            error = 'Variable %s with no initial value and no input slot' % self.name
            logger.error(error)
            return error
        last = self._table.last()
        if last is None:
            last = {v: None for v in self._table.columns}
        else:
            last = last.to_json()
        error = ''
        for (k, v) in six.iteritems(input_):
            if k in last:
                last[k] = v
            else:
                error += 'Invalid key %s ignored. ' % k
        _ = self.scheduler().for_input(self)
        #last['_update'] = run_number
        self._table.add(last)
        return error

    def run_step(self, run_number, step_size, howlong):
        if self._table is None:
            slot = self.get_input_slot('like')
            if slot is not None:
                like = slot.data()
                if like is not None:
                    with slot.lock:
                        self._table = Table(self.generate_table_name('like'),
                                            dshape=like.dshape,
                                            create=True)
                        self._table.append(like.last().to_dict(ordered=True),
                                           indices=[0])
        return self._return_run_step(self.state_blocked, steps_run=1)
 def test_loc_table_selection(self):
     t = Table('table_for_sel', dshape="{a: int, b: float32}", create=True)
     t.resize(10)
     ivalues = np.random.randint(100, size=20)
     t['a'] = ivalues[:10]
     fvalues = np.array(np.random.rand(20), np.float32)
     t['b'] = fvalues[:10]
     t.append({'a': ivalues[10:], 'b': fvalues[10:]})
     sel = bitmap(range(5, 8))
     view = TableSelectedView(t, sel, None)
     self.assertEqual(type(view), TableSelectedView)
     self.assertTrue(np.array_equal(view[0].value, ivalues[5:8]))
     self.assertEqual(view.at[6, 'a'], ivalues[6])
     self.assertEqual(view.at[7, 'b'], fvalues[7])
     with self.assertRaises(KeyError):
         self.assertEqual(view.at[4, 'a'], ivalues[4])
     with self.assertRaises(KeyError):
         self.assertEqual(view.at[8, 'a'], ivalues[8])
Exemple #11
0
 def test_loc_table_selection(self) -> None:
     t = Table("table_for_sel", dshape="{a: int, b: float32}", create=True)
     t.resize(10)
     ivalues = np.random.randint(100, size=20)
     t["a"] = ivalues[:10]
     fvalues = np.array(np.random.rand(20), np.float32)
     t["b"] = fvalues[:10]
     t.append({"a": ivalues[10:], "b": fvalues[10:]})
     sel = bitmap(range(5, 8))
     view = t.loc[sel, :]
     assert view is not None
     self.assertEqual(type(view), BaseTable)
     self.assertTrue(np.array_equal(view[0].value, ivalues[5:8]))
     self.assertEqual(view.at[6, "a"], ivalues[6])
     self.assertEqual(view.at[7, "b"], fvalues[7])
     with self.assertRaises(KeyError):
         self.assertEqual(view.at[4, "a"], ivalues[4])
     with self.assertRaises(KeyError):
         self.assertEqual(view.at[8, "a"], ivalues[8])
 def test_loc_table_computed(self) -> None:
     t = Table(
         "table_for_test_computed_columns",
         dshape="{a: int, b: float32}",
         create=True,
     )
     t.resize(10)
     ivalues = np.random.randint(100, size=20)
     t["a"] = ivalues[:10]
     fvalues = np.array(np.random.rand(20), np.float32)
     t["b"] = fvalues[:10]
     self.assertEqual(t.shape, (10, 2))
     t.append({"a": ivalues[10:], "b": fvalues[10:]})
     self.assertEqual(t.shape, (20, 2))
     t.add_computed("arcsin_b", "b", np.arcsin)
     self.assertEqual(t.shape, (20, 2))
     tb = t.loc[:, "b"]
     assert tb
     self.assertEqual(tb.shape, (20, 1))
     tab = t.loc[:, "arcsin_b"]
     assert tab
     self.assertEqual(tab.shape, (20, 1))
     sel = bitmap(range(5, 8))
     view = t.loc[sel, :]
     assert view is not None
     self.assertEqual(view.shape, (3, 3))
     view2 = view.loc[sel, ["b", "arcsin_b"]]
     assert view2 is not None
     self.assertEqual(view2.shape, (3, 2))
     self.assertTrue(np.allclose(np.arcsin(tb.to_array()), tab.to_array()))
     self.assertEqual(type(view), BaseTable)
     self.assertEqual(type(view2), BaseTable)
     self.assertTrue(np.array_equal(view[0].value, ivalues[5:8]))
     self.assertTrue(np.array_equal(view[1].value, fvalues[5:8]))
     self.assertTrue(np.array_equal(view[2].value, np.arcsin(fvalues[5:8])))
     self.assertEqual(view.at[6, "a"], ivalues[6])
     self.assertEqual(view.at[7, "b"], fvalues[7])
     self.assertEqual(view.at[7, "arcsin_b"], np.arcsin(fvalues[7]))
     with self.assertRaises(KeyError):
         self.assertEqual(view.at[4, "a"], ivalues[4])
     with self.assertRaises(KeyError):
         self.assertEqual(view.at[8, "a"], ivalues[8])
Exemple #13
0
 def test_loc_tableview(self):
     t = Table('table_loc', dshape="{a: int, b: float32}", create=True)
     t.resize(10)
     ivalues = np.random.randint(100,size=20)
     t['a'] = ivalues[:10]
     fvalues = np.random.rand(20)
     t['b'] = fvalues[:10]
     t.append({'a': ivalues[10:], 'b': fvalues[10:]})
     view = t.loc[2:11]
     self.assertEqual(type(view), TableSlicedView)
     self.assertTrue(np.array_equal(view._column(0)[:], ivalues[2:12]))
     view_view = view.iloc[3:7]
     self.assertTrue(np.array_equal(view_view._column(0)[:], view._column(0)[3:7]))
     view_view = view.loc[3:6]
     self.assertTrue(np.array_equal(view_view._column(0)[:], view._column(0)[view.id_to_index(slice(3,6))]))
     table_view = view.iloc[[3,4,6,9]]
     self.assertEqual(type(table_view),TableSelectedView)
     self.assertTrue(np.array_equal(table_view._column(0).values, view._column(0)[[3,4,6,9]]))
     table_view = view.loc[[3,4,6,9]]
     self.assertEqual(type(table_view),TableSelectedView)
     self.assertTrue(np.array_equal(table_view._column(0).values, view._column(0)[view.id_to_index([3,4,6,9])]))
Exemple #14
0
    def test_row(self):
        table = Table('table',
                      data={
                          'a': [1, 2, 3],
                          'b': [10.1, 0.2, 0.3]
                      },
                      create=True)

        row = Row(table)

        self.assertEqual(len(row), 2)  # 2 values
        self.assertEqual(row['a'], 3)
        self.assertEqual(row['b'], 0.3)

        row['a'] = 4
        self.assertEqual(row['a'], 4)
        self.assertEqual(table.at[len(table) - 1, 'a'], 4)

        table.append({'a': [4, 5], 'b': [0.4, 0.5]})
        self.assertEqual(len(row), 2)  # 2 values
        self.assertEqual(row['a'], 5)
        self.assertEqual(row['b'], 0.5)
Exemple #15
0
    def test_row(self) -> None:
        table = Table("table",
                      data={
                          "a": [1, 2, 3],
                          "b": [10.1, 0.2, 0.3]
                      },
                      create=True)

        row = Row(table)

        self.assertEqual(len(row), 2)  # 2 values
        self.assertEqual(row["a"], 3)
        self.assertEqual(row["b"], 0.3)

        row["a"] = 4
        self.assertEqual(row["a"], 4)
        self.assertEqual(table.at[len(table) - 1, "a"], 4)

        table.append({"a": [4, 5], "b": [0.4, 0.5]})
        self.assertEqual(len(row), 2)  # 2 values
        self.assertEqual(row["a"], 5)
        self.assertEqual(row["b"], 0.5)
Exemple #16
0
 def test_loc_tableview(self) -> None:
     t = Table("table_loc", dshape="{a: int, b: float32}", create=True)
     t.resize(10)
     ivalues = np.random.randint(100, size=20)
     t["a"] = ivalues[:10]
     fvalues = np.random.rand(20)
     t["b"] = fvalues[:10]
     t.append({"a": ivalues[10:], "b": fvalues[10:]})
     view = t.loc[2:11]
     assert view is not None
     self.assertEqual(type(view), BaseTable)
     self.assertTrue(np.array_equal(view._column(0)[:], ivalues[2:12]))
     view_view = view.loc[3:7]
     assert view_view is not None
     self.assertTrue(
         np.array_equal(view_view._column(0)[:],
                        view._column(0)[3:7]))
     view_view = view.loc[3:6]
     assert view_view is not None
     self.assertTrue(
         np.array_equal(
             view_view._column(0)[:],
             view._column(0)[view.id_to_index(slice(3, 6))]))
     table_view = view.loc[[3, 4, 6, 9]]
     assert table_view is not None
     self.assertEqual(type(table_view), BaseTable)
     self.assertTrue(
         np.array_equal(
             table_view._column(0).values,
             view._column(0)[[3, 4, 6, 9]]))
     table_view = view.loc[[3, 4, 6, 9]]
     assert table_view is not None
     self.assertEqual(type(table_view), BaseTable)
     self.assertTrue(
         np.array_equal(
             table_view._column(0).values,
             view._column(0)[view.id_to_index([3, 4, 6, 9])],
         ))
Exemple #17
0
class Counter(TableModule):
    def __init__(self, **kwds):
        self._add_slots(kwds, 'input_descriptors',
                        [SlotDescriptor('table', type=Table, required=True)])
        super(Counter, self).__init__(**kwds)
        self.default_step_size = 10000

    def is_ready(self):
        if self.get_input_slot('table').created.any():
            return True
        return super(Counter, self).is_ready()

    @synchronized
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():
            dfslot.reset()
            if self._table is not None:
                self._table.resize(0)
            dfslot.update(run_number)
        indices = dfslot.created.next(step_size)  # returns a slice
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        data = pd.DataFrame(dict(counter=steps), index=[0])
        if self._table is None:
            self._table = Table(
                self.generate_table_name('counter'),
                data=data,
                #                                scheduler=self.scheduler(),
                create=True)
        elif len(self._table) == 0:  # has been resetted
            self._table.append(data)
        else:
            self._table['counter'].loc[0] += steps
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
    def test_tablechangemanager(self) -> None:
        "main test"
        # pylint: disable=protected-access,too-many-locals,too-many-statements
        table = Table("test_changemanager_table",
                      data={
                          "a": [1, 2, 3],
                          "b": [10.1, 0.2, 0.3]
                      })
        col_a = table["a"]
        col_b = table["b"]
        s = self.scheduler()
        table.changes = TableChanges()
        s._run_number = 1
        last = s._run_number
        slot = FakeSlot(table)

        mid1 = "m1"
        changemanager = TableChangeManager(slot,
                                           buffer_updated=True,
                                           buffer_deleted=True)
        self.assertEqual(changemanager.last_update(), 0)
        self.assertEqual(changemanager.created.length(), 0)
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.length(), 0)

        mid2 = "m2"
        cm2 = TableChangeManager(slot,
                                 buffer_updated=True,
                                 buffer_deleted=True)
        self.assertEqual(cm2.last_update(), 0)
        self.assertEqual(cm2.created.length(), 0)
        self.assertEqual(cm2.updated.length(), 0)
        self.assertEqual(cm2.deleted.length(), 0)

        cm3 = TableChangeManager(slot,
                                 buffer_updated=True,
                                 buffer_deleted=True)
        self.assertEqual(cm3.last_update(), 0)
        self.assertEqual(cm3.created.length(), 0)
        self.assertEqual(cm3.updated.length(), 0)
        self.assertEqual(cm3.deleted.length(), 0)

        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.next(), slice(0, 3))
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        last = s._run_number
        table.append({"a": [4], "b": [0.5]})
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.next(), slice(3, 4))
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        last = s._run_number
        table.append({"a": [5], "b": [0.5]})
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.next(), slice(4, 5))
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        col_a[3] = 42
        col_b[3] = 0.42
        col_b[4] = 0.52
        last = s._run_number
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.length(), 0)
        self.assertEqual(changemanager.updated.next(), slice(3, 5))
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        last = s._run_number
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.length(), 0)
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        last2 = 0
        col_a[2] = 22
        col_b[2] = 0.22
        col_b[1] = 0.12

        last2 = s._run_number
        cm2.update(last2, table, mid=mid2)
        self.assertEqual(cm2.last_update(), last2)
        self.assertEqual(cm2.created.next(), slice(0, 5))
        self.assertEqual(cm2.updated.length(), 0)
        self.assertEqual(cm2.deleted.length(), 0)

        s._run_number += 1
        col_a[0] = 11
        col_b[0] = 0.11
        col_b[2] = 0.32
        table.append({"a": [6], "b": [0.6]})

        s._run_number += 1
        last = s._run_number
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.next(), slice(5, 6))
        self.assertEqual(changemanager.updated.next(), slice(0, 3))
        self.assertEqual(changemanager.deleted.length(), 0)
        s._run_number += 1
        last2 = s._run_number
        cm2.update(last2, table, mid=mid2)
        self.assertEqual(cm2.last_update(), last2)
        self.assertEqual(cm2.created.next(), slice(5, 6))
        self.assertEqual(list(cm2.updated.next(as_slice=False)), [0, 2])
        self.assertEqual(cm2.deleted.length(), 0)

        s._run_number += 1
        col_a[0] = 1
        col_b[0] = 0.11
        col_b[2] = 0.22

        # test deletes
        s._run_number += 1
        del table.loc[2]
        last = s._run_number
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.length(), 0)
        # new behaviour prev. 0
        self.assertEqual(changemanager.updated.length(), 1)
        self.assertEqual(changemanager.deleted.next(), slice(2, 3))
        with self.assertRaises(KeyError):
            table.loc[2]
        # Not sure we want to specify what happens inside a deleted slot?
        # self.assertTrue(np.all(a[:]==np.array([1,2,a.fillvalue,42,5,6])))
        # self.assertTrue(np.all(b[:]==np.array([0.11,0.12,a.fillvalue,0.42,.52,0.6])))

        s._run_number += 1
        del table.loc[4]
        table.append({"a": [7, 8], "b": [0.7, 0.8]})
        col_a[5] = 0.55
        last2 = s._run_number
        cm2.update(last2, table, mid=mid2)
        self.assertEqual(cm2.last_update(), last2)
        self.assertEqual(cm2.created.next(), slice(6, 8))
        # new behaviour, prev. slice(5, 6)
        self.assertEqual(cm2.updated.next(), bitmap([0, 5]))
        self.assertEqual(list(cm2.deleted.next(as_slice=False)), [2, 4])

        # TODO test reset
        changemanager.reset(mid1)
        self.assertEqual(changemanager.last_update(), 0)
class Histogram1D(TableModule):
    """
    """
    parameters = [('bins', np.dtype(int), 128),
                  ('delta', np.dtype(float), -5)]  # 5%

    schema = "{ array: var * int32, min: float64, max: float64, time: int64 }"

    def __init__(self, column, **kwds):
        self._add_slots(kwds, 'input_descriptors', [
            SlotDescriptor('table', type=Table, required=True),
            SlotDescriptor('min', type=Table, required=True),
            SlotDescriptor('max', type=Table, required=True)
        ])
        super(Histogram1D, self).__init__(dataframe_slot='table', **kwds)
        self.column = column
        self.total_read = 0
        self.default_step_size = 1000
        self._histo = None
        self._edges = None
        self._bounds = None
        self._table = Table(self.generate_table_name('Histogram1D'),
                            dshape=Histogram1D.schema,
                            chunks={'array': (16384, 128)},
                            create=True)

    def is_ready(self):
        if self._bounds and self.get_input_slot('table').created.any():
            return True
        return super(Histogram1D, self).is_ready()

    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        min_slot = self.get_input_slot('min')
        min_slot.update(run_number)
        max_slot = self.get_input_slot('max')
        max_slot.update(run_number)

        if dfslot.updated.any() or dfslot.deleted.any():
            logger.debug('reseting histogram')
            dfslot.reset()
            self._histo = None
            self._edges = None
            dfslot.update(run_number)

        if not (dfslot.created.any() or min_slot.created.any()
                or max_slot.created.any()):
            logger.info('Input buffers empty')
            return self._return_run_step(self.state_blocked, steps_run=0)

        bounds = self.get_bounds(min_slot, max_slot)
        if bounds is None:
            logger.debug('No bounds yet at run %d', run_number)
            return self._return_run_step(self.state_blocked, steps_run=0)

        bound_min, bound_max = bounds
        if self._bounds is None:
            delta = self.get_delta(*bounds)
            self._bounds = (bound_min - delta, bound_max + delta)
            logger.info("New bounds at run %d: %s", run_number, self._bounds)
        else:
            (old_min, old_max) = self._bounds
            delta = self.get_delta(*bounds)

            if(bound_min < old_min or bound_max > old_max) \
              or bound_min > (old_min + delta) or bound_max < (old_max - delta):
                self._bounds = (bound_min - delta, bound_max + delta)
                logger.info('Updated bounds at run %d: %s', run_number,
                            self._bounds)
                dfslot.reset()
                dfslot.update(run_number)
                self._histo = None
                self._edges = None

        (curr_min, curr_max) = self._bounds
        if curr_min >= curr_max:
            logger.error('Invalid bounds: %s', self._bounds)
            return self._return_run_step(self.state_blocked, steps_run=0)

        input_df = dfslot.data()
        indices = dfslot.created.next(step_size)  # returns a slice or ... ?
        steps = indices_len(indices)
        logger.info('Read %d rows', steps)
        self.total_read += steps
        column = input_df[self.column]
        column = column.loc[fix_loc(indices)]
        bins = self._edges if self._edges is not None else self.params.bins
        histo = None
        if len(column) > 0:
            histo, self._edges = np.histogram(column,
                                              bins=bins,
                                              range=[curr_min, curr_max],
                                              normed=False,
                                              density=False)
        if self._histo is None:
            self._histo = histo
        elif histo is not None:
            self._histo += histo
        values = {
            'array': [self._histo],
            'min': [curr_min],
            'max': [curr_max],
            'time': [run_number]
        }
        with self.lock:
            self._table['array'].set_shape((self.params.bins, ))
            self._table.append(values)
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)

    def get_bounds(self, min_slot, max_slot):
        min_slot.created.next()
        with min_slot.lock:
            min_df = min_slot.data()
            if len(min_df) == 0 and self._bounds is None:
                return None
            min_ = min_df.last(self.column)

        max_slot.created.next()
        with max_slot.lock:
            max_df = max_slot.data()
            if len(max_df) == 0 and self._bounds is None:
                return None
            max_ = max_df.last(self.column)

        return (min_, max_)

    def get_delta(self, min_, max_):
        delta = self.params['delta']
        extent = max_ - min_
        if delta < 0:
            return extent * delta / -100.0

    def get_histogram(self):
        min_ = self._bounds[0] if self._bounds else None
        max_ = self._bounds[1] if self._bounds else None
        edges = self._edges
        if edges is None:
            edges = []
        elif isinstance(edges, integer_types):
            edges = [edges]
        else:
            edges = edges.tolist()
        return {
            "edges": edges,
            "values": self._histo.tolist() if self._histo is not None else [],
            "min": min_,
            "max": max_
        }

    def is_visualization(self):
        return True

    def get_visualization(self):
        return "histogram1d"

    def to_json(self, short=False):
        json = super(Histogram1D, self).to_json(short)
        if short:
            return json
        return self._hist_to_json(json)

    def _hist_to_json(self, json):
        json['histogram'] = self.get_histogram()
        return json
    def test_tablechangemanager(self):
        "main test"
        # pylint: disable=protected-access,too-many-locals,too-many-statements
        table = Table('test_changemanager_table',
                      data={
                          'a': [1, 2, 3],
                          'b': [10.1, 0.2, 0.3]
                      })
        col_a = table['a']
        col_b = table['b']
        s = self.scheduler
        table.changes = TableChanges()
        s._run_number = 1
        last = s._run_number
        slot = FakeSlot(table)

        mid1 = 1
        changemanager = TableChangeManager(slot,
                                           buffer_updated=True,
                                           buffer_deleted=True)
        self.assertEqual(changemanager.last_update(), 0)
        self.assertEqual(changemanager.created.length(), 0)
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.length(), 0)

        mid2 = 2
        cm2 = TableChangeManager(slot,
                                 buffer_updated=True,
                                 buffer_deleted=True)
        self.assertEqual(cm2.last_update(), 0)
        self.assertEqual(cm2.created.length(), 0)
        self.assertEqual(cm2.updated.length(), 0)
        self.assertEqual(cm2.deleted.length(), 0)

        mid3 = 3
        cm3 = TableChangeManager(slot,
                                 buffer_updated=True,
                                 buffer_deleted=True)
        self.assertEqual(cm3.last_update(), 0)
        self.assertEqual(cm3.created.length(), 0)
        self.assertEqual(cm3.updated.length(), 0)
        self.assertEqual(cm3.deleted.length(), 0)

        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.next(), slice(0, 3))
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        last = s._run_number
        table.append({'a': [4], 'b': [0.5]})
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.next(), slice(3, 4))
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        last = s._run_number
        table.append({'a': [5], 'b': [0.5]})
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.next(), slice(4, 5))
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        col_a[3] = 42
        col_b[3] = 0.42
        col_b[4] = 0.52
        last = s._run_number
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.length(), 0)
        self.assertEqual(changemanager.updated.next(), slice(3, 5))
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        last = s._run_number
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.length(), 0)
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        last2 = 0
        col_a[2] = 22
        col_b[2] = 0.22
        col_b[1] = 0.12

        last2 = s._run_number
        cm2.update(last2, table, mid=mid2)
        self.assertEqual(cm2.last_update(), last2)
        self.assertEqual(cm2.created.next(), slice(0, 5))
        self.assertEqual(cm2.updated.length(), 0)
        self.assertEqual(cm2.deleted.length(), 0)

        s._run_number += 1
        col_a[0] = 11
        col_b[0] = 0.11
        col_b[2] = 0.32
        table.append({'a': [6], 'b': [0.6]})

        tableview = table.loc[1:2]
        last3 = s._run_number
        cm3.update(last3, tableview, mid=mid3)
        self.assertEqual(cm3.created.next(), slice(1, 3))  # ids, not indices
        self.assertEqual(cm2.updated.length(), 0)
        self.assertEqual(cm2.deleted.length(), 0)

        s._run_number += 1
        last = s._run_number
        # with self.assertRaises(ValueError):
        #     changemanager.update(last+1, table, mid=mid1)
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.next(), slice(5, 6))
        self.assertEqual(changemanager.updated.next(), slice(0, 3))
        self.assertEqual(changemanager.deleted.length(), 0)

        s._run_number += 1
        last2 = s._run_number
        cm2.update(last2, table, mid=mid2)
        self.assertEqual(cm2.last_update(), last2)
        self.assertEqual(cm2.created.next(), slice(5, 6))
        self.assertEqual(list(cm2.updated.next()), [0, 2])
        self.assertEqual(cm2.deleted.length(), 0)

        s._run_number += 1
        col_a[0] = 1
        col_b[0] = 0.11
        col_b[2] = 0.22
        last3 = s._run_number
        cm3.update(last3, tableview, mid=mid3)
        self.assertEqual(cm3.last_update(), last3)
        self.assertEqual(cm3.created.length(), 0)
        self.assertEqual(cm3.updated.next(), slice(2, 3))
        self.assertEqual(cm3.deleted.length(), 0)

        # test deletes
        s._run_number += 1
        del table.loc[2]
        last = s._run_number
        changemanager.update(last, table, mid=mid1)
        self.assertEqual(changemanager.last_update(), last)
        self.assertEqual(changemanager.created.length(), 0)
        self.assertEqual(changemanager.updated.length(), 0)
        self.assertEqual(changemanager.deleted.next(), slice(2, 3))
        with self.assertRaises(KeyError):
            table.loc[2]
        # Not sure we want to specify what happens inside a deleted slot?
        # self.assertTrue(np.all(a[:]==np.array([1,2,a.fillvalue,42,5,6])))
        # self.assertTrue(np.all(b[:]==np.array([0.11,0.12,a.fillvalue,0.42,.52,0.6])))

        s._run_number += 1
        del table.loc[4]
        table.append({'a': [7, 8], 'b': [0.7, 0.8]})
        col_a[5] = 0.55
        last2 = s._run_number
        cm2.update(last2, table, mid=mid2)
        self.assertEqual(cm2.last_update(), last2)
        self.assertEqual(cm2.created.next(), slice(6, 8))
        self.assertEqual(cm2.updated.next(), slice(5, 6))
        self.assertEqual(list(cm2.deleted.next()), [2, 4])

        #TODO test reset
        changemanager.reset()
        self.assertEqual(changemanager.last_update(), 0)
Exemple #21
0
    def test_tablechangemanager(self) -> None:
        # pylint: disable=protected-access
        table = Table(
            "test_changemanager_table_selected",
            data={"a": [1, 2, 3], "b": [10.1, 0.2, 0.3]},
        )
        selection = bitmap([1, 2])
        table_selected: TableSelectedView = TableSelectedView(table, selection)

        s = self.s
        s._run_number = 1
        last = s._run_number
        slot = FakeSlot(table_selected)

        mid1 = "m1"
        cm = TableSelectedChangeManager(
            slot,
            buffer_exposed=True,
            buffer_updated=True,
            buffer_deleted=True,
            buffer_masked=True,
        )
        self.assertEqual(cm.last_update(), 0)
        self.assertEqual(cm.created.length(), 0)
        self.assertEqual(cm.updated.length(), 0)
        self.assertEqual(cm.deleted.length(), 0)

        # mid2 = 2
        cm2 = TableSelectedChangeManager(
            slot,
            buffer_exposed=True,
            buffer_updated=True,
            buffer_deleted=True,
            buffer_masked=True,
        )
        self.assertEqual(cm2.last_update(), 0)
        self.assertEqual(cm2.created.length(), 0)
        self.assertEqual(cm2.updated.length(), 0)
        self.assertEqual(cm2.deleted.length(), 0)

        # mid3 = 3
        cm3 = TableSelectedChangeManager(
            slot,
            buffer_exposed=True,
            buffer_updated=True,
            buffer_deleted=True,
            buffer_masked=True,
        )
        self.assertEqual(cm3.last_update(), 0)
        self.assertEqual(cm3.created.length(), 0)
        self.assertEqual(cm3.updated.length(), 0)
        self.assertEqual(cm3.deleted.length(), 0)
        cm.update(last, table_selected, mid=mid1)
        self.assertEqual(cm.last_update(), last)
        self.assertEqual(cm.created.next(), slice(1, 3))  # without the mask
        self.assertEqual(cm.updated.length(), 0)
        self.assertEqual(cm.deleted.length(), 0)

        s._run_number += 1
        last = s._run_number
        table.append({"a": [4], "b": [0.5]})  # invisible since id=3
        cm.update(last, table_selected, mid=mid1)
        self.assertEqual(cm.last_update(), last)
        self.assertEqual(cm.created.length(), 0)
        self.assertEqual(cm.updated.length(), 0)
        self.assertEqual(cm.deleted.length(), 0)

        s._run_number += 1
        last = s._run_number
        table.append({"a": [5, 6, 7, 8], "b": [0.5, 0.6, 0.7, 0.8]})
        table_selected.selection = bitmap(range(1, 8))
        cm.update(last, table_selected, mid=mid1)
        self.assertEqual(cm.last_update(), last)
        self.assertEqual(cm.created.next(), slice(3, 8))
        self.assertEqual(cm.updated.length(), 0)
        self.assertEqual(cm.deleted.length(), 0)

        s._run_number += 1
        last = s._run_number
        del table.loc[[1, 2, 3]]
        table_selected.selection = bitmap(
            [3, 4]
        )  # i.e 1,2,5,6,7 were deleted in selection
        cm.update(last, table_selected, mid=mid1)
        self.assertEqual(cm.last_update(), last)
        self.assertEqual(cm.created.length(), 0)
        self.assertEqual(cm.updated.length(), 0)
        self.assertEqual(cm.base.deleted.length(), 3)  # 1, 2, 3
        self.assertEqual(
            cm.selection.deleted.length(), 6
        )  # 1, 2, 5, 6, 7[+3 removed because it was perm.deleted]
        self.assertEqual(cm.deleted.length(), 6)  # 1, 2, 3, 5, 6, 7
        cm.base.deleted.next()
        cm.selection.deleted.next()
        s._run_number += 1
        last = s._run_number
        table.append({"a": [15, 16, 17, 18], "b": [0.51, 0.61, 0.71, 0.81]})
        table_selected._selection = slice(5, None)
        cm.update(last, table_selected, mid=mid1)
        self.assertEqual(cm.last_update(), last)
        self.assertEqual(cm.base.created.changes, bitmap([8, 9, 10, 11]))
        self.assertEqual(cm.selection.created.changes, bitmap([5, 6, 7, 8, 9, 10, 11]))
        self.assertEqual(cm.selection.deleted.changes, bitmap([4]))
        self.assertEqual(cm.updated.length(), 0)
        self.assertEqual(cm.base.deleted.length(), 0)
        self.assertEqual(cm.deleted.length(), 1)
        cm.deleted.next()
        self.assertEqual(cm.deleted.length(), 0)
        cm.created.next()
        self.assertEqual(cm.base.created.length(), 0)
        self.assertEqual(cm.selection.created.length(), 0)
        # s._run_number += 1
        # a[3] = 42
        # b[3] = 0.42
        # b[4] = 0.52
        # last = s._run_number
        # cm.update(last, table, mid=mid1)
        # self.assertEqual(cm.last_update(), last)
        # self.assertEqual(cm.created.length(), 0)
        # self.assertEqual(cm.updated.next(), slice(3,5))
        # self.assertEqual(cm.deleted.length(), 0)

        # s._run_number += 1
        # last = s._run_number
        # cm.update(last, table, mid=mid1)
        # self.assertEqual(cm.last_update(), last)
        # self.assertEqual(cm.created.length(), 0)
        # self.assertEqual(cm.updated.length(), 0)
        # self.assertEqual(cm.deleted.length(), 0)

        # s._run_number += 1
        # last2 = 0
        # a[2] = 22
        # b[2] = 0.22
        # b[1] = 0.12

        # last2 = s._run_number
        # cm2.update(last2, table, mid=mid2)
        # self.assertEqual(cm2.last_update(), last2)
        # self.assertEqual(cm2.created.next(), slice(0, 5))
        # self.assertEqual(cm2.updated.length(), 0)
        # self.assertEqual(cm2.deleted.length(), 0)

        # s._run_number += 1
        # a[0] = 11
        # b[0] = 0.11
        # b[2] = 0.32
        # table.append({'a': [ 6], 'b': [0.6] })

        # tv = table.loc[1:2]
        # last3 = s._run_number
        # cm3.update(last3, tv, mid=mid3)
        # self.assertEqual(cm3.created.next(), slice(1, 3)) # test ids, not indices
        # self.assertEqual(cm2.updated.length(), 0)
        # self.assertEqual(cm2.deleted.length(), 0)

        # s._run_number += 1
        # last = s._run_number
        # # with self.assertRaises(ValueError):
        # #     cm.update(last+1, table, mid=mid1)
        # cm.update(last, table, mid=mid1)
        # self.assertEqual(cm.last_update(), last)
        # self.assertEqual(cm.created.next(), slice(5,6))
        # self.assertEqual(cm.updated.next(), slice(0,3))
        # self.assertEqual(cm.deleted.length(), 0)

        # s._run_number += 1
        # last2 = s._run_number
        # cm2.update(last2, table, mid=mid2)
        # self.assertEqual(cm2.last_update(), last2)
        # self.assertEqual(cm2.created.next(), slice(5,6))
        # self.assertEqual(list(cm2.updated.next()), [0,2])
        # self.assertEqual(cm2.deleted.length(), 0)

        # s._run_number += 1
        # a[0] = 1
        # b[0] = 0.11
        # b[2] = 0.22
        # last3 = s._run_number
        # cm3.update(last3, tv, mid=mid3)
        # self.assertEqual(cm3.last_update(), last3)
        # self.assertEqual(cm3.created.length(), 0)
        # self.assertEqual(cm3.updated.next(), slice(2,3))
        # self.assertEqual(cm3.deleted.length(), 0)

        # # test deletes
        # s._run_number += 1
        # del table.loc[2]
        # last = s._run_number
        # cm.update(last, table, mid=mid1)
        # self.assertEqual(cm.last_update(), last)
        # self.assertEqual(cm.created.length(), 0)
        # self.assertEqual(cm.updated.length(), 0)
        # self.assertEqual(cm.deleted.next(), slice(2,3))
        # self.assertTrue(np.all(a[:]==np.array([1,2,a.fillvalue,42,5,6])))
        # self.assertTrue(np.all(b[:]==np.array([0.11,0.12,a.fillvalue,0.42,.52,0.6])))

        # s._run_number += 1
        # del table.loc[4]
        # table.append({'a': [ 7,8], 'b': [0.7,0.8] })
        # a[5] = 0.55
        # last2 = s._run_number
        # cm2.update(last2, table, mid=mid2)
        # self.assertEqual(cm2.last_update(), last2)
        # self.assertEqual(cm2.created.next(), slice(6,8))
        # self.assertEqual(cm2.updated.next(), slice(5,6))
        # self.assertEqual(list(cm2.deleted.next()), [2,4])

        # TODO test reset
        cm.reset(mid=mid1)
        self.assertEqual(cm.last_update(), 0)
Exemple #22
0
class IdxMin(TableModule):
    parameters = [('history', np.dtype(int), 3)]

    def __init__(self, **kwds):
        self._add_slots(kwds, 'input_descriptors',
                        [SlotDescriptor('table', type=Table, required=True)])
        self._add_slots(kwds, 'output_descriptors',
                        [SlotDescriptor('min', type=Table, required=False)])
        super(IdxMin, self).__init__(**kwds)
        self._min = None
        self.default_step_size = 10000

    def min(self):
        return self._min

    def get_data(self, name):
        if name == 'min':
            return self.min()
        return super(IdxMin, self).get_data(name)

    def is_ready(self):
        if self.get_input_slot('table').created.any():
            return True
        return super(IdxMin, self).is_ready()

    @synchronized
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():
            dfslot.reset()
            self._table = None
            dfslot.update(run_number)
        indices = dfslot.created.next(step_size)  # returns a slice
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_table = dfslot.data()
        op = self.filter_columns(input_table, fix_loc(indices)).idxmin()
        #if not op.index.equals(self._columns):
        #    # some columns are not numerical
        #    self._columns = op.index

        if self._min is None:
            min_ = OrderedDict(zip(op.keys(), [np.nan] * len(op.keys())))
            for col, ix in op.items():
                min_[col] = input_table.at[
                    ix, col]  # lookup value, is there a better way?
            self._min = Table(self.generate_table_name('_min'),
                              dshape=input_table.dshape,
                              create=True)
            self._min.append(min_, indices=[run_number])
            self._table = Table(self.generate_table_name('_table'),
                                dshape=input_table.dshape,
                                create=True)
            self._table.append(op, indices=[run_number])
        else:
            prev_min = self._min.last()
            prev_idx = self._table.last()
            min_ = OrderedDict(prev_min.items())
            for col, ix in op.items():
                val = input_table.at[ix, col]
                if np.isnan(val):
                    pass
                elif np.isnan(min_[col]) or val < min_[col]:
                    op[col] = prev_idx[col]
                    min_[col] = val
            with self.lock:
                self._table.append(op, indices=[run_number])
                self._min.append(min_, indices=[run_number])
                if len(self._table) > self.params.history:
                    data = self._table.loc[
                        self._table.index[-self.params.history:]]
                    self._table = Table(self.generate_table_name('_table'),
                                        data=data,
                                        create=True)
                    data = self._min.loc[
                        self._min.index[-self.params.history:]]
                    self._min = Table(self.generate_table_name('_min'),
                                      data=data,
                                      create=True)

        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Exemple #23
0
 def run_step(self, run_number: int, step_size: int,
              howlong: float) -> ReturnRunStep:
     if step_size == 0:  # bug
         logger.error("Received a step_size of 0")
         return self._return_run_step(self.state_ready, steps_run=0)
     status = self.validate_parser(run_number)
     if status == self.state_terminated:
         raise ProgressiveStopIteration("no more filenames")
     elif status == self.state_blocked:
         return self._return_run_step(status, steps_run=0)
     elif status != self.state_ready:
         logger.error("Invalid state returned by validate_parser: %d",
                      status)
         self.close()
         raise ProgressiveStopIteration("Unexpected situation")
     logger.info("loading %d lines", step_size)
     needs_save = self._needs_save()
     assert self.parser
     df_list: List[pd.DataFrame]
     try:
         df_list = self.parser.read(
             step_size, flush=needs_save)  # raises StopIteration at EOF
         if not df_list:
             raise ProgressiveStopIteration
     except ProgressiveStopIteration:
         self.close()
         if self.has_input_slot("filenames"):
             fn_slot = self.get_input_slot("filenames")
             assert fn_slot.output_module is not None
         self.parser = None
         return self._return_run_step(self.state_ready, 0)
     df_len = sum([len(df) for df in df_list])
     creates = df_len
     if creates == 0:  # should not happen
         logger.error("Received 0 elements")
         raise ProgressiveStopIteration
     if self._filter is not None:
         df_list = [self._filter(df) for df in df_list]
     creates = sum([len(df) for df in df_list])
     if creates == 0:
         logger.info("frame has been filtered out")
     else:
         self._rows_read += creates
         logger.info("Loaded %d lines", self._rows_read)
         if self.force_valid_ids:
             for df in df_list:
                 force_valid_id_columns(df)
         if self.result is None:
             table = self.table
             data, dshape = self._data_as_array(pd.concat(df_list))
             if not self._recovery:
                 self._table_params["name"] = self.generate_table_name(
                     "table")
                 self._table_params["data"] = data
                 self._table_params["dshape"] = dshape
                 self._table_params["create"] = True
                 self.result = Table(**self._table_params)
             else:
                 self._table_params["name"] = self._recovered_csv_table_name
                 # self._table_params['dshape'] = dshape
                 self._table_params["create"] = False
                 table = Table(**self._table_params)
                 self.result = table
                 table.append(self._data_as_array(pd.concat(df_list)))
         else:
             table = self.table
             for df in df_list:
                 data, dshape = self._data_as_array(df)
                 table.append(data)
         if (self.parser.is_flushed() and needs_save
                 and self._recovery_table is None and self._save_context):
             table = self.table
             snapshot = self.parser.get_snapshot(
                 run_number=run_number,
                 table_name=table.name,
                 last_id=table.last_id,
             )
             self._recovery_table = Table(
                 name=self._recovery_table_name,
                 data=pd.DataFrame(snapshot, index=[0]),
                 create=True,
             )
             self._recovery_table_inv = Table(
                 name=self._recovery_table_inv_name,
                 data=pd.DataFrame(
                     dict(
                         table_name=table.name,
                         csv_input=self.filepath_or_buffer,
                     ),
                     index=[0],
                 ),
                 create=True,
             )
             self._last_saved_id = table.last_id
         elif self.parser.is_flushed(
         ) and needs_save and self._save_context:
             snapshot = self.parser.get_snapshot(
                 run_number=run_number,
                 last_id=table.last_id,
                 table_name=table.name,
             )
             assert self._recovery_table
             self._recovery_table.add(snapshot)
             if len(self._recovery_table) > self._recovery_table_size:
                 oldest = self._recovery_table.argmin()["offset"]
                 self._recovery_table.drop(oldest)
             self._last_saved_id = table.last_id
     return self._return_run_step(self.state_ready, steps_run=creates)