def test_del3(self) -> None: t = Table("table_filtering", dshape="{a: int, b: float32}", create=True) sz = 20 sz_del = 10 sz_add = 15 t.resize(sz) np.random.seed(42) ivalues = np.random.randint(100, size=sz) t["a"] = ivalues fvalues: np.ndarray[Any, Any] = np.random.rand(sz) * 100 t["b"] = fvalues df = pd.DataFrame(t.to_dict()) to_del = np.random.randint(len(t) - 1, size=sz_del) del t.loc[to_del] df = df.drop(to_del) self.assertListEqual(list(t.index), list(df.index)) ivalues2: np.ndarray[Any, Any] = np.random.randint(100, size=sz_add) fvalues2: np.ndarray[Any, Any] = np.random.rand(sz_add) * 100 dict_add = {"a": ivalues2, "b": fvalues2} ix = range(df.index[-1] + 1, df.index[-1] + 1 + sz_add) df = df.append(pd.DataFrame(dict_add, index=ix)) t.append(data=dict_add) self.assertSetEqual(set(t.index), set(df.index))
def append_dataframe(self) -> None: # pylint: disable=protected-access # self.scheduler._run_number = 1 df = pd.DataFrame({ "a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": ["a", "b", "cd"] }) t = Table("table_2", data=df) self.assertEqual(len(t), len(df)) for colname in df: coldf = df[colname] colt = t[colname] self.assertEqual(len(coldf), len(colt)) self.assertTrue(np.all(coldf.values == colt.values)) # self.scheduler._run_number = 2 t.append(df) self.assertEqual(len(t), 2 * len(df)) for colname in df: coldf = df[colname] colt = t[colname] self.assertEqual(2 * len(coldf), len(colt)) self.assertTrue(np.all(coldf == colt[len(df):len(t)])) # self.scheduler._run_number = 3 t.append(t) # fun test self.assertEqual(len(t), 4 * len(df)) for colname in df: coldf = df[colname] colt = t[colname] self.assertEqual(4 * len(coldf), len(colt)) self.assertTrue( np.all(colt[0:2 * len(df)] == colt[2 * len(df):len(t)]))
def append_direct(self) -> None: # pylint: disable=protected-access d = OrderedDict([("a", [1, 2, 3]), ("b", [0.1, 0.2, 0.3]), ("c", ["a", "b", "cd"])]) # print(dshape_extract(d)) df = pd.DataFrame(d) # self.scheduler._run_number = 1 t = Table("table_3", data=d) self.assertEqual(len(t), len(df)) for colname in df: coldf = df[colname] colt = t[colname] self.assertEqual(len(coldf), len(colt)) self.assertTrue(np.all(coldf == colt.values)) # self.scheduler._run_number = 2 t.append(d) self.assertEqual(len(t), 2 * len(df)) for colname in df: coldf = df[colname] colt = t[colname] self.assertEqual(2 * len(coldf), len(colt)) self.assertTrue(np.all(coldf == colt[len(df):len(t)])) # self.scheduler._run_number = 3 t.append(t) # fun test self.assertEqual(len(t), 4 * len(df)) for colname in df: coldf = df[colname] colt = t[colname] self.assertEqual(4 * len(coldf), len(colt)) self.assertTrue( np.all(colt[0:2 * len(df)] == colt[2 * len(df):len(t)]))
class Var(TableModule): """ Compute the variance of the columns of an input dataframe. """ parameters = [('history', np.dtype(int), 3)] def __init__(self, columns=None, **kwds): self._add_slots(kwds,'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) super(Var, self).__init__(dataframe_slot='table', **kwds) self._columns = columns self._data = {} self.default_step_size = 1000 def is_ready(self): if self.get_input_slot('table').created.any(): return True return super(Var, self).is_ready() def op(self, chunk): cols = chunk.columns ret = {} for c in cols: data = self._data.get(c) if data is None: data = OnlineVariance() self._data[c] = data data.add(chunk[c]) ret[c] = data.variance return ret @synchronized def run_step(self,run_number,step_size,howlong): dfslot = self.get_input_slot('table') dfslot.update(run_number) if dfslot.updated.any() or dfslot.deleted.any(): dfslot.reset() self._table = None dfslot.update(run_number) indices = dfslot.created.next(step_size) # returns a slice steps = indices_len(indices) if steps==0: return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() op = self.op(self.filter_columns(input_df,fix_loc(indices))) if self._table is None: self._table = Table(self.generate_table_name('var'), dshape=input_df.dshape, # scheduler=self.scheduler(), create=True) self._table.append(op, indices=[run_number]) print(self._table) if len(self._table) > self.params.history: self._table = self._table.loc[self._table.index[-self.params.history:]] return self._return_run_step(self.next_state(dfslot), steps_run=steps)
class Max(TableModule): parameters = [('history', np.dtype(int), 3)] def __init__(self, columns=None, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) super(Max, self).__init__(**kwds) self._columns = columns self.default_step_size = 10000 def is_ready(self): if self.get_input_slot('table').created.any(): return True return super(Max, self).is_ready() @synchronized def run_step(self, run_number, step_size, howlong): dfslot = self.get_input_slot('table') dfslot.update(run_number) if dfslot.updated.any() or dfslot.deleted.any(): dfslot.reset() if self._table is not None: self._table.resize(0) dfslot.update(run_number) indices = dfslot.created.next(step_size) # returns a slice steps = indices_len(indices) if steps == 0: return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() op = self.filter_columns(input_df, fix_loc(indices)).max(keepdims=True) if self._table is None: self._table = Table( self.generate_table_name('max'), data=op, # scheduler=self.scheduler(), create=True) elif len(self._table) == 0: # has been resetted self._table.append(op) else: last = self._table.last() for colname in last: current_max = op[colname] current_max[0] = np.maximum(current_max, last[colname]) self._table.append(op) #TODO manage the history in a more efficient way #if len(self._table) > self.params.history: # self._table = self._table.loc[self._df.index[-self.params.history:]] return self._return_run_step(self.next_state(dfslot), steps_run=steps)
def test_mmap3(self): #pylint: disable=protected-access #self.scheduler._run_number = 1 self._rmtree() df = pd.DataFrame({'a': [1, 2, 3], 'b': [0.1, 0.2, 0.3], 'c': ['a', 'b', 'cd']}) t = Table('table_2', data=df) self.assertEqual(len(t),len(df)) for colname in df: coldf = df[colname] colt = t[colname] self.assertEqual(len(coldf), len(colt)) self.assertTrue(np.all(coldf.values==colt.values)) t.append(df) self.assertEqual(len(t),2*len(df)) self._rmtree()
def test_mmap3(self) -> None: # pylint: disable=protected-access # self.scheduler._run_number = 1 self._rmtree() df = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": ["a", "b", "cd"]}) t = Table("table_2", data=df) self.assertEqual(len(t), len(df)) for colname in df: coldf = df[colname] colt = t[colname] self.assertEqual(len(coldf), len(colt)) self.assertTrue(np.all(coldf.values == colt.values)) t.append(df) self.assertEqual(len(t), 2 * len(df)) self._rmtree()
class RandomTable(TableModule): "Random table generator module" def __init__(self, columns, rows=-1, random=RAND, throttle=False, **kwds): super(RandomTable, self).__init__(**kwds) self.default_step_size = 1000 if isinstance(columns, integer_types): self.columns = ["_%d" % i for i in range(1, columns + 1)] elif isinstance(columns, (list, np.ndarray)): self.columns = columns else: raise ProgressiveError('Invalid type for columns') self.rows = rows self.random = random if throttle and isinstance(throttle, integer_types + (float, )): self.throttle = throttle else: self.throttle = False dshape = "{" + (", ".join( ["%s: float64" % col for col in self.columns])) + "}" self._table = Table(self.generate_table_name('table'), dshape=dshape, create=True) self.columns = self._table.columns def run_step(self, run_number, step_size, howlong): if step_size == 0: # bug logger.error('Received a step_size of 0') return self._return_run_step(self.state_ready, steps_run=0, creates=0) logger.info('generating %d lines', step_size) if self.throttle: step_size = np.min([self.throttle, step_size]) if self.rows >= 0 and (len(self._table) + step_size) > self.rows: step_size = self.rows - len(self._table) if step_size <= 0: raise StopIteration logger.info('truncating to %d lines', step_size) values = OrderedDict() for column in self.columns: s = self.random(step_size) values[column] = s with self.lock: self._table.append(values) next_state = self.state_blocked if self.throttle else self.state_ready return self._return_run_step(next_state, steps_run=step_size)
class Variable(Constant): def __init__(self, table=None, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('like', type=Table, required=False)]) super(Variable, self).__init__(table, **kwds) def is_input(self): return True def from_input(self, input_): if not isinstance(input_, dict): raise ProgressiveError('Expecting a dictionary') if self._table is None and self.get_input_slot('like') is None: error = 'Variable %s with no initial value and no input slot' % self.name logger.error(error) return error last = self._table.last() if last is None: last = {v: None for v in self._table.columns} else: last = last.to_json() error = '' for (k, v) in six.iteritems(input_): if k in last: last[k] = v else: error += 'Invalid key %s ignored. ' % k _ = self.scheduler().for_input(self) #last['_update'] = run_number self._table.add(last) return error def run_step(self, run_number, step_size, howlong): if self._table is None: slot = self.get_input_slot('like') if slot is not None: like = slot.data() if like is not None: with slot.lock: self._table = Table(self.generate_table_name('like'), dshape=like.dshape, create=True) self._table.append(like.last().to_dict(ordered=True), indices=[0]) return self._return_run_step(self.state_blocked, steps_run=1)
def test_loc_table_selection(self): t = Table('table_for_sel', dshape="{a: int, b: float32}", create=True) t.resize(10) ivalues = np.random.randint(100, size=20) t['a'] = ivalues[:10] fvalues = np.array(np.random.rand(20), np.float32) t['b'] = fvalues[:10] t.append({'a': ivalues[10:], 'b': fvalues[10:]}) sel = bitmap(range(5, 8)) view = TableSelectedView(t, sel, None) self.assertEqual(type(view), TableSelectedView) self.assertTrue(np.array_equal(view[0].value, ivalues[5:8])) self.assertEqual(view.at[6, 'a'], ivalues[6]) self.assertEqual(view.at[7, 'b'], fvalues[7]) with self.assertRaises(KeyError): self.assertEqual(view.at[4, 'a'], ivalues[4]) with self.assertRaises(KeyError): self.assertEqual(view.at[8, 'a'], ivalues[8])
def test_loc_table_selection(self) -> None: t = Table("table_for_sel", dshape="{a: int, b: float32}", create=True) t.resize(10) ivalues = np.random.randint(100, size=20) t["a"] = ivalues[:10] fvalues = np.array(np.random.rand(20), np.float32) t["b"] = fvalues[:10] t.append({"a": ivalues[10:], "b": fvalues[10:]}) sel = bitmap(range(5, 8)) view = t.loc[sel, :] assert view is not None self.assertEqual(type(view), BaseTable) self.assertTrue(np.array_equal(view[0].value, ivalues[5:8])) self.assertEqual(view.at[6, "a"], ivalues[6]) self.assertEqual(view.at[7, "b"], fvalues[7]) with self.assertRaises(KeyError): self.assertEqual(view.at[4, "a"], ivalues[4]) with self.assertRaises(KeyError): self.assertEqual(view.at[8, "a"], ivalues[8])
def test_loc_table_computed(self) -> None: t = Table( "table_for_test_computed_columns", dshape="{a: int, b: float32}", create=True, ) t.resize(10) ivalues = np.random.randint(100, size=20) t["a"] = ivalues[:10] fvalues = np.array(np.random.rand(20), np.float32) t["b"] = fvalues[:10] self.assertEqual(t.shape, (10, 2)) t.append({"a": ivalues[10:], "b": fvalues[10:]}) self.assertEqual(t.shape, (20, 2)) t.add_computed("arcsin_b", "b", np.arcsin) self.assertEqual(t.shape, (20, 2)) tb = t.loc[:, "b"] assert tb self.assertEqual(tb.shape, (20, 1)) tab = t.loc[:, "arcsin_b"] assert tab self.assertEqual(tab.shape, (20, 1)) sel = bitmap(range(5, 8)) view = t.loc[sel, :] assert view is not None self.assertEqual(view.shape, (3, 3)) view2 = view.loc[sel, ["b", "arcsin_b"]] assert view2 is not None self.assertEqual(view2.shape, (3, 2)) self.assertTrue(np.allclose(np.arcsin(tb.to_array()), tab.to_array())) self.assertEqual(type(view), BaseTable) self.assertEqual(type(view2), BaseTable) self.assertTrue(np.array_equal(view[0].value, ivalues[5:8])) self.assertTrue(np.array_equal(view[1].value, fvalues[5:8])) self.assertTrue(np.array_equal(view[2].value, np.arcsin(fvalues[5:8]))) self.assertEqual(view.at[6, "a"], ivalues[6]) self.assertEqual(view.at[7, "b"], fvalues[7]) self.assertEqual(view.at[7, "arcsin_b"], np.arcsin(fvalues[7])) with self.assertRaises(KeyError): self.assertEqual(view.at[4, "a"], ivalues[4]) with self.assertRaises(KeyError): self.assertEqual(view.at[8, "a"], ivalues[8])
def test_loc_tableview(self): t = Table('table_loc', dshape="{a: int, b: float32}", create=True) t.resize(10) ivalues = np.random.randint(100,size=20) t['a'] = ivalues[:10] fvalues = np.random.rand(20) t['b'] = fvalues[:10] t.append({'a': ivalues[10:], 'b': fvalues[10:]}) view = t.loc[2:11] self.assertEqual(type(view), TableSlicedView) self.assertTrue(np.array_equal(view._column(0)[:], ivalues[2:12])) view_view = view.iloc[3:7] self.assertTrue(np.array_equal(view_view._column(0)[:], view._column(0)[3:7])) view_view = view.loc[3:6] self.assertTrue(np.array_equal(view_view._column(0)[:], view._column(0)[view.id_to_index(slice(3,6))])) table_view = view.iloc[[3,4,6,9]] self.assertEqual(type(table_view),TableSelectedView) self.assertTrue(np.array_equal(table_view._column(0).values, view._column(0)[[3,4,6,9]])) table_view = view.loc[[3,4,6,9]] self.assertEqual(type(table_view),TableSelectedView) self.assertTrue(np.array_equal(table_view._column(0).values, view._column(0)[view.id_to_index([3,4,6,9])]))
def test_row(self): table = Table('table', data={ 'a': [1, 2, 3], 'b': [10.1, 0.2, 0.3] }, create=True) row = Row(table) self.assertEqual(len(row), 2) # 2 values self.assertEqual(row['a'], 3) self.assertEqual(row['b'], 0.3) row['a'] = 4 self.assertEqual(row['a'], 4) self.assertEqual(table.at[len(table) - 1, 'a'], 4) table.append({'a': [4, 5], 'b': [0.4, 0.5]}) self.assertEqual(len(row), 2) # 2 values self.assertEqual(row['a'], 5) self.assertEqual(row['b'], 0.5)
def test_row(self) -> None: table = Table("table", data={ "a": [1, 2, 3], "b": [10.1, 0.2, 0.3] }, create=True) row = Row(table) self.assertEqual(len(row), 2) # 2 values self.assertEqual(row["a"], 3) self.assertEqual(row["b"], 0.3) row["a"] = 4 self.assertEqual(row["a"], 4) self.assertEqual(table.at[len(table) - 1, "a"], 4) table.append({"a": [4, 5], "b": [0.4, 0.5]}) self.assertEqual(len(row), 2) # 2 values self.assertEqual(row["a"], 5) self.assertEqual(row["b"], 0.5)
def test_loc_tableview(self) -> None: t = Table("table_loc", dshape="{a: int, b: float32}", create=True) t.resize(10) ivalues = np.random.randint(100, size=20) t["a"] = ivalues[:10] fvalues = np.random.rand(20) t["b"] = fvalues[:10] t.append({"a": ivalues[10:], "b": fvalues[10:]}) view = t.loc[2:11] assert view is not None self.assertEqual(type(view), BaseTable) self.assertTrue(np.array_equal(view._column(0)[:], ivalues[2:12])) view_view = view.loc[3:7] assert view_view is not None self.assertTrue( np.array_equal(view_view._column(0)[:], view._column(0)[3:7])) view_view = view.loc[3:6] assert view_view is not None self.assertTrue( np.array_equal( view_view._column(0)[:], view._column(0)[view.id_to_index(slice(3, 6))])) table_view = view.loc[[3, 4, 6, 9]] assert table_view is not None self.assertEqual(type(table_view), BaseTable) self.assertTrue( np.array_equal( table_view._column(0).values, view._column(0)[[3, 4, 6, 9]])) table_view = view.loc[[3, 4, 6, 9]] assert table_view is not None self.assertEqual(type(table_view), BaseTable) self.assertTrue( np.array_equal( table_view._column(0).values, view._column(0)[view.id_to_index([3, 4, 6, 9])], ))
class Counter(TableModule): def __init__(self, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) super(Counter, self).__init__(**kwds) self.default_step_size = 10000 def is_ready(self): if self.get_input_slot('table').created.any(): return True return super(Counter, self).is_ready() @synchronized def run_step(self, run_number, step_size, howlong): dfslot = self.get_input_slot('table') dfslot.update(run_number) if dfslot.updated.any() or dfslot.deleted.any(): dfslot.reset() if self._table is not None: self._table.resize(0) dfslot.update(run_number) indices = dfslot.created.next(step_size) # returns a slice steps = indices_len(indices) if steps == 0: return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() data = pd.DataFrame(dict(counter=steps), index=[0]) if self._table is None: self._table = Table( self.generate_table_name('counter'), data=data, # scheduler=self.scheduler(), create=True) elif len(self._table) == 0: # has been resetted self._table.append(data) else: self._table['counter'].loc[0] += steps return self._return_run_step(self.next_state(dfslot), steps_run=steps)
def test_tablechangemanager(self) -> None: "main test" # pylint: disable=protected-access,too-many-locals,too-many-statements table = Table("test_changemanager_table", data={ "a": [1, 2, 3], "b": [10.1, 0.2, 0.3] }) col_a = table["a"] col_b = table["b"] s = self.scheduler() table.changes = TableChanges() s._run_number = 1 last = s._run_number slot = FakeSlot(table) mid1 = "m1" changemanager = TableChangeManager(slot, buffer_updated=True, buffer_deleted=True) self.assertEqual(changemanager.last_update(), 0) self.assertEqual(changemanager.created.length(), 0) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.length(), 0) mid2 = "m2" cm2 = TableChangeManager(slot, buffer_updated=True, buffer_deleted=True) self.assertEqual(cm2.last_update(), 0) self.assertEqual(cm2.created.length(), 0) self.assertEqual(cm2.updated.length(), 0) self.assertEqual(cm2.deleted.length(), 0) cm3 = TableChangeManager(slot, buffer_updated=True, buffer_deleted=True) self.assertEqual(cm3.last_update(), 0) self.assertEqual(cm3.created.length(), 0) self.assertEqual(cm3.updated.length(), 0) self.assertEqual(cm3.deleted.length(), 0) changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.next(), slice(0, 3)) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 last = s._run_number table.append({"a": [4], "b": [0.5]}) changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.next(), slice(3, 4)) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 last = s._run_number table.append({"a": [5], "b": [0.5]}) changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.next(), slice(4, 5)) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 col_a[3] = 42 col_b[3] = 0.42 col_b[4] = 0.52 last = s._run_number changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.length(), 0) self.assertEqual(changemanager.updated.next(), slice(3, 5)) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 last = s._run_number changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.length(), 0) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 last2 = 0 col_a[2] = 22 col_b[2] = 0.22 col_b[1] = 0.12 last2 = s._run_number cm2.update(last2, table, mid=mid2) self.assertEqual(cm2.last_update(), last2) self.assertEqual(cm2.created.next(), slice(0, 5)) self.assertEqual(cm2.updated.length(), 0) self.assertEqual(cm2.deleted.length(), 0) s._run_number += 1 col_a[0] = 11 col_b[0] = 0.11 col_b[2] = 0.32 table.append({"a": [6], "b": [0.6]}) s._run_number += 1 last = s._run_number changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.next(), slice(5, 6)) self.assertEqual(changemanager.updated.next(), slice(0, 3)) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 last2 = s._run_number cm2.update(last2, table, mid=mid2) self.assertEqual(cm2.last_update(), last2) self.assertEqual(cm2.created.next(), slice(5, 6)) self.assertEqual(list(cm2.updated.next(as_slice=False)), [0, 2]) self.assertEqual(cm2.deleted.length(), 0) s._run_number += 1 col_a[0] = 1 col_b[0] = 0.11 col_b[2] = 0.22 # test deletes s._run_number += 1 del table.loc[2] last = s._run_number changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.length(), 0) # new behaviour prev. 0 self.assertEqual(changemanager.updated.length(), 1) self.assertEqual(changemanager.deleted.next(), slice(2, 3)) with self.assertRaises(KeyError): table.loc[2] # Not sure we want to specify what happens inside a deleted slot? # self.assertTrue(np.all(a[:]==np.array([1,2,a.fillvalue,42,5,6]))) # self.assertTrue(np.all(b[:]==np.array([0.11,0.12,a.fillvalue,0.42,.52,0.6]))) s._run_number += 1 del table.loc[4] table.append({"a": [7, 8], "b": [0.7, 0.8]}) col_a[5] = 0.55 last2 = s._run_number cm2.update(last2, table, mid=mid2) self.assertEqual(cm2.last_update(), last2) self.assertEqual(cm2.created.next(), slice(6, 8)) # new behaviour, prev. slice(5, 6) self.assertEqual(cm2.updated.next(), bitmap([0, 5])) self.assertEqual(list(cm2.deleted.next(as_slice=False)), [2, 4]) # TODO test reset changemanager.reset(mid1) self.assertEqual(changemanager.last_update(), 0)
class Histogram1D(TableModule): """ """ parameters = [('bins', np.dtype(int), 128), ('delta', np.dtype(float), -5)] # 5% schema = "{ array: var * int32, min: float64, max: float64, time: int64 }" def __init__(self, column, **kwds): self._add_slots(kwds, 'input_descriptors', [ SlotDescriptor('table', type=Table, required=True), SlotDescriptor('min', type=Table, required=True), SlotDescriptor('max', type=Table, required=True) ]) super(Histogram1D, self).__init__(dataframe_slot='table', **kwds) self.column = column self.total_read = 0 self.default_step_size = 1000 self._histo = None self._edges = None self._bounds = None self._table = Table(self.generate_table_name('Histogram1D'), dshape=Histogram1D.schema, chunks={'array': (16384, 128)}, create=True) def is_ready(self): if self._bounds and self.get_input_slot('table').created.any(): return True return super(Histogram1D, self).is_ready() def run_step(self, run_number, step_size, howlong): dfslot = self.get_input_slot('table') dfslot.update(run_number) min_slot = self.get_input_slot('min') min_slot.update(run_number) max_slot = self.get_input_slot('max') max_slot.update(run_number) if dfslot.updated.any() or dfslot.deleted.any(): logger.debug('reseting histogram') dfslot.reset() self._histo = None self._edges = None dfslot.update(run_number) if not (dfslot.created.any() or min_slot.created.any() or max_slot.created.any()): logger.info('Input buffers empty') return self._return_run_step(self.state_blocked, steps_run=0) bounds = self.get_bounds(min_slot, max_slot) if bounds is None: logger.debug('No bounds yet at run %d', run_number) return self._return_run_step(self.state_blocked, steps_run=0) bound_min, bound_max = bounds if self._bounds is None: delta = self.get_delta(*bounds) self._bounds = (bound_min - delta, bound_max + delta) logger.info("New bounds at run %d: %s", run_number, self._bounds) else: (old_min, old_max) = self._bounds delta = self.get_delta(*bounds) if(bound_min < old_min or bound_max > old_max) \ or bound_min > (old_min + delta) or bound_max < (old_max - delta): self._bounds = (bound_min - delta, bound_max + delta) logger.info('Updated bounds at run %d: %s', run_number, self._bounds) dfslot.reset() dfslot.update(run_number) self._histo = None self._edges = None (curr_min, curr_max) = self._bounds if curr_min >= curr_max: logger.error('Invalid bounds: %s', self._bounds) return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() indices = dfslot.created.next(step_size) # returns a slice or ... ? steps = indices_len(indices) logger.info('Read %d rows', steps) self.total_read += steps column = input_df[self.column] column = column.loc[fix_loc(indices)] bins = self._edges if self._edges is not None else self.params.bins histo = None if len(column) > 0: histo, self._edges = np.histogram(column, bins=bins, range=[curr_min, curr_max], normed=False, density=False) if self._histo is None: self._histo = histo elif histo is not None: self._histo += histo values = { 'array': [self._histo], 'min': [curr_min], 'max': [curr_max], 'time': [run_number] } with self.lock: self._table['array'].set_shape((self.params.bins, )) self._table.append(values) return self._return_run_step(self.next_state(dfslot), steps_run=steps) def get_bounds(self, min_slot, max_slot): min_slot.created.next() with min_slot.lock: min_df = min_slot.data() if len(min_df) == 0 and self._bounds is None: return None min_ = min_df.last(self.column) max_slot.created.next() with max_slot.lock: max_df = max_slot.data() if len(max_df) == 0 and self._bounds is None: return None max_ = max_df.last(self.column) return (min_, max_) def get_delta(self, min_, max_): delta = self.params['delta'] extent = max_ - min_ if delta < 0: return extent * delta / -100.0 def get_histogram(self): min_ = self._bounds[0] if self._bounds else None max_ = self._bounds[1] if self._bounds else None edges = self._edges if edges is None: edges = [] elif isinstance(edges, integer_types): edges = [edges] else: edges = edges.tolist() return { "edges": edges, "values": self._histo.tolist() if self._histo is not None else [], "min": min_, "max": max_ } def is_visualization(self): return True def get_visualization(self): return "histogram1d" def to_json(self, short=False): json = super(Histogram1D, self).to_json(short) if short: return json return self._hist_to_json(json) def _hist_to_json(self, json): json['histogram'] = self.get_histogram() return json
def test_tablechangemanager(self): "main test" # pylint: disable=protected-access,too-many-locals,too-many-statements table = Table('test_changemanager_table', data={ 'a': [1, 2, 3], 'b': [10.1, 0.2, 0.3] }) col_a = table['a'] col_b = table['b'] s = self.scheduler table.changes = TableChanges() s._run_number = 1 last = s._run_number slot = FakeSlot(table) mid1 = 1 changemanager = TableChangeManager(slot, buffer_updated=True, buffer_deleted=True) self.assertEqual(changemanager.last_update(), 0) self.assertEqual(changemanager.created.length(), 0) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.length(), 0) mid2 = 2 cm2 = TableChangeManager(slot, buffer_updated=True, buffer_deleted=True) self.assertEqual(cm2.last_update(), 0) self.assertEqual(cm2.created.length(), 0) self.assertEqual(cm2.updated.length(), 0) self.assertEqual(cm2.deleted.length(), 0) mid3 = 3 cm3 = TableChangeManager(slot, buffer_updated=True, buffer_deleted=True) self.assertEqual(cm3.last_update(), 0) self.assertEqual(cm3.created.length(), 0) self.assertEqual(cm3.updated.length(), 0) self.assertEqual(cm3.deleted.length(), 0) changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.next(), slice(0, 3)) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 last = s._run_number table.append({'a': [4], 'b': [0.5]}) changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.next(), slice(3, 4)) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 last = s._run_number table.append({'a': [5], 'b': [0.5]}) changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.next(), slice(4, 5)) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 col_a[3] = 42 col_b[3] = 0.42 col_b[4] = 0.52 last = s._run_number changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.length(), 0) self.assertEqual(changemanager.updated.next(), slice(3, 5)) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 last = s._run_number changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.length(), 0) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 last2 = 0 col_a[2] = 22 col_b[2] = 0.22 col_b[1] = 0.12 last2 = s._run_number cm2.update(last2, table, mid=mid2) self.assertEqual(cm2.last_update(), last2) self.assertEqual(cm2.created.next(), slice(0, 5)) self.assertEqual(cm2.updated.length(), 0) self.assertEqual(cm2.deleted.length(), 0) s._run_number += 1 col_a[0] = 11 col_b[0] = 0.11 col_b[2] = 0.32 table.append({'a': [6], 'b': [0.6]}) tableview = table.loc[1:2] last3 = s._run_number cm3.update(last3, tableview, mid=mid3) self.assertEqual(cm3.created.next(), slice(1, 3)) # ids, not indices self.assertEqual(cm2.updated.length(), 0) self.assertEqual(cm2.deleted.length(), 0) s._run_number += 1 last = s._run_number # with self.assertRaises(ValueError): # changemanager.update(last+1, table, mid=mid1) changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.next(), slice(5, 6)) self.assertEqual(changemanager.updated.next(), slice(0, 3)) self.assertEqual(changemanager.deleted.length(), 0) s._run_number += 1 last2 = s._run_number cm2.update(last2, table, mid=mid2) self.assertEqual(cm2.last_update(), last2) self.assertEqual(cm2.created.next(), slice(5, 6)) self.assertEqual(list(cm2.updated.next()), [0, 2]) self.assertEqual(cm2.deleted.length(), 0) s._run_number += 1 col_a[0] = 1 col_b[0] = 0.11 col_b[2] = 0.22 last3 = s._run_number cm3.update(last3, tableview, mid=mid3) self.assertEqual(cm3.last_update(), last3) self.assertEqual(cm3.created.length(), 0) self.assertEqual(cm3.updated.next(), slice(2, 3)) self.assertEqual(cm3.deleted.length(), 0) # test deletes s._run_number += 1 del table.loc[2] last = s._run_number changemanager.update(last, table, mid=mid1) self.assertEqual(changemanager.last_update(), last) self.assertEqual(changemanager.created.length(), 0) self.assertEqual(changemanager.updated.length(), 0) self.assertEqual(changemanager.deleted.next(), slice(2, 3)) with self.assertRaises(KeyError): table.loc[2] # Not sure we want to specify what happens inside a deleted slot? # self.assertTrue(np.all(a[:]==np.array([1,2,a.fillvalue,42,5,6]))) # self.assertTrue(np.all(b[:]==np.array([0.11,0.12,a.fillvalue,0.42,.52,0.6]))) s._run_number += 1 del table.loc[4] table.append({'a': [7, 8], 'b': [0.7, 0.8]}) col_a[5] = 0.55 last2 = s._run_number cm2.update(last2, table, mid=mid2) self.assertEqual(cm2.last_update(), last2) self.assertEqual(cm2.created.next(), slice(6, 8)) self.assertEqual(cm2.updated.next(), slice(5, 6)) self.assertEqual(list(cm2.deleted.next()), [2, 4]) #TODO test reset changemanager.reset() self.assertEqual(changemanager.last_update(), 0)
def test_tablechangemanager(self) -> None: # pylint: disable=protected-access table = Table( "test_changemanager_table_selected", data={"a": [1, 2, 3], "b": [10.1, 0.2, 0.3]}, ) selection = bitmap([1, 2]) table_selected: TableSelectedView = TableSelectedView(table, selection) s = self.s s._run_number = 1 last = s._run_number slot = FakeSlot(table_selected) mid1 = "m1" cm = TableSelectedChangeManager( slot, buffer_exposed=True, buffer_updated=True, buffer_deleted=True, buffer_masked=True, ) self.assertEqual(cm.last_update(), 0) self.assertEqual(cm.created.length(), 0) self.assertEqual(cm.updated.length(), 0) self.assertEqual(cm.deleted.length(), 0) # mid2 = 2 cm2 = TableSelectedChangeManager( slot, buffer_exposed=True, buffer_updated=True, buffer_deleted=True, buffer_masked=True, ) self.assertEqual(cm2.last_update(), 0) self.assertEqual(cm2.created.length(), 0) self.assertEqual(cm2.updated.length(), 0) self.assertEqual(cm2.deleted.length(), 0) # mid3 = 3 cm3 = TableSelectedChangeManager( slot, buffer_exposed=True, buffer_updated=True, buffer_deleted=True, buffer_masked=True, ) self.assertEqual(cm3.last_update(), 0) self.assertEqual(cm3.created.length(), 0) self.assertEqual(cm3.updated.length(), 0) self.assertEqual(cm3.deleted.length(), 0) cm.update(last, table_selected, mid=mid1) self.assertEqual(cm.last_update(), last) self.assertEqual(cm.created.next(), slice(1, 3)) # without the mask self.assertEqual(cm.updated.length(), 0) self.assertEqual(cm.deleted.length(), 0) s._run_number += 1 last = s._run_number table.append({"a": [4], "b": [0.5]}) # invisible since id=3 cm.update(last, table_selected, mid=mid1) self.assertEqual(cm.last_update(), last) self.assertEqual(cm.created.length(), 0) self.assertEqual(cm.updated.length(), 0) self.assertEqual(cm.deleted.length(), 0) s._run_number += 1 last = s._run_number table.append({"a": [5, 6, 7, 8], "b": [0.5, 0.6, 0.7, 0.8]}) table_selected.selection = bitmap(range(1, 8)) cm.update(last, table_selected, mid=mid1) self.assertEqual(cm.last_update(), last) self.assertEqual(cm.created.next(), slice(3, 8)) self.assertEqual(cm.updated.length(), 0) self.assertEqual(cm.deleted.length(), 0) s._run_number += 1 last = s._run_number del table.loc[[1, 2, 3]] table_selected.selection = bitmap( [3, 4] ) # i.e 1,2,5,6,7 were deleted in selection cm.update(last, table_selected, mid=mid1) self.assertEqual(cm.last_update(), last) self.assertEqual(cm.created.length(), 0) self.assertEqual(cm.updated.length(), 0) self.assertEqual(cm.base.deleted.length(), 3) # 1, 2, 3 self.assertEqual( cm.selection.deleted.length(), 6 ) # 1, 2, 5, 6, 7[+3 removed because it was perm.deleted] self.assertEqual(cm.deleted.length(), 6) # 1, 2, 3, 5, 6, 7 cm.base.deleted.next() cm.selection.deleted.next() s._run_number += 1 last = s._run_number table.append({"a": [15, 16, 17, 18], "b": [0.51, 0.61, 0.71, 0.81]}) table_selected._selection = slice(5, None) cm.update(last, table_selected, mid=mid1) self.assertEqual(cm.last_update(), last) self.assertEqual(cm.base.created.changes, bitmap([8, 9, 10, 11])) self.assertEqual(cm.selection.created.changes, bitmap([5, 6, 7, 8, 9, 10, 11])) self.assertEqual(cm.selection.deleted.changes, bitmap([4])) self.assertEqual(cm.updated.length(), 0) self.assertEqual(cm.base.deleted.length(), 0) self.assertEqual(cm.deleted.length(), 1) cm.deleted.next() self.assertEqual(cm.deleted.length(), 0) cm.created.next() self.assertEqual(cm.base.created.length(), 0) self.assertEqual(cm.selection.created.length(), 0) # s._run_number += 1 # a[3] = 42 # b[3] = 0.42 # b[4] = 0.52 # last = s._run_number # cm.update(last, table, mid=mid1) # self.assertEqual(cm.last_update(), last) # self.assertEqual(cm.created.length(), 0) # self.assertEqual(cm.updated.next(), slice(3,5)) # self.assertEqual(cm.deleted.length(), 0) # s._run_number += 1 # last = s._run_number # cm.update(last, table, mid=mid1) # self.assertEqual(cm.last_update(), last) # self.assertEqual(cm.created.length(), 0) # self.assertEqual(cm.updated.length(), 0) # self.assertEqual(cm.deleted.length(), 0) # s._run_number += 1 # last2 = 0 # a[2] = 22 # b[2] = 0.22 # b[1] = 0.12 # last2 = s._run_number # cm2.update(last2, table, mid=mid2) # self.assertEqual(cm2.last_update(), last2) # self.assertEqual(cm2.created.next(), slice(0, 5)) # self.assertEqual(cm2.updated.length(), 0) # self.assertEqual(cm2.deleted.length(), 0) # s._run_number += 1 # a[0] = 11 # b[0] = 0.11 # b[2] = 0.32 # table.append({'a': [ 6], 'b': [0.6] }) # tv = table.loc[1:2] # last3 = s._run_number # cm3.update(last3, tv, mid=mid3) # self.assertEqual(cm3.created.next(), slice(1, 3)) # test ids, not indices # self.assertEqual(cm2.updated.length(), 0) # self.assertEqual(cm2.deleted.length(), 0) # s._run_number += 1 # last = s._run_number # # with self.assertRaises(ValueError): # # cm.update(last+1, table, mid=mid1) # cm.update(last, table, mid=mid1) # self.assertEqual(cm.last_update(), last) # self.assertEqual(cm.created.next(), slice(5,6)) # self.assertEqual(cm.updated.next(), slice(0,3)) # self.assertEqual(cm.deleted.length(), 0) # s._run_number += 1 # last2 = s._run_number # cm2.update(last2, table, mid=mid2) # self.assertEqual(cm2.last_update(), last2) # self.assertEqual(cm2.created.next(), slice(5,6)) # self.assertEqual(list(cm2.updated.next()), [0,2]) # self.assertEqual(cm2.deleted.length(), 0) # s._run_number += 1 # a[0] = 1 # b[0] = 0.11 # b[2] = 0.22 # last3 = s._run_number # cm3.update(last3, tv, mid=mid3) # self.assertEqual(cm3.last_update(), last3) # self.assertEqual(cm3.created.length(), 0) # self.assertEqual(cm3.updated.next(), slice(2,3)) # self.assertEqual(cm3.deleted.length(), 0) # # test deletes # s._run_number += 1 # del table.loc[2] # last = s._run_number # cm.update(last, table, mid=mid1) # self.assertEqual(cm.last_update(), last) # self.assertEqual(cm.created.length(), 0) # self.assertEqual(cm.updated.length(), 0) # self.assertEqual(cm.deleted.next(), slice(2,3)) # self.assertTrue(np.all(a[:]==np.array([1,2,a.fillvalue,42,5,6]))) # self.assertTrue(np.all(b[:]==np.array([0.11,0.12,a.fillvalue,0.42,.52,0.6]))) # s._run_number += 1 # del table.loc[4] # table.append({'a': [ 7,8], 'b': [0.7,0.8] }) # a[5] = 0.55 # last2 = s._run_number # cm2.update(last2, table, mid=mid2) # self.assertEqual(cm2.last_update(), last2) # self.assertEqual(cm2.created.next(), slice(6,8)) # self.assertEqual(cm2.updated.next(), slice(5,6)) # self.assertEqual(list(cm2.deleted.next()), [2,4]) # TODO test reset cm.reset(mid=mid1) self.assertEqual(cm.last_update(), 0)
class IdxMin(TableModule): parameters = [('history', np.dtype(int), 3)] def __init__(self, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) self._add_slots(kwds, 'output_descriptors', [SlotDescriptor('min', type=Table, required=False)]) super(IdxMin, self).__init__(**kwds) self._min = None self.default_step_size = 10000 def min(self): return self._min def get_data(self, name): if name == 'min': return self.min() return super(IdxMin, self).get_data(name) def is_ready(self): if self.get_input_slot('table').created.any(): return True return super(IdxMin, self).is_ready() @synchronized def run_step(self, run_number, step_size, howlong): dfslot = self.get_input_slot('table') dfslot.update(run_number) if dfslot.updated.any() or dfslot.deleted.any(): dfslot.reset() self._table = None dfslot.update(run_number) indices = dfslot.created.next(step_size) # returns a slice steps = indices_len(indices) if steps == 0: return self._return_run_step(self.state_blocked, steps_run=0) input_table = dfslot.data() op = self.filter_columns(input_table, fix_loc(indices)).idxmin() #if not op.index.equals(self._columns): # # some columns are not numerical # self._columns = op.index if self._min is None: min_ = OrderedDict(zip(op.keys(), [np.nan] * len(op.keys()))) for col, ix in op.items(): min_[col] = input_table.at[ ix, col] # lookup value, is there a better way? self._min = Table(self.generate_table_name('_min'), dshape=input_table.dshape, create=True) self._min.append(min_, indices=[run_number]) self._table = Table(self.generate_table_name('_table'), dshape=input_table.dshape, create=True) self._table.append(op, indices=[run_number]) else: prev_min = self._min.last() prev_idx = self._table.last() min_ = OrderedDict(prev_min.items()) for col, ix in op.items(): val = input_table.at[ix, col] if np.isnan(val): pass elif np.isnan(min_[col]) or val < min_[col]: op[col] = prev_idx[col] min_[col] = val with self.lock: self._table.append(op, indices=[run_number]) self._min.append(min_, indices=[run_number]) if len(self._table) > self.params.history: data = self._table.loc[ self._table.index[-self.params.history:]] self._table = Table(self.generate_table_name('_table'), data=data, create=True) data = self._min.loc[ self._min.index[-self.params.history:]] self._min = Table(self.generate_table_name('_min'), data=data, create=True) return self._return_run_step(self.next_state(dfslot), steps_run=steps)
def run_step(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: if step_size == 0: # bug logger.error("Received a step_size of 0") return self._return_run_step(self.state_ready, steps_run=0) status = self.validate_parser(run_number) if status == self.state_terminated: raise ProgressiveStopIteration("no more filenames") elif status == self.state_blocked: return self._return_run_step(status, steps_run=0) elif status != self.state_ready: logger.error("Invalid state returned by validate_parser: %d", status) self.close() raise ProgressiveStopIteration("Unexpected situation") logger.info("loading %d lines", step_size) needs_save = self._needs_save() assert self.parser df_list: List[pd.DataFrame] try: df_list = self.parser.read( step_size, flush=needs_save) # raises StopIteration at EOF if not df_list: raise ProgressiveStopIteration except ProgressiveStopIteration: self.close() if self.has_input_slot("filenames"): fn_slot = self.get_input_slot("filenames") assert fn_slot.output_module is not None self.parser = None return self._return_run_step(self.state_ready, 0) df_len = sum([len(df) for df in df_list]) creates = df_len if creates == 0: # should not happen logger.error("Received 0 elements") raise ProgressiveStopIteration if self._filter is not None: df_list = [self._filter(df) for df in df_list] creates = sum([len(df) for df in df_list]) if creates == 0: logger.info("frame has been filtered out") else: self._rows_read += creates logger.info("Loaded %d lines", self._rows_read) if self.force_valid_ids: for df in df_list: force_valid_id_columns(df) if self.result is None: table = self.table data, dshape = self._data_as_array(pd.concat(df_list)) if not self._recovery: self._table_params["name"] = self.generate_table_name( "table") self._table_params["data"] = data self._table_params["dshape"] = dshape self._table_params["create"] = True self.result = Table(**self._table_params) else: self._table_params["name"] = self._recovered_csv_table_name # self._table_params['dshape'] = dshape self._table_params["create"] = False table = Table(**self._table_params) self.result = table table.append(self._data_as_array(pd.concat(df_list))) else: table = self.table for df in df_list: data, dshape = self._data_as_array(df) table.append(data) if (self.parser.is_flushed() and needs_save and self._recovery_table is None and self._save_context): table = self.table snapshot = self.parser.get_snapshot( run_number=run_number, table_name=table.name, last_id=table.last_id, ) self._recovery_table = Table( name=self._recovery_table_name, data=pd.DataFrame(snapshot, index=[0]), create=True, ) self._recovery_table_inv = Table( name=self._recovery_table_inv_name, data=pd.DataFrame( dict( table_name=table.name, csv_input=self.filepath_or_buffer, ), index=[0], ), create=True, ) self._last_saved_id = table.last_id elif self.parser.is_flushed( ) and needs_save and self._save_context: snapshot = self.parser.get_snapshot( run_number=run_number, last_id=table.last_id, table_name=table.name, ) assert self._recovery_table self._recovery_table.add(snapshot) if len(self._recovery_table) > self._recovery_table_size: oldest = self._recovery_table.argmin()["offset"] self._recovery_table.drop(oldest) self._last_saved_id = table.last_id return self._return_run_step(self.state_ready, steps_run=creates)