def test_filtering(self) -> None: t = Table("table_filtering", dshape="{a: int, b: float32}", create=True) t.resize(20) ivalues = np.random.randint(100, size=20) t["a"] = ivalues fvalues: np.ndarray[Any, Any] = np.random.rand(20) * 100 t["b"] = fvalues df = pd.DataFrame(t.to_dict()) def small_fun(expr: str, r: Any) -> None: te = t.eval(expr, result_object=r) dfe = df.eval(expr) self.assertTrue(np.array_equal(te["a"].loc[:], df[dfe]["a"])) self.assertTrue(np.allclose(te["b"].loc[:], df[dfe]["b"])) def small_fun_ne(expr: str) -> None: r = "raw_numexpr" te = t.eval(expr, result_object=r) dfe: pd.PandasObject = df.eval(expr) self.assertTrue(np.array_equal(te, dfe.values)) small_fun_ne("(a>10) & (a <80)") small_fun_ne("(b>10) & (b <80)") small_fun_ne("a>=b") small_fun("(a>10) & (a <80)", "table") small_fun("(b>10) & (b <80)", "table") small_fun("a>=b", "table") small_fun("(a>10) & (a <80)", "view")
def test_filtering(self): t = Table('table_filtering', dshape="{a: int, b: float32}", create=True) t.resize(20) ivalues = np.random.randint(100, size=20) t['a'] = ivalues fvalues = np.random.rand(20) * 100 t['b'] = fvalues df = pd.DataFrame(t.to_dict()) def small_fun(expr, r): te = t.eval(expr, result_object=r) dfe = df.eval(expr) self.assertTrue(np.array_equal(te['a'], df[dfe]['a'])) self.assertTrue(np.allclose(te['b'], df[dfe]['b'])) def small_fun_ne(expr): r = 'raw_numexpr' te = t.eval(expr, result_object=r) dfe = df.eval(expr) self.assertTrue(np.array_equal(te, dfe.values)) small_fun_ne('(a>10) & (a <80)') small_fun_ne('(b>10) & (b <80)') small_fun_ne('a>=b') small_fun('(a>10) & (a <80)', 'table') small_fun('(b>10) & (b <80)', 'table') small_fun('a>=b', 'table') small_fun('(a>10) & (a <80)', 'view')
def test_combine_first_nan(self): s = self.scheduler() cst1 = Constant(Table(name='tcf_xmin_xmax_nan', data=pd.DataFrame({ 'xmin': [1], 'xmax': [2] }), create=True), scheduler=s) cst2 = Constant(Table(name='tcf_ymin_ymax_nan', data=pd.DataFrame({ 'ymin': [np.nan], 'ymax': [np.nan] }), create=True), scheduler=s) cst3 = Constant(Table(name='tcf_ymin_ymax2_nan', data=pd.DataFrame({ 'ymin': [3], 'ymax': [4] }), create=True), scheduler=s) cf = CombineFirst(scheduler=s) cf.input.table = cst1.output.table cf.input.table = cst2.output.table cf.input.table = cst3.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = cf.output.table s.start() s.join() df = cf.table() last = df.last().to_dict() self.assertTrue(last['xmin']==1 and last['xmax']==2 and \ last['ymin']==3 and last['ymax']==4)
def run_step(self, run_number, step_size, howlong): dfslot = self.get_input_slot('table') dfslot.update(run_number) if dfslot.updated.any() or dfslot.deleted.any(): dfslot.reset() if self._table is not None: self._table.resize(0) dfslot.update(run_number) indices = dfslot.created.next(step_size) # returns a slice steps = indices_len(indices) if steps == 0: return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() data = pd.DataFrame(dict(counter=steps), index=[0]) if self._table is None: self._table = Table( self.generate_table_name('counter'), data=data, # scheduler=self.scheduler(), create=True) elif len(self._table) == 0: # has been resetted self._table.append(data) else: self._table['counter'].loc[0] += steps return self._return_run_step(self.next_state(dfslot), steps_run=steps)
def test_last_row_simple(self): s = self.scheduler() t1 = Table(name=get_random_name("cst1"), data={ 'xmin': [1], 'xmax': [2] }) t2 = Table(name=get_random_name("cst2"), data={ 'ymin': [3], 'ymax': [4] }) cst1 = Constant(t1, scheduler=s) cst2 = Constant(t2, scheduler=s) join = Join(scheduler=s) join.input.table = cst1.output.table join.input.table = cst2.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = join.output.table s.start() s.join() #res = join.trace_stats(max_runs=1) #pd.set_option('display.expand_frame_repr', False) #print(res) df = join.table() last = df.last() self.assertTrue(last['xmin']==1 and last['xmax']==2 and \ last['ymin']==3 and last['ymax']==4)
def test_merge_simple(self) -> None: s = self.scheduler() cst1 = Constant(Table(name=None, data=pd.DataFrame({ "xmin": [1], "xmax": [2] })), scheduler=s) cst2 = Constant(Table(name=None, data=pd.DataFrame({ "ymin": [3], "ymax": [4] })), scheduler=s) merge = Merge(left_index=True, right_index=True, scheduler=s) merge.input[0] = cst1.output.result merge.input[0] = cst2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = merge.output.result aio.run(s.start()) _ = merge.trace_stats(max_runs=1) # pd.set_option('display.expand_frame_repr', False) # print(res) df = merge.table last = df.loc[df.index[-1]] assert last is not None self.assertTrue(last["xmin"] == 1 and last["xmax"] == 2 and last["ymin"] == 3 and last["ymax"] == 4)
def test_paging_helper_t(self) -> None: t = Table("table_for_paging", dshape="{a: int, b: float32}", create=True) t.resize(200) _ = np.arange(200) ivalues = np.random.randint(100, size=200) t["a"] = ivalues fvalues = np.array(np.random.rand(200), np.float32) t["b"] = fvalues # import pdb; pdb.set_trace() ph_t = PagingHelper(t) page = ph_t.get_page(0, 10) self.assertEqual(page[0][0], 0) self.assertEqual(page[-1][0], 9) del t.loc[5] ph_t = PagingHelper(t) page = ph_t.get_page(0, 10) self.assertEqual(page[0][0], 0) self.assertEqual(page[-1][0], 10) sel = bitmap(range(10, 75, 2)) print(sel) view = t.loc[sel, :] self.assertTrue(view is not None) assert view is not None ph_t = PagingHelper(view) page = ph_t.get_page(10, 20) self.assertEqual(page[0][0], 30) self.assertEqual(page[-1][0], 48) print(page)
def test_join_simple(self): s = self.scheduler() cst1 = Constant(Table(name='test_join_simple_cst1', data=pd.DataFrame({ 'xmin': [1], 'xmax': [2] }), create=True), scheduler=s) cst2 = Constant(Table(name='test_join_simple_cst2', data=pd.DataFrame({ 'ymin': [3], 'ymax': [4] }), create=True), scheduler=s) reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s) reduce_.input.table = cst1.output.table reduce_.input.table = cst2.output.table join = reduce_.expand() # join = BinJoin(scheduler=s) # join.input.first = cst1.output.table # join.input.second = cst2.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = join.output.table s.start() s.join() res = join.trace_stats(max_runs=1) print(res) df = join.table() last = df.loc[df.index[-1]] self.assertTrue(last['xmin'] == 1 and last['xmax'] == 2 and \ last['ymin'] == 3 and last['ymax'] == 4)
def test_intersection(self) -> None: s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape="{_1: float64}", data={"_1": [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape="{_1: float64}", data={"_1": [0.8]}) max_value = Constant(table=t_max, scheduler=s) hist_index = HistogramIndex(column="_1", scheduler=s) hist_index.create_dependent_modules(random, "result") bisect_min = Bisect(column="_1", op=">", hist_index=hist_index, scheduler=s) bisect_min.input[0] = hist_index.output.result # bisect_.input[0] = random.output.result bisect_min.input.limit = min_value.output.result bisect_max = Bisect(column="_1", op="<", hist_index=hist_index, scheduler=s) bisect_max.input[0] = hist_index.output.result # bisect_.input[0] = random.output.result bisect_max.input.limit = max_value.output.result inter = Intersection(scheduler=s) inter.input[0] = bisect_min.output.result inter.input[0] = bisect_max.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = inter.output.result aio.run(s.start()) assert hist_index.input_module is not None idx = (hist_index.input_module.output["result"].data().eval( "(_1>0.3)&(_1<0.8)", result_object="index")) self.assertEqual(inter.table.index, bitmap(idx))
def _2_csv_2_const_scenario(module: Module, s: Scheduler) -> Callable[[Scheduler, int], None]: csv_a = CSVLoader(get_dataset("smallfile"), index_col=False, header=None, scheduler=s) csv_b = CSVLoader(get_dataset("smallfile"), index_col=False, header=None, scheduler=s) table_c = Table("const_c_2_csv_2_const_scenario", dshape="{a: int}", create=True) const_c = Constant(table=table_c, scheduler=s) table_d = Table("const_d_2_csv_2_const_scenario", dshape="{a: int}", create=True) const_d = Constant(table=table_d, scheduler=s) module.input.a = csv_a.output.result module.input.b = csv_b.output.result module.input.c = const_c.output.result module.input.d = const_d.output.result def _fun(s: Scheduler, r: int) -> None: if r > 10: s.task_stop() return _fun
def create_table(self) -> None: t = Table( "table", storagegroup=self.storagegroup, dshape="{a: int, b: float32, c: string, d: 10*int}", create=True, ) self.assertTrue(t is not None) self.assertEqual(t.ncol, 4) col1 = t["a"] col2 = t[0] self.assertTrue(col1 is col2) t = Table( "table", storagegroup=self.storagegroup, dshape="{a: int, b: float32, c: string, d: 10*int}", ) self.assertTrue(t is not None) t = Table("table", storagegroup=self.storagegroup) self.assertEqual( t.dshape, ds.dshape("{a: int, b: float32, c: string, d: 10 * int}")) t2 = Table( "bar_table", dshape="{a: int64, b: float64}", fillvalues={"a": -1}, create=True, ) self.assertEqual(t2.dshape, ds.dshape("{a: int64, b: float64}")) self.assertEqual(t2[0].fillvalue, -1)
def test_merge_simple(self): s = self.scheduler() cst1 = Constant(Table(name=None, data=pd.DataFrame({ 'xmin': [1], 'xmax': [2] })), scheduler=s) cst2 = Constant(Table(name=None, data=pd.DataFrame({ 'ymin': [3], 'ymax': [4] })), scheduler=s) merge = Merge(left_index=True, right_index=True, scheduler=s) merge.input.table = cst1.output.table merge.input.table = cst2.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = merge.output.table s.start() s.join() res = merge.trace_stats(max_runs=1) #pd.set_option('display.expand_frame_repr', False) #print(res) df = merge.table() last = df.loc[df.index[-1]] self.assertTrue(last['xmin']==1 and last['xmax']==2 and \ last['ymin']==3 and last['ymax']==4)
def test_hist_index_min_max(self): "Test min_out and max_out on HistogramIndex" s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column='_1', scheduler=s) range_qry.create_dependent_modules(random, 'table', min_value=min_value, max_value=max_value) prt = Print(proc=self.terse, scheduler=s) prt.input.df = range_qry.output.table hist_index = range_qry.hist_index min_ = Min(name='min_' + str(hash(hist_index)), scheduler=s) min_.input.table = hist_index.output.min_out prt2 = Print(proc=self.terse, scheduler=s) prt2.input.df = min_.output.table max_ = Max(name='max_' + str(hash(hist_index)), scheduler=s) max_.input.table = hist_index.output.max_out pr3 = Print(proc=self.terse, scheduler=s) pr3.input.df = max_.output.table s.start() s.join() res1 = random.table().min()['_1'] res2 = min_.table().last().to_dict()['_1'] self.assertAlmostEqual(res1, res2) res1 = random.table().max()['_1'] res2 = max_.table().last().to_dict()['_1'] self.assertAlmostEqual(res1, res2)
def run_step(self, run_number, step_size, howlong): dfslot = self.get_input_slot('table') dfslot.update(run_number) if dfslot.updated.any() or dfslot.deleted.any(): dfslot.reset() if self._table is not None: self._table.resize(0) dfslot.update(run_number) indices = dfslot.created.next(step_size) # returns a slice steps = indices_len(indices) if steps == 0: return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() op = self.filter_columns(input_df, fix_loc(indices)).max(keepdims=True) if self._table is None: self._table = Table( self.generate_table_name('max'), data=op, # scheduler=self.scheduler(), create=True) elif len(self._table) == 0: # has been resetted self._table.append(op) else: last = self._table.last() for colname in last: current_max = op[colname] current_max[0] = np.maximum(current_max, last[colname]) self._table.append(op) #TODO manage the history in a more efficient way #if len(self._table) > self.params.history: # self._table = self._table.loc[self._df.index[-self.params.history:]] return self._return_run_step(self.next_state(dfslot), steps_run=steps)
def test_intersection(self): s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) max_value = Constant(table=t_max, scheduler=s) hist_index = HistogramIndex(column='_1', scheduler=s) hist_index.create_dependent_modules(random, 'table') bisect_min = Bisect(column='_1', op='>', hist_index=hist_index, scheduler=s) bisect_min.input.table = hist_index.output.table #bisect_.input.table = random.output.table bisect_min.input.limit = min_value.output.table bisect_max = Bisect(column='_1', op='<', hist_index=hist_index, scheduler=s) bisect_max.input.table = hist_index.output.table #bisect_.input.table = random.output.table bisect_max.input.limit = max_value.output.table inter = Intersection(scheduler=s) inter.input.table = bisect_min.output.table inter.input.table = bisect_max.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = inter.output.table s.start() s.join() idx = hist_index.input_module.output['table']\ .data().eval('(_1>0.3)&(_1<0.8)', result_object='index') self.assertEqual(inter.table().selection, bitmap(idx))
def test_mmap5(self): #pylint: disable=protected-access self._rmtree() t = Table('table_mmap_5', dshape='{anint: int, atext: string}') for i in range(100): t.add(dict(anint=i, atext="abc")) t.add(dict(anint=i, atext="xyz")) nb_str = len(set(t._column("atext").storagegroup["atext"].view)) self.assertEqual(nb_str, 2)
def test_mmap6(self): #pylint: disable=protected-access long_text = "a"*LONG_SIZE self._rmtree() t = Table('table_mmap_6', dshape='{anint: int, atext: string}') for i in range(100): t.add(dict(anint=i, atext=long_text)) nb_str = len(set(t._column("atext").storagegroup["atext"].view)) self.assertEqual(nb_str, 100)
def test_join_simple(self) -> None: s = self.scheduler() cst1 = Constant( Table( name="test_join_simple_cst1", data=pd.DataFrame({"xmin": [1], "xmax": [2]}), create=True, ), scheduler=s, ) cst2 = Constant( Table( name="test_join_simple_cst2", data=pd.DataFrame({"ymin": [3], "ymax": [4]}), create=True, ), scheduler=s, ) cst3 = Constant( Table( name="test_join_simple_cst3", data=pd.DataFrame({"zmin": [5], "zmax": [6]}), create=True, ), scheduler=s, ) # join=Join(scheduler=s) # reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s) # reduce_.input[0] = cst1.output.result # reduce_.input[0] = cst2.output.result # reduce_.input[0] = cst3.output.result # join = reduce_.expand() join = Reduce.expand( BinJoin, "first", "second", "result", [cst1.output.result, cst2.output.result, cst3.output.result], scheduler=s, ) pr = Print(proc=self.terse, scheduler=s) pr.input[0] = join.output.result aio.run(s.start()) res = join.trace_stats(max_runs=1) print(res) df = join.table last = df.loc[df.index[-1]] assert last is not None self.assertTrue( last["xmin"] == 1 and last["xmax"] == 2 and last["ymin"] == 3 and last["ymax"] == 4 and last["zmin"] == 5 and last["zmax"] == 6 )
class Var(TableModule): """ Compute the variance of the columns of an input dataframe. """ parameters = [('history', np.dtype(int), 3)] def __init__(self, columns=None, **kwds): self._add_slots(kwds,'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) super(Var, self).__init__(dataframe_slot='table', **kwds) self._columns = columns self._data = {} self.default_step_size = 1000 def is_ready(self): if self.get_input_slot('table').created.any(): return True return super(Var, self).is_ready() def op(self, chunk): cols = chunk.columns ret = {} for c in cols: data = self._data.get(c) if data is None: data = OnlineVariance() self._data[c] = data data.add(chunk[c]) ret[c] = data.variance return ret @synchronized def run_step(self,run_number,step_size,howlong): dfslot = self.get_input_slot('table') dfslot.update(run_number) if dfslot.updated.any() or dfslot.deleted.any(): dfslot.reset() self._table = None dfslot.update(run_number) indices = dfslot.created.next(step_size) # returns a slice steps = indices_len(indices) if steps==0: return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() op = self.op(self.filter_columns(input_df,fix_loc(indices))) if self._table is None: self._table = Table(self.generate_table_name('var'), dshape=input_df.dshape, # scheduler=self.scheduler(), create=True) self._table.append(op, indices=[run_number]) print(self._table) if len(self._table) > self.params.history: self._table = self._table.loc[self._table.index[-self.params.history:]] return self._return_run_step(self.next_state(dfslot), steps_run=steps)
def test_to_array(self) -> None: t = Table("table_to_array", dshape="{a: int, b: float32, c: real}", create=True) t.resize(10) ivalues = np.random.randint(100, size=10) t["a"] = ivalues fvalues = np.random.rand(10) t["b"] = fvalues dvalues = np.random.rand(10) t["c"] = dvalues a = t["a"] b = t["b"] c = t["c"] arr = t.to_array() self.assertEqual(arr.dtype, np.float64) self.assertEqual(arr.shape[0], t.nrow) self.assertEqual(arr.shape[1], t.ncol) self.assertTrue(np.allclose(a[:], arr[:, 0])) self.assertTrue(np.allclose(b[:], arr[:, 1])) self.assertTrue(np.allclose(c[:], arr[:, 2])) # Columns arr = t.to_array(columns=["a", "b"]) self.assertEqual(arr.dtype, np.float64) self.assertEqual(arr.shape[0], t.nrow) self.assertEqual(arr.shape[1], 2) self.assertTrue(np.allclose(a[:], arr[:, 0])) self.assertTrue(np.allclose(b[:], arr[:, 1])) # Keys key1 = slice(2, 7) arr = t.to_array(key1) key = t.id_to_index( key1).to_slice_maybe() # slices contain their bounds self.assertEqual(arr.dtype, np.float64) self.assertEqual(arr.shape[0], key.stop - key.start) self.assertEqual(arr.shape[1], 3) self.assertTrue(np.allclose(a[key], arr[:, 0])) self.assertTrue(np.allclose(b[key], arr[:, 1])) self.assertTrue(np.allclose(c[key], arr[:, 2])) # Keys with fancy indexing key2 = [2, 4, 6, 8] arr = t.to_array(key2) indices = t.id_to_index(key2) # slices contain their bounds self.assertEqual(arr.dtype, np.float64) self.assertEqual(arr.shape[0], len(indices)) self.assertEqual(arr.shape[1], 3) self.assertTrue(np.allclose(a[indices], arr[:, 0])) self.assertTrue(np.allclose(b[indices], arr[:, 1])) self.assertTrue(np.allclose(c[indices], arr[:, 2]))
def _create_table(tname: str, columns: Parameters) -> Table: dshape = "" data = {} for (name, dtype, val) in columns: if dshape: dshape += "," dshape += "%s: %s" % (name, dshape_from_dtype(dtype)) data[name] = val dshape = "{" + dshape + "}" assert Group.default_internal table = Table(tname, dshape=dshape, storagegroup=Group.default_internal(tname)) table.add(data) return table
def test_range_query_min_max(self): "Test min and max on RangeQuery output" s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) range_qry = self._query_min_max_impl(random, t_min, t_max, s) s.start() s.join() min_data = range_qry.output.min.data() max_data = range_qry.output.max.data() self.assertAlmostEqual(min_data['_1'].loc[0], 0.3) self.assertAlmostEqual(max_data['_1'].loc[0], 0.8)
def run_step(self, run_number, step_size, howlong): if self._table is None: slot = self.get_input_slot('like') if slot is not None: like = slot.data() if like is not None: with slot.lock: self._table = Table(self.generate_table_name('like'), dshape=like.dshape, create=True) self._table.append(like.last().to_dict(ordered=True), indices=[0]) return self._return_run_step(self.state_blocked, steps_run=1)
def test_last(self) -> None: t = Table("table_last", dshape="{a: int, b: float32}", create=True) t.resize(10) ivalues = np.random.randint(100, size=10) t["a"] = ivalues fvalues = np.random.rand(10) t["b"] = fvalues last_ = list(notNone(t.last()).values()) self.assertEqual(last_, [t._column(0)[-1], t._column(1)[-1]]) last_a = t.last("a") self.assertEqual(last_a, t._column(0)[-1]) last_a_b = t.last(["a", "b"]) self.assertEqual(list(last_a_b), last_)
def test_last(self): t = Table('table_last', dshape="{a: int, b: float32}", create=True) t.resize(10) ivalues = np.random.randint(100,size=10) t['a'] = ivalues fvalues = np.random.rand(10) t['b'] = fvalues last_ = list(t.last().values()) self.assertEqual(last_, [t._column(0)[-1],t._column(1)[-1]]) last_a = t.last('a') self.assertEqual(last_a, t._column(0)[-1]) last_a_b = t.last(['a','b']) self.assertEqual(list(last_a_b),last_)
def _create_table(tname, columns): dshape = "" data = {} for (name, dtype, val) in columns: if dshape: dshape += ',' dshape += '%s: %s' % (name, dshape_from_dtype(dtype)) data[name] = val dshape = '{' + dshape + '}' table = Table(tname, dshape=dshape, storagegroup=Group.default_internal(tname)) table.add(data) return table
class Max(TableModule): parameters = [('history', np.dtype(int), 3)] def __init__(self, columns=None, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) super(Max, self).__init__(**kwds) self._columns = columns self.default_step_size = 10000 def is_ready(self): if self.get_input_slot('table').created.any(): return True return super(Max, self).is_ready() @synchronized def run_step(self, run_number, step_size, howlong): dfslot = self.get_input_slot('table') dfslot.update(run_number) if dfslot.updated.any() or dfslot.deleted.any(): dfslot.reset() if self._table is not None: self._table.resize(0) dfslot.update(run_number) indices = dfslot.created.next(step_size) # returns a slice steps = indices_len(indices) if steps == 0: return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() op = self.filter_columns(input_df, fix_loc(indices)).max(keepdims=True) if self._table is None: self._table = Table( self.generate_table_name('max'), data=op, # scheduler=self.scheduler(), create=True) elif len(self._table) == 0: # has been resetted self._table.append(op) else: last = self._table.last() for colname in last: current_max = op[colname] current_max[0] = np.maximum(current_max, last[colname]) self._table.append(op) #TODO manage the history in a more efficient way #if len(self._table) > self.params.history: # self._table = self._table.loc[self._df.index[-self.params.history:]] return self._return_run_step(self.next_state(dfslot), steps_run=steps)
def test_mmap3(self): #pylint: disable=protected-access #self.scheduler._run_number = 1 self._rmtree() df = pd.DataFrame({'a': [1, 2, 3], 'b': [0.1, 0.2, 0.3], 'c': ['a', 'b', 'cd']}) t = Table('table_2', data=df) self.assertEqual(len(t),len(df)) for colname in df: coldf = df[colname] colt = t[colname] self.assertEqual(len(coldf), len(colt)) self.assertTrue(np.all(coldf.values==colt.values)) t.append(df) self.assertEqual(len(t),2*len(df)) self._rmtree()
def _delete_table(self, t: Table) -> None: self.assertEqual(t.index_to_id(2), 2) a = t["a"] self.assertEqual(a[2], a.fillvalue) del t.loc[2] with self.assertRaises(KeyError): c = t.loc[2] print(c) self.assertEqual(len(t), a.size - 1) cnt = 0 for row in t.iterrows(): assert row is not None self.assertTrue("a" in row) cnt += 1 self.assertEqual(len(t), cnt)
def test_merge1(self): table_left = Table(name='table_left', data=df_left1, create=True) print(repr(table_left)) table_right = Table(name='table_right', data=df_right1, create=True, indices=df_right1.index.values) print(repr(table_right)) #table_right2 = Table(name='table_right2', data=df_right2, create=True) table_merge = merge(table_left, table_right, name='table_merge', left_index=True, right_index=True) print(repr(table_merge))