def test_ps_dict_new_ids(self) -> None:
     prev = PsDict(a=1, b=2, c=3)
     now = copy.copy(prev)
     now["x"] = 10
     now["y"] = 20
     new_ids = now.created_indices(prev)
     self.assertEqual(bitmap(new_ids), bitmap([3, 4]))
Beispiel #2
0
 def test_to_dict2(self):
     # index=[1,2,3,8,11],
     df = pd.DataFrame(
         data={
             'a': [1, 2, 3, 4, 5, 6, 7, 8],
             'b': [10, 20, 30, 40, 50, 60, 70, 80],
             'c': ['a', 'b', 'cd', 'ef', 'fg', 'gh', 'hi', 'ij']
         })
     t_ = Table(name=None, data=df)
     df = df.drop(df.index[[3, 4]])
     sel = bitmap(t_.index) - bitmap([3, 4])
     #del t.loc[[3,4]]
     t = TableSelectedView(t_, sel)
     #del t.loc[3]
     #print(df.to_dict(orient='records'))
     #print(df.to_dict(orient='records'))
     self.assertEqual(df.to_dict(orient='rows'),
                      df.to_dict(orient='records'))
     #print(t.to_dict(orient='records'))
     # orient : {'dict', 'list', 'split', 'rows', 'record', 'index'}
     self.assertEqual(df.to_dict(orient='dict'), t.to_dict(orient='dict'))
     self.assertEqual(df.to_dict(orient='list'), t.to_dict(orient='list'))
     self.assertEqual(df.to_dict(orient='split'), t.to_dict(orient='split'))
     self.assertEqual(df.to_dict(orient='rows'), t.to_dict(orient='rows'))
     self.assertEqual(df.to_dict(orient='index'), t.to_dict(orient='index'))
 def test_to_dict2(self) -> None:
     # index=[1,2,3,8,11],
     df = pd.DataFrame(
         data={
             "a": [1, 2, 3, 4, 5, 6, 7, 8],
             "b": [10, 20, 30, 40, 50, 60, 70, 80],
             "c": ["a", "b", "cd", "ef", "fg", "gh", "hi", "ij"],
         })
     t_ = Table(name=None, data=df)
     df = df.drop(df.index[[3, 4]])
     sel = bitmap(t_.index) - bitmap([3, 4])
     # del t.loc[[3,4]]
     t = t_.loc[sel, :]  # TableSelectedView(t_, sel)
     assert t is not None
     # del t.loc[3]
     # print(df.to_dict(orient='records'))
     # print(df.to_dict(orient='records'))
     # print(t.to_dict(orient='records'))
     # orient : {'dict', 'list', 'split', 'rows', 'record', 'index'}
     self.assertEqual(df.to_dict(orient="dict"), t.to_dict(orient="dict"))
     self.assertEqual(df.to_dict(orient="list"), t.to_dict(orient="list"))
     self.assertEqual(df.to_dict(orient="split"), t.to_dict(orient="split"))
     self.assertEqual(df.to_dict(orient="records"),
                      t.to_dict(orient="records"))
     self.assertEqual(df.to_dict(orient="index"), t.to_dict(orient="index"))
Beispiel #4
0
 def run_step(self, run_number: int, step_size: int,
              howlong: float) -> ReturnRunStep:
     if self.params.fixed_step_size and False:
         step_size = self.params.fixed_step_size
     input_slot = self.get_input_slot("table")
     assert input_slot is not None
     steps = 0
     if not input_slot.created.any():
         return self._return_run_step(self.state_blocked, steps_run=0)
     created = input_slot.created.next(length=step_size, as_slice=False)
     # created = fix_loc(created)
     steps = indices_len(created)
     input_table = input_slot.data()
     if self.result is None:
         self.result = TableSelectedView(input_table, bitmap([]))
     before_ = bitmap(self.table.index)
     self.selected.selection |= created
     # print(len(self.table.index))
     delete = []
     if self._delete_rows and self.test_delete_threshold(before_):
         if isinstance(self._delete_rows, int):
             delete = random.sample(tuple(before_),
                                    min(self._delete_rows, len(before_)))
         elif self._delete_rows == "half":
             delete = random.sample(tuple(before_), len(before_) // 2)
         elif self._delete_rows == "all":
             delete = before_
         else:
             delete = self._delete_rows
         self.selected.selection -= bitmap(delete)
     return self._return_run_step(self.next_state(input_slot),
                                  steps_run=steps)
Beispiel #5
0
    def run_step_progress(self, run_number: int, step_size: int,
                          howlong: float) -> ReturnRunStep:
        _b = bitmap.asbitmap
        # to_delete: List[bitmap]
        to_create: List[bitmap]
        steps = 0
        tables = []
        ph_table = None
        # assert len(self.inputs) > 0
        reset_ = False
        for name in self.get_input_slot_multiple():
            slot = self.get_input_slot(name)
            t = slot.data()
            assert isinstance(t, BaseTable)
            if ph_table is None:
                ph_table = _get_physical_table(t)
            else:
                assert ph_table is _get_physical_table(t)
            tables.append(t)
            # slot.update(run_number)
            if reset_ or slot.updated.any() or slot.deleted.any():
                slot.reset()
                reset_ = True
                steps += 1

            # if slot.deleted.any():
            #    deleted = slot.deleted.next(step_size)
            #    steps += 1
            #    to_delete.append(_b(deleted))
            # if slot.updated.any(): # actually don't care
            #    _ = slot.updated.next(step_size)
            #    #to_delete |= _b(updated)
            #    #to_create |= _b(updated)
            #    #steps += 1 # indices_len(updated) + 1
            if slot.created.any():
                created = slot.created.next(step_size)
                bm = _b(created)  # - to_delete
                to_create.append(bm)
                steps += indices_len(created)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        # to_delete = bitmap.union(*to_delete)
        to_create_4sure = bitmap()
        if len(to_create) == len(tables):
            to_create_4sure = bitmap.intersection(*to_create)

        to_create_maybe = bitmap.union(*to_create)

        if not self.result:
            self.result = TableSelectedView(ph_table, bitmap([]))
        if reset_:
            self.selected.selection = bitmap([])
        self.selected.selection = self.selected.index | to_create_4sure
        to_create_maybe -= to_create_4sure
        eff_create = to_create_maybe
        for t in tables:
            eff_create &= t.index
        self.selected.selection = self.selected.index | eff_create
        return self._return_run_step(self.state_blocked, steps)
 def test_ps_dict_updated_ids(self) -> None:
     prev = PsDict(a=1, b=2, c=3, d=4, e=5)
     now = copy.copy(prev)
     updated_ids = now.updated_indices(prev)
     self.assertEqual(bitmap(updated_ids), bitmap())
     now["b"] += 1
     now["d"] *= 2
     updated_ids = now.updated_indices(prev)
     self.assertEqual(bitmap(updated_ids), bitmap([1, 3]))
Beispiel #7
0
 def run_step(self, run_number, step_size, howlong):
     if self.params.fixed_step_size and False:
         step_size = self.params.fixed_step_size
     input_slot = self.get_input_slot('table')
     input_slot.update(run_number)
     steps = 0
     if not input_slot.created.any():
         return self._return_run_step(self.state_blocked, steps_run=0)
     created = input_slot.created.next(step_size)
     steps = indices_len(created)
     with input_slot.lock:
         input_table = input_slot.data()
     p = self.params
     if self._table is None:
         self._table = Table(
             self.generate_table_name('dummy'),
             dshape=input_table.dshape,
         )
     raw_ids = self._table.index.values
     before_ = bitmap(raw_ids[raw_ids >= 0])
     v = input_table.loc[fix_loc(created), :]
     #print("creations: ", created)
     self._table.append(v)  # indices=bitmap(created))
     delete = []
     if self._delete_rows and self.test_delete_threshold(before_):
         if isinstance(self._delete_rows, int):
             delete = random.sample(tuple(before_),
                                    min(self._delete_rows, len(before_)))
         elif self._delete_rows == 'half':
             delete = random.sample(tuple(before_), len(before_) // 2)
         elif self._delete_rows == 'all':
             delete = before_
         else:
             delete = self._delete_rows
         #print("deletions: ", delete)
         if self.params.del_twice:
             mid = len(delete) // 2
             del self._table.loc[delete[:mid]]
             del self._table.loc[delete[mid:]]
         else:
             del self._table.loc[delete]
     if self._update_rows and len(before_):
         before_ -= bitmap(delete)
         if isinstance(self._update_rows, int):
             updated = random.sample(tuple(before_),
                                     min(self._update_rows, len(before_)))
         else:
             updated = self._update_rows
         v = np.random.rand(len(updated))
         if updated:
             self._table.loc[fix_loc(updated), [self._update_column]] = [v]
     return self._return_run_step(self.next_state(input_slot),
                                  steps_run=steps)
Beispiel #8
0
 def compute_updates(self, start, now, mid=None, cleanup=True):
     if self._changes:
         self._flush_cache()
         updates = self._changes.compute_updates(start,
                                                 now,
                                                 mid,
                                                 cleanup=cleanup)
         if updates is None:
             try:  # EAFP
                 updates = IndexUpdate(created=bitmap(self.dataset[:]))
             except OverflowError:  # because rows could be created then removed in the same step
                 ids = self.dataset[:]
                 updates = IndexUpdate(created=bitmap(ids[ids >= 0]))
         return updates
     return None
Beispiel #9
0
    def test_bitmapchangemanager(self) -> None:
        mid1 = "m1"
        bm = bitmap([1, 2, 3])
        slot = FakeSlot(bm)

        cm = BitmapChangeManager(slot)
        self.assertEqual(cm.last_update(), 0)
        self.assertEqual(cm.created.length(), 0)
        self.assertEqual(cm.updated.length(), 0)
        self.assertEqual(cm.deleted.length(), 0)

        cm.update(1, bm, mid1)
        self.assertEqual(cm.last_update(), 1)
        self.assertEqual(cm.created.length(), 3)
        self.assertEqual(cm.updated.length(), 0)
        self.assertEqual(cm.deleted.length(), 0)

        bm = bitmap([2, 3, 4])
        cm.update(2, bm, mid1)
        self.assertEqual(cm.last_update(), 2)
        # 1 should be removed because deleted at ts=2
        self.assertEqual(cm.created.next(), slice(2, 5))
        self.assertEqual(cm.updated.length(), 0)
        # 0 has been created then deleted before it got consumed
        self.assertEqual(cm.deleted.length(), 0)

        bm = bitmap([3, 4, 5])
        cm.update(3, bm, mid1)
        self.assertEqual(cm.last_update(), 3)
        self.assertEqual(cm.created.next(), slice(5, 6))
        self.assertEqual(cm.updated.length(), 0)
        self.assertEqual(cm.deleted.length(), 1)  # 2 is deleted but buffered

        bm = bitmap([2, 3, 4])
        cm.update(4, bm, mid1)
        self.assertEqual(cm.last_update(), 4)
        # 2 has been created before it was consumed so it becomes updated
        self.assertEqual(cm.created.length(), 0)
        self.assertEqual(cm.created.length(), len(cm.created))
        self.assertEqual(cm.updated.length(),
                         0)  # updates are ignored by default
        # 2 should be removed because added at ts=4
        self.assertEqual(cm.deleted.next(), slice(5, 6))

        cm.created.clear()
        self.assertEqual(cm.created.length(), 0)
        cm.created.set_buffered(False)
        self.assertIsNone(cm.created.next())
Beispiel #10
0
 def divide_bin(self, i):
     "Change the bounds of the index if needed"
     #import pdb;pdb.set_trace()
     ids = np.array(self.bitmaps[i], np.int64)
     if self._sampling_size * 1.2 < len(ids):
         samples = np.random.choice(ids, self._sampling_size, replace=False)
     else:
         samples = ids
     s_vals = self.column.loc[samples]
     v = np.median(s_vals)
     assert self.bins[i -
                      1] < v < self.bins[i] if i > 0 else v < self.bins[i]
     values = self.column.loc[ids]
     lower_bin = bitmap(ids[values < v])
     upper_bin = self.bitmaps[i] - lower_bin
     lower_len = len(lower_bin)
     upper_len = len(upper_bin)
     t = len(ids) * self._perm_deviation
     if abs(lower_len - upper_len) > t:
         print("DIFF: ", lower_len, upper_len,
               float(abs(lower_len - upper_len)) / len(ids))
     #old = self.bins
     self.bins = np.insert(self.bins, i, v)
     try:
         assert (self.bins[i - 1] < self.bins[i] < self.bins[i + 1]
                 if i > 0 else self.bins[i] < self.bins[i + 1])
     except:
         import pdb
         pdb.set_trace()
     self.bitmaps.insert(i, lower_bin)
     self.bitmaps[i + 1] = upper_bin
     print('*', end='')
Beispiel #11
0
 def range_query_aslist(
     self, lower: float, upper: float, approximate: bool = APPROX
 ) -> List[bitmap]:
     """
     Return the list of bitmaps with values in range [`lower`, `upper`[
     """
     if lower > upper:
         lower, upper = upper, lower
     pos_lo, pos_up = np.digitize([lower, upper], self.bins)  # type: ignore
     detail = bitmap()
     res = self.bitmaps[pos_lo + 1 : pos_up]
     if not approximate:
         ids = np.array(self.bitmaps[pos_lo], np.int64)
         values = self.column.loc[ids]
         if pos_lo == pos_up:
             selected = ids[(lower <= values) & (values < upper)]
             detail.update(selected)
         else:
             selected = ids[lower <= values]
             detail.update(selected)
             ids = np.array(self.bitmaps[pos_up], np.int64)
             values = self.column.loc[ids]
             selected = ids[values < upper]
             detail.update(selected)
         res.append(detail)
     return res
Beispiel #12
0
 def run_step_seq(self, run_number: int, step_size: int,
                  howlong: float) -> ReturnRunStep:
     steps = 0
     tables = []
     ph_table = None
     # assert len(self.inputs) > 0
     for name in self.get_input_slot_multiple():
         if not name.startswith("table"):
             continue
         slot = self.get_input_slot(name)
         t = slot.data()
         assert isinstance(t, BaseTable)
         if ph_table is None:
             ph_table = _get_physical_table(t)
         else:
             assert ph_table is _get_physical_table(t)
         tables.append(t)
         # slot.update(run_number)
         if slot.deleted.any():
             slot.deleted.next()
             steps += 1
         if slot.updated.any():
             slot.updated.next()
             steps += 1
         if slot.created.any():
             slot.created.next()
             steps += 1
     if steps == 0:
         return self._return_run_step(self.state_blocked, 0)
     if not self.result:
         self.result = TableSelectedView(ph_table, bitmap([]))
     self.selected.selection = bitmap.intersection(
         *[t.index for t in tables])
     return self._return_run_step(self.state_blocked, steps)
Beispiel #13
0
 def range_query(
     self, lower: float, upper: float, all_ids: bitmap, approximate: bool = APPROX
 ) -> bitmap:
     """
     Return the bitmap of all rows with values in range [`lower`, `upper`[
     """
     if lower > upper:
         lower, upper = upper, lower
     assert self.bins is not None
     pos_lo, pos_up = np.digitize([lower, upper], self.bins)
     if pos_up - pos_lo > len(self.bins) // 2:
         exclusion = self.bitmaps[: pos_lo + 1] + self.bitmaps[pos_up:]
         union = all_ids - bitmap.union(*exclusion)
     else:
         union = bitmap.union(*self.bitmaps[pos_lo + 1 : pos_up])
     if not approximate:
         detail = bitmap()
         ids = np.array(self.bitmaps[pos_lo], np.int64)
         values = self.column.loc[ids]
         if pos_lo == pos_up:
             selected = ids[(lower <= values) & (values < upper)]
             detail.update(selected)
         else:
             selected = ids[lower <= values]
             detail.update(selected)
             ids = np.array(self.bitmaps[pos_up], np.int64)
             values = self.column.loc[ids]
             selected = ids[values < upper]
             detail.update(selected)
         union.update(detail)
     return union
Beispiel #14
0
    def restricted_query(
        self,
        operator_: Callable[[Any, Any], int],
        limit: Any,
        only_locs: Any,
        approximate: bool = APPROX,
    ) -> bitmap:  # blocking...
        """
        Returns the subset of only_locs matching the query.
        """
        only_locs = bitmap.asbitmap(only_locs)
        assert self.bins is not None
        pos = np.digitize(limit, self.bins)
        detail = bitmap()
        if not approximate:
            ids = np.array(self.bitmaps[pos] & only_locs, np.int64)
            values = self.column.loc[ids]
            selected = ids[operator_(values, limit)]
            detail.update(selected)

        if operator_ in (operator.lt, operator.le):
            for bm in self.bitmaps[:pos]:
                detail.update(bm & only_locs)
        else:
            for bm in self.bitmaps[pos + 1 :]:
                detail.update(bm & only_locs)
        return detail
Beispiel #15
0
    def query(
        self,
        operator_: Callable[[Any, Any], int],
        limit: Any,
        approximate: bool = APPROX,
    ) -> bitmap:  # blocking...
        """
        Return the list of rows matching the query.
        For example, returning all values less than 10 (< 10) would be
        `query(operator.__lt__, 10)`
        """
        assert self.bins is not None
        pos = np.digitize(limit, self.bins)
        detail = bitmap()
        if not approximate:
            ids = np.array(self.bitmaps[pos], np.int64)
            values = self.column.loc[ids]
            selected = ids[operator_(values, limit)]
            detail.update(selected)

        if operator_ in (operator.lt, operator.le):
            for bm in self.bitmaps[:pos]:
                detail.update(bm)
        else:
            for bm in self.bitmaps[pos + 1 :]:
                detail.update(bm)
        return detail
Beispiel #16
0
 def __contains__(self, loc):
     v = Loc.dispatch(loc)
     end = self.size
     ids = self._ids_dict
     if v == Loc.INT:
         if self._is_identity:
             return 0 < loc < end
         else:
             return loc in ids
     if v == Loc.SLICE:
         if self._is_identity:
             return loc.start >= 0 and (loc.end == None or loc.end == end)
         else:
             loc = range(*loc.index(end))
             v = Loc.ITERABLE
     elif v == Loc.BITMAP:
         if self._is_identity:
             inside = bitmap(range(0, end))
             return loc.difference_cardinality(inside) == 0
         else:
             v = Loc.ITERABLE
     if Loc.isiterable(v):
         if self._is_identity:
             for l in loc:
                 if l < 0 or l >= end:
                     return False
         else:
             for l in loc:
                 if not l in ids:
                     return False
         return True
     else:
         raise ValueError('Unsupported data for "in" %s', loc)
Beispiel #17
0
 def test_bisect2(self) -> None:
     s = self.scheduler()
     random = RandomTable(2, rows=100_000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=100,
         # update_rows=5,
         # fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     t = Table(name=None, dshape="{value: string}", data={"value": [0.5]})
     min_value = Constant(table=t, scheduler=s)
     hist_index = HistogramIndex(column="_1", scheduler=s)
     hist_index.create_dependent_modules(stirrer, "result")
     bisect_ = Bisect(column="_1",
                      op=">",
                      hist_index=hist_index,
                      scheduler=s)
     bisect_.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_.input.limit = min_value.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = bisect_.output.result
     aio.run(s.start())
     idx = stirrer.table.eval("_1>0.5", result_object="index")
     self.assertEqual(bisect_.table.index, bitmap(idx))
Beispiel #18
0
 def resize(self,
            newsize: int,
            index: Optional[Union[bitmap, List[int]]] = None) -> None:
     # NB: newsize means how many active rows the table must contain
     if index is not None:
         index = bitmap.asbitmap(index)
         newsize_ = index.max() + 1 if index else 0
         if newsize < newsize_:
             logger.warning(f"Wrong newsize={newsize}, fixed to {newsize_}")
             newsize = newsize_
     assert newsize is not None
     delta = newsize - len(self.index)
     # if delta < 0:
     #    return
     newsize = self.last_id + delta + 1
     crt_index = bitmap(self._index)
     self._resize_rows(newsize, index)
     del_index = crt_index - self._index
     if del_index:
         self.add_deleted(del_index)
     if delta < 0:
         return
     self._storagegroup.attrs[metadata.ATTR_NROWS] = newsize
     assert newsize is not None
     for column in self._columns:
         col = cast(Column, column)
         col._resize(newsize)
Beispiel #19
0
 def restricted_range_query(self,
                            lower,
                            upper,
                            only_locs,
                            approximate=APPROX):
     """
     Return the bitmap of only_locs rows in range [`lower`, `upper`[
     """
     if lower > upper:
         lower, upper = upper, lower
     only_locs = bitmap.asbitmap(only_locs)
     pos = np.digitize([lower, upper], self.bins)
     detail = bitmap()
     if not approximate:
         ids = np.array(self.bitmaps[pos[0]] & only_locs, np.int64)
         values = self.column.loc[ids]
         if pos[0] == pos[1]:
             selected = ids[(lower <= values) & (values < upper)]
         else:
             selected = ids[lower <= values]
             detail.update(selected)
             ids = np.array(self.bitmaps[pos[1]] & only_locs, np.int64)
             values = self.column.loc[ids]
             selected = ids[values < upper]
             detail.update(selected)
     for bm in self.bitmaps[pos[0] + 1:pos[1]]:
         detail.update(bm & only_locs)
     return detail
Beispiel #20
0
    def compute_updates(self,
                        start: int,
                        now: int,
                        mid: str,
                        cleanup: bool = True) -> Optional[IndexUpdate]:
        """Compute the updates (delta) that happened to this table since the last call.

        Parameters
        ----------
        start: integer
            Start is interpreted as a virtual time for `last time`
        now: integer
            Start is interpreted as a virtual time for `now`
        mid: hashable object
            An identifier for the object that will ask for updates,
            usually the name of a slot.
        Returns
        -------
        updates: None or an IndexUpdate structure which describes the list
             of rows created, updated, and deleted.
        """
        if self._changes:
            self._flush_cache()
            updates = self._changes.compute_updates(start,
                                                    now,
                                                    mid,
                                                    cleanup=cleanup)
            if updates is None:
                updates = IndexUpdate(created=bitmap(self.index))
            return updates
        return None
Beispiel #21
0
 def restricted_range_query(
     self, lower: float, upper: float, only_locs: Any, approximate: bool = APPROX
 ) -> bitmap:
     """
     Return the bitmap of only_locs rows in range [`lower`, `upper`[
     """
     if lower > upper:
         lower, upper = upper, lower
     only_locs = bitmap.asbitmap(only_locs)
     pos_lo, pos_up = np.digitize([lower, upper], self.bins)  # type: ignore
     union = bitmap.union(
         *[(bm & only_locs) for bm in self.bitmaps[pos_lo + 1 : pos_up]]
     )
     if not approximate:
         detail = bitmap()
         ids = np.array(self.bitmaps[pos_lo] & only_locs, np.int64)
         values = self.column.loc[ids]
         if pos_lo == pos_up:
             selected = ids[(lower <= values) & (values < upper)]
             detail.update(selected)
         else:
             selected = ids[lower <= values]
             detail.update(selected)
             ids = np.array(self.bitmaps[pos_up] & only_locs, np.int64)
             values = self.column.loc[ids]
             selected = ids[values < upper]
             detail.update(selected)
         union.update(detail)
     return union
Beispiel #22
0
 def test_intersection(self) -> None:
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape="{_1: float64}", data={"_1": [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape="{_1: float64}", data={"_1": [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     hist_index = HistogramIndex(column="_1", scheduler=s)
     hist_index.create_dependent_modules(random, "result")
     bisect_min = Bisect(column="_1",
                         op=">",
                         hist_index=hist_index,
                         scheduler=s)
     bisect_min.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_min.input.limit = min_value.output.result
     bisect_max = Bisect(column="_1",
                         op="<",
                         hist_index=hist_index,
                         scheduler=s)
     bisect_max.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_max.input.limit = max_value.output.result
     inter = Intersection(scheduler=s)
     inter.input[0] = bisect_min.output.result
     inter.input[0] = bisect_max.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = inter.output.result
     aio.run(s.start())
     assert hist_index.input_module is not None
     idx = (hist_index.input_module.output["result"].data().eval(
         "(_1>0.3)&(_1<0.8)", result_object="index"))
     self.assertEqual(inter.table.index, bitmap(idx))
Beispiel #23
0
 def test_paging_helper_t(self) -> None:
     t = Table("table_for_paging",
               dshape="{a: int, b: float32}",
               create=True)
     t.resize(200)
     _ = np.arange(200)
     ivalues = np.random.randint(100, size=200)
     t["a"] = ivalues
     fvalues = np.array(np.random.rand(200), np.float32)
     t["b"] = fvalues
     # import pdb; pdb.set_trace()
     ph_t = PagingHelper(t)
     page = ph_t.get_page(0, 10)
     self.assertEqual(page[0][0], 0)
     self.assertEqual(page[-1][0], 9)
     del t.loc[5]
     ph_t = PagingHelper(t)
     page = ph_t.get_page(0, 10)
     self.assertEqual(page[0][0], 0)
     self.assertEqual(page[-1][0], 10)
     sel = bitmap(range(10, 75, 2))
     print(sel)
     view = t.loc[sel, :]
     self.assertTrue(view is not None)
     assert view is not None
     ph_t = PagingHelper(view)
     page = ph_t.get_page(10, 20)
     self.assertEqual(page[0][0], 30)
     self.assertEqual(page[-1][0], 48)
     print(page)
Beispiel #24
0
    def test_intersection(self):
        s = self.scheduler()
        random = RandomTable(2, rows=100000, scheduler=s)
        t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
        min_value = Constant(table=t_min, scheduler=s)
        t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
        max_value = Constant(table=t_max, scheduler=s)
        hist_index = HistogramIndex(column='_1', scheduler=s)
        hist_index.create_dependent_modules(random, 'table')
        bisect_min = Bisect(column='_1',
                            op='>',
                            hist_index=hist_index,
                            scheduler=s)
        bisect_min.input.table = hist_index.output.table
        #bisect_.input.table = random.output.table
        bisect_min.input.limit = min_value.output.table

        bisect_max = Bisect(column='_1',
                            op='<',
                            hist_index=hist_index,
                            scheduler=s)
        bisect_max.input.table = hist_index.output.table
        #bisect_.input.table = random.output.table
        bisect_max.input.limit = max_value.output.table
        inter = Intersection(scheduler=s)
        inter.input.table = bisect_min.output.table
        inter.input.table = bisect_max.output.table
        pr = Print(proc=self.terse, scheduler=s)
        pr.input.df = inter.output.table
        s.start()
        s.join()
        idx = hist_index.input_module.output['table']\
          .data().eval('(_1>0.3)&(_1<0.8)', result_object='index')
        self.assertEqual(inter.table().selection, bitmap(idx))
Beispiel #25
0
 def resize(self, newsize, indices=None):
     """
     Change the size if of the IDColumn.
     When the column grows, return the new identifiers allocated.
     """
     # pylint: disable=arguments-differ
     oldsize = self.size
     if oldsize == newsize:
         assert (indices is None or len(indices) == 0)
         return None
     elif oldsize > newsize:
         todelete = self[newsize:]
         try:  #EAFP
             newsize_bm = bitmap(todelete)
             newsize = self._delete_ids(newsize_bm)
         except OverflowError:
             newsize_ = todelete[todelete >= 0]
             newsize = self._delete_ids(newsize_)
         if newsize is not None:
             super(IdColumn, self).resize(newsize)
             self._flush_cache()
         return None
     else:  # oldsize < newsize
         incr = newsize - oldsize
         assert indices is None or len(indices) == incr
         self._flush_cache()
         if self._is_identity:
             newindices = np.arange(oldsize, newsize)
             # if the new indices are not the same
             # as expected, allocate the hashtable-based storage.
             if (indices is not None
                     and not np.array_equal(indices, newindices)):
                 self._really_create_dataset()  #  indices=indices)
                 return self.resize(newsize, indices)
             # indices is None or == newindices, super.resize works
             super(IdColumn, self).resize(newsize)
             indices = newindices
             self.add_created(indices)
             self._last_id += incr
             self.dataset.attrs[IdColumn.ATTR_LAST_ID] = self._last_id
             return indices
         # not _is_identity, code using full dataset/hash table
         if indices is None:
             last_id = self._last_id + incr
             indices = np.arange(self._last_id, last_id, dtype=np.int64)
         else:
             indices = np.asarray(indices, dtype=np.int64)
             if (self._ids_dict is not None
                     and self._ids_dict.contains_any(indices)):
                 raise ValueError('Indices would contain duplicates')
             last_id = max(self._last_id, int(np.max(indices) + 1))
         # TODO reuse free list
         super(IdColumn, self).resize(newsize)
         self.dataset[oldsize:] = indices
         self._update_ids_dict(oldsize, oldsize + incr, indices)
         indices[:] = np.arange(oldsize, oldsize + incr)
         self._last_id = last_id
         self.dataset.attrs[IdColumn.ATTR_LAST_ID] = self._last_id
         return indices
Beispiel #26
0
 def _normalize_locs(self, locs):
     if locs is None:
         if bool(self._freelist):
             locs = iter(self)
         else:
             locs = iter(self.dataset)
     elif isinstance(locs, integer_types):
         locs = [locs]
     return bitmap(locs)
Beispiel #27
0
 def test_filter3(self) -> None:
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(update_column="_1",
                       update_rows=5,
                       fixed_step_size=100,
                       scheduler=s)
     stirrer.input[0] = random.output.result
     filter_ = FilterMod(expr="_1 > 0.5", scheduler=s)
     filter_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = filter_.output.result
     aio.run(s.start())
     tbl = filter_.get_input_slot("table").data()
     idx = tbl.eval("_1>0.5", result_object="index")
     self.assertEqual(filter_.table.index, bitmap(idx))
     df = pd.DataFrame(tbl.to_dict(), index=tbl.index.to_array())
     dfe = df.eval("_1>0.5")
     self.assertEqual(filter_.table.index, bitmap(df.index[dfe]))
Beispiel #28
0
    def _combine_updates(self, update: IndexUpdate, start: int) -> IndexUpdate:
        # TODO reuse cached results if it matches
        new_u = IndexUpdate(
            created=bitmap(update.created),
            deleted=bitmap(update.deleted),
            updated=bitmap(update.updated),
        )

        last_u = None
        # Since bookmarks can share their update slots,
        # search for a bookmark with a different value
        for i in range(start, len(self._bookmarks)):
            upd = self._bookmarks[i].update
            if upd is last_u:
                continue
            new_u.combine(upd)
            last_u = new_u
        # TODO cache results to reuse it if possible
        return new_u
Beispiel #29
0
 def _slice_to_bitmap(self,
                      sl: slice,
                      fix_loc: bool = True,
                      existing_only: bool = True) -> bitmap:
     stop = sl.stop or self.last_xid
     nsl = norm_slice(sl, fix_loc, stop=stop)
     ret = bitmap(nsl)
     if existing_only:
         ret &= self.index
     return ret
Beispiel #30
0
 def test_filter(self) -> None:
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     filter_ = FilterMod(expr="_1 > 0.5", scheduler=s)
     filter_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = filter_.output.result
     aio.run(s.start())
     idx = (filter_.get_input_slot("table").data().eval(
         "_1>0.5", result_object="index"))
     self.assertEqual(filter_.table.index, bitmap(idx))