Beispiel #1
0
 def update_histogram(
     self,
     created: Optional[bitmap] = None,
     updated: Optional[bitmap] = None,
     deleted: Optional[bitmap] = None,
 ) -> None:
     "Update the histogram index"
     created = bitmap.asbitmap(created)
     updated = bitmap.asbitmap(updated)
     deleted = bitmap.asbitmap(deleted)
     # if deleted:
     #     self._tdigest_is_valid = False
     if deleted or updated:
         to_remove = updated | deleted
         for i, bm in enumerate(self.bitmaps):
             self.bitmaps[i] = bm - to_remove
     if created or updated:
         to_add = created | updated
         ids = np.array(to_add, np.int64)
         values = self.column.loc[to_add]
         bins = np.digitize(values, self.bins)  # type: ignore
         counts = np.bincount(bins)
         for i in np.nonzero(counts)[0]:
             bm = self.bitmaps[i]
             selection = bins == i  # boolean mask of values in bin i
             bm.update(ids[selection])  # add them to the bitmap
Beispiel #2
0
 def _process_created(ret: Dict[str, Any]) -> None:
     b = dialog.bag
     if not created:
         return
     if how == "outer":
         return _process_created_outer(ret)
     # if first_key not in created: return
     first_ids = created.get(first_key, None)
     second_ids = created.get(second_key, None)
     only_1st, common, only_2nd = inter_slice(first_ids, second_ids)
     assert isinstance(join_table, Table)
     if first_ids is not None:
         new_size = _len(first_ids)
         if (
             isinstance(first_ids, slice)
             and join_table.is_identity
             and (
                 join_table.last_id + 1 == first_ids.start or join_table.last_id == 0
             )
         ):
             # the nice case (no gaps)
             join_table.resize(new_size)
         else:  # there are gaps ...we have to keep trace of existing ids
             join_table.resize(new_size, index=bitmap.asbitmap(first_ids))
             if b.get("existing_ids", None) is None:
                 b["existing_ids"] = bitmap.asbitmap(join_table.index)
             else:
                 b["existing_ids"] = bitmap.union(
                     b["existing_ids"], bitmap.asbitmap(first_ids)
                 )
         join_table.loc[_fix(first_ids), first_cols] = first.loc[
             _fix(first_ids), first.columns
         ]
     if not _void(common):
         join_table.loc[_fix(common), second_cols] = second.loc[
             _fix(common), second.columns
         ]
     # first matching: older orphans on the second table with new orphans on the first
     only_1st_bm = bitmap.asbitmap(only_1st)
     paired = b["second_orphans"] & only_1st_bm
     if paired:
         join_table.loc[paired, second_cols] = second.loc[paired, second.columns]
         b["second_orphans"] = b["second_orphans"] - paired
         only_1st_bm -= paired
     b["first_orphans"] = bitmap.union(b["first_orphans"], only_1st_bm)
     # 2nd matching: older orphans on the first table with new orphans on the second
     only_2nd_bm = bitmap.asbitmap(only_2nd)
     paired = b["first_orphans"] & only_2nd_bm
     if paired:
         join_table.loc[paired, second_cols] = second.loc[paired, second.columns]
         b["first_orphans"] = b["first_orphans"] - paired
         only_2nd_bm -= paired
     b["second_orphans"] = bitmap.union(b["second_orphans"], only_2nd_bm)
Beispiel #3
0
 def resize(self,
            newsize: int,
            index: Optional[Union[bitmap, List[int]]] = None) -> None:
     # NB: newsize means how many active rows the table must contain
     if index is not None:
         index = bitmap.asbitmap(index)
         newsize_ = index.max() + 1 if index else 0
         if newsize < newsize_:
             logger.warning(f"Wrong newsize={newsize}, fixed to {newsize_}")
             newsize = newsize_
     assert newsize is not None
     delta = newsize - len(self.index)
     # if delta < 0:
     #    return
     newsize = self.last_id + delta + 1
     crt_index = bitmap(self._index)
     self._resize_rows(newsize, index)
     del_index = crt_index - self._index
     if del_index:
         self.add_deleted(del_index)
     if delta < 0:
         return
     self._storagegroup.attrs[metadata.ATTR_NROWS] = newsize
     assert newsize is not None
     for column in self._columns:
         col = cast(Column, column)
         col._resize(newsize)
Beispiel #4
0
 def restricted_range_query(
     self, lower: float, upper: float, only_locs: Any, approximate: bool = APPROX
 ) -> bitmap:
     """
     Return the bitmap of only_locs rows in range [`lower`, `upper`[
     """
     if lower > upper:
         lower, upper = upper, lower
     only_locs = bitmap.asbitmap(only_locs)
     pos_lo, pos_up = np.digitize([lower, upper], self.bins)  # type: ignore
     union = bitmap.union(
         *[(bm & only_locs) for bm in self.bitmaps[pos_lo + 1 : pos_up]]
     )
     if not approximate:
         detail = bitmap()
         ids = np.array(self.bitmaps[pos_lo] & only_locs, np.int64)
         values = self.column.loc[ids]
         if pos_lo == pos_up:
             selected = ids[(lower <= values) & (values < upper)]
             detail.update(selected)
         else:
             selected = ids[lower <= values]
             detail.update(selected)
             ids = np.array(self.bitmaps[pos_up] & only_locs, np.int64)
             values = self.column.loc[ids]
             selected = ids[values < upper]
             detail.update(selected)
         union.update(detail)
     return union
Beispiel #5
0
    def restricted_query(
        self,
        operator_: Callable[[Any, Any], int],
        limit: Any,
        only_locs: Any,
        approximate: bool = APPROX,
    ) -> bitmap:  # blocking...
        """
        Returns the subset of only_locs matching the query.
        """
        only_locs = bitmap.asbitmap(only_locs)
        assert self.bins is not None
        pos = np.digitize(limit, self.bins)
        detail = bitmap()
        if not approximate:
            ids = np.array(self.bitmaps[pos] & only_locs, np.int64)
            values = self.column.loc[ids]
            selected = ids[operator_(values, limit)]
            detail.update(selected)

        if operator_ in (operator.lt, operator.le):
            for bm in self.bitmaps[:pos]:
                detail.update(bm & only_locs)
        else:
            for bm in self.bitmaps[pos + 1 :]:
                detail.update(bm & only_locs)
        return detail
Beispiel #6
0
 def restricted_range_query(self,
                            lower,
                            upper,
                            only_locs,
                            approximate=APPROX):
     """
     Return the bitmap of only_locs rows in range [`lower`, `upper`[
     """
     if lower > upper:
         lower, upper = upper, lower
     only_locs = bitmap.asbitmap(only_locs)
     pos = np.digitize([lower, upper], self.bins)
     detail = bitmap()
     if not approximate:
         ids = np.array(self.bitmaps[pos[0]] & only_locs, np.int64)
         values = self.column.loc[ids]
         if pos[0] == pos[1]:
             selected = ids[(lower <= values) & (values < upper)]
         else:
             selected = ids[lower <= values]
             detail.update(selected)
             ids = np.array(self.bitmaps[pos[1]] & only_locs, np.int64)
             values = self.column.loc[ids]
             selected = ids[values < upper]
             detail.update(selected)
     for bm in self.bitmaps[pos[0] + 1:pos[1]]:
         detail.update(bm & only_locs)
     return detail
Beispiel #7
0
 def update_histogram(self, created, updated=(), deleted=()):
     "Update the histogram index"
     created = bitmap.asbitmap(created)
     updated = bitmap.asbitmap(updated)
     deleted = bitmap.asbitmap(deleted)
     # if deleted:
     #     self._tdigest_is_valid = False
     if deleted or updated:
         to_remove = updated | deleted
         for bm in self.bitmaps:
             bm -= to_remove
     if created or updated:
         to_add = created | updated
         ids = np.array(to_add, np.int64)
         values = self.column.loc[to_add]
         #self._tdigest.batch_update(values)
         bins = np.digitize(values, self.bins)
         counts = np.bincount(bins)
         for i in np.nonzero(counts)[0]:
             bm = self.bitmaps[i]
             selection = (bins == i)  # boolean mask of values in bin i
             bm.update(ids[selection])  # add them to the bitmap
Beispiel #8
0
 def _allocate(self,
               count: int,
               index: Optional[Union[bitmap, List[int]]] = None) -> bitmap:
     start = self.last_id + 1
     index = (bitmap(range(start, start + count))
              if index is None else bitmap.asbitmap(index))
     newsize = max(index.max(), self.last_id) + 1
     self.add_created(index)
     self._storagegroup.attrs[metadata.ATTR_NROWS] = newsize
     for column in self._columns:
         col = cast(Column, column)
         col._resize(newsize)
     self._resize_rows(newsize, index)
     return index
Beispiel #9
0
 def __getitem__(self, key):
     index, col_key, _ = self.parse_key(key)
     if isinstance(index, integer_types):
         row = self._table.row(index)
         if col_key != slice(None):
             return row[col_key]
         return row
     if isinstance(index, slice) and index.step in (None, 1):
         from .table_sliced import TableSlicedView
         return TableSlicedView(self._table, index, col_key)
     elif isinstance(index, Iterable):
         from .table_selected import TableSelectedView
         selection = bitmap.asbitmap(self._table.index[index])
         return TableSelectedView(self._table, selection, col_key,
                                  self._table.name)
     raise ValueError('getitem not implemented for index "%s"', index)
Beispiel #10
0
    def restricted_query(self,
                         operator_,
                         limit,
                         only_locs,
                         approximate=APPROX):  # blocking...
        """
        Returns the subset of only_locs matching the query.
        """
        only_locs = bitmap.asbitmap(only_locs)
        pos = np.digitize(limit, self.bins)
        detail = bitmap()
        if not approximate:
            ids = np.array(self.bitmaps[pos] & only_locs, np.int64)
            values = self.column.loc[ids]
            selected = ids[operator_(values, limit)]
            detail.update(selected)

        if operator_ in (operator.lt, operator.le):
            for bm in self.bitmaps[:pos]:
                detail.update(bm & only_locs)
        else:
            for bm in self.bitmaps[pos + 1:]:
                detail.update(bm & only_locs)
        return detail