def range_query( self, lower: float, upper: float, all_ids: bitmap, approximate: bool = APPROX ) -> bitmap: """ Return the bitmap of all rows with values in range [`lower`, `upper`[ """ if lower > upper: lower, upper = upper, lower assert self.bins is not None pos_lo, pos_up = np.digitize([lower, upper], self.bins) if pos_up - pos_lo > len(self.bins) // 2: exclusion = self.bitmaps[: pos_lo + 1] + self.bitmaps[pos_up:] union = all_ids - bitmap.union(*exclusion) else: union = bitmap.union(*self.bitmaps[pos_lo + 1 : pos_up]) if not approximate: detail = bitmap() ids = np.array(self.bitmaps[pos_lo], np.int64) values = self.column.loc[ids] if pos_lo == pos_up: selected = ids[(lower <= values) & (values < upper)] detail.update(selected) else: selected = ids[lower <= values] detail.update(selected) ids = np.array(self.bitmaps[pos_up], np.int64) values = self.column.loc[ids] selected = ids[values < upper] detail.update(selected) union.update(detail) return union
def _process_created(ret: Dict[str, Any]) -> None: b = dialog.bag if not created: return if how == "outer": return _process_created_outer(ret) # if first_key not in created: return first_ids = created.get(first_key, None) second_ids = created.get(second_key, None) only_1st, common, only_2nd = inter_slice(first_ids, second_ids) assert isinstance(join_table, Table) if first_ids is not None: new_size = _len(first_ids) if ( isinstance(first_ids, slice) and join_table.is_identity and ( join_table.last_id + 1 == first_ids.start or join_table.last_id == 0 ) ): # the nice case (no gaps) join_table.resize(new_size) else: # there are gaps ...we have to keep trace of existing ids join_table.resize(new_size, index=bitmap.asbitmap(first_ids)) if b.get("existing_ids", None) is None: b["existing_ids"] = bitmap.asbitmap(join_table.index) else: b["existing_ids"] = bitmap.union( b["existing_ids"], bitmap.asbitmap(first_ids) ) join_table.loc[_fix(first_ids), first_cols] = first.loc[ _fix(first_ids), first.columns ] if not _void(common): join_table.loc[_fix(common), second_cols] = second.loc[ _fix(common), second.columns ] # first matching: older orphans on the second table with new orphans on the first only_1st_bm = bitmap.asbitmap(only_1st) paired = b["second_orphans"] & only_1st_bm if paired: join_table.loc[paired, second_cols] = second.loc[paired, second.columns] b["second_orphans"] = b["second_orphans"] - paired only_1st_bm -= paired b["first_orphans"] = bitmap.union(b["first_orphans"], only_1st_bm) # 2nd matching: older orphans on the first table with new orphans on the second only_2nd_bm = bitmap.asbitmap(only_2nd) paired = b["first_orphans"] & only_2nd_bm if paired: join_table.loc[paired, second_cols] = second.loc[paired, second.columns] b["first_orphans"] = b["first_orphans"] - paired only_2nd_bm -= paired b["second_orphans"] = bitmap.union(b["second_orphans"], only_2nd_bm)
def restricted_range_query( self, lower: float, upper: float, only_locs: Any, approximate: bool = APPROX ) -> bitmap: """ Return the bitmap of only_locs rows in range [`lower`, `upper`[ """ if lower > upper: lower, upper = upper, lower only_locs = bitmap.asbitmap(only_locs) pos_lo, pos_up = np.digitize([lower, upper], self.bins) # type: ignore union = bitmap.union( *[(bm & only_locs) for bm in self.bitmaps[pos_lo + 1 : pos_up]] ) if not approximate: detail = bitmap() ids = np.array(self.bitmaps[pos_lo] & only_locs, np.int64) values = self.column.loc[ids] if pos_lo == pos_up: selected = ids[(lower <= values) & (values < upper)] detail.update(selected) else: selected = ids[lower <= values] detail.update(selected) ids = np.array(self.bitmaps[pos_up] & only_locs, np.int64) values = self.column.loc[ids] selected = ids[values < upper] detail.update(selected) union.update(detail) return union
def run_step_progress(self, run_number: int, step_size: int, howlong: float) -> ReturnRunStep: _b = bitmap.asbitmap # to_delete: List[bitmap] to_create: List[bitmap] steps = 0 tables = [] ph_table = None # assert len(self.inputs) > 0 reset_ = False for name in self.get_input_slot_multiple(): slot = self.get_input_slot(name) t = slot.data() assert isinstance(t, BaseTable) if ph_table is None: ph_table = _get_physical_table(t) else: assert ph_table is _get_physical_table(t) tables.append(t) # slot.update(run_number) if reset_ or slot.updated.any() or slot.deleted.any(): slot.reset() reset_ = True steps += 1 # if slot.deleted.any(): # deleted = slot.deleted.next(step_size) # steps += 1 # to_delete.append(_b(deleted)) # if slot.updated.any(): # actually don't care # _ = slot.updated.next(step_size) # #to_delete |= _b(updated) # #to_create |= _b(updated) # #steps += 1 # indices_len(updated) + 1 if slot.created.any(): created = slot.created.next(step_size) bm = _b(created) # - to_delete to_create.append(bm) steps += indices_len(created) if steps == 0: return self._return_run_step(self.state_blocked, steps_run=0) # to_delete = bitmap.union(*to_delete) to_create_4sure = bitmap() if len(to_create) == len(tables): to_create_4sure = bitmap.intersection(*to_create) to_create_maybe = bitmap.union(*to_create) if not self.result: self.result = TableSelectedView(ph_table, bitmap([])) if reset_: self.selected.selection = bitmap([]) self.selected.selection = self.selected.index | to_create_4sure to_create_maybe -= to_create_4sure eff_create = to_create_maybe for t in tables: eff_create &= t.index self.selected.selection = self.selected.index | eff_create return self._return_run_step(self.state_blocked, steps)