Beispiel #1
0
 def __init__(self, scheduler=None, **kwds):
     self._add_slots(kwds, 'input_descriptors', [
         SlotDescriptor('first', type=Table, required=True),
         SlotDescriptor('second', type=Table, required=True)
     ])
     super(Paste, self).__init__(scheduler=scheduler, **kwds)
     self.join_kwds = self._filter_kwds(kwds, join)
     self._dialog = Dialog(self)
Beispiel #2
0
 def __init__(self, **kwds):
     self._add_slots(kwds, 'input_descriptors',
                     [SlotDescriptor('table', type=Table, required=True)])
     self._add_slots(kwds, 'output_descriptors',
                     [SlotDescriptor('min', type=Table, required=False)])
     super(IdxMin, self).__init__(**kwds)
     self._min = None
     self.default_step_size = 10000
Beispiel #3
0
 def __init__(self, **kwds):
     self._add_slots(kwds, 'input_descriptors',
                     [SlotDescriptor('inp', required=True)])
     self._add_slots(kwds, 'output_descriptors',
                     [SlotDescriptor('out', required=False)])
     super(Wait, self).__init__(**kwds)
     if np.isnan(self.params.delay) and self.params.reads == -1:
         raise ProgressiveError(
             'Module %s needs either a delay or '
             'a number of reads, not both', self.pretty_typename())
Beispiel #4
0
 def __init__(self, columns=None, **kwds):
     self._add_slots(kwds, 'input_descriptors', [
         SlotDescriptor('min', type=BaseTable, required=True),
         SlotDescriptor('max', type=BaseTable, required=True)
     ])
     self._add_slots(kwds, 'output_descriptors', [
         SlotDescriptor('min', type=BaseTable, required=False),
         SlotDescriptor('max', type=BaseTable, required=False)
     ])
     super(Histograms, self).__init__(**kwds)
     self.default_step_size = 1
     self._columns = columns
     self._histogram = {}
Beispiel #5
0
class Wait(Module):
    parameters = [("delay", np.dtype(float), np.nan), ("reads", np.dtype(int), -1)]
    inputs = [SlotDescriptor("inp", required=True)]
    outputs = [SlotDescriptor("out", required=False)]

    def __init__(self, **kwds: Any) -> None:
        super(Wait, self).__init__(**kwds)
        if np.isnan(self.params.delay) and self.params.reads == -1:
            raise ProgressiveError(
                "Module %s needs either a delay or " "a number of reads, not both",
                self.pretty_typename(),
            )

    def is_ready(self) -> bool:
        if not super(Wait, self).is_ready():
            return False
        if self.is_zombie():
            return True  # give it a chance to run before it dies
        delay = self.params.delay
        reads = self.params.reads
        if np.isnan(delay) and reads < 0:
            return False
        inslot = self.get_input_slot("inp")
        assert inslot is not None
        trace = inslot.output_module.tracer.trace_stats()
        if len(trace) == 0:
            return False
        if not np.isnan(delay):
            return bool(len(trace) >= delay)
        elif reads >= 0:
            return bool(len(inslot.data()) >= reads)
        return False

    def get_data(self, name: str) -> Any:
        if name == "inp":
            slot = self.get_input_slot("inp")
            if slot is not None:
                return slot.data()
        return None

    def predict_step_size(self, duration: float) -> int:
        return 1

    def run_step(
        self, run_number: int, step_size: int, howlong: float
    ) -> ReturnRunStep:
        slot = self.get_input_slot("inp")
        if slot is not None:
            slot.clear_buffers()
        return self._return_run_step(self.state_blocked, steps_run=1)
Beispiel #6
0
 def __init__(self, columns=None, **kwds):
     self._add_slots(kwds,'input_descriptors',
                     [SlotDescriptor('table', type=Table, required=True)])
     super(Var, self).__init__(dataframe_slot='table', **kwds)
     self._columns = columns
     self._data = {}
     self.default_step_size = 1000
Beispiel #7
0
 def __init__(self, nary='table', **kwds):
     self._add_slots(
         kwds, 'input_descriptors',
         [SlotDescriptor('table', type=BaseTable, required=True)])
     super(NAry, self).__init__(**kwds)
     self.nary = nary
     self.inputs = [nary]
Beispiel #8
0
 def __init__(self, columns=None, **kwds):
     self._add_slots(kwds, 'input_descriptors',
                     [SlotDescriptor('table', type=Table, required=True)])
     super(Max, self).__init__(**kwds)
     self._columns = columns
     self.default_step_size = 10000
     self.cxx_module = CxxMax(self)
Beispiel #9
0
class Sink(Module):
    "Base class for modules supporting a variable number of input slots."
    inputs = [SlotDescriptor("inp", type=None, required=True, multiple=True)]

    def __init__(self, slot_name: str = "inp", **kwds: Any) -> None:
        super(Sink, self).__init__(**kwds)
        self.slot_name = slot_name

    def predict_step_size(self, duration: float) -> int:
        return 1

    def get_input_slot_multiple(self, name: Optional[str] = None) -> List[str]:
        if name is None:
            name = self.slot_name
        return super(Sink, self).get_input_slot_multiple(name)

    def prepare_run(self, run_number: int) -> None:
        "Switch from zombie to terminated, or update slots."
        if self.state == Module.state_zombie:
            self.state = Module.state_terminated

    def is_ready(self) -> bool:
        self.state = Module.state_terminated
        return False

    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:  # pragma no cover
        raise NotImplementedError("run_step not defined")
Beispiel #10
0
class LastRow(TableModule):
    inputs = [SlotDescriptor("table", type=Table, required=True)]

    def __init__(self, reset_index: Optional[bool] = True, **kwds: Any) -> None:
        super(LastRow, self).__init__(**kwds)
        self._reset_index = reset_index

    def predict_step_size(self, duration: float) -> int:
        return 1

    def run_step(
        self, run_number: int, step_size: int, howlong: float
    ) -> ReturnRunStep:
        slot = self.get_input_slot("table")
        assert slot is not None
        slot.clear_buffers()
        df = slot.data()

        if df is not None:
            last = df.last()
            if self.result is None:
                self.result = Table(
                    self.generate_table_name("LastRow"), dshape=df.dshape
                )
                if self._reset_index:
                    self.table.add(last)
                else:
                    self.table.add(last, last.index)
            elif self._reset_index:
                self.table.loc[0] = last
            else:
                del self.table.loc[0]
                self.table.add(last, last.index)

        return self._return_run_step(self.state_blocked, steps_run=1)
Beispiel #11
0
class MyStirrer(TableModule):
    inputs = [SlotDescriptor("table", type=Table, required=True)]

    def __init__(self,
                 watched: str,
                 proc_sensitive: bool = True,
                 mode: str = "delete",
                 value: float = 9999.0,
                 **kwds: Any):
        super().__init__(**kwds)
        self.watched = watched
        self.proc_sensitive = proc_sensitive
        self.mode = mode
        self.default_step_size = 100
        self.value = value
        self.done = False

    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:
        input_slot = self.get_input_slot("table")
        # input_slot.update(run_number)
        steps = 0
        if not input_slot.created.any():
            return self._return_run_step(self.state_blocked, steps_run=0)
        created = input_slot.created.next(step_size)
        steps = indices_len(created)
        input_table = input_slot.data()
        if self.result is None:
            self.result = Table(
                self.generate_table_name("stirrer"),
                dshape=input_table.dshape,
            )
        v = input_table.loc[fix_loc(created), :]
        self.table.append(v)
        if not self.done:
            module = self.scheduler()[self.watched]
            sensitive_ids = bitmap(getattr(module, "_sensitive_ids").values())
            if sensitive_ids:
                if self.proc_sensitive:
                    if self.mode == "delete":
                        # print('delete sensitive', sensitive_ids)
                        del self.table.loc[sensitive_ids]
                    else:
                        # print('update sensitive', sensitive_ids)
                        self.table.loc[sensitive_ids, 0] = self.value
                    self.done = True
                else:  # non sensitive
                    if len(self.result) > 10:
                        for i in range(10):
                            id_ = self.table.index[i]
                            if id_ not in sensitive_ids:
                                if self.mode == "delete":
                                    del self.table.loc[id_]
                                else:
                                    self.table.loc[id_, 0] = self.value
                                self.done = True

        return self._return_run_step(self.next_state(input_slot),
                                     steps_run=steps)
Beispiel #12
0
 def __init__(self, column, **kwds):
     self._add_slots(kwds, 'input_descriptors', [
         SlotDescriptor('table', type=Table, required=True),
         SlotDescriptor('min', type=Table, required=True),
         SlotDescriptor('max', type=Table, required=True)
     ])
     super(Histogram1D, self).__init__(dataframe_slot='table', **kwds)
     self.column = column
     self.total_read = 0
     self.default_step_size = 1000
     self._histo = None
     self._edges = None
     self._bounds = None
     self._table = Table(self.generate_table_name('Histogram1D'),
                         dshape=Histogram1D.schema,
                         chunks={'array': (16384, 128)},
                         create=True)
Beispiel #13
0
class MergeDict(TableModule):
    """
    Binary join module to join two dict and return a third one.

    Slots:
        first : Table module producing the first dict to join
        second : Table module producing the second dict to join
    Args:
        kwds : argument to pass to the join function
    """

    inputs = [
        SlotDescriptor("first", type=PsDict, required=True),
        SlotDescriptor("second", type=PsDict, required=True),
    ]

    def __init__(self, **kwds: Any) -> None:
        super().__init__(**kwds)
        self._dialog = Dialog(self)

    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:
        first_slot = self.get_input_slot("first")
        # first_slot.update(run_number)
        second_slot = self.get_input_slot("second")
        assert first_slot is not None and second_slot is not None
        first_dict = first_slot.data()
        second_dict = second_slot.data()
        if first_dict is None or second_dict is None:
            return self._return_run_step(self.state_blocked, steps_run=0)
        # second_slot.update(run_number)
        first_slot.created.next()
        second_slot.created.next()
        first_slot.updated.next()
        second_slot.updated.next()
        first_slot.deleted.next()
        second_slot.deleted.next()
        if self.result is None:
            self.result = PsDict(**first_dict, **second_dict)
        else:
            self.psdict.update(first_dict)
            self.psdict.update(second_dict)
        return self._return_run_step(self.next_state(first_slot), steps_run=1)
Beispiel #14
0
 def __init__(self, scheduler=None, **kwds):
     self._add_slots(kwds, 'input_descriptors',
                     [SlotDescriptor('table', type=Table, required=True)])
     super(DummyMod, self).__init__(scheduler=scheduler, **kwds)
     self._update_column = self.params.update_column
     self._update_rows = self.params.update_rows
     self._delete_rows = self.params.delete_rows
     self._delete_threshold = self.params.delete_threshold
     self._update_threshold = self.params.update_threshold
     self._mode = self.params.mode
Beispiel #15
0
class StirrerView(TableModule):
    parameters = [
        ("update_column", np.dtype(object), ""),
        ("delete_rows", np.dtype(object), None),
        ("delete_threshold", np.dtype(object), None),
        ("fixed_step_size", np.dtype(np.int_), 0),
        ("mode", np.dtype(object), "random"),
    ]
    inputs = [SlotDescriptor("table", type=Table, required=True)]

    def __init__(self, **kwds: Any) -> None:
        super().__init__(**kwds)
        self._update_column: str = self.params.update_column
        self._delete_rows: bool = self.params.delete_rows is not None
        self._delete_threshold: Optional[int] = self.params.delete_threshold
        self._mode: str = self.params.mode

    def test_delete_threshold(self, val: bitmap) -> bool:
        if self._delete_threshold is None:
            return True
        return len(val) > self._delete_threshold

    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:
        if self.params.fixed_step_size and False:
            step_size = self.params.fixed_step_size
        input_slot = self.get_input_slot("table")
        assert input_slot is not None
        steps = 0
        if not input_slot.created.any():
            return self._return_run_step(self.state_blocked, steps_run=0)
        created = input_slot.created.next(length=step_size, as_slice=False)
        # created = fix_loc(created)
        steps = indices_len(created)
        input_table = input_slot.data()
        if self.result is None:
            self.result = TableSelectedView(input_table, bitmap([]))
        before_ = bitmap(self.table.index)
        self.selected.selection |= created
        # print(len(self.table.index))
        delete = []
        if self._delete_rows and self.test_delete_threshold(before_):
            if isinstance(self._delete_rows, int):
                delete = random.sample(tuple(before_),
                                       min(self._delete_rows, len(before_)))
            elif self._delete_rows == "half":
                delete = random.sample(tuple(before_), len(before_) // 2)
            elif self._delete_rows == "all":
                delete = before_
            else:
                delete = self._delete_rows
            self.selected.selection -= bitmap(delete)
        return self._return_run_step(self.next_state(input_slot),
                                     steps_run=steps)
class FooABC(TableModule):
    inputs = [
        SlotDescriptor("a", type=Table, required=True),
        SlotDescriptor("b", type=Table, required=True),
        SlotDescriptor("c", type=Table, required=True),
        SlotDescriptor("d", type=Table, required=True),
    ]

    def __init__(self, **kwds: Any) -> None:
        super().__init__(output_required=False, **kwds)

    def run_step_impl(self, ctx: _CtxImpl, run_number: int,
                      step_size: int) -> ReturnRunStep:
        if self.result is None:
            self.result = Table(self.generate_table_name("Foo"),
                                dshape="{a: int, b: int}",
                                create=True)
        for sn in "abcd":
            getattr(ctx, sn).created.next()
        self.table.append({"a": [run_number], "b": [step_size]})
        return self._return_run_step(self.state_blocked, steps_run=0)
Beispiel #17
0
    def __init__(self, colormap=None, **kwds):
        self._add_slots(kwds, 'input_descriptors',
                        [SlotDescriptor('array', type=Table)])
        super(Heatmap, self).__init__(table_slot='heatmap', **kwds)
        self.colormap = colormap
        self.default_step_size = 1

        name = self.generate_table_name('Heatmap')
        params = self.params
        # if params.filename is None:
        #     params.filename = name+'%d.png'
        self._table = Table(name, dshape=Heatmap.schema, create=True)
Beispiel #18
0
 def __init__(self, x_column, y_column, with_output=True, **kwds):
     self._add_slots(kwds, 'input_descriptors', [
         SlotDescriptor('table', type=Table, required=True),
         SlotDescriptor('min', type=Table, required=True),
         SlotDescriptor('max', type=Table, required=True)
     ])
     super(Histogram2D, self).__init__(dataframe_slot='table', **kwds)
     self.x_column = x_column
     self.y_column = y_column
     self.default_step_size = 10000
     self.total_read = 0
     self._histo = None
     self._xedges = None
     self._yedges = None
     self._bounds = None
     self._with_output = with_output
     self._heatmap_cache = None
     self._table = Table(
         self.generate_table_name('Histogram2D'),
         dshape=Histogram2D.schema,
         chunks={'array': (1, 64, 64)},
         #                            scheduler=self.scheduler(),
         create=True)
class Hadamard(TableModule):
    inputs = [
        SlotDescriptor("x1", type=Table, required=True),
        SlotDescriptor("x2", type=Table, required=True),
    ]

    def reset(self) -> None:
        if self.result is not None:
            self.table.resize(0)

    def run_step(
        self, run_number: int, step_size: int, howlong: float
    ) -> ReturnRunStep:
        x1 = self.get_input_slot("x1")
        x2 = self.get_input_slot("x2")
        if x1.updated.any() or x1.deleted.any() or x2.updated.any() or x2.deleted.any():
            x1.reset()
            x2.reset()
            if self.result is not None:
                self.table.resize(0)
            x1.update(run_number)
            x2.update(run_number)
        step_size = min(x1.created.length(), x2.created.length(), step_size)
        x1_indices = x1.created.next(step_size)
        x2_indices = x2.created.next(step_size)
        res = {}
        data1 = x1.data().loc[fix_loc(x1_indices)]
        data2 = x2.data().loc[fix_loc(x2_indices)]
        assert data1.columns == data2.columns
        for col in data1.columns:
            res[col] = np.multiply(data1[col].value, data2[col].value)
        if self.result is None:
            self.result = Table(name="simple_hadamard", data=res, create=True)
        else:
            self.table.append(res)
        return self._return_run_step(self.next_state(x1), steps_run=step_size)
Beispiel #20
0
    def __init__(self, column, scheduler=None, **kwds):
        self._add_slots(
            kwds,
            'input_descriptors',
            [
                SlotDescriptor('table', type=Table, required=True),
                #SlotDescriptor('min', type=Table, required=True),
                #SlotDescriptor('max', type=Table, required=True)
            ])
        self._add_slots(kwds, 'output_descriptors', [
            SlotDescriptor('min_out', type=Table, required=False),
            SlotDescriptor('max_out', type=Table, required=False)
        ])

        super(HistogramIndex, self).__init__(scheduler=scheduler, **kwds)
        self.column = column
        self._impl = None  # will be created when the init_threshold is reached
        self.selection = bitmap()  # will be filled when the table is read
        # so realistic initial values for min and max were available
        self.input_module = None
        self.input_slot = None
        self._input_table = None
        self._min_table = None
        self._max_table = None
Beispiel #21
0
class FilterMod(TableModule):
    parameters = [
        ("expr", np.dtype(object), "unknown"),
        ("user_dict", np.dtype(object), None),
    ]

    inputs = [SlotDescriptor("table", type=Table, required=True)]

    def __init__(self, **kwds: Any) -> None:
        super().__init__(**kwds)

    def reset(self) -> None:
        if self.result is not None:
            self.selected.selection = bitmap([])

    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:
        input_slot = self.get_input_slot("table")
        assert input_slot is not None
        input_table = input_slot.data()
        if input_table is None:
            return self._return_run_step(self.state_blocked, steps_run=0)
        if self.result is None:
            self.result = TableSelectedView(input_table, bitmap([]))
        steps = 0
        if input_slot.updated.any():
            input_slot.reset()
            input_slot.update(run_number)
            self.reset()
        if input_slot.deleted.any():
            deleted = input_slot.deleted.next(length=step_size, as_slice=False)
            self.selected.selection -= deleted
            steps += indices_len(deleted)
        if input_slot.created.any():
            created = input_slot.created.next(length=step_size, as_slice=False)
            indices = fix_loc(created)
            steps += indices_len(created)
            eval_idx = input_table.eval(
                expr=self.params.expr,
                locs=np.array(indices),
                as_slice=False,
                result_object="index",
            )
            self.selected.selection |= bitmap(eval_idx)
        if not steps:
            return self._return_run_step(self.state_blocked, steps_run=0)
        return self._return_run_step(self.next_state(input_slot), steps)
Beispiel #22
0
class MyResetter(TableModule):
    inputs = [SlotDescriptor("table", type=Table, required=True)]

    def __init__(self, threshold: int, **kwds: Any) -> None:
        super().__init__(**kwds)
        self._threshold = threshold
        self.result = PsDict({"reset": True})

    def run_step(
        self, run_number: int, step_size: int, howlong: float
    ) -> ReturnRunStep:
        input_slot = self.get_input_slot("table")
        input_slot.clear_buffers()
        data = input_slot.data()
        if data and len(data) >= self._threshold:
            self.psdict["reset"] = False
        return self._return_run_step(self.next_state(input_slot), steps_run=step_size)
Beispiel #23
0
    def __init__(self, column, min_column=None, max_column=None, reset_index=False, **kwds):
        self._add_slots(kwds,'input_descriptors',
                        [SlotDescriptor('table', type=Table, required=True)])
        super(Stats, self).__init__(table_slot='stats', **kwds)
        self._column = column
        self.default_step_size = 10000

        if min_column is None:
            min_column = '_' + str(column) + '_min'
        if max_column is None:
            max_column = '_' + str(column) + '_max'
        self._min_column = min_column
        self._max_column = max_column
        self._reset_index = reset_index
        # self.schema = [(self._min_column, np.dtype(float), np.nan),
        #                (self._max_column, np.dtype(float), np.nan),]
        self.schema = '{'+self._min_column+': float64, '+self._max_column+': float64}'
        self._table = Table(get_random_name('stats_'), dshape=self.schema)
Beispiel #24
0
class Sample(TableModule):
    parameters = [("history", np.dtype(int), 3)]
    inputs = [SlotDescriptor("table", type=Table, required=True)]

    def __init__(self, columns: Optional[List[str]] = None, **kwds: Any) -> None:
        super().__init__(**kwds)
        self._columns = columns
        self.default_step_size = 10000
        self.cxx_module = CxxSample(self)

    def is_ready(self) -> bool:
        if self.get_input_slot("table").created.any():
            return True
        return super().is_ready()

    def run_step(
        self, run_number: int, step_size: int, howlong: float
    ) -> ReturnRunStep:
        return self.cxx_module.run(run_number, step_size, howlong)  # type: ignore
Beispiel #25
0
class NAry(TableModule):
    "Base class for modules supporting a variable number of input slots."
    inputs = [SlotDescriptor("table", type=BaseTable, required=True, multiple=True)]

    def __init__(self, nary: str = "table", **kwds: Any) -> None:
        super(NAry, self).__init__(**kwds)
        self.nary = nary

    def predict_step_size(self, duration: float) -> int:
        return 1

    def get_input_slot_multiple(self, name: Optional[str] = None) -> List[str]:
        if name is None:
            name = self.nary
        return super(NAry, self).get_input_slot_multiple(name)

    def run_step(
        self, run_number: int, step_size: int, howlong: float
    ) -> ReturnRunStep:  # pragma no cover
        raise NotImplementedError("run_step not defined")
class Max(TableModule):
    """
    Simplified Max, adapted for documentation
    """

    inputs = [SlotDescriptor("table", type=Table, required=True)]

    def __init__(self, **kwds: Any) -> None:
        super().__init__(**kwds)
        self.default_step_size = 10000

    def is_ready(self) -> bool:
        if self.get_input_slot("table").created.any():
            return True
        return super().is_ready()

    def reset(self) -> None:
        if self.result is not None:
            self.psdict.fill(-np.inf)

    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:
        slot = self.get_input_slot("table")
        if slot.updated.any() or slot.deleted.any():
            slot.reset()
            if self.result is not None:
                self.psdict.clear()  # resize(0)
            slot.update(run_number)
        indices = slot.created.next(step_size)
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        data = slot.data()
        op = data.loc[fix_loc(indices)].max(keepdims=False)
        if self.result is None:
            self.result = PsDict(op)
        else:
            for k, v in self.psdict.items():
                self.result[k] = np.maximum(op[k], v)
        return self._return_run_step(self.next_state(slot), steps_run=steps)
Beispiel #27
0
class Dict2Table(TableModule):
    """
    dict to table convertor

    Slots:
        dict_ : Table module producing the first table to join
    Args:
        kwds : argument to pass to the join function
    """

    inputs = [SlotDescriptor("dict_", type=PsDict, required=True)]

    def __init__(self, **kwds: Any) -> None:
        super().__init__(**kwds)

    def run_step(
        self, run_number: int, step_size: int, howlong: float
    ) -> ReturnRunStep:
        dict_slot = self.get_input_slot("dict_")
        assert dict_slot is not None
        dict_ = dict_slot.data()
        if dict_ is None:
            return self._return_run_step(self.state_blocked, steps_run=0)
        if not (
            dict_slot.created.any()
            or dict_slot.updated.any()
            or dict_slot.deleted.any()
        ):
            return self._return_run_step(self.state_blocked, steps_run=0)
        dict_slot.created.next()
        dict_slot.updated.next()
        dict_slot.deleted.next()
        if self.result is None:
            self.result = Table(name=None, dshape=dict_.dshape)
        if len(self.result) == 0:  # or history:
            self.table.append(dict_.as_row)
        else:
            self.table.loc[0] = dict_.array
        return self._return_run_step(self.next_state(dict_slot), steps_run=1)
class MaxDec(TableModule):
    """
    Simplified Max with decorated run_step(), adapted for documentation
    """

    inputs = [SlotDescriptor("table", type=Table, required=True)]

    def __init__(self, **kwds: Any) -> None:
        super().__init__(**kwds)
        self.default_step_size = 10000

    def is_ready(self) -> bool:
        if self.get_input_slot("table").created.any():
            return True
        return super().is_ready()

    def reset(self) -> None:
        if self.result is not None:
            self.psdict.fill(-np.inf)

    @process_slot("table", reset_cb="reset")
    @run_if_any
    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:
        assert self.context
        with self.context as ctx:
            indices = ctx.table.created.next(step_size)  # returns a slice
            steps = indices_len(indices)
            input_df = ctx.table.data()
            op = input_df.loc[fix_loc(indices)].max(keepdims=False)
            if self.result is None:
                self.result = PsDict(op)
            else:
                for k, v in self.psdict.items():
                    self.result[k] = np.maximum(op[k], v)
            return self._return_run_step(self.next_state(ctx.table),
                                         steps_run=steps)
Beispiel #29
0
class BinJoin(TableModule):
    """
    Binary join module to join two tables and return a third one.

    Slots:
        first : Table module producing the first table to join
        second : Table module producing the second table to join
    Args:
        kwds : argument to pass to the join function
    """

    inputs = [
        SlotDescriptor("first", type=Table, required=True),
        SlotDescriptor("second", type=Table, required=True),
    ]

    def __init__(self, **kwds: Any) -> None:
        super(BinJoin, self).__init__(**kwds)
        self.join_kwds = filter_kwds(kwds, join)
        self._dialog = Dialog(self)

    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:
        first_slot = self.get_input_slot("first")
        # first_slot.update(run_number)
        second_slot = self.get_input_slot("second")
        # second_slot.update(run_number)
        steps = 0
        if first_slot.deleted.any() or second_slot.deleted.any():
            first_slot.reset()
            second_slot.reset()
            if self.result is not None:
                self.table.resize(0)
                join_reset(self._dialog)
            first_slot.update(run_number)
            second_slot.update(run_number)
        created = {}
        if first_slot.created.any():
            indices = first_slot.created.next(length=step_size)
            steps += indices_len(indices)
            created["table"] = indices
        if second_slot.created.any():
            indices = second_slot.created.next(length=step_size)
            steps += indices_len(indices)
            created["other"] = indices
        updated = {}
        if first_slot.updated.any():
            indices = first_slot.updated.next(length=step_size)
            steps += indices_len(indices)
            updated["table"] = indices
        if second_slot.updated.any():
            indices = second_slot.updated.next(length=step_size)
            steps += indices_len(indices)
            updated["other"] = indices
        first_table = first_slot.data()
        second_table = second_slot.data()
        if not self._dialog.is_started:
            join_start(first_table,
                       second_table,
                       dialog=self._dialog,
                       created=created,
                       updated=updated,
                       **self.join_kwds)
        else:
            join_cont(
                first_table,
                second_table,
                dialog=self._dialog,
                created=created,
                updated=updated,
            )
        return self._return_run_step(self.next_state(first_slot),
                                     steps_run=steps)
Beispiel #30
0
class RangeQuery(TableModule):
    """ """

    parameters = [
        ("column", np.dtype(object), "unknown"),
        ("watched_key_lower", np.dtype(object), ""),
        ("watched_key_upper", np.dtype(object), ""),
        # ('hist_index', object, None) # to improve ...
    ]
    inputs = [
        SlotDescriptor("table", type=Table, required=True),
        SlotDescriptor("lower", type=Table, required=False),
        SlotDescriptor("upper", type=Table, required=False),
        SlotDescriptor("min", type=PsDict, required=False),
        SlotDescriptor("max", type=PsDict, required=False),
        SlotDescriptor("hist", type=Table, required=True),
    ]
    outputs = [
        SlotDescriptor("min", type=Table, required=False),
        SlotDescriptor("max", type=Table, required=False),
    ]

    def __init__(
            self,
            # hist_index: Optional[HistogramIndex] = None,
            approximate: bool = False,
            **kwds: Any) -> None:
        super(RangeQuery, self).__init__(**kwds)
        self._impl: RangeQueryImpl = RangeQueryImpl(self.params.column,
                                                    approximate)
        # self._hist_index: Optional[HistogramIndex] = hist_index
        self._approximate = approximate
        self.default_step_size = 1000
        self.input_module: Optional[Module] = None
        self._min_table: Optional[PsDict] = None
        self._max_table: Optional[PsDict] = None
        self.hist_index: Optional[HistogramIndex] = None

    # @property
    # def hist_index(self) -> Optional[HistogramIndex]:
    #     return self._hist_index

    # @hist_index.setter
    # def hist_index(self, hi: HistogramIndex) -> None:
    #     self._hist_index = hi
    #     self._impl = RangeQueryImpl(self._column, hi, approximate=self._approximate)
    @property
    def column(self) -> str:
        return str(self.params.column)

    @property
    def watched_key_lower(self) -> str:
        return self.params.watched_key_lower or self.column

    @property
    def watched_key_upper(self) -> str:
        return self.params.watched_key_upper or self.column

    def create_dependent_modules(self,
                                 input_module: Module,
                                 input_slot: str,
                                 min_: Optional[Module] = None,
                                 max_: Optional[Module] = None,
                                 min_value: Optional[Module] = None,
                                 max_value: Optional[Module] = None,
                                 hist_index: Optional[HistogramIndex] = None,
                                 **kwds: Any) -> RangeQuery:
        if self.input_module is not None:  # test if already called
            return self
        scheduler = self.scheduler()
        params = self.params
        self.input_module = input_module
        self.input_slot = input_slot
        with scheduler:
            if hist_index is None:
                hist_index = HistogramIndex(column=params.column,
                                            group=self.name,
                                            scheduler=scheduler)
            hist_index.input.table = input_module.output[input_slot]
            if min_ is None:
                min_ = Min(group=self.name,
                           columns=[self.column],
                           scheduler=scheduler)
                min_.input.table = hist_index.output.min_out
            if max_ is None:
                max_ = Max(group=self.name,
                           columns=[self.column],
                           scheduler=scheduler)
                max_.input.table = hist_index.output.max_out
            if min_value is None:
                min_value = Variable(group=self.name, scheduler=scheduler)
                min_value.input.like = min_.output.result

            if max_value is None:
                max_value = Variable(group=self.name, scheduler=scheduler)
                max_value.input.like = max_.output.result

            range_query = self
            range_query.hist_index = hist_index
            range_query.input.hist = hist_index.output.result
            range_query.input.table = input_module.output[input_slot]
            if min_value:
                range_query.input.lower = min_value.output.result
            if max_value:
                range_query.input.upper = max_value.output.result
            range_query.input.min = min_.output.result
            range_query.input.max = max_.output.result

        self.min = min_
        self.max = max_
        self.min_value = min_value
        self.max_value = max_value
        return range_query

    def _create_min_max(self) -> None:
        if self._min_table is None:
            self._min_table = PsDict({self.column: np.inf})
        if self._max_table is None:
            self._max_table = PsDict({self.column: -np.inf})

    def _set_minmax_out(self, attr_: str, val: float) -> None:
        d = {self.column: val}
        if getattr(self, attr_) is None:
            setattr(self, attr_, PsDict(d))
        else:
            getattr(self, attr_).update(d)

    def _set_min_out(self, val: float) -> None:
        return self._set_minmax_out("_min_table", val)

    def _set_max_out(self, val: float) -> None:
        return self._set_minmax_out("_max_table", val)

    def get_data(self, name: str) -> Any:
        if name == "min":
            return self._min_table
        if name == "max":
            return self._max_table
        return super(RangeQuery, self).get_data(name)

    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:
        input_slot = self.get_input_slot("table")
        self._create_min_max()
        #
        # lower/upper
        #
        lower_slot = self.get_input_slot("lower")
        # lower_slot.update(run_number)
        upper_slot = self.get_input_slot("upper")
        limit_changed = False
        if lower_slot.deleted.any():
            lower_slot.deleted.next()
        if lower_slot.updated.any():
            lower_slot.updated.next()
            limit_changed = True
        if lower_slot.created.any():
            lower_slot.created.next()
            limit_changed = True
        if not (lower_slot is upper_slot):
            # upper_slot.update(run_number)
            if upper_slot.deleted.any():
                upper_slot.deleted.next()
            if upper_slot.updated.any():
                upper_slot.updated.next()
                limit_changed = True
            if upper_slot.created.any():
                upper_slot.created.next()
                limit_changed = True
        #
        # min/max
        #
        min_slot = self.get_input_slot("min")
        min_slot.clear_buffers()
        # min_slot.update(run_number)
        # min_slot.created.next()
        # min_slot.updated.next()
        # min_slot.deleted.next()
        max_slot = self.get_input_slot("max")
        max_slot.clear_buffers()
        # max_slot.update(run_number)
        # max_slot.created.next()
        # max_slot.updated.next()
        # max_slot.deleted.next()
        if (lower_slot.data() is None or upper_slot.data() is None
                or len(lower_slot.data()) == 0 or len(upper_slot.data()) == 0):
            return self._return_run_step(self.state_blocked, steps_run=0)
        lower_value = lower_slot.data().get(self.watched_key_lower)
        upper_value = upper_slot.data().get(self.watched_key_upper)
        if (lower_slot.data() is None or upper_slot.data() is None
                or min_slot.data() is None or max_slot.data() is None
                or len(min_slot.data()) == 0 or len(max_slot.data()) == 0):
            return self._return_run_step(self.state_blocked, steps_run=0)
        minv = min_slot.data().get(self.watched_key_lower)
        maxv = max_slot.data().get(self.watched_key_upper)
        if lower_value == "*":
            lower_value = minv
        elif (lower_value is None or np.isnan(lower_value)
              or lower_value < minv or lower_value >= maxv):
            lower_value = minv
            limit_changed = True
        if upper_value == "*":
            upper_value = maxv
        elif (upper_value is None or np.isnan(upper_value)
              or upper_value > maxv or upper_value <= minv
              or upper_value <= lower_value):
            upper_value = maxv
            limit_changed = True
        self._set_min_out(lower_value)
        self._set_max_out(upper_value)
        # input_slot.update(run_number)
        if not input_slot.has_buffered() and not limit_changed:
            return self._return_run_step(self.state_blocked, steps_run=0)
        # ...
        steps = 0
        deleted: Optional[bitmap] = None
        if input_slot.deleted.any():
            deleted = input_slot.deleted.next(length=step_size, as_slice=False)
            steps += indices_len(deleted)
        created: Optional[bitmap] = None
        if input_slot.created.any():
            created = input_slot.created.next(length=step_size, as_slice=False)
            steps += indices_len(created)
        updated: Optional[bitmap] = None
        if input_slot.updated.any():
            updated = input_slot.updated.next(length=step_size, as_slice=False)
            steps += indices_len(updated)
        input_table = input_slot.data()
        if self.result is None:
            self.result = TableSelectedView(input_table, bitmap([]))
        assert self._impl
        hist_slot = self.get_input_slot("hist")
        hist_slot.clear_buffers()
        if not self._impl.is_started:
            self._impl.start(
                input_table,
                cast(HistogramIndex, hist_slot.output_module),
                lower_value,
                upper_value,
                limit_changed,
                created=created,
                updated=updated,
                deleted=deleted,
            )
        else:
            self._impl.resume(
                cast(HistogramIndex, hist_slot.output_module),
                lower_value,
                upper_value,
                limit_changed,
                created=created,
                updated=updated,
                deleted=deleted,
            )
        assert self._impl.result
        self.selected.selection = self._impl.result._values
        return self._return_run_step(self.next_state(input_slot), steps)