Example #1
0
 def run_step(self, run_number, step_size, howlong):
     input_slot = self.get_input_slot('table')
     input_slot.update(run_number)
     steps = 0
     deleted = None
     if input_slot.deleted.any():
         deleted = input_slot.deleted.next(step_size)
         steps += indices_len(deleted)
     created = None
     if input_slot.created.any():
         created = input_slot.created.next(step_size)
         steps += indices_len(created)
     updated = None
     if input_slot.updated.any():
         updated = input_slot.updated.next(step_size)
         steps += indices_len(updated)
     with input_slot.lock:
         input_table = input_slot.data()
     if not self._table:
         self._table = TableSelectedView(input_table, bitmap([]))
     if steps == 0:
         return self._return_run_step(self.state_blocked, steps_run=0)
     param = self.params
     limit_slot = self.get_input_slot('limit')
     limit_slot.update(run_number)
     limit_changed = False
     if limit_slot.deleted.any():
         limit_slot.deleted.next()
     if limit_slot.updated.any():
         limit_slot.updated.next()
         limit_changed = True
     if limit_slot.created.any():
         limit_slot.created.next()
         limit_changed = True
     if len(limit_slot.data()) == 0:
         return self._return_run_step(self.state_blocked, steps_run=0)
     if param.limit_key:
         limit_value = limit_slot.data().last(param.limit_key)
     else:
         limit_value = limit_slot.data().last()[0]
     if not self._impl.is_started:
         #self._table = TableSelectedView(input_table, bitmap([]))
         status = self._impl.start(input_table,
                                   limit_value,
                                   limit_changed,
                                   created=created,
                                   updated=updated,
                                   deleted=deleted)
         self._table.selection = self._impl.result._values
     else:
         status = self._impl.resume(limit_value,
                                    limit_changed,
                                    created=created,
                                    updated=updated,
                                    deleted=deleted)
         self._table.selection = self._impl.result._values
     return self._return_run_step(self.next_state(input_slot),
                                  steps_run=steps)
Example #2
0
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot("df")
        df = dfslot.data()
        dfslot.update(run_number)
        if dfslot.has_updated() or dfslot.has_deleted():
            dfslot.reset()
            logger.info("Reseting history because of changes in the input df")
            dfslot.update(run_number, df)
            # TODO: be smarter with changed values

        m = step_size

        indices = dfslot.next_created(m)
        m = indices_len(indices)

        i = None
        j = None
        Si = self._buf.matrix()

        arrayslot = self.get_input_slot("array")
        if arrayslot is not None and arrayslot.data() is not None:
            array = arrayslot.data()
            logger.debug("Using array instead of DataFrame columns")
            if Si is not None:
                i = array[self._last_index]
            j = array[indices]
        if j is None:
            if self.columns is None:
                self.columns = df.columns.delete(np.where(df.columns == Module.UPDATE_COLUMN))
            elif not isinstance(self.columns, pd.Index):
                self.columns = pd.Index(self.columns)
            rows = df[self.columns]
            if Si is not None:
                i = rows.loc[self._last_index]
                assert len(i) == len(self._last_index)
            j = rows.loc[fix_loc(indices)]
            assert len(j) == indices_len(indices)

        Sj = pairwise_distances(j, metric=self._metric, n_jobs=self._n_jobs)
        if Si is None:
            mat = self._buf.resize(Sj.shape[0])
            mat[:, :] = Sj
            self._last_index = dfslot.last_index[indices]
        else:
            Sij = pairwise_distances(i, j, metric=self._metric, n_jobs=self._n_jobs)
            n0 = i.shape[0]
            n1 = n0 + j.shape[0]
            mat = self._buf.resize(n1)
            mat[0:n0, n0:n1] = Sij
            mat[n0:n1, 0:n0] = Sij.T
            mat[n0:n1, n0:n1] = Sj
            self._last_index = self._last_index.append(df.index[indices])
            # truth = pairwise_distances(array[0:n1], metric=self._metric)
            # import pdb
            # pdb.set_trace()
            # assert np.allclose(mat,truth)
        return self._return_run_step(dfslot.next_state(), steps_run=m)
Example #3
0
 def run_step(self, run_number: int, step_size: int,
              howlong: float) -> ReturnRunStep:
     first_slot = self.get_input_slot("first")
     # first_slot.update(run_number)
     second_slot = self.get_input_slot("second")
     # second_slot.update(run_number)
     steps = 0
     if first_slot.deleted.any() or second_slot.deleted.any():
         first_slot.reset()
         second_slot.reset()
         if self.result is not None:
             self.table.resize(0)
             join_reset(self._dialog)
         first_slot.update(run_number)
         second_slot.update(run_number)
     created = {}
     if first_slot.created.any():
         indices = first_slot.created.next(length=step_size)
         steps += indices_len(indices)
         created["table"] = indices
     if second_slot.created.any():
         indices = second_slot.created.next(length=step_size)
         steps += indices_len(indices)
         created["other"] = indices
     updated = {}
     if first_slot.updated.any():
         indices = first_slot.updated.next(length=step_size)
         steps += indices_len(indices)
         updated["table"] = indices
     if second_slot.updated.any():
         indices = second_slot.updated.next(length=step_size)
         steps += indices_len(indices)
         updated["other"] = indices
     first_table = first_slot.data()
     second_table = second_slot.data()
     if not self._dialog.is_started:
         join_start(first_table,
                    second_table,
                    dialog=self._dialog,
                    created=created,
                    updated=updated,
                    **self.join_kwds)
     else:
         join_cont(
             first_table,
             second_table,
             dialog=self._dialog,
             created=created,
             updated=updated,
         )
     return self._return_run_step(self.next_state(first_slot),
                                  steps_run=steps)
Example #4
0
 def run_step(self, run_number, step_size, howlong):
     first_slot = self.get_input_slot('first')
     first_slot.update(run_number)
     second_slot = self.get_input_slot('second')
     second_slot.update(run_number)
     steps = 0
     if first_slot.deleted.any() or second_slot.deleted.any():
         first_slot.reset()
         second_slot.reset()
         if self._table is not None:
             self._table.resize(0)
             join_reset(self._dialog)
         first_slot.update(run_number)
         second_slot.update(run_number)
     created = {}
     if first_slot.created.any():
         indices = first_slot.created.next(step_size)
         steps += indices_len(indices)
         created["table"] = indices
     if second_slot.created.any():
         indices = second_slot.created.next(step_size)
         steps += indices_len(indices)
         created["other"] = indices
     updated = {}
     if first_slot.updated.any():
         indices = first_slot.updated.next(step_size)
         steps += indices_len(indices)
         updated["table"] = indices
     if second_slot.updated.any():
         indices = second_slot.updated.next(step_size)
         steps += indices_len(indices)
         updated["other"] = indices
     with first_slot.lock:
         first_table = first_slot.data()
     with second_slot.lock:
         second_table = second_slot.data()
     if not self._dialog.is_started:
         join_start(first_table,
                    second_table,
                    dialog=self._dialog,
                    created=created,
                    updated=updated,
                    **self.join_kwds)
     else:
         join_cont(first_table,
                   second_table,
                   dialog=self._dialog,
                   created=created,
                   updated=updated)
     return self._return_run_step(self.next_state(first_slot),
                                  steps_run=steps)
Example #5
0
 def run_step(self, run_number, step_size, howlong):
     dfslot = self.get_input_slot('table')
     dfslot.update(run_number)
     if dfslot.updated.any() or dfslot.deleted.any():
         dfslot.reset()
         if self._table is not None:
             self._table.resize(0)
         dfslot.update(run_number)
     indices = dfslot.created.next(step_size)  # returns a slice
     steps = indices_len(indices)
     if steps == 0:
         return self._return_run_step(self.state_blocked, steps_run=0)
     input_df = dfslot.data()
     data = pd.DataFrame(dict(counter=steps), index=[0])
     if self._table is None:
         self._table = Table(
             self.generate_table_name('counter'),
             data=data,
             #                                scheduler=self.scheduler(),
             create=True)
     elif len(self._table) == 0:  # has been resetted
         self._table.append(data)
     else:
         self._table['counter'].loc[0] += steps
     return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Example #6
0
    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:
        prev_min = prev_max = np.nan
        dfslot = self.get_input_slot("table")
        assert dfslot is not None
        if dfslot.updated.any() or dfslot.deleted.any():
            dfslot.reset()
            dfslot.update(run_number)
        else:
            df = self.table
            prev = df.last_id
            if prev > 0:
                prev_min = df.at[prev, self._min_column]
                prev_max = df.at[prev, self._max_column]

        indices = dfslot.created.next(length=step_size)  # returns a slice
        input_df = dfslot.data()
        steps = indices_len(indices)
        if steps > 0:
            x = input_df.to_array(locs=fix_loc(indices),
                                  columns=[self._column])
            new_min = np.nanmin(x)  # type: ignore
            new_max = np.nanmax(x)  # type: ignore
            row = {
                self._min_column: np.nanmin([prev_min,
                                             new_min]),  # type: ignore
                self._max_column: np.nanmax([prev_max,
                                             new_max]),  # type: ignore
            }
            if run_number in df.index:
                df.loc[run_number] = row
            else:
                df.add(row, index=run_number)
        return self._return_run_step(self.next_state(dfslot), steps)
Example #7
0
 def run_step(self, run_number: int, step_size: int,
              howlong: float) -> ReturnRunStep:
     if self.params.fixed_step_size and False:
         step_size = self.params.fixed_step_size
     input_slot = self.get_input_slot("table")
     assert input_slot is not None
     steps = 0
     if not input_slot.created.any():
         return self._return_run_step(self.state_blocked, steps_run=0)
     created = input_slot.created.next(length=step_size, as_slice=False)
     # created = fix_loc(created)
     steps = indices_len(created)
     input_table = input_slot.data()
     if self.result is None:
         self.result = TableSelectedView(input_table, bitmap([]))
     before_ = bitmap(self.table.index)
     self.selected.selection |= created
     # print(len(self.table.index))
     delete = []
     if self._delete_rows and self.test_delete_threshold(before_):
         if isinstance(self._delete_rows, int):
             delete = random.sample(tuple(before_),
                                    min(self._delete_rows, len(before_)))
         elif self._delete_rows == "half":
             delete = random.sample(tuple(before_), len(before_) // 2)
         elif self._delete_rows == "all":
             delete = before_
         else:
             delete = self._delete_rows
         self.selected.selection -= bitmap(delete)
     return self._return_run_step(self.next_state(input_slot),
                                  steps_run=steps)
Example #8
0
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('df')
        dfslot.update(run_number)
        if dfslot.has_updated() or dfslot.has_deleted():        
            dfslot.reset()
            self._df = None
            dfslot.update(run_number)
        indices = dfslot.next_created(step_size) # returns a slice
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.filter_columns(input_df, fix_loc(indices)).max()
        if not op.index.equals(self._columns):
            # some columns are not numerical
            self._columns = op.index

        op[self.UPDATE_COLUMN] = run_number
        if self._df is None:
            self._df = pd.DataFrame([op],index=[run_number])
        else:
            op = pd.concat([last_row(self._df), op], axis=1).max(axis=1)
            # Also computed the max over the UPDATE_COLUMNS so reset it
            op[self.UPDATE_COLUMN] = run_number
            self._df.loc[run_number] = op

        if len(self._df) > self.params.history:
            self._df = self._df.loc[self._df.index[-self.params.history:]]
        return self._return_run_step(dfslot.next_state(), steps_run=steps)
Example #9
0
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('df')
        dfslot.update(run_number)

        if dfslot.has_deleted() or dfslot.has_updated():
            logger.debug('has deleted or updated, reseting')
            self.reset()
            dfslot.update(run_number)

        print('dfslot has buffered %d elements'% dfslot.created_length())
        if dfslot.created_length() < self.mbk.n_clusters:
            # Should add more than k items per loop
            return self._return_run_step(self.state_blocked, steps_run=0)
        indices = dfslot.next_created(step_size) # returns a slice
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        X = self.filter_columns(input_df, fix_loc(indices)).values
        batch_size = self.mbk.batch_size or 100
        for batch in gen_batches(steps, batch_size):
            self.mbk.partial_fit(X[batch])
            if self._buffer is not None:
                df = pd.DataFrame({'labels': self.mbk.labels_})
                df[self.UPDATE_COLUMN] = run_number
                self._buffer.append(df)

        with self.lock:
            self._df = pd.DataFrame(self.mbk.cluster_centers_, columns=self.columns)
            self._df[self.UPDATE_COLUMN] = run_number
            if self._buffer is not None:
                logger.debug('Setting the labels')
                self._labels = self._buffer.df()
        return self._return_run_step(dfslot.next_state(), steps_run=steps)
Example #10
0
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('df')
        dfslot.update(run_number)
        if dfslot.has_updated() or dfslot.has_deleted():
            dfslot.reset()
            dfslot.update(run_number)
            self.tdigest = TDigest() # reset

        indices = dfslot.next_created(step_size)
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=steps)
        input_df = dfslot.data()
        with dfslot.lock:
            x = self.filter_columns(input_df, fix_loc(indices))
            self.tdigest.batch_update(x)
        df = self._df
        values = []
        for p in self._percentiles:
            values.append(self.tdigest.percentile(p*100))
        values.append(run_number)
        with self.lock:
            df.loc[run_number] = values
            if len(df) > self.params.history:
                self._df = df.loc[df.index[-self.params.history:]]
        return self._return_run_step(dfslot.next_state(),
                                     steps_run=steps, reads=steps, updates=len(self._df))
Example #11
0
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():
            dfslot.reset()
            if self._table is not None:
                self._table.resize(0)
            dfslot.update(run_number)
        indices = dfslot.created.next(step_size)  # returns a slice
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.filter_columns(input_df, fix_loc(indices)).max(keepdims=True)
        if self._table is None:
            self._table = Table(
                self.generate_table_name('max'),
                data=op,
                #                                scheduler=self.scheduler(),
                create=True)
        elif len(self._table) == 0:  # has been resetted
            self._table.append(op)
        else:
            last = self._table.last()
            for colname in last:
                current_max = op[colname]
                current_max[0] = np.maximum(current_max, last[colname])
            self._table.append(op)

        #TODO manage the history in a more efficient way
        #if len(self._table) > self.params.history:
        #    self._table = self._table.loc[self._df.index[-self.params.history:]]
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Example #12
0
    def run_step_progress(self, run_number: int, step_size: int,
                          howlong: float) -> ReturnRunStep:
        _b = bitmap.asbitmap
        # to_delete: List[bitmap]
        to_create: List[bitmap]
        steps = 0
        tables = []
        ph_table = None
        # assert len(self.inputs) > 0
        reset_ = False
        for name in self.get_input_slot_multiple():
            slot = self.get_input_slot(name)
            t = slot.data()
            assert isinstance(t, BaseTable)
            if ph_table is None:
                ph_table = _get_physical_table(t)
            else:
                assert ph_table is _get_physical_table(t)
            tables.append(t)
            # slot.update(run_number)
            if reset_ or slot.updated.any() or slot.deleted.any():
                slot.reset()
                reset_ = True
                steps += 1

            # if slot.deleted.any():
            #    deleted = slot.deleted.next(step_size)
            #    steps += 1
            #    to_delete.append(_b(deleted))
            # if slot.updated.any(): # actually don't care
            #    _ = slot.updated.next(step_size)
            #    #to_delete |= _b(updated)
            #    #to_create |= _b(updated)
            #    #steps += 1 # indices_len(updated) + 1
            if slot.created.any():
                created = slot.created.next(step_size)
                bm = _b(created)  # - to_delete
                to_create.append(bm)
                steps += indices_len(created)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        # to_delete = bitmap.union(*to_delete)
        to_create_4sure = bitmap()
        if len(to_create) == len(tables):
            to_create_4sure = bitmap.intersection(*to_create)

        to_create_maybe = bitmap.union(*to_create)

        if not self.result:
            self.result = TableSelectedView(ph_table, bitmap([]))
        if reset_:
            self.selected.selection = bitmap([])
        self.selected.selection = self.selected.index | to_create_4sure
        to_create_maybe -= to_create_4sure
        eff_create = to_create_maybe
        for t in tables:
            eff_create &= t.index
        self.selected.selection = self.selected.index | eff_create
        return self._return_run_step(self.state_blocked, steps)
Example #13
0
    def to_array(
        self,
        locs: Indexer = None,
        columns: Optional[List[str]] = None,
        # returns_indices=False,
        ret: Optional[np.ndarray[Any, Any]] = None,
    ) -> np.ndarray[Any, Any]:
        """Convert this table to a numpy array

        Parameters
        ----------
        locs: a list of ids or None
            The rows to extract.  Locs can be specified with multiple formats:
            integer, list, numpy array, Iterable, or slice.
        columns: a list or None
            the columns to extract or, if None, all the table columns
        return_indices: Boolean
            if True, returns a tuple with the indices of the returned values
            as indices, followed by the array
        ret: array or None
            if None, the returned array is allocated, otherwise, ret is reused.
            It should be an array of the right dtype and size otherwise it is
            ignored.
        """
        if columns is None:
            columns = self.columns
        assert columns is not None
        shapes = [self[c].shape for c in columns]
        offsets = self.column_offsets(columns, shapes)
        dtype = self.columns_common_dtype(columns)
        indices = None
        # TODO split the copy in chunks
        if locs is None:
            indices = self.index
        elif isinstance(locs, slice):
            indices = self._slice_to_bitmap(locs)
            # indices = self._any_to_bitmap(locs)
        else:
            indices = locs
        shape: Shape = (indices_len(indices), offsets[-1])
        arr: np.ndarray[Any, Any]
        if isinstance(
                ret, np.ndarray) and ret.shape == shape and ret.dtype == dtype:
            arr = ret
        else:
            arr = np.empty(shape, dtype=dtype)
        for i, column in enumerate(columns):
            col = self._column(column)
            shape = shapes[i]
            if len(shape) == 1:
                col.read_direct(arr, indices, dest_sel=np.s_[:, offsets[i]])
            else:
                col.read_direct(arr,
                                indices,
                                dest_sel=np.s_[:, offsets[i]:offsets[i + 1]])
        # if returns_indices:
        #     return indices, arr
        return arr
Example #14
0
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():
            dfslot.reset()
            self._table = None
            dfslot.update(run_number)
        indices = dfslot.created.next(step_size)  # returns a slice
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_table = dfslot.data()
        op = self.filter_columns(input_table, fix_loc(indices)).idxmin()
        #if not op.index.equals(self._columns):
        #    # some columns are not numerical
        #    self._columns = op.index

        if self._min is None:
            min_ = OrderedDict(zip(op.keys(), [np.nan] * len(op.keys())))
            for col, ix in op.items():
                min_[col] = input_table.at[
                    ix, col]  # lookup value, is there a better way?
            self._min = Table(self.generate_table_name('_min'),
                              dshape=input_table.dshape,
                              create=True)
            self._min.append(min_, indices=[run_number])
            self._table = Table(self.generate_table_name('_table'),
                                dshape=input_table.dshape,
                                create=True)
            self._table.append(op, indices=[run_number])
        else:
            prev_min = self._min.last()
            prev_idx = self._table.last()
            min_ = OrderedDict(prev_min.items())
            for col, ix in op.items():
                val = input_table.at[ix, col]
                if np.isnan(val):
                    pass
                elif np.isnan(min_[col]) or val < min_[col]:
                    op[col] = prev_idx[col]
                    min_[col] = val
            with self.lock:
                self._table.append(op, indices=[run_number])
                self._min.append(min_, indices=[run_number])
                if len(self._table) > self.params.history:
                    data = self._table.loc[
                        self._table.index[-self.params.history:]]
                    self._table = Table(self.generate_table_name('_table'),
                                        data=data,
                                        create=True)
                    data = self._min.loc[
                        self._min.index[-self.params.history:]]
                    self._min = Table(self.generate_table_name('_min'),
                                      data=data,
                                      create=True)

        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Example #15
0
    def run_step(self,run_number,step_size,howlong):
        query_slot = self.get_input_slot('query')
        df_slot = self.get_input_slot('df')
        if not query_slot:
            query = None
        else:
            query_df = query_slot.data()
            query_slot.update(run_number)
            if  query_slot.has_created(): # ignore deleted and updated
                df_slot.reset() # re-filter
                self._buffer.reset();
            indices = query_slot.next_created() # read it all
            with query_slot.lock:
                query = last_row(query_df)[self._query_column] # get the query expression
            if query is not None:
                if len(query)==0:
                    query=None
                else:
                    query = unicode(query) # make sure we have a string

        df_slot.update(run_number)
        if df_slot.has_deleted() or df_slot.has_updated():
            df_slot.reset()
            self._buffer.reset()
            df_slot.update(run_number)
        
        indices = df_slot.next_created(step_size)
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=steps)

        if query is None: # nothing to query, just pass through
            logger.info('No query, passing data through')
            self._df = df_slot.data()
            return self._return_run_step(self.state_blocked, steps_run=steps)
        
        with df_slot.lock:
            new_df = df_slot.data().loc[fix_loc(indices)]
            try:
                selected_df = new_df.eval(query)
                #print 'Select evaluated %d/%d rows'%(len(selected_df),steps)
                if isinstance(selected_df, pd.Series):
                    if selected_df.index.has_duplicates:
                        import pdb
                        pdb.set_trace()
                    selected_df = new_df.loc[selected_df]
            except Exception as e:
                logger.error('Probably a syntax error in query expression: %s', e)
                self._df = df_slot.data()
                return self._return_run_step(self.state_blocked, steps_run=steps)
            selected_df.loc[:,self.UPDATE_COLUMN] = run_number
            self._buffer.append(selected_df) #, ignore_index=False) TODO later
            self._df = self._buffer.df()
        return self._return_run_step(self.state_blocked, steps_run=steps)
Example #16
0
 def run_step(self, run_number: int, step_size: int,
              howlong: float) -> ReturnRunStep:
     if self.params.fixed_step_size and False:
         step_size = self.params.fixed_step_size
     input_slot = self.get_input_slot("table")
     assert input_slot is not None
     steps = 0
     if not input_slot.created.any():
         return self._return_run_step(self.state_blocked, steps_run=0)
     created = input_slot.created.next(length=step_size)
     steps = indices_len(created)
     self._steps += steps
     input_table = input_slot.data()
     if self.result is None:
         self.result = Table(
             self.generate_table_name("stirrer"),
             dshape=input_table.dshape,
         )
     raw_ids = self.table.index
     before_ = raw_ids  # bitmap(raw_ids[raw_ids >= 0])
     v = input_table.loc[fix_loc(created), :]
     self.table.append(v)  # indices=bitmap(created))
     delete = []
     if self._delete_rows and self.test_delete_threshold(before_):
         if isinstance(self._delete_rows, int):
             delete = random.sample(tuple(before_),
                                    min(self._delete_rows, len(before_)))
         elif self._delete_rows == "half":
             delete = random.sample(tuple(before_), len(before_) // 2)
         elif self._delete_rows == "all":
             delete = before_
         else:
             delete = self._delete_rows
         if delete and self.params.del_twice:
             mid = len(delete) // 2
             del self.table.loc[delete[:mid]]
             del self.table.loc[delete[mid:]]
         elif delete:
             steps += len(delete)
             del self.table.loc[delete]
     if self._update_rows and len(before_):
         before_ -= bitmap(delete)
         if isinstance(self._update_rows, int):
             updated = random.sample(tuple(before_),
                                     min(self._update_rows, len(before_)))
         else:
             updated = self._update_rows
         v = np.random.rand(len(updated))
         if updated:
             steps += len(updated)
             self.table.loc[fix_loc(updated), [self._update_column]] = [v]
     return self._return_run_step(self.next_state(input_slot),
                                  steps_run=steps)
Example #17
0
 def run_step(self, run_number, step_size, howlong):
     if self.params.fixed_step_size and False:
         step_size = self.params.fixed_step_size
     input_slot = self.get_input_slot('table')
     input_slot.update(run_number)
     steps = 0
     if not input_slot.created.any():
         return self._return_run_step(self.state_blocked, steps_run=0)
     created = input_slot.created.next(step_size)
     steps = indices_len(created)
     with input_slot.lock:
         input_table = input_slot.data()
     p = self.params
     if self._table is None:
         self._table = Table(
             self.generate_table_name('dummy'),
             dshape=input_table.dshape,
         )
     raw_ids = self._table.index.values
     before_ = bitmap(raw_ids[raw_ids >= 0])
     v = input_table.loc[fix_loc(created), :]
     #print("creations: ", created)
     self._table.append(v)  # indices=bitmap(created))
     delete = []
     if self._delete_rows and self.test_delete_threshold(before_):
         if isinstance(self._delete_rows, int):
             delete = random.sample(tuple(before_),
                                    min(self._delete_rows, len(before_)))
         elif self._delete_rows == 'half':
             delete = random.sample(tuple(before_), len(before_) // 2)
         elif self._delete_rows == 'all':
             delete = before_
         else:
             delete = self._delete_rows
         #print("deletions: ", delete)
         if self.params.del_twice:
             mid = len(delete) // 2
             del self._table.loc[delete[:mid]]
             del self._table.loc[delete[mid:]]
         else:
             del self._table.loc[delete]
     if self._update_rows and len(before_):
         before_ -= bitmap(delete)
         if isinstance(self._update_rows, int):
             updated = random.sample(tuple(before_),
                                     min(self._update_rows, len(before_)))
         else:
             updated = self._update_rows
         v = np.random.rand(len(updated))
         if updated:
             self._table.loc[fix_loc(updated), [self._update_column]] = [v]
     return self._return_run_step(self.next_state(input_slot),
                                  steps_run=steps)
Example #18
0
 def run_step(self, run_number: int, step_size: int,
              howlong: float) -> ReturnRunStep:
     assert self.context
     with self.context as ctx:
         indices_t = ctx.table.created.next(
             length=step_size)  # returns a slice
         steps_t = indices_len(indices_t)
         ctx.table.clear_buffers()
         indices_l = ctx.labels.created.next(
             length=step_size)  # returns a slice
         steps_l = indices_len(indices_l)
         ctx.labels.clear_buffers()
         steps = steps_t + steps_l
         if steps == 0:
             return self._return_run_step(self.state_blocked, steps_run=0)
         if self.result is None:
             self.result = TableSelectedView(ctx.table.data(),
                                             ctx.labels.data().selection)
         else:
             self.selected.selection = ctx.labels.data().selection
         return self._return_run_step(self.next_state(ctx.table),
                                      steps_run=steps)
Example #19
0
 def run_step(self,run_number,step_size,howlong):
     df_slot = self.get_input_slot('df')
     df_slot.update(run_number, buffer_created=True, buffer_updated=True)
     if df_slot.has_deleted():
         self.reset()
         df_slot.reset()
         df_slot.update(run_number)
     input_df = df_slot.data()
     columns = self.get_columns(input_df)
     if input_df is None or len(input_df)==0:
         return self._return_run_step(self.state_blocked, steps_run=0)
     indices = df_slot.next_created(step_size)
     steps = indices_len(indices)
     step_size -= steps
     steps_run = steps
     if steps != 0:
         indices = fix_loc(indices)
         self._buffer.append(input_df.loc[indices])
         self._df = self._buffer.df()
         self._df.loc[indices,self.UPDATE_COLUMN] = run_number
     if step_size > 0 and df_slot.has_updated():
         indices = df_slot.next_updated(step_size,as_slice=False)
         steps = indices_len(indices)
         if steps != 0:
             steps_run += steps
             indices = fix_loc(indices) # no need, but stick to the stereotype
             updated = self.filter_columns(input_df, indices)
             df = self.filter_columns(self._df, indices)
             norms = row_norms(updated-df)
             selected = (norms > (self._delta*self.get_scale()))
             indices = indices[selected]
             if selected.any():
                 logger.debug('updating at %d', run_number)
                 self._df.loc[indices, self._columns] = updated.loc[indices, self._columns]
                 self._df.loc[indices, self.UPDATE_COLUMN] = run_number
             else:
                 logger.debug('Not updating at %d', run_number)
     return self._return_run_step(df_slot.next_state(), steps_run=steps_run)
Example #20
0
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('array')
        input_df = dfslot.data()
        dfslot.update(run_number)
        indices = dfslot.next_created()
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=1)
        with dfslot.lock:
            histo = input_df.at[input_df.index[-1], 'array']
        if histo is None:
            return self._return_run_step(self.state_blocked, steps_run=1)
        p = self.params
        cmax = p.cmax
        if np.isnan(cmax):
            cmax = None
        cmin = p.cmin
        if np.isnan(cmin):
            cmin = None
        high = p.high
        low = p.low
        try:
            image = sp.misc.toimage(sp.special.cbrt(histo), cmin=cmin, cmax=cmax, high=high, low=low, mode='I')
            image = image.transpose(Image.FLIP_TOP_BOTTOM)
            filename = p.filename
        except:
            image = None
            filename = None
        if filename is not None:
            try:
                if re.search(r'%(0[\d])?d', filename):
                    filename = filename % (run_number)
                filename = self.storage.fullname(self, filename)
                image.save(filename, format='PNG', bits=16)
                logger.debug('Saved image %s', filename)
                image = None
            except:
                logger.error('Cannot save image %s', filename)
                raise

        values = [image, filename, run_number]
        with self.lock:
            df = self._df
            df.loc[run_number] = values
            if len(df) > p.history:
                self._df = df.loc[df.index[-p.history:]]
        return self._return_run_step(self.state_blocked,
                                     steps_run=1,
                                     reads=1,
                                     updates=1)
Example #21
0
    def to_array(self, locs=None, columns=None):
        """Convert this table to a numpy array

        Parameters
        ----------
        locs: a list of ids or None
            The rows to extract.  Locs can be specified with multiple formats:
            integer, list, numpy array, Iterable, or slice.
        columns: a list or None
            the columns to extract
        """
        if columns is None:
            columns = self.columns

        shapes = [self[c].shape for c in columns]
        offsets = self.column_offsets(columns, shapes)
        dtypes = [self[c].dtype for c in columns]
        dtype = np.find_common_type(dtypes, [])
        indices = None
        #TODO split the copy in chunks
        if locs is None:
            if self._ids.has_freelist():
                indices = self._ids[:]
                mask = np.one(locs.shape, dtype=np.bool)
                mask[self._ids.freelist()] = False
                indices = np.ma.masked_array(indices, mask)
            else:
                indices = slice(0, self.size)
        elif isinstance(locs, (list, np.ndarray)):
            indices = np.asarray(locs, np.int64)
            indices = self.id_to_index(indices)
        elif isinstance(locs, Iterable):
            indices = self.id_to_index(locs)
        elif isinstance(locs, integer_types):
            indices = self.id_to_index(slice(locs, locs + 1, 1))
        elif isinstance(locs, slice):
            indices = self.id_to_index(locs)

        arr = np.empty((indices_len(indices), offsets[-1]), dtype=dtype)
        for i, column in enumerate(columns):
            col = self._column(column)
            shape = shapes[i]
            if len(shape) == 1:
                col.read_direct(arr, indices, dest_sel=np.s_[:, offsets[i]])
            else:
                col.read_direct(arr,
                                indices,
                                dest_sel=np.s_[:, offsets[i]:offsets[i + 1]])
        return arr
 def run_step(self, run_number: int, step_size: int,
              howlong: float) -> ReturnRunStep:
     assert self.context
     with self.context as ctx:
         indices = ctx.table.created.next(step_size)  # returns a slice
         steps = indices_len(indices)
         input_df = ctx.table.data()
         op = input_df.loc[fix_loc(indices)].max(keepdims=False)
         if self.result is None:
             self.result = PsDict(op)
         else:
             for k, v in self.psdict.items():
                 self.result[k] = np.maximum(op[k], v)
         return self._return_run_step(self.next_state(ctx.table),
                                      steps_run=steps)
Example #23
0
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('df')
        dfslot.update(run_number)
        if dfslot.has_updated() or dfslot.has_deleted():        
            dfslot.reset()
            self._df = None
            dfslot.update(run_number)
        indices = dfslot.next_created(step_size) # returns a slice
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.filter_columns(input_df, fix_loc(indices)).idxmin()
        if not op.index.equals(self._columns):
            # some columns are not numerical
            self._columns = op.index

        op[self.UPDATE_COLUMN] = run_number
        if self._min is None:
            min = pd.Series([np.nan], index=op.index) # the UPDATE_COLUMN is included
            min[self.UPDATE_COLUMN] = run_number
            for col in op.index:
                if col==self.UPDATE_COLUMN: continue
                min[col] = input_df.loc[op[col], col] # lookup value, is there a better way?
            self._min = pd.DataFrame([min], columns=op.index)
            self._df = pd.DataFrame([op], columns=op.index)
        else:
            prev_min = last_row(self._min)
            prev_idx = last_row(self._df)
            min = pd.Series(prev_min)
            min[self.UPDATE_COLUMN] = run_number
            for col in op.index:
                if col==self.UPDATE_COLUMN: continue
                val = input_df.loc[op[col], col]
                if np.isnan(val):
                    pass
                elif np.isnan(min[col]) or val < min[col]:
                    op[col] = prev_idx[col]
                    min[col] = val
            op[self.UPDATE_COLUMN] = run_number
            with self.lock:
                self._df = self._df.append(op, ignore_index=True)
                self._min = self._min.append(min, ignore_index=True)
                if len(self._df) > self.params.history:
                    self._df = self._df.loc[self._df.index[-self.params.history:]]
                    self._min = self._min.loc[self._min.index[-self.params.history:]]

        return self._return_run_step(dfslot.next_state(), steps_run=steps)
Example #24
0
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)

        if dfslot.deleted.any() or dfslot.updated.any():
            logger.debug('has deleted or updated, reseting')
            self.reset()
            dfslot.update(run_number)

        #print('dfslot has buffered %d elements'% dfslot.created_length())
        input_df = dfslot.data()
        if (input_df is None or len(input_df)
                == 0) and dfslot.created_length() < self.mbk.n_clusters:
            # Should add more than k items per loop
            return self._return_run_step(self.state_blocked, steps_run=0)
        indices = dfslot.created.next(step_size)  # returns a slice
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        cols = self.get_columns(input_df)
        if len(cols) == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        locs = fix_loc(indices)
        if self._labels is not None and isinstance(indices, slice):
            indices = np.arange(indices.start, indices.stop)

        X = input_df.to_array(columns=cols, locs=locs)

        batch_size = self.mbk.batch_size or 100
        for batch in gen_batches(steps, batch_size):
            self.mbk.partial_fit(X[batch])
            if self._labels is not None:
                self._labels.append({'labels': self.mbk.labels_},
                                    indices=indices[batch])
        if self._table is None:
            dshape = self.dshape_from_columns(input_df, cols,
                                              dshape_from_dtype(X.dtype))
            self._table = Table(self.generate_table_name('centers'),
                                dshape=dshape,
                                create=True)
            self._table.resize(self.mbk.cluster_centers_.shape[0])
        self._table[cols] = self.mbk.cluster_centers_
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Example #25
0
    def run_step(self, run_number: int, step_size: int,
                 howlong: float) -> ReturnRunStep:
        input_slot = self.get_input_slot("table")
        # input_slot.update(run_number)
        steps = 0
        if not input_slot.created.any():
            return self._return_run_step(self.state_blocked, steps_run=0)
        created = input_slot.created.next(step_size)
        steps = indices_len(created)
        input_table = input_slot.data()
        if self.result is None:
            self.result = Table(
                self.generate_table_name("stirrer"),
                dshape=input_table.dshape,
            )
        v = input_table.loc[fix_loc(created), :]
        self.table.append(v)
        if not self.done:
            module = self.scheduler()[self.watched]
            sensitive_ids = bitmap(getattr(module, "_sensitive_ids").values())
            if sensitive_ids:
                if self.proc_sensitive:
                    if self.mode == "delete":
                        # print('delete sensitive', sensitive_ids)
                        del self.table.loc[sensitive_ids]
                    else:
                        # print('update sensitive', sensitive_ids)
                        self.table.loc[sensitive_ids, 0] = self.value
                    self.done = True
                else:  # non sensitive
                    if len(self.result) > 10:
                        for i in range(10):
                            id_ = self.table.index[i]
                            if id_ not in sensitive_ids:
                                if self.mode == "delete":
                                    del self.table.loc[id_]
                                else:
                                    self.table.loc[id_, 0] = self.value
                                self.done = True

        return self._return_run_step(self.next_state(input_slot),
                                     steps_run=steps)
 def run_step(self, run_number: int, step_size: int,
              howlong: float) -> ReturnRunStep:
     slot = self.get_input_slot("table")
     if slot.updated.any() or slot.deleted.any():
         slot.reset()
         if self.result is not None:
             self.psdict.clear()  # resize(0)
         slot.update(run_number)
     indices = slot.created.next(step_size)
     steps = indices_len(indices)
     if steps == 0:
         return self._return_run_step(self.state_blocked, steps_run=0)
     data = slot.data()
     op = data.loc[fix_loc(indices)].max(keepdims=False)
     if self.result is None:
         self.result = PsDict(op)
     else:
         for k, v in self.psdict.items():
             self.result[k] = np.maximum(op[k], v)
     return self._return_run_step(self.next_state(slot), steps_run=steps)
Example #27
0
 def run_step(self, run_number, step_size, howlong):
     prev_min = prev_max = np.nan        
     dfslot = self.get_input_slot('table')
     dfslot.update(run_number)
     if dfslot.updated.any() or dfslot.deleted.any():        
         dfslot.reset()
         dfslot.update(run_number)
     else:
         df = self._table
         prev = len(df)-1
         if prev > 0:
             prev_min = df.iat[prev, self._min_column]
             prev_max = df.iat[prev, self._max_column]
         
     indices = dfslot.created.next(step_size) # returns a slice
     input_df = dfslot.data()
     steps = indices_len(indices)
     if steps > 0:
         x = input_df.to_array(locs=fix_loc(indices), columns=[self._column])
         new_min = np.nanmin(x)
         new_max = np.nanmax(x)
         
         row = {self._min_column: np.nanmin([prev_min, new_min]),
                self._max_column: np.nanmax([prev_max, new_max])}
         with self.lock:
             if run_number in df.index:
                 df.loc[run_number] = row
             else:
                 df.add(row, index=run_number)
             # while len(df) > self.params.history:
             #     drop ...self._table
             # if self._reset_index:
             #     new_ = Table(get_random_name('stats_'), dshape=self._table.dshape)
             #     new_.resize(len(self._table))
             #     new_.iloc[:,self._min_column] = self._table[self._min_column]
             #     new_.iloc[:,self._max_column] = self._table[self._max_column]
             #     self._table = new_
             #print(repr(df))
     return self._return_run_step(self.next_state(dfslot),
                                  steps_run=steps, reads=steps, updates=len(self._table))
Example #28
0
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():        
            dfslot.reset()
            self._table = None
            dfslot.update(run_number)
        indices = dfslot.created.next(step_size) # returns a slice
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.op(self.filter_columns(input_df,fix_loc(indices)))
        if self._table is None:
            self._table = Table(self.generate_table_name('var'), dshape=input_df.dshape,
#                                scheduler=self.scheduler(),
                                create=True)
        self._table.append(op, indices=[run_number])
        print(self._table)

        if len(self._table) > self.params.history:
            self._table = self._table.loc[self._table.index[-self.params.history:]]
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Example #29
0
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('df')
        dfslot.update(run_number)
        if dfslot.has_updated() or dfslot.has_deleted():        
            dfslot.reset()
            self._df = None
            dfslot.update(run_number)
        indices = dfslot.next_created(step_size) # returns a slice
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.op(self.filter_columns(input_df,fix_loc(indices)))
        op[self.UPDATE_COLUMN] = run_number
        if self._df is None:
            self._df = pd.DataFrame([op], index=[run_number])
        else:
            self._df.loc[run_number] = op
        print self._df

        if len(self._df) > self.params.history:
            self._df = self._df.loc[self._df.index[-self.params.history:]]
        return self._return_run_step(dfslot.next_state(), steps_run=steps)
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('df')
        input_df = dfslot.data()
        dfslot.update(run_number)
        if dfslot.has_updated() or dfslot.has_deleted():
            raise ProgressiveError('%s module does not manage updates or deletes', self.__class__.__name__)

        indices = dfslot.next_created(step_size)
        steps = indices_len(indices)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=steps)

        (x, y) = input_df.loc[fix_loc(indices),[self._x, self._y]]
        df = self._df
        sum_x     = df.at[0, 'sum_x']     + x.sum() 
        sum_x_sqr = df.at[0, 'sum_x_sqr'] + (x*x).sum()
        sum_y     = df.at[0, 'sum_y']     + y.sum()
        sum_xy    = df.at[0, 'sum_xy']    + (x*y).sum()
        denom = len(x) * sum_x_sqr - sum_x*sum_x
        coef = (sum_y*sum_x_sqr - sum_x*sum_xy) / denom
        intercept = (len(x)*sum_xy - sum_x*sum_y) / denom
        df.loc[0] = [coef, intercept, sum_x, sum_x_sqr, sum_y, sum_xy, run_number]
        return self._return_run_step(dfslot.next_state(),
                                     steps_run=steps, reads=steps, updates=1)
Example #31
0
 def run_step(self, run_number, step_size, howlong):
     input_slot = self.get_input_slot('table')
     input_slot.update(run_number)
     steps = 0
     deleted = None
     if input_slot.deleted.any():
         deleted = input_slot.deleted.next(step_size)
         steps += indices_len(deleted)
     created = None
     if input_slot.created.any():
         created = input_slot.created.next(step_size)
         steps += indices_len(created)
     updated = None
     if input_slot.updated.any():
         updated = input_slot.updated.next(step_size)
         steps += indices_len(updated)
     with input_slot.lock:
         input_table = input_slot.data()
     if not self._table:
         self._table = TableSelectedView(input_table, bitmap([]))
     self._create_min_max()
     param = self.params
     #
     # lower/upper
     #
     lower_slot = self.get_input_slot('lower')
     lower_slot.update(run_number)
     upper_slot = self.get_input_slot('upper')
     limit_changed = False
     if lower_slot.deleted.any():
         lower_slot.deleted.next()
     if lower_slot.updated.any():
         lower_slot.updated.next()
         limit_changed = True
     if lower_slot.created.any():
         lower_slot.created.next()
         limit_changed = True
     if not (lower_slot is upper_slot):
         upper_slot.update(run_number)
         if upper_slot.deleted.any():
             upper_slot.deleted.next()
         if upper_slot.updated.any():
             upper_slot.updated.next()
             limit_changed = True
         if upper_slot.created.any():
             upper_slot.created.next()
             limit_changed = True
     #
     # min/max
     #
     min_slot = self.get_input_slot('min')
     min_slot.update(run_number)
     min_slot.created.next()
     min_slot.updated.next()
     min_slot.deleted.next()
     max_slot = self.get_input_slot('max')
     max_slot.update(run_number)
     max_slot.created.next()
     max_slot.updated.next()
     max_slot.deleted.next()
     if (lower_slot.data() is None or upper_slot.data() is None
             or len(lower_slot.data()) == 0 or len(upper_slot.data()) == 0):
         return self._return_run_step(self.state_blocked, steps_run=0)
     # X ...
     lower_value_x = lower_slot.data().last(self._watched_key_lower_x)
     upper_value_x = upper_slot.data().last(self._watched_key_upper_x)
     # Y ...
     lower_value_y = lower_slot.data().last(self._watched_key_lower_y)
     upper_value_y = upper_slot.data().last(self._watched_key_upper_y)
     if (lower_slot.data() is None or upper_slot.data() is None
             or len(min_slot.data()) == 0 or len(max_slot.data()) == 0):
         return self._return_run_step(self.state_blocked, steps_run=0)
     # X ...
     minv_x = min_slot.data().last(self._watched_key_lower_x)
     maxv_x = max_slot.data().last(self._watched_key_upper_x)
     # Y ...
     minv_y = min_slot.data().last(self._watched_key_lower_y)
     maxv_y = max_slot.data().last(self._watched_key_upper_y)
     # X ...
     if lower_value_x is None or np.isnan(
             lower_value_x
     ) or lower_value_x < minv_x or lower_value_x >= maxv_x:
         lower_value_x = minv_x
         limit_changed = True
     if (upper_value_x is None or np.isnan(upper_value_x)
             or upper_value_x > maxv_x or upper_value_x <= minv_x
             or upper_value_x <= lower_value_x):
         upper_value_x = maxv_x
         limit_changed = True
     # Y ...
     if lower_value_y is None or np.isnan(
             lower_value_y
     ) or lower_value_y < minv_y or lower_value_y >= maxv_y:
         lower_value_y = minv_y
         limit_changed = True
     if (upper_value_y is None or np.isnan(upper_value_y)
             or upper_value_y > maxv_y or upper_value_y <= minv_y
             or upper_value_y <= lower_value_y):
         upper_value_y = maxv_y
         limit_changed = True
     self._set_min_out(lower_value_x, lower_value_y)
     self._set_max_out(upper_value_x, upper_value_y)
     if steps == 0 and not limit_changed:
         return self._return_run_step(self.state_blocked, steps_run=0)
     # ...
     if not self._impl.is_started:
         status = self._impl.start(input_table,
                                   lower_value_x,
                                   upper_value_x,
                                   lower_value_y,
                                   upper_value_y,
                                   limit_changed,
                                   created=created,
                                   updated=updated,
                                   deleted=deleted)
         self._table.selection = self._impl.result._values
     else:
         status = self._impl.resume(lower_value_x,
                                    upper_value_x,
                                    lower_value_y,
                                    upper_value_y,
                                    limit_changed,
                                    created=created,
                                    updated=updated,
                                    deleted=deleted)
         self._table.selection = self._impl.result._values
     return self._return_run_step(self.next_state(input_slot),
                                  steps_run=steps)
Example #32
0
    def run_step_progress(self, run_number, step_size, howlong):
        _b = bitmap.asbitmap
        to_delete = []
        to_create = []
        steps = 0
        tables = []
        ph_table = None
        assert len(self.inputs) > 0
        reset_ = False
        for name in self.inputs:
            if not name.startswith('table'):
                continue
            slot = self.get_input_slot(name)
            t = slot.data()
            assert isinstance(t, TableSelectedView)
            if ph_table is None:
                ph_table = _get_physical_table(t)
            else:
                assert ph_table is _get_physical_table(t)
            tables.append(t)
            slot.update(run_number)
            if reset_ or slot.updated.any() or slot.deleted.any():
                slot.reset()
                reset_ = True
                steps += 1

            #if slot.deleted.any():
            #    deleted = slot.deleted.next(step_size)
            #    steps += 1
            #    to_delete.append(_b(deleted))
            #if slot.updated.any(): # actually don't care
            #    _ = slot.updated.next(step_size)
            #    #to_delete |= _b(updated)
            #    #to_create |= _b(updated)
            #    #steps += 1 # indices_len(updated) + 1
            if slot.created.any():
                created = slot.created.next(step_size)
                bm = _b(created)  #- to_delete
                to_create.append(bm)
                steps += indices_len(created)
        if steps == 0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        to_delete = bitmap.union(*to_delete)
        to_create_4sure = bitmap()
        if len(to_create) == len(tables):
            to_create_4sure = bitmap.intersection(*to_create)

        to_create_maybe = bitmap.union(*to_create)

        if not self._table:
            self._table = TableSelectedView(ph_table, bitmap([]))
        if reset_:
            self._table.selection = bitmap([])
        #self._table.selection -= to_delete
        self._table.selection |= to_create_4sure
        to_create_maybe -= to_create_4sure
        eff_create = to_create_maybe
        for t in tables:
            eff_create &= t.selection
        self._table.selection |= eff_create
        return self._return_run_step(self.next_state(
            self.get_input_slot(self.inputs[0])),
                                     steps_run=steps)
Example #33
0
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('df')
        dfslot.update(run_number)
        min_slot = self.get_input_slot('min')
        min_slot.update(run_number)
        max_slot = self.get_input_slot('max')
        max_slot.update(run_number)

        if dfslot.has_updated() or dfslot.has_deleted():
            logger.debug('reseting histogram')
            dfslot.reset()
            self._histo = None
            self._xedges = None
            self._yedges = None
            dfslot.update(run_number)

        if not (dfslot.has_created() or min_slot.has_created() or max_slot.has_created()):
            # nothing to do, just wait 
            logger.info('Input buffers empty')
            return self._return_run_step(self.state_blocked, steps_run=0)
            
        bounds = self.get_bounds(min_slot, max_slot)
        if bounds is None:
            print('No bounds yet at run %d'%run_number)
            logger.debug('No bounds yet at run %d', run_number)
            return self._return_run_step(self.state_blocked, steps_run=0)
        xmin, xmax, ymin, ymax = bounds
        if self._bounds is None:
            (xdelta, ydelta) = self.get_delta(*bounds)
            self._bounds = (xmin-xdelta,xmax+xdelta,ymin-ydelta,ymax+ydelta)
            print('New bounds at run %d: %s'%(run_number,self._bounds))
        else:
            (dxmin, dxmax, dymin, dymax) = self._bounds
            (xdelta, ydelta) = self.get_delta(*bounds)
            # Either the min/max has extended, or it has shrunk beyond the deltas
            if (xmin<dxmin or xmax>dxmax or ymin<dymin or ymax>dymax) \
              or (xmin>(dxmin+xdelta) or xmax<(dxmax-xdelta) or ymin>(dymin+ydelta) or ymax<(dymax-ydelta)):
                self._bounds = (xmin-xdelta,xmax+xdelta,ymin-ydelta,ymax+ydelta)
                print('Updated bounds at run %d: %s'%(run_number,self._bounds))
                logger.info('Updated bounds at run %s: %s', run_number, self._bounds)
                dfslot.reset()
                dfslot.update(run_number) # should recompute the histogram from scatch
                self._histo = None 
                self._xedges = None
                self._yedges = None

        xmin, xmax, ymin, ymax = self._bounds
        if xmin>=xmax or ymin>=ymax:
            logger.error('Invalid bounds: %s', self._bounds)
            return self._return_run_step(self.state_blocked, steps_run=0)

        # Now, we know we have data and bounds, proceed to create a new histogram

        input_df = dfslot.data()
        indices = dfslot.next_created(step_size)
        steps = indices_len(indices)
        logger.info('Read %d rows', steps)
        self.total_read += steps
        
        filtered_df = input_df.loc[fix_loc(indices)]
        x = filtered_df[self.x_column]
        y = filtered_df[self.y_column]
        p = self.params
        if self._xedges is not None:
            bins = [self._xedges, self._yedges]
        else:
            bins = [p.ybins, p.xbins]
        if len(x)>0:
            histo, xedges, yedges = np.histogram2d(y, x,
                                                   bins=bins,
                                                   range=[[ymin, ymax], [xmin, xmax]],
                                                   normed=False)
            self._xedges = xedges
            self._yedges = yedges
        else:
            histo = None
            cmax = 0

        if self._histo is None:
            self._histo = histo
        elif histo is not None:
            self._histo += histo

        if self._histo is not None:
            cmax = self._histo.max()
        print 'cmax=%d'%cmax
        values = [self._histo, 0, cmax, xmin, xmax, ymin, ymax, run_number]
        with self.lock:
            self._df.loc[run_number] = values
            if len(self._df) > p.history:
                self._df = self._df.loc[self._df.index[-p.history:]]
        return self._return_run_step(dfslot.next_state(), steps_run=steps)
Example #34
0
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        df = dfslot.data()
        dfslot.update(run_number)
        if dfslot.updated.any() or dfslot.deleted.any():
            dfslot.reset()
            logger.info(
                'Reseting history because of changes in the input table')
            dfslot.update(run_number)
            #TODO: be smarter with changed values

        m = step_size

        indices = dfslot.created.next(m)
        m = indices_len(indices)

        i = None
        j = None
        Si = self._table['document']

        arrayslot = self.get_input_slot('array')
        if arrayslot is not None and arrayslot.data() is not None:
            array = arrayslot.data()
            logger.debug('Using array instead of DataFrame columns')
            if Si is not None:
                i = array[self._last_index]
            j = array[indices]
        if j is None:
            if self.columns is None:
                self.columns = df.columns.delete(
                    np.where(df.columns == UPDATE_COLUMN))
            elif not isinstance(self.columns, pd.Index):
                self.columns = pd.Index(self.columns)
            rows = df[self.columns]
            if Si is not None:
                i = rows.loc[self._last_index]
                assert len(i) == len(self._last_index)
            j = rows.loc[fix_loc(indices)]
            assert len(j) == indices_len(indices)

        Sj = pairwise_distances(j, metric=self._metric, n_jobs=self._n_jobs)
        if Si is None:
            mat = self._buf.resize(Sj.shape[0])
            mat[:, :] = Sj
            self._last_index = dfslot.last_index[indices]
        else:
            Sij = pairwise_distances(i,
                                     j,
                                     metric=self._metric,
                                     n_jobs=self._n_jobs)
            n0 = i.shape[0]
            n1 = n0 + j.shape[0]
            mat = self._buf.resize(n1)
            mat[0:n0, n0:n1] = Sij
            mat[n0:n1, 0:n0] = Sij.T
            mat[n0:n1, n0:n1] = Sj
            self._last_index = self._last_index.append(df.index[indices])
            #truth = pairwise_distances(array[0:n1], metric=self._metric)
            #import pdb
            #pdb.set_trace()
            #assert np.allclose(mat,truth)
        return self._return_run_step(self.next_state(dfslot), steps_run=m)
Example #35
0
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('array')
        input_df = dfslot.data()
        dfslot.update(run_number)
        indices = dfslot.created.next()
        steps = indices_len(indices)
        if steps == 0:
            indices = dfslot.updated.next()
            steps = indices_len(indices)
            if steps == 0:
                return self._return_run_step(self.state_blocked, steps_run=1)
        with dfslot.lock:
            histo = input_df.last()['array']
        if histo is None:
            return self._return_run_step(self.state_blocked, steps_run=1)
        params = self.params
        cmax = params.cmax
        if np.isnan(cmax):
            cmax = None
        cmin = params.cmin
        if np.isnan(cmin):
            cmin = None
        high = params.high
        low = params.low
        try:
            image = sp.misc.toimage(sp.special.cbrt(histo),
                                    cmin=cmin,
                                    cmax=cmax,
                                    high=high,
                                    low=low,
                                    mode='I')
            image = image.transpose(Image.FLIP_TOP_BOTTOM)
            filename = params.filename
        except:
            image = None
            filename = None
        if filename is not None:
            try:
                if re.search(r'%(0[\d])?d', filename):
                    filename = filename % (run_number)
                filename = self.storage.fullname(self, filename)
                #TODO should do it atomically since it will be called 4 times with the same fn
                image.save(filename, format='PNG')  #, bits=16)
                logger.debug('Saved image %s', filename)
                image = None
            except:
                logger.error('Cannot save image %s', filename)
                raise
        else:
            buffered = six.BytesIO()
            image.save(buffered, format='PNG', bits=16)
            res = base64.b64encode(buffered.getvalue())
            if six.PY3:
                res = str(base64.b64encode(buffered.getvalue()), "ascii")
            filename = "data:image/png;base64," + res

        if len(self._table) == 0 or self._table.last()['time'] != run_number:
            values = {'filename': filename, 'time': run_number}
            with self.lock:
                self._table.add(values)
        return self._return_run_step(self.state_blocked,
                                     steps_run=1,
                                     reads=1,
                                     updates=1)
Example #36
0
 def run_step(self, run_number: int, step_size: int,
              howlong: float) -> ReturnRunStep:
     input_slot = self.get_input_slot("table")
     self._create_min_max()
     #
     # lower/upper
     #
     lower_slot = self.get_input_slot("lower")
     # lower_slot.update(run_number)
     upper_slot = self.get_input_slot("upper")
     limit_changed = False
     if lower_slot.deleted.any():
         lower_slot.deleted.next()
     if lower_slot.updated.any():
         lower_slot.updated.next()
         limit_changed = True
     if lower_slot.created.any():
         lower_slot.created.next()
         limit_changed = True
     if not (lower_slot is upper_slot):
         # upper_slot.update(run_number)
         if upper_slot.deleted.any():
             upper_slot.deleted.next()
         if upper_slot.updated.any():
             upper_slot.updated.next()
             limit_changed = True
         if upper_slot.created.any():
             upper_slot.created.next()
             limit_changed = True
     #
     # min/max
     #
     min_slot = self.get_input_slot("min")
     min_slot.clear_buffers()
     # min_slot.update(run_number)
     # min_slot.created.next()
     # min_slot.updated.next()
     # min_slot.deleted.next()
     max_slot = self.get_input_slot("max")
     max_slot.clear_buffers()
     # max_slot.update(run_number)
     # max_slot.created.next()
     # max_slot.updated.next()
     # max_slot.deleted.next()
     if (lower_slot.data() is None or upper_slot.data() is None
             or len(lower_slot.data()) == 0 or len(upper_slot.data()) == 0):
         return self._return_run_step(self.state_blocked, steps_run=0)
     lower_value = lower_slot.data().get(self.watched_key_lower)
     upper_value = upper_slot.data().get(self.watched_key_upper)
     if (lower_slot.data() is None or upper_slot.data() is None
             or min_slot.data() is None or max_slot.data() is None
             or len(min_slot.data()) == 0 or len(max_slot.data()) == 0):
         return self._return_run_step(self.state_blocked, steps_run=0)
     minv = min_slot.data().get(self.watched_key_lower)
     maxv = max_slot.data().get(self.watched_key_upper)
     if lower_value == "*":
         lower_value = minv
     elif (lower_value is None or np.isnan(lower_value)
           or lower_value < minv or lower_value >= maxv):
         lower_value = minv
         limit_changed = True
     if upper_value == "*":
         upper_value = maxv
     elif (upper_value is None or np.isnan(upper_value)
           or upper_value > maxv or upper_value <= minv
           or upper_value <= lower_value):
         upper_value = maxv
         limit_changed = True
     self._set_min_out(lower_value)
     self._set_max_out(upper_value)
     # input_slot.update(run_number)
     if not input_slot.has_buffered() and not limit_changed:
         return self._return_run_step(self.state_blocked, steps_run=0)
     # ...
     steps = 0
     deleted: Optional[bitmap] = None
     if input_slot.deleted.any():
         deleted = input_slot.deleted.next(length=step_size, as_slice=False)
         steps += indices_len(deleted)
     created: Optional[bitmap] = None
     if input_slot.created.any():
         created = input_slot.created.next(length=step_size, as_slice=False)
         steps += indices_len(created)
     updated: Optional[bitmap] = None
     if input_slot.updated.any():
         updated = input_slot.updated.next(length=step_size, as_slice=False)
         steps += indices_len(updated)
     input_table = input_slot.data()
     if self.result is None:
         self.result = TableSelectedView(input_table, bitmap([]))
     assert self._impl
     hist_slot = self.get_input_slot("hist")
     hist_slot.clear_buffers()
     if not self._impl.is_started:
         self._impl.start(
             input_table,
             cast(HistogramIndex, hist_slot.output_module),
             lower_value,
             upper_value,
             limit_changed,
             created=created,
             updated=updated,
             deleted=deleted,
         )
     else:
         self._impl.resume(
             cast(HistogramIndex, hist_slot.output_module),
             lower_value,
             upper_value,
             limit_changed,
             created=created,
             updated=updated,
             deleted=deleted,
         )
     assert self._impl.result
     self.selected.selection = self._impl.result._values
     return self._return_run_step(self.next_state(input_slot), steps)
Example #37
0
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        min_slot = self.get_input_slot('min')
        min_slot.update(run_number)
        max_slot = self.get_input_slot('max')
        max_slot.update(run_number)
        if dfslot.updated.any():
            logger.debug('reseting histogram')
            self.reset()
            dfslot.update(run_number)

        if not (dfslot.created.any() or min_slot.created.any()
                or max_slot.created.any()):
            logger.info('Input buffers empty')
            return self._return_run_step(self.state_blocked, steps_run=0)

        bounds = self.get_bounds(min_slot, max_slot)
        if bounds is None:
            logger.debug('No bounds yet at run %d', run_number)
            return self._return_run_step(self.state_blocked, steps_run=0)
        xmin, xmax, ymin, ymax = bounds
        if self._bounds is None:
            (xdelta, ydelta) = self.get_delta(*bounds)
            self._bounds = (xmin - xdelta, xmax + xdelta, ymin - ydelta,
                            ymax + ydelta)
            logger.info("New bounds at run %d: %s", run_number, self._bounds)
        else:
            (dxmin, dxmax, dymin, dymax) = self._bounds
            (xdelta, ydelta) = self.get_delta(*bounds)
            assert xdelta >= 0 and ydelta >= 0

            # Either the min/max has extended, or it has shrunk beyond the deltas
            if ((xmin < dxmin or xmax > dxmax or ymin < dymin or ymax > dymax)
                    or (xmin > (dxmin + xdelta) or xmax <
                        (dxmax - xdelta) or ymin > (dymin + ydelta) or ymax <
                        (dymax - ydelta))):
                #print('Old bounds: %s,%s,%s,%s'%(dxmin,dxmax,dymin,dymax))
                self._bounds = (xmin - xdelta, xmax + xdelta, ymin - ydelta,
                                ymax + ydelta)
                #print('Updated bounds at run %d: %s old %s deltas %s, %s'%(run_number,self._bounds, bounds, xdelta, ydelta))
                logger.info('Updated bounds at run %s: %s', run_number,
                            self._bounds)
                self.reset()
                dfslot.update(run_number)

        xmin, xmax, ymin, ymax = self._bounds
        if xmin >= xmax or ymin >= ymax:
            logger.error('Invalid bounds: %s', self._bounds)
            return self._return_run_step(self.state_blocked, steps_run=0)

        # Now, we know we have data and bounds, proceed to create a new histogram
        # or to update the previous if is still exists (i.e. no reset)
        p = self.params
        steps = 0
        # if there are new deletions, build the histogram of the deleted pairs
        # then subtract it from the main histogram
        if dfslot.deleted.any() and self._histo is not None:
            input_df = get_physical_base(dfslot.data())
            indices = dfslot.deleted.next(step_size)
            steps += indices_len(indices)
            #print('Histogram2D steps :%d'% steps)
            logger.info('Read %d rows', steps)
            x = input_df[self.x_column]
            y = input_df[self.y_column]
            idx = input_df.id_to_index(fix_loc(indices))
            #print(idx)
            x = x[idx]
            y = y[idx]
            bins = [p.ybins, p.xbins]
            if len(x) > 0:
                histo = histogram2d(y,
                                    x,
                                    bins=bins,
                                    range=[[ymin, ymax], [xmin, xmax]])
                self._histo -= histo
        # if there are new creations, build a partial histogram with them then
        # add it to the main histogram
        input_df = dfslot.data()
        indices = dfslot.created.next(step_size)
        steps += indices_len(indices)
        #print('Histogram2D steps :%d'% steps)
        logger.info('Read %d rows', steps)
        self.total_read += steps

        x = input_df[self.x_column]
        y = input_df[self.y_column]
        idx = input_df.id_to_index(fix_loc(indices))
        #print(idx)
        x = x[idx]
        y = y[idx]
        if self._xedges is not None:
            bins = [self._xedges, self._yedges]
        else:
            bins = [p.ybins, p.xbins]
        if len(x) > 0:
            #t = default_timer()
            # using fast_histogram
            histo = histogram2d(y,
                                x,
                                bins=bins,
                                range=[[ymin, ymax], [xmin, xmax]])
            # using numpy histogram
            #histo, xedges, yedges = np.histogram2d(y, x,
            #                                           bins=bins,
            #                                           range=[[ymin, ymax], [xmin, xmax]],
            #                                           normed=False)
            #t = default_timer()-t
            #print('Time for histogram2d: %f'%t)
            #self._xedges = xedges
            #self._yedges = yedges

        else:
            histo = None
            cmax = 0

        if self._histo is None:
            self._histo = histo
        elif histo is not None:
            self._histo += histo

        if self._histo is not None:
            cmax = self._histo.max()
        values = {
            'array': np.flip(self._histo, axis=0),
            'cmin': 0,
            'cmax': cmax,
            'xmin': xmin,
            'xmax': xmax,
            'ymin': ymin,
            'ymax': ymax,
            'time': run_number
        }
        if self._with_output:
            with self.lock:
                table = self._table
                table['array'].set_shape([p.ybins, p.xbins])
                l = len(table)
                last = table.last()
                if l == 0 or last['time'] != run_number:
                    table.add(values)
                else:
                    table.iloc[last.row] = values
        self.build_heatmap(values)
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)
Example #38
0
 def run_step(
     self, run_number: int, step_size: int, howlong: float
 ) -> ReturnRunStep:
     self._run_once = True
     input_slot = self.get_input_slot("table")
     # input_slot.update(run_number)
     steps = 0
     deleted = None
     if input_slot.deleted.any():
         deleted = input_slot.deleted.next(as_slice=False)
         steps += 1  # indices_len(deleted)
     created = None
     if input_slot.created.any():
         created = input_slot.created.next(length=step_size, as_slice=False)
         steps += indices_len(created)
     updated = None
     if input_slot.updated.any():
         updated = input_slot.updated.next(length=step_size, as_slice=False)
         steps += indices_len(updated)
     input_table = input_slot.data()
     if input_table is None:
         return self._return_run_step(self.state_blocked, steps_run=0)
     if self.result is None:
         self.result = TableSelectedView(input_table, bitmap([]))
     if steps == 0:
         return self._return_run_step(self.state_blocked, steps_run=0)
     param = self.params
     limit_slot = self.get_input_slot("limit")
     # limit_slot.update(run_number)
     limit_changed = False
     if limit_slot.deleted.any():
         limit_slot.deleted.next()
     if limit_slot.updated.any():
         limit_slot.updated.next()
         limit_changed = True
     if limit_slot.created.any():
         limit_slot.created.next()
         limit_changed = True
     if len(limit_slot.data()) == 0:
         return self._return_run_step(self.state_blocked, steps_run=0)
     if param.limit_key:
         limit_value = limit_slot.data().last(param.limit_key)
     else:
         limit_value = limit_slot.data().last()[0]
     if not self._impl.is_started:
         self._impl.start(
             input_table,
             limit_value,
             limit_changed,
             created=created,
             updated=updated,
             deleted=deleted,
         )
     else:
         self._impl.resume(
             limit_value,
             limit_changed,
             created=created,
             updated=updated,
             deleted=deleted,
         )
     self.selected.selection = self._impl.result._values
     return self._return_run_step(self.next_state(input_slot), steps)
Example #39
0
   def run_step(self, run_number, step_size, howlong):
       dfslot = self.get_input_slot('df')
       dfslot.update(run_number)
       min_slot = self.get_input_slot('min')
       min_slot.update(run_number)
       max_slot = self.get_input_slot('max')
       max_slot.update(run_number)
 
       if dfslot.has_updated() or dfslot.has_deleted():
           logger.debug('resetting histogram')
           dfslot.reset()
           self._histo = None
           self._edges = None
           dfslot.update(run_number)
 
       if not (dfslot.has_created() or min_slot.has_created() or max_slot.has_created()):
           logger.info('input buffers empty')
           return self._return_run_step(self.state_blocked, steps_run=0)
 
       bounds = self.get_bounds(min_slot, max_slot)
       if bounds is None:
           logger.debug('No bounds yet at run %d', run_number)
           return self._return_run_step(self.state_blocked, steps_run=0)
 
       bound_min, bound_max = bounds
       if self._bounds is None:
           delta = self.get_delta(*bounds)
           self._bounds = (bound_min - delta, bound_max + delta)
           logger.info("New bounds at run %d: %s"%(run_number, self._bounds))
       else:
           (old_min, old_max) = self._bounds
           delta = self.get_delta(*bounds)
 
           if(bound_min < old_min or bound_max > old_max) \
             or bound_min > (old_min + delta) or bound_max < (old_max - delta):
               self._bounds = (bound_min - delta, bound_max + delta)
               logger.info('Updated bounds at run %d: %s', run_number, self._bounds)
               dfslot.reset()
               dfslot.update(run_number)
               self._histo = None
 
       (curr_min, curr_max) = self._bounds
       if curr_min >= curr_max:
           logger.error('Invalid bounds: %s', self._bounds)
           return self._return_run_step(self.state_blocked, steps_run=0)
 
       input_df = dfslot.data()
       indices = dfslot.next_created(step_size) # returns a slice or ... ?
       steps = indices_len(indices)
       logger.info('Read %d rows', steps)
       self.total_read += steps
       filtered_df = input_df.loc[fix_loc(indices)]
       column = filtered_df[self.column]
       bins = self._edges if self._edges is not None else self.params.bins
       histo = None
       if len(column) > 0:
           histo, self._edges = np.histogram(column, bins=bins, range=[curr_min, curr_max], normed=False)
       if self._histo is None:
           self._histo = histo
       elif histo is not None:
           self._histo += histo
       values = [self._histo, curr_min, curr_max, run_number]
       with self.lock:
           self._df.loc[run_number] = values
           self._df = self._df.loc[self._df.index[-1:]]
       return self._return_run_step(dfslot.next_state(), steps_run=steps)
Example #40
0
    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('table')
        dfslot.update(run_number)
        min_slot = self.get_input_slot('min')
        min_slot.update(run_number)
        max_slot = self.get_input_slot('max')
        max_slot.update(run_number)

        if dfslot.updated.any() or dfslot.deleted.any():
            logger.debug('reseting histogram')
            dfslot.reset()
            self._histo = None
            self._edges = None
            dfslot.update(run_number)

        if not (dfslot.created.any() or min_slot.created.any()
                or max_slot.created.any()):
            logger.info('Input buffers empty')
            return self._return_run_step(self.state_blocked, steps_run=0)

        bounds = self.get_bounds(min_slot, max_slot)
        if bounds is None:
            logger.debug('No bounds yet at run %d', run_number)
            return self._return_run_step(self.state_blocked, steps_run=0)

        bound_min, bound_max = bounds
        if self._bounds is None:
            delta = self.get_delta(*bounds)
            self._bounds = (bound_min - delta, bound_max + delta)
            logger.info("New bounds at run %d: %s", run_number, self._bounds)
        else:
            (old_min, old_max) = self._bounds
            delta = self.get_delta(*bounds)

            if(bound_min < old_min or bound_max > old_max) \
              or bound_min > (old_min + delta) or bound_max < (old_max - delta):
                self._bounds = (bound_min - delta, bound_max + delta)
                logger.info('Updated bounds at run %d: %s', run_number,
                            self._bounds)
                dfslot.reset()
                dfslot.update(run_number)
                self._histo = None
                self._edges = None

        (curr_min, curr_max) = self._bounds
        if curr_min >= curr_max:
            logger.error('Invalid bounds: %s', self._bounds)
            return self._return_run_step(self.state_blocked, steps_run=0)

        input_df = dfslot.data()
        indices = dfslot.created.next(step_size)  # returns a slice or ... ?
        steps = indices_len(indices)
        logger.info('Read %d rows', steps)
        self.total_read += steps
        column = input_df[self.column]
        column = column.loc[fix_loc(indices)]
        bins = self._edges if self._edges is not None else self.params.bins
        histo = None
        if len(column) > 0:
            histo, self._edges = np.histogram(column,
                                              bins=bins,
                                              range=[curr_min, curr_max],
                                              normed=False,
                                              density=False)
        if self._histo is None:
            self._histo = histo
        elif histo is not None:
            self._histo += histo
        values = {
            'array': [self._histo],
            'min': [curr_min],
            'max': [curr_max],
            'time': [run_number]
        }
        with self.lock:
            self._table['array'].set_shape((self.params.bins, ))
            self._table.append(values)
        return self._return_run_step(self.next_state(dfslot), steps_run=steps)