Example #1
0
 def get_bounds(self, min_slot, max_slot):
     min_slot.next_created()
     with min_slot.lock:
         min_df = min_slot.data()
         if len(min_df)==0 and self._bounds is None:
             return None
         min = last_row(min_df)
         xmin = min[self.x_column]
         ymin = min[self.y_column]
     
     max_slot.next_created()
     with max_slot.lock:
         max_df = max_slot.data()
         if len(max_df)==0 and self._bounds is None:
             return None
         max = last_row(max_df)
         xmax = max[self.x_column]
         ymax = max[self.y_column]
     
     if xmax < xmin:
         xmax, xmin = xmin, xmax
         logger.warn('xmax < xmin, swapped')
     if ymax < ymin:
         ymax, ymin = ymin, ymax
         logger.warn('ymax < ymin, swapped')
     return (xmin, xmax, ymin, ymax)
Example #2
0
 def test_idxmin(self):
     s=Scheduler()
     random = RandomTable(10, rows=10000,throttle=1000, scheduler=s)
     idxmin=IdxMin(scheduler=s)
     idxmin.input.df = random.output.df
     min=Min(scheduler=s)
     min.input.df = random.output.df
     pr=Print(scheduler=s)
     pr.input.df = idxmin.output.min
     s.start()
     min1=last_row(min.df(),remove_update=True)
     #print min1
     min2=last_row(idxmin.min(),remove_update=True)
     #print min2
     self.assertTrue((min1==min2).all())
Example #3
0
 def test_idxmax(self):
     s=Scheduler()
     random = RandomTable(10, rows=10000,throttle=1000, scheduler=s)
     idxmax=IdxMax(scheduler=s)
     idxmax.input.df = random.output.df
     max=Max(scheduler=s)
     max.input.df = random.output.df
     pr=Print(scheduler=s)
     pr.input.df = idxmax.output.max
     s.start()
     max1=last_row(max.df(),remove_update=True)
     #print max1
     max2=last_row(idxmax.max(),remove_update=True)
     #print max2
     self.assertTrue((max1==max2).all())
Example #4
0
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('df')
        dfslot.update(run_number)
        if dfslot.has_updated() or dfslot.has_deleted():        
            dfslot.reset()
            self._df = None
            dfslot.update(run_number)
        indices = dfslot.next_created(step_size) # returns a slice
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.filter_columns(input_df, fix_loc(indices)).idxmin()
        if not op.index.equals(self._columns):
            # some columns are not numerical
            self._columns = op.index

        op[self.UPDATE_COLUMN] = run_number
        if self._min is None:
            min = pd.Series([np.nan], index=op.index) # the UPDATE_COLUMN is included
            min[self.UPDATE_COLUMN] = run_number
            for col in op.index:
                if col==self.UPDATE_COLUMN: continue
                min[col] = input_df.loc[op[col], col] # lookup value, is there a better way?
            self._min = pd.DataFrame([min], columns=op.index)
            self._df = pd.DataFrame([op], columns=op.index)
        else:
            prev_min = last_row(self._min)
            prev_idx = last_row(self._df)
            min = pd.Series(prev_min)
            min[self.UPDATE_COLUMN] = run_number
            for col in op.index:
                if col==self.UPDATE_COLUMN: continue
                val = input_df.loc[op[col], col]
                if np.isnan(val):
                    pass
                elif np.isnan(min[col]) or val < min[col]:
                    op[col] = prev_idx[col]
                    min[col] = val
            op[self.UPDATE_COLUMN] = run_number
            with self.lock:
                self._df = self._df.append(op, ignore_index=True)
                self._min = self._min.append(min, ignore_index=True)
                if len(self._df) > self.params.history:
                    self._df = self._df.loc[self._df.index[-self.params.history:]]
                    self._min = self._min.loc[self._min.index[-self.params.history:]]

        return self._return_run_step(dfslot.next_state(), steps_run=steps)
Example #5
0
    def run_step(self,run_number,step_size,howlong):
        dfslot = self.get_input_slot('df')
        dfslot.update(run_number)
        if dfslot.has_updated() or dfslot.has_deleted():        
            dfslot.reset()
            self._df = None
            dfslot.update(run_number)
        indices = dfslot.next_created(step_size) # returns a slice
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=0)
        input_df = dfslot.data()
        op = self.filter_columns(input_df, fix_loc(indices)).max()
        if not op.index.equals(self._columns):
            # some columns are not numerical
            self._columns = op.index

        op[self.UPDATE_COLUMN] = run_number
        if self._df is None:
            self._df = pd.DataFrame([op],index=[run_number])
        else:
            op = pd.concat([last_row(self._df), op], axis=1).max(axis=1)
            # Also computed the max over the UPDATE_COLUMNS so reset it
            op[self.UPDATE_COLUMN] = run_number
            self._df.loc[run_number] = op

        if len(self._df) > self.params.history:
            self._df = self._df.loc[self._df.index[-self.params.history:]]
        return self._return_run_step(dfslot.next_state(), steps_run=steps)
Example #6
0
   def get_bounds(self, min_slot, max_slot):
       min_slot.next_created()
       with min_slot.lock:
           min_df = min_slot.data()
           if len(min_df) == 0 and self._bounds is None:
               return None
           min = last_row(min_df)[self.column]
 
       max_slot.next_created() 
       with max_slot.lock:
           max_df = max_slot.data()
           if len(max_df) == 0 and self._bounds is None:
               return None
           max = last_row(max_df)[self.column]
 
       return (min, max)
Example #7
0
 def get_scale(self):
     scale_slot = self.get_input_slot('scale')
     if scale_slot is None:
         return 1
     scale_df = scale_slot.data()
     if scale_df is None or len(scale_df)==0:
         return 1
     return last_row(scale_df).iloc[0]
Example #8
0
    def run_step(self,run_number,step_size,howlong):
        query_slot = self.get_input_slot('query')
        df_slot = self.get_input_slot('df')
        if not query_slot:
            query = None
        else:
            query_df = query_slot.data()
            query_slot.update(run_number)
            if  query_slot.has_created(): # ignore deleted and updated
                df_slot.reset() # re-filter
                self._buffer.reset();
            indices = query_slot.next_created() # read it all
            with query_slot.lock:
                query = last_row(query_df)[self._query_column] # get the query expression
            if query is not None:
                if len(query)==0:
                    query=None
                else:
                    query = unicode(query) # make sure we have a string

        df_slot.update(run_number)
        if df_slot.has_deleted() or df_slot.has_updated():
            df_slot.reset()
            self._buffer.reset()
            df_slot.update(run_number)
        
        indices = df_slot.next_created(step_size)
        steps = indices_len(indices)
        if steps==0:
            return self._return_run_step(self.state_blocked, steps_run=steps)

        if query is None: # nothing to query, just pass through
            logger.info('No query, passing data through')
            self._df = df_slot.data()
            return self._return_run_step(self.state_blocked, steps_run=steps)
        
        with df_slot.lock:
            new_df = df_slot.data().loc[fix_loc(indices)]
            try:
                selected_df = new_df.eval(query)
                #print 'Select evaluated %d/%d rows'%(len(selected_df),steps)
                if isinstance(selected_df, pd.Series):
                    if selected_df.index.has_duplicates:
                        import pdb
                        pdb.set_trace()
                    selected_df = new_df.loc[selected_df]
            except Exception as e:
                logger.error('Probably a syntax error in query expression: %s', e)
                self._df = df_slot.data()
                return self._return_run_step(self.state_blocked, steps_run=steps)
            selected_df.loc[:,self.UPDATE_COLUMN] = run_number
            self._buffer.append(selected_df) #, ignore_index=False) TODO later
            self._df = self._buffer.df()
        return self._return_run_step(self.state_blocked, steps_run=steps)
Example #9
0
 def test_max(self):
     s=Scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     max=Max(scheduler=s)
     max.input.df = random.output.df
     pr=Print(scheduler=s)
     pr.input.df = max.output.df
     s.start()
     res1 = random.df()[random.columns.difference([random.UPDATE_COLUMN])].max()
     res2 = last_row(max.df(), remove_update=True)
     self.assertTrue(np.allclose(res1, res2))
Example #10
0
 def _ranges_to_json(self, json):
     #join the min and max input slots, and the min and max output slots by name
     #example:
     #ranges = [{"name": "xRange", "in_min": 0, "in_max": 1, "out_min": 0, "out_max": 1},
     #    {"name": "yRange", "in_min": 0, "in_max": 1, "out_min": 0, "out_max": 1}]
     in_min = self.get_input_slot('min').data()
     in_max = self.get_input_slot('max').data()
     out_min = self.get_data('min')
     out_max = self.get_data('max')
     if all(x is not None for x in [in_min, in_max, out_min, out_max]):
         in_min_final = last_row(in_min, remove_update=True)
         in_max_final = last_row(in_max, remove_update=True)
         out_min_final = last_row(out_min, remove_update=True)
         out_max_final = last_row(out_max, remove_update=True)
         ranges = pd.DataFrame({'in_min': in_min_final,
                                'in_max': in_max_final,
                                'out_min': out_min_final,
                                'out_max': out_max_final})
         ranges.index.name = "name"
         json['ranges'] = ranges.reset_index().to_dict(orient='records')
     return json
Example #11
0
    def run_step(self,run_number,step_size,howlong):
        slot = self.get_input_slot('df')
        df = slot.data()

        if df is not None:
            with slot.lock:
                last = last_row(slot.data(), as_series=False)
            last[self.UPDATE_COLUMN] = run_number
            if self._reset_index:
                last.index = [0]
            with self.lock:
                self._df = last
        return self._return_run_step(self.state_blocked, steps_run=1)
Example #12
0
 def test_var(self):
     s=Scheduler()
     random = RandomTable(1, rows=1000, scheduler=s)
     var=Var(scheduler=s)
     var.input.df = random.output.df
     pr=Print(scheduler=s)
     pr.input.df = var.output.df
     s.start()
     res1 = random.df()[1].var()
     res2 = last_row(var.df(), remove_update=True)
     #print 'res1:', res1
     #print 'res2:', res2
     self.assertTrue(np.allclose(res1, res2))
Example #13
0
 def heatmap_to_json(self, json, short):
     dfslot = self.get_input_slot('array')
     histo = dfslot.output_module
     json['columns'] = [histo.x_column, histo.y_column]
     with dfslot.lock:
         histo_df = dfslot.data()
         if histo_df is not None and histo_df.index[-1] is not None:
             row = last_row(histo_df)
             if not (np.isnan(row.xmin) or np.isnan(row.xmax)
                     or np.isnan(row.ymin) or np.isnan(row.ymax)):
                 json['bounds'] = {
                     'xmin': row.xmin,
                     'ymin': row.ymin,
                     'xmax': row.xmax,
                     'ymax': row.ymax
                 }
     with self.lock:
         df = self.df()
         if df is not None and self._last_update is not None:
             row = last_row(df)
             json['image'] = "/progressivis/module/image/%s?run_number=%d"%(self.id,row[self.UPDATE_COLUMN])
     return json
Example #14
0
 def from_input(self, input):
     if not isinstance(input,dict):
         raise ProgressiveError('Expecting a dictionary')
     if self._df is None and self.get_input_slot('like') is None:
         error = 'Variable %s with no initial value and no input slot'%self.id
         logger.error(error)
         return error
     last = last_row(self._df)
     if last is None:
         last = {v: None for v in self._df.columns}
     else:
         last = last.to_dict()
     error = ''
     for (k, v) in input.iteritems():
         if k in last:
             last[k] = v
         else:
             error += 'Invalid key %s ignored. '%k
     run_number = self.scheduler().for_input(self)
     last[self.UPDATE_COLUMN] = run_number
     self._df.loc[run_number] = last
     return error
Example #15
0
    def run_step(self, run_number, step_size, howlong):
        # Assuming min and max come from applying Min and Max to a DataFrame with e.g.
        # columns 'a' and 'b', we now have min containing the 2 columns and max too.
        # min_value and max_value are generated from an interaction probably, so
        # they contain either no value (e.g. index only contains 'a', 'b', or empty),
        # or NaN for 'a' and/or 'b'.
        min_slot = self.get_input_slot('min')
        with min_slot.lock:
            min_slot.update(run_number)
            min = last_row(min_slot.data(), remove_update=True)
        max_slot = self.get_input_slot('max')
        with max_slot.lock:
            max_slot.update(run_number)
            max = last_row(max_slot.data(), remove_update=True)
        minv_slot = self.get_input_slot('min_value')
        with minv_slot.lock:
            minv_slot.update(run_number)
            minv = last_row(minv_slot.data(), remove_update=True)
        if minv is None:
            minv = min
        maxv_slot = self.get_input_slot('max_value')
        with maxv_slot.lock:
            maxv_slot.update(run_number)
            maxv = last_row(maxv_slot.data(), remove_update=True)
        if maxv is None:
            maxv = max

        # Need to align the series to create queries
        aligned = pd.DataFrame({'min': min, 'max': max, 'min_value': minv, 'max_value': maxv})
        min_query = aligned['min_value'] > aligned['min']
        max_query = aligned['max_value'] < aligned['max']
        range_query = min_query & max_query
        min_query = min_query & (~ range_query)
        max_query = max_query & (~ range_query)
        query = ''
        for row in aligned.index[min_query]:
            if query: query += ' and '
            query += '({} < {})'.format(minv[row], row)
        for row in aligned.index[max_query]:
            if query: query += ' and '
            query += '({} < {})'.format(row, maxv[row])
        for row in aligned.index[range_query]:
            if query: query += ' and '
            query += '({} < {} < {})'.format(minv[row], row, maxv[row])

        # compute the new min/max columns
        op = aligned.loc[:, ['min', 'min_value']].max(axis=1)
        op[self.UPDATE_COLUMN] = run_number
        op.name = 'min'
        self._min = pd.DataFrame([op], index=[run_number])

        op = aligned.loc[:, ['max', 'max_value']].min(axis=1)
        op[self.UPDATE_COLUMN] = run_number
        op.name = 'max'
        self._max = pd.DataFrame([op], index=[run_number])

        with self.lock:
            if len(self._df) != 0:
                last = self._df.at[self._df.index[-1], 'query']
                if last == query: # do not repeat the query to allow optimizing downstream
                    return self._return_run_step(self.state_blocked, steps_run=1)
                logger.info('New query: "%s"', query)
            self._df.loc[run_number] = pd.Series({'query': query, self.UPDATE_COLUMN: run_number})
        return self._return_run_step(self.state_blocked, steps_run=1)