def get_bounds(self, min_slot, max_slot): min_slot.next_created() with min_slot.lock: min_df = min_slot.data() if len(min_df)==0 and self._bounds is None: return None min = last_row(min_df) xmin = min[self.x_column] ymin = min[self.y_column] max_slot.next_created() with max_slot.lock: max_df = max_slot.data() if len(max_df)==0 and self._bounds is None: return None max = last_row(max_df) xmax = max[self.x_column] ymax = max[self.y_column] if xmax < xmin: xmax, xmin = xmin, xmax logger.warn('xmax < xmin, swapped') if ymax < ymin: ymax, ymin = ymin, ymax logger.warn('ymax < ymin, swapped') return (xmin, xmax, ymin, ymax)
def test_idxmin(self): s=Scheduler() random = RandomTable(10, rows=10000,throttle=1000, scheduler=s) idxmin=IdxMin(scheduler=s) idxmin.input.df = random.output.df min=Min(scheduler=s) min.input.df = random.output.df pr=Print(scheduler=s) pr.input.df = idxmin.output.min s.start() min1=last_row(min.df(),remove_update=True) #print min1 min2=last_row(idxmin.min(),remove_update=True) #print min2 self.assertTrue((min1==min2).all())
def test_idxmax(self): s=Scheduler() random = RandomTable(10, rows=10000,throttle=1000, scheduler=s) idxmax=IdxMax(scheduler=s) idxmax.input.df = random.output.df max=Max(scheduler=s) max.input.df = random.output.df pr=Print(scheduler=s) pr.input.df = idxmax.output.max s.start() max1=last_row(max.df(),remove_update=True) #print max1 max2=last_row(idxmax.max(),remove_update=True) #print max2 self.assertTrue((max1==max2).all())
def run_step(self,run_number,step_size,howlong): dfslot = self.get_input_slot('df') dfslot.update(run_number) if dfslot.has_updated() or dfslot.has_deleted(): dfslot.reset() self._df = None dfslot.update(run_number) indices = dfslot.next_created(step_size) # returns a slice steps = indices_len(indices) if steps==0: return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() op = self.filter_columns(input_df, fix_loc(indices)).idxmin() if not op.index.equals(self._columns): # some columns are not numerical self._columns = op.index op[self.UPDATE_COLUMN] = run_number if self._min is None: min = pd.Series([np.nan], index=op.index) # the UPDATE_COLUMN is included min[self.UPDATE_COLUMN] = run_number for col in op.index: if col==self.UPDATE_COLUMN: continue min[col] = input_df.loc[op[col], col] # lookup value, is there a better way? self._min = pd.DataFrame([min], columns=op.index) self._df = pd.DataFrame([op], columns=op.index) else: prev_min = last_row(self._min) prev_idx = last_row(self._df) min = pd.Series(prev_min) min[self.UPDATE_COLUMN] = run_number for col in op.index: if col==self.UPDATE_COLUMN: continue val = input_df.loc[op[col], col] if np.isnan(val): pass elif np.isnan(min[col]) or val < min[col]: op[col] = prev_idx[col] min[col] = val op[self.UPDATE_COLUMN] = run_number with self.lock: self._df = self._df.append(op, ignore_index=True) self._min = self._min.append(min, ignore_index=True) if len(self._df) > self.params.history: self._df = self._df.loc[self._df.index[-self.params.history:]] self._min = self._min.loc[self._min.index[-self.params.history:]] return self._return_run_step(dfslot.next_state(), steps_run=steps)
def run_step(self,run_number,step_size,howlong): dfslot = self.get_input_slot('df') dfslot.update(run_number) if dfslot.has_updated() or dfslot.has_deleted(): dfslot.reset() self._df = None dfslot.update(run_number) indices = dfslot.next_created(step_size) # returns a slice steps = indices_len(indices) if steps==0: return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() op = self.filter_columns(input_df, fix_loc(indices)).max() if not op.index.equals(self._columns): # some columns are not numerical self._columns = op.index op[self.UPDATE_COLUMN] = run_number if self._df is None: self._df = pd.DataFrame([op],index=[run_number]) else: op = pd.concat([last_row(self._df), op], axis=1).max(axis=1) # Also computed the max over the UPDATE_COLUMNS so reset it op[self.UPDATE_COLUMN] = run_number self._df.loc[run_number] = op if len(self._df) > self.params.history: self._df = self._df.loc[self._df.index[-self.params.history:]] return self._return_run_step(dfslot.next_state(), steps_run=steps)
def get_bounds(self, min_slot, max_slot): min_slot.next_created() with min_slot.lock: min_df = min_slot.data() if len(min_df) == 0 and self._bounds is None: return None min = last_row(min_df)[self.column] max_slot.next_created() with max_slot.lock: max_df = max_slot.data() if len(max_df) == 0 and self._bounds is None: return None max = last_row(max_df)[self.column] return (min, max)
def get_scale(self): scale_slot = self.get_input_slot('scale') if scale_slot is None: return 1 scale_df = scale_slot.data() if scale_df is None or len(scale_df)==0: return 1 return last_row(scale_df).iloc[0]
def run_step(self,run_number,step_size,howlong): query_slot = self.get_input_slot('query') df_slot = self.get_input_slot('df') if not query_slot: query = None else: query_df = query_slot.data() query_slot.update(run_number) if query_slot.has_created(): # ignore deleted and updated df_slot.reset() # re-filter self._buffer.reset(); indices = query_slot.next_created() # read it all with query_slot.lock: query = last_row(query_df)[self._query_column] # get the query expression if query is not None: if len(query)==0: query=None else: query = unicode(query) # make sure we have a string df_slot.update(run_number) if df_slot.has_deleted() or df_slot.has_updated(): df_slot.reset() self._buffer.reset() df_slot.update(run_number) indices = df_slot.next_created(step_size) steps = indices_len(indices) if steps==0: return self._return_run_step(self.state_blocked, steps_run=steps) if query is None: # nothing to query, just pass through logger.info('No query, passing data through') self._df = df_slot.data() return self._return_run_step(self.state_blocked, steps_run=steps) with df_slot.lock: new_df = df_slot.data().loc[fix_loc(indices)] try: selected_df = new_df.eval(query) #print 'Select evaluated %d/%d rows'%(len(selected_df),steps) if isinstance(selected_df, pd.Series): if selected_df.index.has_duplicates: import pdb pdb.set_trace() selected_df = new_df.loc[selected_df] except Exception as e: logger.error('Probably a syntax error in query expression: %s', e) self._df = df_slot.data() return self._return_run_step(self.state_blocked, steps_run=steps) selected_df.loc[:,self.UPDATE_COLUMN] = run_number self._buffer.append(selected_df) #, ignore_index=False) TODO later self._df = self._buffer.df() return self._return_run_step(self.state_blocked, steps_run=steps)
def test_max(self): s=Scheduler() random = RandomTable(10, rows=10000, scheduler=s) max=Max(scheduler=s) max.input.df = random.output.df pr=Print(scheduler=s) pr.input.df = max.output.df s.start() res1 = random.df()[random.columns.difference([random.UPDATE_COLUMN])].max() res2 = last_row(max.df(), remove_update=True) self.assertTrue(np.allclose(res1, res2))
def _ranges_to_json(self, json): #join the min and max input slots, and the min and max output slots by name #example: #ranges = [{"name": "xRange", "in_min": 0, "in_max": 1, "out_min": 0, "out_max": 1}, # {"name": "yRange", "in_min": 0, "in_max": 1, "out_min": 0, "out_max": 1}] in_min = self.get_input_slot('min').data() in_max = self.get_input_slot('max').data() out_min = self.get_data('min') out_max = self.get_data('max') if all(x is not None for x in [in_min, in_max, out_min, out_max]): in_min_final = last_row(in_min, remove_update=True) in_max_final = last_row(in_max, remove_update=True) out_min_final = last_row(out_min, remove_update=True) out_max_final = last_row(out_max, remove_update=True) ranges = pd.DataFrame({'in_min': in_min_final, 'in_max': in_max_final, 'out_min': out_min_final, 'out_max': out_max_final}) ranges.index.name = "name" json['ranges'] = ranges.reset_index().to_dict(orient='records') return json
def run_step(self,run_number,step_size,howlong): slot = self.get_input_slot('df') df = slot.data() if df is not None: with slot.lock: last = last_row(slot.data(), as_series=False) last[self.UPDATE_COLUMN] = run_number if self._reset_index: last.index = [0] with self.lock: self._df = last return self._return_run_step(self.state_blocked, steps_run=1)
def test_var(self): s=Scheduler() random = RandomTable(1, rows=1000, scheduler=s) var=Var(scheduler=s) var.input.df = random.output.df pr=Print(scheduler=s) pr.input.df = var.output.df s.start() res1 = random.df()[1].var() res2 = last_row(var.df(), remove_update=True) #print 'res1:', res1 #print 'res2:', res2 self.assertTrue(np.allclose(res1, res2))
def heatmap_to_json(self, json, short): dfslot = self.get_input_slot('array') histo = dfslot.output_module json['columns'] = [histo.x_column, histo.y_column] with dfslot.lock: histo_df = dfslot.data() if histo_df is not None and histo_df.index[-1] is not None: row = last_row(histo_df) if not (np.isnan(row.xmin) or np.isnan(row.xmax) or np.isnan(row.ymin) or np.isnan(row.ymax)): json['bounds'] = { 'xmin': row.xmin, 'ymin': row.ymin, 'xmax': row.xmax, 'ymax': row.ymax } with self.lock: df = self.df() if df is not None and self._last_update is not None: row = last_row(df) json['image'] = "/progressivis/module/image/%s?run_number=%d"%(self.id,row[self.UPDATE_COLUMN]) return json
def from_input(self, input): if not isinstance(input,dict): raise ProgressiveError('Expecting a dictionary') if self._df is None and self.get_input_slot('like') is None: error = 'Variable %s with no initial value and no input slot'%self.id logger.error(error) return error last = last_row(self._df) if last is None: last = {v: None for v in self._df.columns} else: last = last.to_dict() error = '' for (k, v) in input.iteritems(): if k in last: last[k] = v else: error += 'Invalid key %s ignored. '%k run_number = self.scheduler().for_input(self) last[self.UPDATE_COLUMN] = run_number self._df.loc[run_number] = last return error
def run_step(self, run_number, step_size, howlong): # Assuming min and max come from applying Min and Max to a DataFrame with e.g. # columns 'a' and 'b', we now have min containing the 2 columns and max too. # min_value and max_value are generated from an interaction probably, so # they contain either no value (e.g. index only contains 'a', 'b', or empty), # or NaN for 'a' and/or 'b'. min_slot = self.get_input_slot('min') with min_slot.lock: min_slot.update(run_number) min = last_row(min_slot.data(), remove_update=True) max_slot = self.get_input_slot('max') with max_slot.lock: max_slot.update(run_number) max = last_row(max_slot.data(), remove_update=True) minv_slot = self.get_input_slot('min_value') with minv_slot.lock: minv_slot.update(run_number) minv = last_row(minv_slot.data(), remove_update=True) if minv is None: minv = min maxv_slot = self.get_input_slot('max_value') with maxv_slot.lock: maxv_slot.update(run_number) maxv = last_row(maxv_slot.data(), remove_update=True) if maxv is None: maxv = max # Need to align the series to create queries aligned = pd.DataFrame({'min': min, 'max': max, 'min_value': minv, 'max_value': maxv}) min_query = aligned['min_value'] > aligned['min'] max_query = aligned['max_value'] < aligned['max'] range_query = min_query & max_query min_query = min_query & (~ range_query) max_query = max_query & (~ range_query) query = '' for row in aligned.index[min_query]: if query: query += ' and ' query += '({} < {})'.format(minv[row], row) for row in aligned.index[max_query]: if query: query += ' and ' query += '({} < {})'.format(row, maxv[row]) for row in aligned.index[range_query]: if query: query += ' and ' query += '({} < {} < {})'.format(minv[row], row, maxv[row]) # compute the new min/max columns op = aligned.loc[:, ['min', 'min_value']].max(axis=1) op[self.UPDATE_COLUMN] = run_number op.name = 'min' self._min = pd.DataFrame([op], index=[run_number]) op = aligned.loc[:, ['max', 'max_value']].min(axis=1) op[self.UPDATE_COLUMN] = run_number op.name = 'max' self._max = pd.DataFrame([op], index=[run_number]) with self.lock: if len(self._df) != 0: last = self._df.at[self._df.index[-1], 'query'] if last == query: # do not repeat the query to allow optimizing downstream return self._return_run_step(self.state_blocked, steps_run=1) logger.info('New query: "%s"', query) self._df.loc[run_number] = pd.Series({'query': query, self.UPDATE_COLUMN: run_number}) return self._return_run_step(self.state_blocked, steps_run=1)