def _parse_parameters(self, kwds): # pylint: disable=no-member self._params = create_dataframe(self.all_parameters + [self.UPDATE_COLUMN_DESC]) self.params = DataFrameAsDict(self._params) for (name,_,_) in self.all_parameters: if name in kwds: self.params[name] = kwds[name]
def __init__(self, column, percentiles=None, **kwds): if not column: raise ProgressiveError('Need a column name') self._add_slots(kwds,'input_descriptors', [SlotDescriptor('df', type=pd.DataFrame)]) super(Percentiles, self).__init__(dataframe_slot='percentiles', **kwds) self._column = column self.default_step_size = 1000 self.tdigest = TDigest() if percentiles is None: percentiles = np.array([0.25, 0.5, 0.75]) else: # get them all to be in [0, 1] percentiles = np.asarray(percentiles) if (percentiles > 1).any(): percentiles = percentiles / 100.0 msg = ("percentiles should all be in the interval [0, 1]. " "Try {0} instead.") raise ValueError(msg.format(list(percentiles))) if (percentiles != 0.5).all(): # median isn't included lh = percentiles[percentiles < .5] uh = percentiles[percentiles > .5] percentiles = np.hstack([lh, 0.5, uh]) self._percentiles = percentiles self.schema = [(_pretty_name(x), np.dtype(float), np.nan) for x in self._percentiles] self.schema.append(DataFrameModule.UPDATE_COLUMN_DESC) self._df = create_dataframe(self.schema)
def __init__(self, colormap=None, **kwds): self._add_slots(kwds,'input_descriptors', [SlotDescriptor('array', type=pd.DataFrame)]) super(Heatmap, self).__init__(dataframe_slot='heatmap', **kwds) self.colormap = colormap self.default_step_size = 1 self._df = create_dataframe(Heatmap.schema)
def __init__(self, x_column, y_column, **kwds): self._x = x_column self._y = y_column self._add_slots(kwds,'input_descriptors', [SlotDescriptor('inp', type=pd.DataFrame)]) super(LinearRegression, self).__init__(dataframe_slot='inp', **kwds) self.default_step_size = 10000 self._df = create_dataframe(LinearRegression.schema)
def __init__(self, column, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('df', type=pd.DataFrame, required=True), SlotDescriptor('min', type=pd.DataFrame, required=True), SlotDescriptor('max', type=pd.DataFrame, required=True)]) super(Histogram1D, self).__init__(dataframe_slot='df', **kwds) self.column = column self.total_read = 0 self._histo = None self._edges = None self._bounds = None self._df = create_dataframe(Histogram1D.schema)
def __init__(self, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('min', type=pd.DataFrame, required=True), SlotDescriptor('max', type=pd.DataFrame, required=True), SlotDescriptor('min_value', type=pd.DataFrame, required=True), SlotDescriptor('max_value', type=pd.DataFrame, required=True)]) self._add_slots(kwds, 'output_descriptors', [SlotDescriptor('min', type=pd.DataFrame, required=False), SlotDescriptor('max', type=pd.DataFrame, required=False)]) super(RangeQuery, self).__init__(dataframe_slot='query', **kwds) self.default_step_size = 1 self._df = create_dataframe(RangeQuery.schema, empty=True) self._min = None self._max = None
def __init__(self, x_column, y_column, **kwds): self._add_slots(kwds,'input_descriptors', [SlotDescriptor('df', type=pd.DataFrame, required=True), SlotDescriptor('min', type=pd.DataFrame, required=True), SlotDescriptor('max', type=pd.DataFrame, required=True)]) super(Histogram2D, self).__init__(dataframe_slot='df', **kwds) self.x_column = x_column self.y_column = y_column self.default_step_size = 10000 self.total_read = 0 self._histo = None self._xedges = None self._yedges = None self._bounds = None self._df = create_dataframe(Histogram2D.schema)
def __init__(self, **kwds): super(Input, self).__init__(**kwds) self._df = create_dataframe(Input.schema,empty=True) self._last = len(self._df) self.default_step_size = 1000000