Beispiel #1
0
 def _parse_parameters(self, kwds):
     # pylint: disable=no-member
     self._params = create_dataframe(self.all_parameters + [self.UPDATE_COLUMN_DESC])
     self.params = DataFrameAsDict(self._params)
     for (name,_,_) in self.all_parameters:
         if name in kwds:
             self.params[name] = kwds[name]
Beispiel #2
0
    def __init__(self, column, percentiles=None, **kwds):
        if not column:
            raise ProgressiveError('Need a column name')
        self._add_slots(kwds,'input_descriptors',
                        [SlotDescriptor('df', type=pd.DataFrame)])
        super(Percentiles, self).__init__(dataframe_slot='percentiles', **kwds)
        self._column = column
        self.default_step_size = 1000
        self.tdigest = TDigest()

        if percentiles is None:
            percentiles = np.array([0.25, 0.5, 0.75])
        else:
            # get them all to be in [0, 1]
            percentiles = np.asarray(percentiles)
            if (percentiles > 1).any():
                percentiles = percentiles / 100.0
                msg = ("percentiles should all be in the interval [0, 1]. "
                       "Try {0} instead.")
                raise ValueError(msg.format(list(percentiles)))
            if (percentiles != 0.5).all():  # median isn't included
                lh = percentiles[percentiles < .5]
                uh = percentiles[percentiles > .5]
                percentiles = np.hstack([lh, 0.5, uh])

        self._percentiles = percentiles
        
        self.schema = [(_pretty_name(x), np.dtype(float), np.nan) for x in self._percentiles]
        self.schema.append(DataFrameModule.UPDATE_COLUMN_DESC)
        self._df = create_dataframe(self.schema)
Beispiel #3
0
    def __init__(self, colormap=None, **kwds):
        self._add_slots(kwds,'input_descriptors',
                        [SlotDescriptor('array', type=pd.DataFrame)])
        super(Heatmap, self).__init__(dataframe_slot='heatmap', **kwds)
        self.colormap = colormap
        self.default_step_size = 1

        self._df = create_dataframe(Heatmap.schema)
    def __init__(self, x_column, y_column, **kwds):
        self._x = x_column
        self._y = y_column
        self._add_slots(kwds,'input_descriptors',
                        [SlotDescriptor('inp', type=pd.DataFrame)])
        super(LinearRegression, self).__init__(dataframe_slot='inp', **kwds)
        self.default_step_size = 10000

        self._df = create_dataframe(LinearRegression.schema)
Beispiel #5
0
 def __init__(self, column, **kwds):
     self._add_slots(kwds, 'input_descriptors',
                     [SlotDescriptor('df', type=pd.DataFrame, required=True),
                      SlotDescriptor('min', type=pd.DataFrame, required=True),
                      SlotDescriptor('max', type=pd.DataFrame, required=True)])
     super(Histogram1D, self).__init__(dataframe_slot='df', **kwds)
     self.column = column
     self.total_read = 0
     self._histo = None
     self._edges = None
     self._bounds = None
     self._df = create_dataframe(Histogram1D.schema)
Beispiel #6
0
 def __init__(self, **kwds):
     self._add_slots(kwds, 'input_descriptors',
                     [SlotDescriptor('min', type=pd.DataFrame, required=True),
                      SlotDescriptor('max', type=pd.DataFrame, required=True),
                      SlotDescriptor('min_value', type=pd.DataFrame, required=True),
                      SlotDescriptor('max_value', type=pd.DataFrame, required=True)])
     self._add_slots(kwds, 'output_descriptors',
                     [SlotDescriptor('min', type=pd.DataFrame, required=False),
                      SlotDescriptor('max', type=pd.DataFrame, required=False)])
     super(RangeQuery, self).__init__(dataframe_slot='query', **kwds)
     self.default_step_size = 1
     self._df = create_dataframe(RangeQuery.schema, empty=True)
     self._min = None
     self._max = None
Beispiel #7
0
 def __init__(self, x_column, y_column, **kwds):
     self._add_slots(kwds,'input_descriptors',
                     [SlotDescriptor('df', type=pd.DataFrame, required=True),
                      SlotDescriptor('min', type=pd.DataFrame, required=True),
                      SlotDescriptor('max', type=pd.DataFrame, required=True)])
     super(Histogram2D, self).__init__(dataframe_slot='df', **kwds)
     self.x_column = x_column
     self.y_column = y_column
     self.default_step_size = 10000
     self.total_read = 0
     self._histo = None
     self._xedges = None
     self._yedges = None
     self._bounds = None
     self._df = create_dataframe(Histogram2D.schema)
Beispiel #8
0
 def __init__(self, **kwds):
     super(Input, self).__init__(**kwds)
     self._df = create_dataframe(Input.schema,empty=True)
     self._last = len(self._df)
     self.default_step_size = 1000000