Example #1
0
    def discretize(self, n_bins=3, inplace=False):
        """Retruns a discretized pandas.Series

        Args:
            n_bins (int): Number of bins or steps to discretize the function
            inplace (bool): if True, perform operation in-place
        """
        try:
            from scipy.optimize import minimize
            from itertools import chain
        except ImportError:
            raise ImportError("The sklearn package must be installed to "
                              "use this optional feature.")
        if self.archetypes:
            # if multiindex, group and apply operation on each group.
            # combine at the end
            results = {}
            edges = {}
            ampls = {}
            for name, sub in self.groupby(level=0):
                hour_of_min = sub.time_at_min[1]

                sf = [1 / (i * 1.01) for i in range(1, n_bins + 1)]
                sf.extend([sub.min()])
                sf_bounds = [(0, sub.max()) for i in range(0, n_bins + 1)]
                hours = [
                    hour_of_min - hour_of_min * 1 / (i * 1.01)
                    for i in range(1, n_bins + 1)
                ]
                # Todo hours need to work fow datetime index
                hours.extend([len(sub)])
                hours_bounds = [(0, len(sub)) for i in range(0, n_bins + 1)]

                start_time = time.time()
                log("discretizing EnergySeries {}".format(name), lg.DEBUG)
                res = minimize(
                    rmse,
                    np.array(hours + sf),
                    args=(sub.values),
                    method="L-BFGS-B",
                    bounds=hours_bounds + sf_bounds,
                    options=dict(disp=True),
                )
                log(
                    "Completed discretization in {:,.2f} seconds".format(
                        time.time() - start_time),
                    lg.DEBUG,
                )
                edges[name] = res.x[0:n_bins + 1]
                ampls[name] = res.x[n_bins + 1:]
                results[name] = Series(piecewise(res.x))
            self.bin_edges_ = Series(edges).apply(Series)
            self.bin_scaling_factors_ = DataFrame(ampls)

            result = concat(results)
        else:
            hour_of_min = self.time_at_min

            sf = [1 / (i * 1.01) for i in range(1, n_bins + 1)]
            sf.extend([self.min()])
            sf_bounds = [(0, self.max()) for i in range(0, n_bins + 1)]
            hours = [
                hour_of_min - hour_of_min * 1 / (i * 1.01)
                for i in range(1, n_bins + 1)
            ]
            hours.extend([len(self)])
            hours_bounds = [(0, len(self)) for i in range(0, n_bins + 1)]

            start_time = time.time()
            # log('discretizing EnergySeries {}'.format(name), lg.DEBUG)
            res = minimize(
                rmse,
                np.array(hours + sf),
                args=(self.values),
                method="L-BFGS-B",
                bounds=hours_bounds + sf_bounds,
                options=dict(disp=True),
            )
            log(
                "Completed discretization in {:,.2f} seconds".format(
                    time.time() - start_time),
                lg.DEBUG,
            )
            edges = res.x[0:n_bins + 1]
            ampls = res.x[n_bins + 1:]
            result = Series(piecewise(res.x))
            bin_edges = Series(edges).apply(Series)
            self.bin_edges_ = bin_edges
            bin_edges.loc[-1, 0] = 0
            bin_edges.sort_index(inplace=True)
            bin_edges = bin_edges.diff().dropna()
            bin_edges = bin_edges.round()
            self.bin_scaling_factors_ = DataFrame({
                "duration": bin_edges[0],
                "scaling factor": ampls
            })
            self.bin_scaling_factors_.index = np.round(edges).astype(int)

        if inplace:
            self.update(result)
            self.__class__ = EnergySeries
            self.__finalize__(result)
        else:
            result.__class__ = EnergySeries
            return result.__finalize__(self)