def discretize(self, n_bins=3, inplace=False): """Retruns a discretized pandas.Series Args: n_bins (int): Number of bins or steps to discretize the function inplace (bool): if True, perform operation in-place """ try: from scipy.optimize import minimize from itertools import chain except ImportError: raise ImportError("The sklearn package must be installed to " "use this optional feature.") if self.archetypes: # if multiindex, group and apply operation on each group. # combine at the end results = {} edges = {} ampls = {} for name, sub in self.groupby(level=0): hour_of_min = sub.time_at_min[1] sf = [1 / (i * 1.01) for i in range(1, n_bins + 1)] sf.extend([sub.min()]) sf_bounds = [(0, sub.max()) for i in range(0, n_bins + 1)] hours = [ hour_of_min - hour_of_min * 1 / (i * 1.01) for i in range(1, n_bins + 1) ] # Todo hours need to work fow datetime index hours.extend([len(sub)]) hours_bounds = [(0, len(sub)) for i in range(0, n_bins + 1)] start_time = time.time() log("discretizing EnergySeries {}".format(name), lg.DEBUG) res = minimize( rmse, np.array(hours + sf), args=(sub.values), method="L-BFGS-B", bounds=hours_bounds + sf_bounds, options=dict(disp=True), ) log( "Completed discretization in {:,.2f} seconds".format( time.time() - start_time), lg.DEBUG, ) edges[name] = res.x[0:n_bins + 1] ampls[name] = res.x[n_bins + 1:] results[name] = Series(piecewise(res.x)) self.bin_edges_ = Series(edges).apply(Series) self.bin_scaling_factors_ = DataFrame(ampls) result = concat(results) else: hour_of_min = self.time_at_min sf = [1 / (i * 1.01) for i in range(1, n_bins + 1)] sf.extend([self.min()]) sf_bounds = [(0, self.max()) for i in range(0, n_bins + 1)] hours = [ hour_of_min - hour_of_min * 1 / (i * 1.01) for i in range(1, n_bins + 1) ] hours.extend([len(self)]) hours_bounds = [(0, len(self)) for i in range(0, n_bins + 1)] start_time = time.time() # log('discretizing EnergySeries {}'.format(name), lg.DEBUG) res = minimize( rmse, np.array(hours + sf), args=(self.values), method="L-BFGS-B", bounds=hours_bounds + sf_bounds, options=dict(disp=True), ) log( "Completed discretization in {:,.2f} seconds".format( time.time() - start_time), lg.DEBUG, ) edges = res.x[0:n_bins + 1] ampls = res.x[n_bins + 1:] result = Series(piecewise(res.x)) bin_edges = Series(edges).apply(Series) self.bin_edges_ = bin_edges bin_edges.loc[-1, 0] = 0 bin_edges.sort_index(inplace=True) bin_edges = bin_edges.diff().dropna() bin_edges = bin_edges.round() self.bin_scaling_factors_ = DataFrame({ "duration": bin_edges[0], "scaling factor": ampls }) self.bin_scaling_factors_.index = np.round(edges).astype(int) if inplace: self.update(result) self.__class__ = EnergySeries self.__finalize__(result) else: result.__class__ = EnergySeries return result.__finalize__(self)