def soiling_seperation_algorithm(observed, iterations=5, weights=None, index_set=None, tau=0.85): if weights is None: weights = np.ones_like(observed) if index_set is None: index_set = ~np.isnan(observed) zero_set = np.zeros(len(observed) - 1, dtype=np.bool) eps = .01 n = len(observed) s1 = cvx.Variable(n) s2 = cvx.Variable(n) s3 = cvx.Variable(n) w = cvx.Parameter(n - 2, nonneg=True) w.value = np.ones(len(observed) - 2) for i in range(iterations): # cvx.norm(cvx.multiply(s3, weights), p=2) \ cost = .1 * cvx.sum(tau * cvx.pos(s3) +(1 - tau) * cvx.neg(s3)) \ + 10 * cvx.norm(cvx.diff(s2, k=2), p=2) \ + .2 * cvx.norm(cvx.multiply(w, cvx.diff(s1, k=2)), p=1) objective = cvx.Minimize(cost) constraints = [ observed[index_set] == s1[index_set] + s2[index_set] + s3[index_set], s2[365:] - s2[:-365] == 0, cvx.sum(s2[:365]) == 0 # s1 <= 1 ] if np.sum(zero_set) > 0: constraints.append(cvx.diff(s1, k=1)[zero_set] == 0) problem = cvx.Problem(objective, constraints) problem.solve(solver='MOSEK') w.value = 1 / (eps + 1e2* np.abs(cvx.diff(s1, k=2).value)) # Reweight the L1 penalty zero_set = np.abs(cvx.diff(s1, k=1).value) <= 5e-5 # Make nearly flat regions exactly flat (sparse 1st diff) return s1.value, s2.value, s3.value
def plot_pdf(self, figsize=(8, 6)): data = self.clip_stat_1 x_rs = self.cdf_x y_hat = self.y_hat point_mass_values = self.point_mass_locations fig = plt.figure(figsize=figsize) plt.hist(data[data > 0], bins=100, alpha=0.5, label="histogram") scale = (np.histogram(data[data > 0], bins=100)[0].max() / cvx.diff(y_hat, k=1).value.max()) plt.plot( x_rs[:-1], scale * cvx.diff(y_hat, k=1).value, color="orange", linewidth=1, label="piecewise constant PDF estimate", ) for count, val in enumerate(point_mass_values): if count == 0: plt.axvline( val, linewidth=1, linestyle=":", color="green", label="detected point mass", ) else: plt.axvline(val, linewidth=1, linestyle=":", color="green") return fig
def total_variation_plus_seasonal_filter(signal, c1=10, c2=500): ''' This performs total variation filtering with the addition of a seasonal baseline fit. This introduces a new signal to the model that is smooth and periodic on a yearly time frame. This does a better job of describing real, multi-year solar PV power data sets, and therefore does an improved job of estimating the discretely changing signal. :param signal: A 1d numpy array (must support boolean indexing) containing the signal of interest :param c1: The regularization parameter to control the total variation in the final output signal :param c2: The regularization parameter to control the smoothness of the seasonal signal :return: A 1d numpy array containing the filtered signal ''' s_hat = cvx.Variable(len(signal)) s_seas = cvx.Variable(len(signal)) s_error = cvx.Variable(len(signal)) c1 = cvx.Constant(value=c1) c2 = cvx.Constant(value=c2) index_set = ~np.isnan(signal) w = len(signal) / np.sum(index_set) objective = cvx.Minimize((365 * 3 / len(signal)) * w * cvx.sum(cvx.huber(s_error)) + c1 * cvx.norm1(cvx.diff(s_hat, k=1)) + c2 * cvx.norm(cvx.diff(s_seas, k=2)) + c2 * .1 * cvx.norm(cvx.diff(s_seas, k=1))) constraints = [ signal[index_set] == s_hat[index_set] + s_seas[index_set] + s_error[index_set], s_seas[365:] - s_seas[:-365] == 0, cvx.sum(s_seas[:365]) == 0 ] problem = cvx.Problem(objective=objective, constraints=constraints) problem.solve() return s_hat.value, s_seas.value
def test_diff(self) -> None: """Test the diff atom. """ A = cp.Variable((20, 10)) B = np.zeros((20, 10)) self.assertEqual(cp.diff(A, axis=0).shape, np.diff(B, axis=0).shape) self.assertEqual(cp.diff(A, axis=1).shape, np.diff(B, axis=1).shape)
def local_quantile_regression_with_seasonal(signal, use_ixs=None, tau=0.75, c1=1e3, solver='ECOS', residual_weights=None, tv_weights=None): ''' https://colab.research.google.com/github/cvxgrp/cvx_short_course/blob/master/applications/quantile_regression.ipynb :param signal: 1d numpy array :param use_ixs: optional index set to apply cost function to :param tau: float, parameter for quantile regression :param c1: float :param solver: string :return: median fit with seasonal baseline removed ''' if use_ixs is None: use_ixs = np.arange(len(signal)) x = cvx.Variable(len(signal)) r = signal[use_ixs] - x[use_ixs] objective = cvx.Minimize( cvx.sum(0.5 * cvx.abs(r) + (tau - 0.5) * r) + c1 * cvx.norm(cvx.diff(x, k=2))) if len(signal) > 365: constraints = [x[365:] == x[:-365]] else: constraints = [] prob = cvx.Problem(objective, constraints=constraints) prob.solve(solver=solver) return x.value
def local_median_regression_with_seasonal(signal, use_ixs=None, c1=1e3, solver='ECOS'): ''' for a list of available solvers, see: https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options :param signal: 1d numpy array :param use_ixs: optional index set to apply cost function to :param c1: float :param solver: string :return: median fit with seasonal baseline removed ''' if use_ixs is None: use_ixs = np.arange(len(signal)) x = cvx.Variable(len(signal)) objective = cvx.Minimize( cvx.norm1(signal[use_ixs] - x[use_ixs]) + c1 * cvx.norm(cvx.diff(x, k=2))) if len(signal) > 365: constraints = [x[365:] == x[:-365]] else: constraints = [] prob = cvx.Problem(objective, constraints=constraints) prob.solve(solver=solver) return x.value
def obtain_component_r0(self, initial_r_cs_value, index_set=None): """ Obtains the initial r0 values that are used in place of variables denominator of degradation equation. Removed duplicated code from the original implementation. Arguments ----------------- initial_r_cs_value : numpy array Initial low dimension right matrix. Returns ------- numpy array The values that is used in order to make the constraint of degradation to be linear. """ component_r0 = initial_r_cs_value[0] if index_set is None: index_set = component_r0 > 1e-3 * np.percentile(component_r0, 95) x = cvx.Variable(initial_r_cs_value.shape[1]) objective = cvx.Minimize( cvx.sum(0.5 * cvx.abs(component_r0[index_set] - x[index_set]) + (.9 - 0.5) * (component_r0[index_set] - x[index_set])) + 1e3 * cvx.norm(cvx.diff(x, k=2))) problem = cvx.Problem(objective) problem.solve(solver=self._solver_type) result_component_r0 = x.value return result_component_r0
def tl1_l2d2p365( signal, use_ixs=None, tau=0.75, c1=1e3, solver=None, yearly_periodic=True, verbose=False, residual_weights=None, tv_weights=None, ): """ https://colab.research.google.com/github/cvxgrp/cvx_short_course/blob/master/applications/quantile_regression.ipynb :param signal: 1d numpy array :param use_ixs: optional index set to apply cost function to :param tau: float, parameter for quantile regression :param c1: float :param solver: string :return: median fit with seasonal baseline removed """ if use_ixs is None: use_ixs = ~np.isnan(signal) x = cvx.Variable(len(signal)) r = signal[use_ixs] - x[use_ixs] objective = cvx.Minimize( cvx.sum(0.5 * cvx.abs(r) + (tau - 0.5) * r) + c1 * cvx.norm(cvx.diff(x, k=2))) if len(signal) > 365 and yearly_periodic: constraints = [x[365:] == x[:-365]] else: constraints = [] prob = cvx.Problem(objective, constraints=constraints) prob.solve(solver=solver, verbose=verbose) return x.value
def l1_l2d2p365(signal, use_ixs=None, c1=1e3, yearly_periodic=True, solver=None, verbose=False): """ for a list of available solvers, see: https://www.cvxpy.org/tutorial/advanced/index.html#solve-method-options :param signal: 1d numpy array :param use_ixs: optional index set to apply cost function to :param c1: float :param solver: string :return: median fit with seasonal baseline removed """ if use_ixs is None: use_ixs = np.arange(len(signal)) x = cvx.Variable(len(signal)) objective = cvx.Minimize( cvx.norm1(signal[use_ixs] - x[use_ixs]) + c1 * cvx.norm(cvx.diff(x, k=2))) if len(signal) > 365 and yearly_periodic: constraints = [x[365:] == x[:-365]] else: constraints = [] prob = cvx.Problem(objective, constraints=constraints) # Currently seems to work with SCS or MOSEK prob.solve(solver=solver, verbose=verbose) return x.value
def quad_solve(y, lambd): x = cp.Variable(len(y)) loss = 0.5 * cp.sum_entries(cp.square(y - x)) + lambd * cp.norm1( cp.diff(x)) cp.Problem(cp.Minimize(loss), [x[0] == y[0]]).solve() return x.value.A[:, 0]
def find_clear_days(data, th=0.1, boolean_out=True): ''' This function quickly finds clear days in a PV power data set. The input to this function is a 2D array containing standardized time series power data. This will typically be the output from `solardatatools.data_transforms.make_2d`. The filter relies on two estimates of daily "clearness": the smoothness of each daily signal as measured by the l2-norm of the 2nd order difference, and seasonally-adjusted daily energy. Seasonal adjustment of the daily energy if obtained by solving a local quantile regression problem, which is a convex optimization problem and is solvable with cvxpy. The parameter `th` controls the relative weighting of the daily smoothness and daily energy in the final filter in a geometric mean. A value of 0 will rely entirely on the daily energy and a value of 1 will rely entirely on daily smoothness. :param D: A 2D numpy array containing a solar power time series signal. :param th: A parameter that tunes the filter between relying of daily smoothness and daily energy :return: A 1D boolean array, with `True` values corresponding to clear days in the data set ''' # Take the norm of the second different of each day's signal. This gives a rough estimate of the smoothness of # day in the data set tc = np.linalg.norm(data[:-2] - 2 * data[1:-1] + data[2:], ord=1, axis=0) # Shift this metric so the median is at zero tc = np.percentile(tc, 50) - tc # Normalize such that the maximum value is equal to one tc /= np.max(tc) # Take the positive part function, i.e. set the negative values to zero. This is the first metric tc = np.clip(tc, 0, None) # Calculate the daily energy de = np.sum(data, axis=0) # Solve a convex minimization problem to roughly fit the local 90th percentile of the data (quantile regression) x = cvx.Variable(len(tc)) obj = cvx.Minimize( cvx.sum(0.5 * cvx.abs(de - x) + (.9 - 0.5) * (de - x)) + 1e3 * cvx.norm(cvx.diff(x, k=2))) prob = cvx.Problem(obj) try: prob.solve(solver='MOSEK') except Exception as e: print(e) print('Trying ECOS solver') prob.solve(solver='ECOS') # x gives us the local top 90th percentile of daily energy, i.e. the very sunny days. This gives us our # seasonal normalization. de = np.clip(np.divide(de, x.value), 0, 1) # Take geometric mean weights = np.multiply(np.power(tc, th), np.power(de, 1. - th)) # Set values less than 0.6 to be equal to zero weights[weights < 0.6] = 0. # Apply filter for sparsity to catch data errors related to non-zero nighttime data try: msk = filter_for_sparsity(data, solver='MOSEK') except Exception as e: print(e) print('Trying ECOS solver') msk = filter_for_sparsity(data, solver='ECOS') weights = weights * msk.astype(int) if boolean_out: return weights >= 1e-3 else: return weights
def make_l2_ll1d1(y, weight=1e1): y_hat = cvx.Variable(len(y)) y_param = cvx.Parameter(len(y), value=y) mu = cvx.Parameter(nonneg=True) mu.value = weight error = cvx.sum_squares(y_param - y_hat) reg = cvx.norm(cvx.diff(y_hat, k=2), p=1) objective = cvx.Minimize(error + mu * reg) constraints = [y_param[0] == y_hat[0], y[-1] == y_hat[-1]] problem = cvx.Problem(objective, constraints) return problem, y_param, y_hat, mu
def lasso_solve(input_file, lambdas, pnorm=2): toy = np.loadtxt(open(input_file, "rb"), delimiter=",") m, n = toy.shape nval = len(lambdas) fig = plt.figure(figsize=(8, 8)) figr = math.ceil((1 + nval * 2) / 5) figc = min(5, 1 + nval * 2) plt.subplots_adjust(hspace=0.3) plot = fig.add_subplot(figr, figc, 1) plot.set_title("Original") plt.imshow(toy) theta = cp.Variable(toy.shape) lambd = cp.Parameter(nonneg=True) hdiff = cp.diff(theta, axis=0) vdiff = cp.diff(theta, axis=1) hdiff_flat = cp.reshape(hdiff[0:n - 1, 0:m - 1], ((n - 1) * (m - 1), )) vdiff_flat = cp.reshape(vdiff[0:n - 1, 0:m - 1], ((n - 1) * (m - 1), )) stack = cp.vstack([hdiff_flat, vdiff_flat]) objective = cp.sum_squares(toy - theta) / 2 + lambd * cp.sum( cp.norm(stack, p=pnorm, axis=0)) problem = cp.Problem(cp.Minimize(objective)) for idx, val in enumerate(lambdas): lambd.value = val problem.solve(warm_start=True) print("Lambda: {}, objective value: {:.2f}".format(val, problem.value)) plot = fig.add_subplot(figr, figc, 2 + 2 * idx) plot.set_title("Lambda {}".format(val)) plt.imshow(theta.value) # histogram plot = fig.add_subplot(figr, figc, 3 + 2 * idx) plt.hist(theta.value, bins=100, histtype='step') plt.show()
def constraints(self): P = self.terminals[0].power_var if self.energy is None: self.energy = cvx.Variable(self.terminals[0].power_var.shape) e_init = cvx.reshape(self.energy_init, ()) constr = [ cvx.diff(self.energy.T) == P[1:, :] * self.len_interval, self.energy[0, :] - e_init - P[0, :] * self.len_interval == 0, self.terminals[0].power_var >= -self.discharge_max, self.terminals[0].power_var <= self.charge_max, self.energy <= self.energy_max, self.energy >= 0, ] if self.energy_final is not None: constr += [self.energy[-1] >= self.energy_final] return constr
def baseline_rope(y, lam=1): """ Baseline drift correction based on [1] Inputs: y: row signal to be cleaned (array, numpy array) lam: reg. parameter (int) Problem to Solve min |y-b| + lam*(diff_b)^2, s.t. b<=y [1] Xie, Z., Schwartz, O., & Prasad, A. (2018). Decoding of finger trajectory from ECoG using deep learning. Journal of neural engineering, 15(3), 036009. """ b = cp.Variable(y.shape) objective = cp.Minimize( cp.norm(y - b, 2) + lam * cp.sum_squares(cp.diff(b, 1))) constraints = [b <= y] problem = cp.Problem(objective, constraints) problem.solve(solver="SCS") z = b.value #--> baseline return z
def plot_diffs(self, figsize=(8, 6)): x_rs = self.cdf_x metric = self.metric threshold = self.threshold y_hat = self.y_hat point_masses = self.point_masses point_mass_values = self.point_mass_locations fig, ax = plt.subplots(nrows=2, sharex=True, figsize=figsize) y1 = cvx.diff(y_hat, k=1).value y2 = metric ax[0].plot(x_rs[:-1], y1) ax[1].plot(x_rs[1:-1], y2) ax[1].axhline(threshold, linewidth=1, color="r", ls=":", label="decision boundary") if len(point_mass_values) > 0: ax[0].scatter( x_rs[point_masses], y1[point_masses[1:]], color="red", marker="o", label="detected point mass", ) ax[1].scatter( x_rs[point_masses], y2[point_masses[1:-1]], color="red", marker="o", label="detected point mass", ) ax[0].set_title("1st order difference of CDF fit") ax[1].set_title("2nd order difference of CDF fit") ax[1].legend() plt.tight_layout() return fig
def total_variation_filter(signal, C=5): ''' This function performs total variation filtering or denoising on a 1D signal. This filter is implemented as a convex optimization problem which is solved with cvxpy. (https://en.wikipedia.org/wiki/Total_variation_denoising) :param signal: A 1d numpy array (must support boolean indexing) containing the signal of interest :param C: The regularization parameter to control the total variation in the final output signal :return: A 1d numpy array containing the filtered signal ''' s_hat = cvx.Variable(len(signal)) mu = cvx.Constant(value=C) index_set = ~np.isnan(signal) objective = cvx.Minimize( cvx.sum(cvx.huber(signal[index_set] - s_hat[index_set])) + mu * cvx.norm1(cvx.diff(s_hat, k=1))) problem = cvx.Problem(objective=objective) try: problem.solve(solver='MOSEK') except Exception as e: print(e) print('Trying ECOS solver') problem.solve(solver='ECOS') return s_hat.value
def updateSourceObj(self, sourcename): import cvxpy as cvp #cvxpy is a bad bad no good library and we only want to use it when we're using a model using this solver. if sourcename.lower() == 'all': for name in self.models.keys(): self.updateSourceObj(name) else: model = self.models[sourcename] ## Define objective function to fit model to regressors ## **CHANGE MT: I moved the alpha variable to be inside the norms so that ## it can be time varying. I'm adding a check above to ensure that alpha is ## a scalar or a vector of length N. if model['costFunction'].lower() == 'sse': residuals = (model['source'] - model['regressor'] * model['theta']) modelObj = cvp.sum_squares( cvp.mul_elemwise( model['alpha'] ** .5 , residuals ) ) elif model['costFunction'].lower() == 'l1': residuals = (model['source'] - model['regressor'] * model['theta']) modelObj = cvp.norm( cvp.mul_elemwise( model['alpha'] , residuals ) ,1) elif model['costFunction'].lower()=='l2': residuals = (model['source'] - model['regressor'] * model['theta']) modelObj = cvp.norm( cvp.mul_elemwise( model['alpha'] , residuals ) ,2) else: raise ValueError('{} wrong option, use "sse","l2" or "l1"'.format(costFunction)) ## Define cost function to regularize theta **************** # ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** * # Check that beta is scalar or of length of number of parameters. model['beta'] = np.array(model['beta']) if model['beta'].size not in [1, model['order']]: raise ValueError('Beta must be scalar or vector with one element for each regressor') if model['regularizeTheta'] is not None: if callable(model['regularizeTheta']): ## User can input their own function to regularize theta. # Must input a cvxpy variable vector and output a scalar # or a vector with one element for each parameter. ## TODO: TRY CATCH TO ENSURE regularizeTheta WORKS AND RETURNS SCALAR try: regThetaObj = model['regularizeTheta'](model['theta']) * model['beta'] except: raise ValueError('Check custom regularizer for model {}'.format(model['name'])) if regThetaObj.size[0]* regThetaObj.size[1] != 1: raise ValueError('Check custom regularizer for model {}, make sure it returns a scalar'.format(model['name'])) elif model['regularizeTheta'].lower() == 'l2': ## Sum square errors. regThetaObj = cvp.norm(model['theta'] * model['beta']) elif model['regularizeTheta'].lower() == 'l1': regThetaObj = cvp.norm(model['theta'] * model['beta'], 1) else: regThetaObj = 0 ## Define cost function to regularize source signal **************** # ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** * # Check that gamma is scalar model['gamma'] = np.array(model['gamma']) if model['gamma'].size != 1: raise NameError('Gamma must be scalar') ## Calculate regularization. if model['regularizeSource'] is not None: if callable(model['regularizeSource']): ## User can input their own function to regularize the source signal. # Must input a cvxpy variable vector and output a scalar. regSourceObj = model['regularizeSource'](model['source']) * model['gamma'] elif model['regularizeSource'].lower() == 'diff1_ss': regSourceObj = cvp.sum_squares(cvp.diff(model['source'])) * model['gamma'] else: raise Exception('regularizeSource must be a callable method, \`diff1_ss\`, or None') else: regSourceObj = 0 ## Sum total model objective model['obj'] = modelObj + regThetaObj + regSourceObj ## Append model to models list self.models[sourcename] = model
[[[20, 8, 5, 2], [8, 16, 2, 4], [5, 2, 5, 2], [2, 4, 2, 4]]], Constant([7.7424020218157814])), (cp.geo_mean, tuple(), [[4, 1]], Constant([2])), (cp.geo_mean, tuple(), [[0.01, 7]], Constant([0.2645751311064591])), (cp.geo_mean, tuple(), [[63, 7]], Constant([21])), (cp.geo_mean, tuple(), [[1, 10]], Constant([math.sqrt(10)])), (lambda x: cp.geo_mean(x, [1, 1]), tuple(), [[1, 10]], Constant([math.sqrt(10)])), (lambda x: cp.geo_mean(x, [.4, .8, 4.9]), tuple(), [[.5, 1.8, 17]], Constant([10.04921378316062])), (cp.harmonic_mean, tuple(), [[1, 2, 3]], Constant([1.6363636363636365])), (cp.harmonic_mean, tuple(), [[2.5, 2.5, 2.5, 2.5]], Constant([2.5])), (cp.harmonic_mean, tuple(), [[0, 1, 2]], Constant([0])), (lambda x: cp.diff(x, 0), (3,), [[1, 2, 3]], Constant([1, 2, 3])), (cp.diff, (2,), [[1, 2, 3]], Constant([1, 1])), (cp.diff, tuple(), [[1.1, 2.3]], Constant([1.2])), (lambda x: cp.diff(x, 2), tuple(), [[1, 2, 3]], Constant([0])), (cp.diff, (3,), [[2.1, 1, 4.5, -.1]], Constant([-1.1, 3.5, -4.6])), (lambda x: cp.diff(x, 2), (2,), [[2.1, 1, 4.5, -.1]], Constant([4.6, -8.1])), (lambda x: cp.diff(x, 1, axis=0), (1, 2), [[[-5, -3], [2, 1]]], Constant([[7], [4]])), (lambda x: cp.diff(x, 1, axis=1), (2, 1), [[[-5, -3], [2, 1]]], Constant([[2, -1]])), (lambda x: cp.pnorm(x, .5), tuple(), [[1.1, 2, .1]], Constant([7.724231543909264])), (lambda x: cp.pnorm(x, -.4), tuple(), [[1.1, 2, .1]], Constant([0.02713620334])), (lambda x: cp.pnorm(x, -1), tuple(), [[1.1, 2, .1]], Constant([0.0876494023904])), (lambda x: cp.pnorm(x, -2.3), tuple(), [[1.1, 2, .1]], Constant([0.099781528576])),
(cp.entr, (2, 2), [[[1, math.e], [math.e**2, 1.0 / math.e]]], Constant([[0, -math.e], [-2 * math.e**2, 1.0 / math.e]])), (cp.log_det, tuple(), [[[20, 8, 5, 2], [8, 16, 2, 4], [5, 2, 5, 2], [2, 4, 2, 4]]], Constant([7.7424020218157814])), (cp.geo_mean, tuple(), [[4, 1]], Constant([2])), (cp.geo_mean, tuple(), [[0.01, 7]], Constant([0.2645751311064591])), (cp.geo_mean, tuple(), [[63, 7]], Constant([21])), (cp.geo_mean, tuple(), [[1, 10]], Constant([math.sqrt(10)])), (lambda x: cp.geo_mean(x, [1, 1]), tuple(), [[1, 10]], Constant([math.sqrt(10)])), (lambda x: cp.geo_mean(x, [.4, .8, 4.9]), tuple(), [[.5, 1.8, 17]], Constant([10.04921378316062])), (cp.harmonic_mean, tuple(), [[1, 2, 3]], Constant([1.6363636363636365])), (cp.harmonic_mean, tuple(), [[2.5, 2.5, 2.5, 2.5]], Constant([2.5])), (cp.harmonic_mean, tuple(), [[0, 1, 2]], Constant([0])), (lambda x: cp.diff(x, 0), (3, ), [[1, 2, 3]], Constant([1, 2, 3])), (cp.diff, (2, ), [[1, 2, 3]], Constant([1, 1])), (cp.diff, tuple(), [[1.1, 2.3]], Constant([1.2])), (lambda x: cp.diff(x, 2), tuple(), [[1, 2, 3]], Constant([0])), (cp.diff, (3, ), [[2.1, 1, 4.5, -.1]], Constant([-1.1, 3.5, -4.6])), (lambda x: cp.diff(x, 2), (2, ), [[2.1, 1, 4.5, -.1]], Constant([4.6, -8.1])), (lambda x: cp.diff(x, 1, axis=0), (1, 2), [np.array([[-5, -3], [2, 1]])], Constant([[7], [4]])), (lambda x: cp.diff(x, 1, axis=1), (2, 1), [np.array([[-5, -3], [2, 1]])], Constant([[2, -1]])), (lambda x: cp.pnorm(x, .5), tuple(), [[1.1, 2, .1]], Constant([7.724231543909264])), (lambda x: cp.pnorm(x, -.4), tuple(), [[1.1, 2, .1]], Constant([0.02713620334])), (lambda x: cp.pnorm(x, -1), tuple(), [[1.1, 2,
l = cvx.Variable(T) v = cvx.Variable(T) Val = cvx.Variable(T) Bal = cvx.Variable(T) a = cvx.Variable(1) b = cvx.Variable(1) turb = cvx.Variable(T) const = [ l[1:] == l[:-1] + dl[:-1] + Bal[:-1] + Val[:-1], v[1:] == v[:-1] + 1 / klin / 4 * 60 * (l[:-1] - v[:-1]) + turb[:-1], h[1:] == h[:-1] + v[:-1] ] const.append(Bal == a * bal2) const.append(Val == a * val2) obj = cvx.Minimize(0.001 * cvx.sum_squares(turb) + 100 * cvx.sum_squares(cvx.diff(dl))) prob = cvx.Problem(obj, const) r = prob.solve( ) #solver = 'ECOS',verbose=True,reltol=1e-10,abstol=1e-10,max_iters=200) print(r) temps = df.atmo_temp_dp[::intv] l = np.asarray(l.value.T)[0, :] dl = np.asarray(dl.value.T)[0, :] #plt.plot(temps, np.sign(l)*)dl) #m, b = np.polyfit(temps, np.sign(l)*(np.asarray(l.value.T).T), 1) #plt.plot(np.unique(temps), m*np.unique(temps)+b) plt.xlabel('gradient (dK/dPa)') plt.ylabel('dl/dt') plt.show()
def l1filter(t, y, lam=1200, rho=80, periods=(365.25, 182.625), solver=cvx.MOSEK, verbose=False): """ Do l1 regularize for given time series. :param t: np.array, time :param y: np.array, time series value :param lam: lambda value :param rho: rho value :param periods: list, periods, same unit as t :param solver: cvx.solver :param verbose: bool, show verbose or not :return: x, w, s, if periods is not None, else return x, w """ t = np.asarray(t, dtype=np.float64) t = t - t[0] y = np.asarray(y, dtype=np.float64) assert y.shape == t.shape n = len(t) D = gen_d2(n) x = cvx.Variable(n) w = cvx.Variable(n) errs = y - x - w seasonal = None if periods: tpi_t = 2 * np.pi * t for period in periods: a = cvx.Variable() b = cvx.Variable() temp = a * np.sin(tpi_t / period) + b * np.cos(tpi_t / period) if seasonal is None: seasonal = temp else: seasonal += temp errs = errs - seasonal obj = cvx.Minimize(0.5 * cvx.sum_squares(errs) + lam * cvx.norm(D * x, 1) + rho * cvx.tv(w)) prob = cvx.Problem(obj) prob.solve(solver=solver, verbose=verbose) if periods: return np.array(x.value), np.array(w.value), np.array(seasonal.value) else: return np.array(x.value), np.array(w.value), None t = np.asarray(t, dtype=np.float64) y = np.asarray(y, dtype=np.float64) n = len(t) x = cvx.Variable(n) w = cvx.Variable(n) dx = cvx.mul_elemwise(1.0 / np.diff(t), cvx.diff(x)) x_term = cvx.tv(dx) dw = cvx.mul_elemwise(1.0 / np.diff(t), cvx.diff(w)) w_term = cvx.norm(dw, 1) errs = y - x - w seasonal = None if periods: tpi_t = 2 * np.pi * t for period in periods: a = cvx.Variable() b = cvx.Variable() temp = a * np.sin(tpi_t / period) + b * np.cos(tpi_t / period) if seasonal is None: seasonal = temp else: seasonal += temp errs = errs - seasonal obj = cvx.Minimize(0.5 * cvx.sum_squares(errs) + lam * x_term + rho * w_term) prob = cvx.Problem(obj) prob.solve(solver=solver, verbose=verbose) if periods: return np.array(x.value), np.array(w.value), np.array(seasonal.value) else: return np.array(x.value), np.array(w.value), None
# create problem data N = 100; # create an increasing input signal xtrue = np.zeros((N,1)) xtrue[1:40] = 0.1 xtrue[50] = 2 xtrue[70:80] = 0.15 xtrue[80] = 1 xtrue = np.cumsum(xtrue) # pass the increasing input through a moving-average filter # and add Gaussian noise h = np.array([1, -0.85 ,0.7 ,-0.3]) k = h.shape[0] yhat = np.convolve(h,xtrue) y = yhat[:-3] + np.random.randn(N) x = cp.Variable((100,),nonneg = True) z = y[:,None] - cp.conv(h,x)[:-3] objective = cp.Minimize(cp.sum_squares(z)) constraints = [cp.diff(x) >= 0] prob=cp.Problem(objective,constraints=constraints) prob.solve() #plot t = list(range(0,xtrue.size)) plt.plot(t,list(xtrue), color='red',label='x_true') plt.plot(t,list(x.value), color='blue',label='x_hat') plt.legend(loc="upper left") plt.savefig('prob_66.png') plt.show()
def addSource( self, regressor, name=None, costFunction='sse', alpha=1, # Cost function for fit to regressors, alpha is a scalar multiplier regularizeTheta=None, beta=1, # Cost function for parameter regularization, beta is a scalar multiplier regularizeSource=None, gamma=1, # Cost function for signal smoothing, gamma is a scalar multiplier lb=None, # Lower bound on source ub=None, # Upper bound on source idxScrReg=None, # indices used to break the source signal into smaller ones to apply regularization numWind=1, # number of time windows (relevant for time-varying regressors) ): ### This is a method to add a new source self.modelcounter += 1 # Increment model counter ## Write model name if it doesn't exist. if name is None: name = str(self.modelcounter) ## Instantiate a dictionary of model terms model = {} model['name'] = name model['alpha'] = alpha model['lb'] = lb model['ub'] = ub ## Check regressor shape regressor = np.array(regressor) if regressor.ndim == 0: ## If no regressors are included, set them an empty array regressor = np.zeros((self.N, 0)) if regressor.ndim == 1: regressor = np.expand_dims(regressor, 1) if regressor.ndim > 2: raise NameError('Regressors cannot have more than 2 dimensions') ## Check that regressors have the correct shape (Nobs, Nregressors) if regressor.shape[0] != self.N: if regressor.shape[1] == self.N: regressor = regressor.transpose() else: raise NameError( 'Lengths of regressors and aggregate signal must match') ## Define model regressors and order model['regressor'] = regressor model['order'] = regressor.shape[1] ## Define decision variables and cost function style model['source'] = cvp.Variable(self.N, 1) #model['source'] = cvp.Variable((self.N,1)) # required for cvxpy 1.0.1 model['theta'] = cvp.Variable(model['order'], 1) #model['theta'] = cvp.Variable((model['order'],1)) # required for cvxpy 1.0.1 model['costFunction'] = costFunction ## Define objective function to fit model to regressors if costFunction.lower() == 'sse': residuals = (model['source'] - model['regressor'] * model['theta']) modelObj = cvp.sum_squares(residuals) * model['alpha'] elif costFunction.lower() == 'l1': residuals = (model['source'] - model['regressor'] * model['theta']) #residuals = (model['source'] - auxVec - model['regressor'] * model['theta']) modelObj = cvp.norm(residuals, 1) * model['alpha'] elif costFunction.lower() == 'l2': residuals = (model['source'] - model['regressor'] * model['theta']) modelObj = cvp.norm(residuals, 2) * model['alpha'] else: raise ValueError( '{} wrong option, use "sse","l2" or "l1"'.format(costFunction)) ## Define cost function to regularize theta **************** # ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** * # Check that beta is scalar or of length of number of parameters. beta = np.array(beta) if beta.size not in [1, model['order']]: raise ValueError( 'Beta must be scalar or vector with one element for each regressor' ) if regularizeTheta is not None: if callable(regularizeTheta): ## User can input their own function to regularize theta. # Must input a cvxpy variable vector and output a scalar # or a vector with one element for each parameter. try: regThetaObj = regularizeTheta(model['theta']) * beta except: raise ValueError( 'Check custom regularizer for model {}'.format( model['name'])) if regThetaObj.size[0] * regThetaObj.size[1] != 1: raise ValueError( 'Check custom regularizer for model {}, make sure it returns a scalar' .format(model['name'])) elif regularizeTheta.lower() == 'l2': ## Sum square errors. regThetaObj = cvp.norm(model['theta'] * beta) elif regularizeTheta.lower() == 'l1': regThetaObj = cvp.norm(model['theta'] * beta, 1) elif regularizeTheta.lower() == 'diff_l2': if numWind == 1: regThetaObj = 0 else: if regressor.shape[ 1] == numWind: # this actually corresponds to the solar model (no intercept) thetaDiffVec = cvp.diff(model['theta']) else: thetaDiffVec = cvp.vstack( cvp.diff(model['theta'][0:numWind]), cvp.diff(model['theta'][numWind:2 * numWind])) regThetaObj = cvp.norm(thetaDiffVec, 2) * beta elif regularizeTheta.lower() == 'diff_l1': if numWind == 1: regThetaObj = 0 else: if regressor.shape[ 1] == numWind: # this actually corresponds to the solar model (no intercept) thetaDiffVec = cvp.diff(model['theta']) else: thetaDiffVec = cvp.vstack( cvp.diff(model['theta'][0:numWind]), cvp.diff(model['theta'][numWind:2 * numWind])) regThetaObj = cvp.norm(thetaDiffVec, 1) * beta else: regThetaObj = 0 ## Define cost function to regularize source signal **************** # ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** * # Check that gamma is scalar gamma = np.array(gamma) if gamma.size != 1: raise NameError('Gamma must be scalar') ## Calculate regularization. if regularizeSource is not None: if idxScrReg is not None: scrVec = model['source'][0:idxScrReg[0]] idxStart = idxScrReg[0] + 1 for idxEnd in idxScrReg[1:]: scrCur = cvp.diff(model['source'][idxStart:idxEnd]) scrVec = cvp.vstack(scrVec, scrCur) idxStart = idxEnd + 1 scrVec = cvp.vstack(scrVec, cvp.diff(model['source'][idxStart:])) else: scrVec = cvp.diff(model['source']) if callable(regularizeSource): ## User can input their own function to regularize the source signal. # Must input a cvxpy variable vector and output a scalar. regSourceObj = regularizeSource(scrVec) * gamma elif regularizeSource.lower() == 'diff1_ss': regSourceObj = cvp.sum_squares(scrVec) * gamma elif regularizeSource.lower() == 'diff_l1': regSourceObj = cvp.norm(scrVec, 1) * gamma elif regularizeSource.lower() == 'diff_l2': regSourceObj = cvp.norm(scrVec, 2) * gamma else: regSourceObj = 0 ## Sum total model objective model['obj'] = modelObj + regThetaObj + regSourceObj ## Append model to models list self.models[name] = model
def objective_constraints(self, variables, mask, reservations, mpc_ene=None): """ Builds the master constraint list for the subset of timeseries data being optimized. Args: variables (Dict): Dictionary of variables being optimized mask (DataFrame): A boolean array that is true for indices corresponding to time_series data included in the subs data set reservations (Dict): Dictionary of energy and power reservations required by the services being preformed with the current optimization subset mpc_ene (float): value of energy at end of last opt step (for mpc opt) Returns: A list of constraints that corresponds the battery's physical constraints and its service constraints """ constraint_list = [] size = int(np.sum(mask)) curr_e_cap = self.physical_constraints['ene_max_rated'].value ene_target = self.soc_target * curr_e_cap # optimization variables ene = variables['ene'] dis = variables['dis'] ch = variables['ch'] on_c = variables['on_c'] on_d = variables['on_d'] try: pv_gen = variables['pv_out'] except KeyError: pv_gen = np.zeros(size) try: ice_gen = variables['ice_gen'] except KeyError: ice_gen = np.zeros(size) # create cvx parameters of control constraints (this improves readability in cvx costs and better handling) ene_max = cvx.Parameter( size, value=self.control_constraints['ene_max'].value[mask].values, name='ene_max') ene_min = cvx.Parameter( size, value=self.control_constraints['ene_min'].value[mask].values, name='ene_min') ch_max = cvx.Parameter( size, value=self.control_constraints['ch_max'].value[mask].values, name='ch_max') ch_min = cvx.Parameter( size, value=self.control_constraints['ch_min'].value[mask].values, name='ch_min') dis_max = cvx.Parameter( size, value=self.control_constraints['dis_max'].value[mask].values, name='dis_max') dis_min = cvx.Parameter( size, value=self.control_constraints['dis_min'].value[mask].values, name='dis_min') # energy at the end of the last time step (makes sure that the end of the last time step is ENE_TARGET # TODO: rewrite this if MPC_ENE is not None constraint_list += [ cvx.Zero((ene_target - ene[-1]) - (self.dt * ch[-1] * self.rte) + (self.dt * dis[-1]) - reservations['E'][-1] + (self.dt * ene[-1] * self.sdr * 0.01)) ] # energy generally for every time step constraint_list += [ cvx.Zero(ene[1:] - ene[:-1] - (self.dt * ch[:-1] * self.rte) + (self.dt * dis[:-1]) - reservations['E'][:-1] + (self.dt * ene[:-1] * self.sdr * 0.01)) ] # energy at the beginning of the optimization window -- handles rolling window if mpc_ene is None: constraint_list += [cvx.Zero(ene[0] - ene_target)] else: constraint_list += [cvx.Zero(ene[0] - mpc_ene)] # Keep energy in bounds determined in the constraints configuration function -- making sure our storage meets control constraints constraint_list += [ cvx.NonPos(ene_target - ene_max[-1] + reservations['E_upper'][-1] - variables['ene_max_slack'][-1]) ] constraint_list += [ cvx.NonPos(ene[:-1] - ene_max[:-1] + reservations['E_upper'][:-1] - variables['ene_max_slack'][:-1]) ] constraint_list += [ cvx.NonPos(-ene_target + ene_min[-1] + reservations['E_lower'][-1] - variables['ene_min_slack'][-1]) ] constraint_list += [ cvx.NonPos(ene_min[1:] - ene[1:] + reservations['E_lower'][:-1] - variables['ene_min_slack'][:-1]) ] # Keep charge and discharge power levels within bounds constraint_list += [ cvx.NonPos(-ch_max + ch - dis + reservations['D_min'] + reservations['C_max'] - variables['ch_max_slack']) ] constraint_list += [ cvx.NonPos(-ch + dis + reservations['C_min'] + reservations['D_max'] - dis_max - variables['dis_max_slack']) ] constraint_list += [cvx.NonPos(ch - cvx.multiply(ch_max, on_c))] constraint_list += [cvx.NonPos(dis - cvx.multiply(dis_max, on_d))] # removing the band in between ch_min and dis_min that the battery will not operate in constraint_list += [ cvx.NonPos( cvx.multiply(ch_min, on_c) - ch + reservations['C_min']) ] constraint_list += [ cvx.NonPos( cvx.multiply(dis_min, on_d) - dis + reservations['D_min']) ] # the constraint below limits energy throughput and total discharge to less than or equal to # (number of cycles * energy capacity) per day, for technology warranty purposes # this constraint only applies when optimization window is equal to or greater than 24 hours if self.daily_cycle_limit and size >= 24: sub = mask.loc[mask] for day in sub.index.dayofyear.unique(): day_mask = (day == sub.index.dayofyear) constraint_list += [ cvx.NonPos( cvx.sum(dis[day_mask] * self.dt + cvx.pos(reservations['E'][day_mask])) - self.ene_max_rated * self.daily_cycle_limit) ] elif self.daily_cycle_limit and size < 24: e_logger.info( 'Daily cycle limit did not apply as optimization window is less than 24 hours.' ) # constraints to keep slack variables positive if self.incl_slack: constraint_list += [cvx.NonPos(-variables['ch_max_slack'])] constraint_list += [cvx.NonPos(-variables['ch_min_slack'])] constraint_list += [cvx.NonPos(-variables['dis_max_slack'])] constraint_list += [cvx.NonPos(-variables['dis_min_slack'])] constraint_list += [cvx.NonPos(-variables['ene_max_slack'])] constraint_list += [cvx.NonPos(-variables['ene_min_slack'])] if self.incl_binary: # when dis_min or ch_min has been overwritten (read: increased) by predispatch services, need to force technology to be on # TODO better way to do this??? ind_d = [ i for i in range(size) if self.control_constraints['dis_min'].value[mask].values[i] > self.physical_constraints['dis_min_rated'].value ] ind_c = [ i for i in range(size) if self.control_constraints['ch_min'].value[mask].values[i] > self.physical_constraints['ch_min_rated'].value ] if len(ind_d) > 0: constraint_list += [on_d[ind_d] == 1] # np.ones(len(ind_d)) if len(ind_c) > 0: constraint_list += [on_c[ind_c] == 1] # np.ones(len(ind_c)) # note: cannot operate startup without binary if self.incl_startup: # startup variables are positive constraint_list += [cvx.NonPos(-variables['start_d'])] constraint_list += [cvx.NonPos(-variables['start_c'])] # difference between binary variables determine if started up in previous interval constraint_list += [ cvx.NonPos(cvx.diff(on_d) - variables['start_d'][1:]) ] # first variable not constrained constraint_list += [ cvx.NonPos(cvx.diff(on_c) - variables['start_c'][1:]) ] # first variable not constrained return constraint_list
def get_params(data): r"""Correct a signal estimated as numerator/denominator for weekday effects. The ordinary estimate would be numerator_t/denominator_t for each time point t. Instead, model log(numerator_t/denominator_t) = alpha_{wd(t)} + phi_t where alpha is a vector of fixed effects for each weekday. For identifiability, we constrain \sum alpha_j = 0, and to enforce this we set Sunday's fixed effect to be the negative sum of the other weekdays. We estimate this as a penalized Poisson GLM problem with log link. We rewrite the problem as log(numerator_t) = alpha_{wd(t)} + phi_t + log(denominator_t) and set a design matrix X with one row per time point. The first six columns of X are weekday indicators; the remaining columns are the identity matrix, so that each time point gets a unique phi. Using this X, we write log(numerator_t) = X beta + log(denominator_t) Hence the first six entries of beta correspond to alpha, and the remaining entries to phi. The penalty is on the L1 norm of third differences of phi (so the third differences of the corresponding columns of beta), to enforce smoothness. Third differences ensure smoothness without removing peaks or valleys. Objective function is negative mean Poisson log likelihood plus penalty: ll = (numerator * (X*b + log(denominator)) - sum(exp(X*b) + log(denominator))) / num_days Return a matrix of parameters: the entire vector of betas, for each time series column in the data. """ denoms = data.groupby(Config.DATE_COL).sum()["Denominator"] nums = data.groupby(Config.DATE_COL).sum()[Config.CLI_COLS + Config.FLU1_COL] # Construct design matrix to have weekday indicator columns and then day # indicators. X = np.zeros((nums.shape[0], 6 + nums.shape[0])) not_sunday = np.where(nums.index.dayofweek != 6)[0] X[not_sunday, np.array(nums.index.dayofweek)[not_sunday]] = 1 X[np.where(nums.index.dayofweek == 6)[0], :6] = -1 X[:, 6:] = np.eye(X.shape[0]) npnums, npdenoms = np.array(nums), np.array(denoms) params = np.zeros((nums.shape[1], X.shape[1])) # Loop over the available numerator columns and smooth each separately. for i in range(nums.shape[1]): b = cp.Variable((X.shape[1])) lmbda = cp.Parameter(nonneg=True) lmbda.value = 10 # Hard-coded for now, seems robust to changes ll = (cp.matmul(npnums[:, i], cp.matmul(X, b) + np.log(npdenoms)) - cp.sum( cp.exp(cp.matmul(X, b) + np.log(npdenoms)))) / X.shape[0] penalty = (lmbda * cp.norm(cp.diff(b[6:], 3), 1) / (X.shape[0] - 2) ) # L-1 Norm of third differences, rewards smoothness try: prob = cp.Problem(cp.Minimize(-ll + lmbda * penalty)) _ = prob.solve() except: # If the magnitude of the objective function is too large, an error is # thrown; Rescale the objective function prob = cp.Problem(cp.Minimize((-ll + lmbda * penalty) / 1e5)) _ = prob.solve() params[i, :] = b.value return params
def constraints(self, mask, sizing_for_rel=False, find_min_soe=False): """Default build constraint list method. Used by services that do not have constraints. Args: mask (DataFrame): A boolean array that is true for indices corresponding to time_series data included in the subs data set Returns: A list of constraints that corresponds the battery's physical constraints and its service constraints """ constraint_list = [] size = int(np.sum(mask)) ene_target = self.soc_target * self.effective_soe_max # this is init_ene # optimization variables ene = self.variables_dict['ene'] dis = self.variables_dict['dis'] ch = self.variables_dict['ch'] uene = self.variables_dict['uene'] udis = self.variables_dict['udis'] uch = self.variables_dict['uch'] on_c = self.variables_dict['on_c'] on_d = self.variables_dict['on_d'] start_c = self.variables_dict['start_c'] start_d = self.variables_dict['start_d'] if sizing_for_rel: constraint_list += [ cvx.Zero(ene[0] - ene_target + (self.dt * dis[0]) - (self.rte * self.dt * ch[0]) - uene[0] + (ene[0] * self.sdr * 0.01)) ] constraint_list += [ cvx.Zero(ene[1:] - ene[:-1] + (self.dt * dis[1:]) - (self.rte * self.dt * ch[1:]) - uene[1:] + (ene[1:] * self.sdr * 0.01)) ] else: # energy at beginning of time step must be the target energy value constraint_list += [cvx.Zero(ene[0] - ene_target)] # energy evolution generally for every time step constraint_list += [ cvx.Zero(ene[1:] - ene[:-1] + (self.dt * dis[:-1]) - (self.rte * self.dt * ch[:-1]) - uene[:-1] + (ene[:-1] * self.sdr * 0.01)) ] # energy at the end of the last time step (makes sure that the end of the last time step is ENE_TARGET constraint_list += [ cvx.Zero(ene_target - ene[-1] + (self.dt * dis[-1]) - (self.rte * self.dt * ch[-1]) - uene[-1] + (ene[-1] * self.sdr * 0.01)) ] # constraints on the ch/dis power constraint_list += [cvx.NonPos(ch - (on_c * self.ch_max_rated))] constraint_list += [cvx.NonPos((on_c * self.ch_min_rated) - ch)] constraint_list += [cvx.NonPos(dis - (on_d * self.dis_max_rated))] constraint_list += [cvx.NonPos((on_d * self.dis_min_rated) - dis)] # constraints on the state of energy constraint_list += [cvx.NonPos(self.effective_soe_min - ene)] constraint_list += [cvx.NonPos(ene - self.effective_soe_max)] # account for -/+ sub-dt energy -- this is the change in energy that the battery experiences as a result of energy option # if sizing for reliability if sizing_for_rel: constraint_list += [cvx.Zero(uene)] else: constraint_list += [ cvx.Zero(uene + (self.dt * udis) - (self.dt * uch * self.rte)) ] # the constraint below limits energy throughput and total discharge to less than or equal to # (number of cycles * energy capacity) per day, for technology warranty purposes # this constraint only applies when optimization window is equal to or greater than 24 hours if self.daily_cycle_limit and size >= 24: sub = mask.loc[mask] for day in sub.index.dayofyear.unique(): day_mask = (day == sub.index.dayofyear) constraint_list += [ cvx.NonPos( cvx.sum(dis[day_mask] + udis[day_mask]) * self.dt - self.ene_max_rated * self.daily_cycle_limit) ] elif self.daily_cycle_limit and size < 24: TellUser.info( 'Daily cycle limit did not apply as optimization window is less than 24 hours.' ) # note: cannot operate startup without binary if self.incl_startup and self.incl_binary: # startup variables are positive constraint_list += [cvx.NonPos(-start_c)] constraint_list += [cvx.NonPos(-start_d)] # difference between binary variables determine if started up in # previous interval constraint_list += [cvx.NonPos(cvx.diff(on_d) - start_d[1:])] constraint_list += [cvx.NonPos(cvx.diff(on_c) - start_c[1:])] return constraint_list
def total_variation_plus_seasonal_quantile_filter(signal, use_ixs=None, tau=0.995, c1=1e3, c2=1e2, c3=1e2, solver='ECOS', residual_weights=None, tv_weights=None): ''' This performs total variation filtering with the addition of a seasonal baseline fit. This introduces a new signal to the model that is smooth and periodic on a yearly time frame. This does a better job of describing real, multi-year solar PV power data sets, and therefore does an improved job of estimating the discretely changing signal. :param signal: A 1d numpy array (must support boolean indexing) containing the signal of interest :param c1: The regularization parameter to control the total variation in the final output signal :param c2: The regularization parameter to control the smoothness of the seasonal signal :return: A 1d numpy array containing the filtered signal ''' n = len(signal) if residual_weights is None: residual_weights = np.ones_like(signal) if tv_weights is None: tv_weights = np.ones(len(signal) - 1) if use_ixs is None: use_ixs = np.ones(n, dtype=np.bool) # selected_days = np.arange(n)[index_set] # np.random.shuffle(selected_days) # ix = 2 * n // 3 # train = selected_days[:ix] # validate = selected_days[ix:] # train.sort() # validate.sort() s_hat = cvx.Variable(n) s_seas = cvx.Variable(max(n, 366)) s_error = cvx.Variable(n) s_linear = cvx.Variable(n) c1 = cvx.Parameter(value=c1, nonneg=True) c2 = cvx.Parameter(value=c2, nonneg=True) c3 = cvx.Parameter(value=c3, nonneg=True) tau = cvx.Parameter(value=tau) # w = len(signal) / np.sum(index_set) beta = cvx.Variable() objective = cvx.Minimize( # (365 * 3 / len(signal)) * w * cvx.sum(0.5 * cvx.abs(s_error) + (tau - 0.5) * s_error) 2 * cvx.sum(0.5 * cvx.abs(cvx.multiply(residual_weights, s_error)) + (tau - 0.5) * cvx.multiply(residual_weights, s_error)) + c1 * cvx.norm1(cvx.multiply(tv_weights, cvx.diff(s_hat, k=1))) + c2 * cvx.norm(cvx.diff(s_seas, k=2)) + c3 * beta**2) constraints = [ signal[use_ixs] == s_hat[use_ixs] + s_seas[:n][use_ixs] + s_error[use_ixs], cvx.sum(s_seas[:365]) == 0 ] if True: constraints.append(s_seas[365:] - s_seas[:-365] == beta) constraints.extend([beta <= 0.01, beta >= -0.1]) problem = cvx.Problem(objective=objective, constraints=constraints) problem.solve(solver='MOSEK') return s_hat.value, s_seas.value[:n]
def total_variation_plus_seasonal_filter(signal, c1=10, c2=500, residual_weights=None, tv_weights=None, use_ixs=None, periodic_detector=False, transition_locs=None, seas_max=None): ''' This performs total variation filtering with the addition of a seasonal baseline fit. This introduces a new signal to the model that is smooth and periodic on a yearly time frame. This does a better job of describing real, multi-year solar PV power data sets, and therefore does an improved job of estimating the discretely changing signal. :param signal: A 1d numpy array (must support boolean indexing) containing the signal of interest :param c1: The regularization parameter to control the total variation in the final output signal :param c2: The regularization parameter to control the smoothness of the seasonal signal :return: A 1d numpy array containing the filtered signal ''' if residual_weights is None: residual_weights = np.ones_like(signal) if tv_weights is None: tv_weights = np.ones(len(signal) - 1) if use_ixs is None: index_set = ~np.isnan(signal) else: index_set = np.logical_and(use_ixs, ~np.isnan(signal)) s_hat = cvx.Variable(len(signal)) s_seas = cvx.Variable(len(signal)) s_error = cvx.Variable(len(signal)) c1 = cvx.Constant(value=c1) c2 = cvx.Constant(value=c2) #w = len(signal) / np.sum(index_set) if transition_locs is None: objective = cvx.Minimize( # (365 * 3 / len(signal)) * w * # cvx.sum(cvx.huber(cvx.multiply(residual_weights, s_error))) 10 * cvx.norm(cvx.multiply(residual_weights, s_error)) + c1 * cvx.norm1(cvx.multiply(tv_weights, cvx.diff(s_hat, k=1))) + c2 * cvx.norm(cvx.diff(s_seas, k=2)) # + c2 * .1 * cvx.norm(cvx.diff(s_seas, k=1)) ) else: objective = cvx.Minimize( 10 * cvx.norm(cvx.multiply(residual_weights, s_error)) + c2 * cvx.norm(cvx.diff(s_seas, k=2))) constraints = [ signal[index_set] == s_hat[index_set] + s_seas[index_set] + s_error[index_set], cvx.sum(s_seas[:365]) == 0 ] if len(signal) > 365: constraints.append(s_seas[365:] - s_seas[:-365] == 0) if periodic_detector: constraints.append(s_hat[365:] - s_hat[:-365] == 0) if transition_locs is not None: loc_mask = np.ones(len(signal) - 1, dtype=np.bool) loc_mask[transition_locs] = False # loc_mask[transition_locs + 1] = False constraints.append(cvx.diff(s_hat, k=1)[loc_mask] == 0) if seas_max is not None: constraints.append(s_seas <= seas_max) problem = cvx.Problem(objective=objective, constraints=constraints) problem.solve() return s_hat.value, s_seas.value
def addSource(self, regressor, name = None, costFunction='sse',alpha = 1, # Cost function for fit to regressors, alpha is a scalar multiplier regularizeTheta=None, beta = 1, # Cost function for parameter regularization, beta is a scalar multiplier regularizeSource=None, gamma = 1, # Cost function for signal smoothing, gamma is a scalar multiplier lb=None, # Lower bound on source ub=None, # Upper bound on source idxScrReg=None, # indices used to break the source signal into smaller ones to apply regularization numWind=1, # number of time windows (relevant for time-varying regressors) ): ### This is a method to add a new source self.modelcounter += 1 # Increment model counter ## Write model name if it doesn't exist. if name is None: name = str(self.modelcounter) ## Instantiate a dictionary of model terms model = {} model['name'] = name model['alpha'] = alpha model['lb']=lb model['ub']=ub ## Check regressor shape regressor = np.array(regressor) if regressor.ndim == 0: ## If no regressors are included, set them an empty array regressor = np.zeros((self.N,0)) if regressor.ndim == 1: regressor = np.expand_dims(regressor,1) if regressor.ndim > 2: raise NameError('Regressors cannot have more than 2 dimensions') ## Check that regressors have the correct shape (Nobs, Nregressors) if regressor.shape[0] != self.N: if regressor.shape[1] == self.N: regressor = regressor.transpose() else: raise NameError('Lengths of regressors and aggregate signal must match') ## Define model regressors and order model['regressor'] = regressor model['order'] = regressor.shape[1] ## Define decision variables and cost function style model['source'] = cvp.Variable(self.N,1) #model['source'] = cvp.Variable((self.N,1)) # required for cvxpy 1.0.1 model['theta'] = cvp.Variable(model['order'],1) #model['theta'] = cvp.Variable((model['order'],1)) # required for cvxpy 1.0.1 model['costFunction'] = costFunction ## Define objective function to fit model to regressors if costFunction.lower() == 'sse': residuals = (model['source'] - model['regressor'] * model['theta']) modelObj = cvp.sum_squares(residuals) * model['alpha'] elif costFunction.lower() == 'l1': residuals = (model['source'] - model['regressor'] * model['theta']) #residuals = (model['source'] - auxVec - model['regressor'] * model['theta']) modelObj = cvp.norm(residuals,1) * model['alpha'] elif costFunction.lower()=='l2': residuals = (model['source'] - model['regressor'] * model['theta']) modelObj = cvp.norm(residuals,2) * model['alpha'] else: raise ValueError('{} wrong option, use "sse","l2" or "l1"'.format(costFunction)) ## Define cost function to regularize theta **************** # ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** * # Check that beta is scalar or of length of number of parameters. beta = np.array(beta) if beta.size not in [1, model['order']]: raise ValueError('Beta must be scalar or vector with one element for each regressor') if regularizeTheta is not None: if callable(regularizeTheta): ## User can input their own function to regularize theta. # Must input a cvxpy variable vector and output a scalar # or a vector with one element for each parameter. try: regThetaObj = regularizeTheta(model['theta']) * beta except: raise ValueError('Check custom regularizer for model {}'.format(model['name'])) if regThetaObj.size[0]* regThetaObj.size[1] != 1: raise ValueError('Check custom regularizer for model {}, make sure it returns a scalar'.format(model['name'])) elif regularizeTheta.lower() == 'l2': ## Sum square errors. regThetaObj = cvp.norm(model['theta'] * beta) elif regularizeTheta.lower() == 'l1': regThetaObj = cvp.norm(model['theta'] * beta, 1) elif regularizeTheta.lower() == 'diff_l2': if numWind==1: regThetaObj = 0 else: if regressor.shape[1] == numWind: # this actually corresponds to the solar model (no intercept) thetaDiffVec = cvp.diff(model['theta']) else: thetaDiffVec = cvp.vstack(cvp.diff(model['theta'][0:numWind]),cvp.diff(model['theta'][numWind:2*numWind])) regThetaObj = cvp.norm(thetaDiffVec,2) * beta elif regularizeTheta.lower() == 'diff_l1': if numWind==1: regThetaObj = 0 else: if regressor.shape[1] == numWind: # this actually corresponds to the solar model (no intercept) thetaDiffVec = cvp.diff(model['theta']) else: thetaDiffVec = cvp.vstack(cvp.diff(model['theta'][0:numWind]),cvp.diff(model['theta'][numWind:2*numWind])) regThetaObj = cvp.norm(thetaDiffVec,1) * beta else: regThetaObj = 0 ## Define cost function to regularize source signal **************** # ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** * # Check that gamma is scalar gamma = np.array(gamma) if gamma.size != 1: raise NameError('Gamma must be scalar') ## Calculate regularization. if regularizeSource is not None: if idxScrReg is not None: scrVec = model['source'][0:idxScrReg[0]] idxStart = idxScrReg[0]+1 for idxEnd in idxScrReg[1:]: scrCur = cvp.diff(model['source'][idxStart:idxEnd]) scrVec = cvp.vstack(scrVec,scrCur) idxStart = idxEnd+1 scrVec = cvp.vstack(scrVec,cvp.diff(model['source'][idxStart:])) else: scrVec = cvp.diff(model['source']) if callable(regularizeSource): ## User can input their own function to regularize the source signal. # Must input a cvxpy variable vector and output a scalar. regSourceObj = regularizeSource(scrVec) * gamma elif regularizeSource.lower() == 'diff1_ss': regSourceObj = cvp.sum_squares(scrVec) * gamma elif regularizeSource.lower() == 'diff_l1': regSourceObj = cvp.norm(scrVec,1) * gamma elif regularizeSource.lower() == 'diff_l2': regSourceObj = cvp.norm(scrVec,2) * gamma else: regSourceObj = 0 ## Sum total model objective model['obj'] = modelObj + regThetaObj + regSourceObj ## Append model to models list self.models[name]= model
def __analyze_distribution(self, data, plot=None, figsize=(8, 6)): # Calculate empirical CDF x = np.sort(np.copy(data)) x = x[x > 0] x = np.concatenate([[0.], x, [1.]]) y = np.linspace(0, 1, len(x)) # Resample the CDF to get an even spacing of points along the x-axis f = interp1d(x, y) x_rs = np.linspace(0, 1, 5000) y_rs = f(x_rs) # Fit statistical model to resampled CDF that has sparse 2nd order difference y_hat = cvx.Variable(len(y_rs)) mu = cvx.Parameter(nonneg=True) mu.value = 1e1 error = cvx.sum_squares(y_rs - y_hat) reg = cvx.norm(cvx.diff(y_hat, k=2), p=1) objective = cvx.Minimize(error + mu * reg) constraints = [ y_rs[0] == y_hat[0], y[-1] == y_hat[-1] ] problem = cvx.Problem(objective, constraints) problem.solve(solver='MOSEK') # Look for outliers in the 2nd order difference to identify point masses from clipping local_curv = cvx.diff(y_hat, k=2).value ref_slope = cvx.diff(y_hat, k=1).value[:-1] threshold = -0.5 # metric = local_curv / ref_slope metric = np.min([ local_curv / ref_slope, np.concatenate([ (local_curv[:-1] + local_curv[1:]) / ref_slope[:-1], [local_curv[-1] / ref_slope[-1]] ]), np.concatenate([ (local_curv[:-2] + local_curv[1:-1] + local_curv[2:]) / ref_slope[:-2], [local_curv[-2:] / ref_slope[-2:]] ], axis=None) ], axis=0) point_masses = np.concatenate( [[False], np.logical_and(metric <= threshold, ref_slope > 3e-4), # looking for drops of more than 65% [False]]) # Catch if the PDF ends in a point mass at the high value if np.logical_or(cvx.diff(y_hat, k=1).value[-1] > 1e-3, np.allclose(cvx.diff(y_hat, k=1).value[-1], np.max(cvx.diff(y_hat, k=1).value))): point_masses[-2] = True # Reduce clusters of detected points to single points pm_reduce = np.zeros_like(point_masses, dtype=np.bool) for ix in range(len(point_masses) - 1): if ~point_masses[ix] and point_masses[ix + 1]: begin_cluster = ix + 1 elif point_masses[ix] and ~point_masses[ix + 1]: end_cluster = ix try: ix_select = np.argmax(metric[begin_cluster:end_cluster + 1]) except ValueError: pm_reduce[begin_cluster] = True else: pm_reduce[begin_cluster + ix_select] = True point_masses = pm_reduce point_mass_values = x_rs[point_masses] if plot is None: return point_mass_values elif plot == 'pdf': fig = plt.figure(figsize=figsize) plt.hist(data[data > 0], bins=100, alpha=0.5, label='histogram') scale = np.histogram(data[data > 0], bins=100)[0].max() \ / cvx.diff(y_hat, k=1).value.max() plt.plot(x_rs[:-1], scale * cvx.diff(y_hat, k=1).value, color='orange', linewidth=1, label='piecewise constant PDF estimate') for count, val in enumerate(point_mass_values): if count == 0: plt.axvline(val, linewidth=1, linestyle=':', color='green', label='detected point mass') else: plt.axvline(val, linewidth=1, linestyle=':', color='green') return fig elif plot == 'cdf': fig = plt.figure(figsize=figsize) plt.plot(x_rs, y_rs, linewidth=1, label='empirical CDF') plt.plot(x_rs, y_hat.value, linewidth=3, color='orange', alpha=0.57, label='estimated CDF') if len(point_mass_values) > 0: plt.scatter(x_rs[point_masses], y_rs[point_masses], color='red', marker='o', label='detected point mass') return fig elif plot == 'diffs': fig, ax = plt.subplots(nrows=2, sharex=True, figsize=figsize) y1 = cvx.diff(y_hat, k=1).value y2 = metric ax[0].plot(x_rs[:-1], y1) ax[1].plot(x_rs[1:-1], y2) ax[1].axhline(threshold, linewidth=1, color='r', ls=':', label='decision boundary') if len(point_mass_values) > 0: ax[0].scatter(x_rs[point_masses], y1[point_masses[1:]], color='red', marker='o', label='detected point mass') ax[1].scatter(x_rs[point_masses], y2[point_masses[1:-1]], color='red', marker='o', label='detected point mass') ax[0].set_title('1st order difference of CDF fit') ax[1].set_title('2nd order difference of CDF fit') ax[1].legend() plt.tight_layout() return fig
def updateSourceObj(self, sourcename): if sourcename.lower() == 'all': for name in self.models.keys(): self.updateSourceObj(name) else: model = self.models[sourcename] ## Define objective function to fit model to regressors ## **CHANGE MT: I moved the alpha variable to be inside the norms so that ## it can be time varying. I'm adding a check above to ensure that alpha is ## a scalar or a vector of length N. if model['costFunction'].lower() == 'sse': residuals = (model['source'] - model['regressor'] * model['theta']) modelObj = cvp.sum_squares( cvp.mul_elemwise( model['alpha'] ** .5 , residuals ) ) elif model['costFunction'].lower() == 'l1': residuals = (model['source'] - model['regressor'] * model['theta']) modelObj = cvp.norm( cvp.mul_elemwise( model['alpha'] , residuals ) ,1) elif model['costFunction'].lower()=='l2': residuals = (model['source'] - model['regressor'] * model['theta']) modelObj = cvp.norm( cvp.mul_elemwise( model['alpha'] , residuals ) ,2) else: raise ValueError('{} wrong option, use "sse","l2" or "l1"'.format(costFunction)) ## Define cost function to regularize theta **************** # ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** * # Check that beta is scalar or of length of number of parameters. model['beta'] = np.array(model['beta']) if model['beta'].size not in [1, model['order']]: raise ValueError('Beta must be scalar or vector with one element for each regressor') if model['regularizeTheta'] is not None: if callable(model['regularizeTheta']): ## User can input their own function to regularize theta. # Must input a cvxpy variable vector and output a scalar # or a vector with one element for each parameter. ## TODO: TRY CATCH TO ENSURE regularizeTheta WORKS AND RETURNS SCALAR try: regThetaObj = model['regularizeTheta'](model['theta']) * model['beta'] except: raise ValueError('Check custom regularizer for model {}'.format(model['name'])) if regThetaObj.size[0]* regThetaObj.size[1] != 1: raise ValueError('Check custom regularizer for model {}, make sure it returns a scalar'.format(model['name'])) elif model['regularizeTheta'].lower() == 'l2': ## Sum square errors. regThetaObj = cvp.norm(model['theta'] * model['beta']) elif model['regularizeTheta'].lower() == 'l1': regThetaObj = cvp.norm(model['theta'] * model['beta'], 1) else: regThetaObj = 0 ## Define cost function to regularize source signal **************** # ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** * # Check that gamma is scalar model['gamma'] = np.array(model['gamma']) if model['gamma'].size != 1: raise NameError('Gamma must be scalar') ## Calculate regularization. if model['regularizeSource'] is not None: if callable(model['regularizeSource']): ## User can input their own function to regularize the source signal. # Must input a cvxpy variable vector and output a scalar. regSourceObj = model['regularizeSource'](model['source']) * model['gamma'] elif model['regularizeSource'].lower() == 'diff1_ss': regSourceObj = cvp.sum_squares(cvp.diff(model['source'])) * model['gamma'] else: raise Exception('regularizeSource must be a callable method, \`diff1_ss\`, or None') else: regSourceObj = 0 ## Sum total model objective model['obj'] = modelObj + regThetaObj + regSourceObj ## Append model to models list self.models[sourcename] = model