def test_all_stats(self): np.random.seed(121) my_pwlf_0 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin, degree=0) my_pwlf_0.fitfast(3) my_pwlf_0.standard_errors() my_pwlf_0.prediction_variance(np.random.random(20)) my_pwlf_0.p_values() my_pwlf_0.r_squared() my_pwlf_0.calc_slopes() my_pwlf_2 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin, degree=2) my_pwlf_2.fitfast(3) my_pwlf_2.standard_errors() my_pwlf_2.prediction_variance(np.random.random(20)) my_pwlf_2.p_values() my_pwlf_2.r_squared() my_pwlf_2.calc_slopes() my_pwlf_3 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin, degree=3) my_pwlf_3.fitfast(3) my_pwlf_3.standard_errors() my_pwlf_3.prediction_variance(np.random.random(20)) my_pwlf_3.p_values() my_pwlf_3.r_squared() my_pwlf_3.calc_slopes()
def test_one_segment_fits(self): my_pwlf = pwlf.PiecewiseLinFit(self.x_small, self.y_small) fit1 = my_pwlf.fitfast(1) my_pwlf = pwlf.PiecewiseLinFit(self.x_small, self.y_small) fit2 = my_pwlf.fit(1) same_breaks = np.isclose(fit1, fit2) self.assertTrue(same_breaks[0]) self.assertTrue(same_breaks[1])
def test_random_seed_fitfast(self): np.random.seed(1) my_pwlf = pwlf.PiecewiseLinFit(self.x_small, self.y_small, seed=123) fit1 = my_pwlf.fitfast(2) np.random.seed(2) my_pwlf = pwlf.PiecewiseLinFit(self.x_small, self.y_small, seed=123) fit2 = my_pwlf.fitfast(2) same_breaks = np.isclose(fit1, fit2) self.assertTrue(same_breaks.sum() == same_breaks.size)
def test_fit(self): my_pwlf_0 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin, degree=0) my_pwlf_1 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin, degree=1) my_pwlf_2 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin, degree=2) # fit the data for four line segments np.random.seed(123123) my_pwlf_0.fit(5) my_pwlf_1.fit(5) my_pwlf_2.fit(5) self.assertTrue(my_pwlf_0.ssr <= 10.) self.assertTrue(my_pwlf_1.ssr <= 7.0) self.assertTrue(my_pwlf_2.ssr <= 0.5)
def linearization(self, ele_capacity=2): # define a converter and then conduct piecewise linearization on its power_in-power_out curve --------- con = converter.converter(ele_capacity) x = np.append(np.linspace(0, 0.1 * ele_capacity, 20), np.linspace(0.11 * ele_capacity, ele_capacity, 50)) y = np.array([i * con.eta(i) for i in x]) con_pwlf = pwlf.PiecewiseLinFit(x, y) con_breaks = con_pwlf.fit( 3) # The number indicates how many pieces are generated. y1 = con_pwlf.predict(x) # fig, ax = plt.subplots() # ax.plot(x, y) # ax.plot(x, y1) # plt.show() self.con_breaks = con_breaks self.con_f_breaks = np.array([i * con.eta(i) for i in con_breaks]) self.k1_c, self.k2_c, self.k3_c = con_pwlf.slopes[0], con_pwlf.slopes[ 1], con_pwlf.slopes[2] self.b1_c, self.b2_c, self.b3_c = con_pwlf.intercepts[ 0], con_pwlf.intercepts[1], con_pwlf.intercepts[2] # ---------------------------------------End linearization of converter------------------------------- # do the same to electrolyser ele = electrolyser.electrolyser_group(I_min=0) y = [] for i in x: electrolyser.set_power_group(ele, i) y.append(ele.m_H2() * 3600) y = np.array(y) # fig,ax = plt.subplots() # ax.plot(x,y) # plt.show() ele_pwlf = pwlf.PiecewiseLinFit(x, y) ele_breaks = ele_pwlf.fit(3) self.ele_breaks = ele_breaks ele_f_breaks = [] for i in ele_breaks: electrolyser.set_power_group(ele, i) ele_f_breaks.append(ele.m_H2() * 3600) self.ele_f_breaks = np.array(ele_f_breaks) self.k1_e, self.k2_e, self.k3_e = ele_pwlf.slopes[0], ele_pwlf.slopes[ 1], ele_pwlf.slopes[2] self.b1_e, self.b2_e, self.b3_e = ele_pwlf.intercepts[ 0], ele_pwlf.intercepts[1], ele_pwlf.intercepts[2] # ---------------------------------------End linearization of electrolyser----------------------------- '''
def test_weighted_same_as_ols(self): # test that weighted least squares is same as OLS # when the weight is equal to 1.0 n_segments = 2 my = pwlf.PiecewiseLinFit(self.x_small, self.y_small) x = np.random.random() breaks = my.fit_guess([x]) my_w = pwlf.PiecewiseLinFit(self.x_small, self.y_small, weights=np.ones_like(self.x_small)) breaks_w = my_w.fit_guess([x]) self.assertTrue(np.isclose(my.ssr, my_w.ssr)) for i in range(n_segments + 1): self.assertTrue(np.isclose(breaks[i], breaks_w[i]))
def test_nonlinear_p_and_se(self): # generate a true piecewise linear data np.random.seed(1) n_data = 20 x = np.linspace(0, 1, num=n_data) y = np.random.random(n_data) my_pwlf = pwlf.PiecewiseLinFit(x, y) true_beta = np.array((1.0, 0.2, 0.2)) true_breaks = np.array((0.0, 0.5, 1.0)) y = my_pwlf.predict(x, beta=true_beta, breaks=true_breaks) my_pwlf = pwlf.PiecewiseLinFit(x, y) my_pwlf.fitfast(2) # calculate p-values p = my_pwlf.p_values(method='non-linear', step_size=1e-4) self.assertTrue(p.max() <= 0.05)
def test_lapack_driver(self): # check that I can fit when break points spot on a my_fit1 = pwlf.PiecewiseLinFit(self.x_small, self.y_small, lapack_driver='gelsy') x0 = self.x_small.copy() ssr = my_fit1.fit_with_breaks(x0) self.assertTrue(np.isclose(ssr, 0.0))
def test_y_c_not_supplied(self): try: mf = pwlf.PiecewiseLinFit(self.x_small, self.y_small, degree=1) mf.fit(2, y_c=[1.0]) self.assertTrue(False) except ValueError: self.assertTrue(True)
def test_pvalue_no_fit(self): my_pwlf_0 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin) try: my_pwlf_0.p_values() self.assertTrue(False) except AttributeError: self.assertTrue(True)
def test_r2_no_fit(self): my_pwlf_0 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin) try: my_pwlf_0.r_squared() self.assertTrue(False) except AttributeError: self.assertTrue(True)
def fit(self, X, y, **kwargs): # Check that X and y have correct shape X, y = check_X_y(X, y, y_numeric=True) X = check_max_features(X) pwlf_kws = default_none_kwargs(self.pwlf_kwargs) self.model_ = pwlf.PiecewiseLinFit(X[:, 0], y, **pwlf_kws) if self.fit_option == 'auto': self.fit_breaks_ = self.model_.fit(self.n_segments, **kwargs) elif self.fit_option == 'arrm': self.fit_breaks_ = arrm_breakpoints(X, y, 0.05, self.n_segments) _ = self.model_.fit_with_breaks(self.fit_breaks_, **kwargs) elif self.fit_option == 'fast': self.fit_breaks_ = self.model_.fitfast(self.n_segments, **kwargs) else: raise ValueError(f"unsupported fit_option '{self.fit_option}'") self.X_ = X self.y_ = y # Return the classifier return self
def test_pv(self): # check to see if it will let me calculate prediction variance for # random data my_pwlf = pwlf.PiecewiseLinFit(np.random.random(20), np.random.random(20)) ssr = my_pwlf.fitfast(2) pv = my_pwlf.prediction_variance(np.random.random(20))
def pw_changepoint(f, max_cpt=6, window=None): # ts is a STL trend from an addtive TS # best number of cpts is when the aic drop is max as nb increases # Not recommended: not clear how to regularize ts = f['dtrend_diff'].values if f['period'].isnull().sum() == 0: window = f.loc[f.index[0], 'period'] if window is not None: f = pd.Series(ts) ts_ = f.rolling(window).mean() ts_.fillna(pd.Series(ts[:window]), inplace=True) ts = ts_.values nobs = len(ts) opt_val, opt_cp, aic_arr, cpt_arr = 0.0, list(), list(), list() my_pwlf = pwlf.PiecewiseLinFit(np.array(range(len(ts))), ts) d_res = defaultdict(list) for nb in range(2, max_cpt + 1): res = my_pwlf.fitfast(nb, pop=5) ssr = my_pwlf.ssr npars = my_pwlf.n_parameters sig2 = ssr / (nobs - npars) aic = aicc_sigma(sig2, nobs, nb, islog=False) d_res['nb'].append(nb - 1) d_res['ssr'].append(ssr) d_res['npars'].append(npars) d_res['sig2'].append(sig2) d_res['aic'].append(aic) aic_arr.append(aic) cpt = [int(x) for x in res] d_res['cpt'].append(cpt[1:-1]) diff = np.inf if len(aic_arr) <= 1 else aic_arr[-1] - aic_arr[-2] cpt_arr.append(cpt) print('nb: ' + str(nb-1) + ' pars: ' + str(npars) + ' ssr: ' + str(ssr) + ' aic: ' + str(aic) + ' cpts: ' + str(cpt) + ' aicdiff: ' + str(diff)) return pd.DataFrame(d_res) # return list of estimated CPs
def test_n_parameters_correct(self): my_pwlf_0 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin, degree=0) my_pwlf_1 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin, degree=1) my_pwlf_2 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin, degree=2) breaks = np.array([0., 1.03913513, 3.04676334, 4.18647526, 10.]) A0 = my_pwlf_0.assemble_regression_matrix(breaks, self.x_sin) A1 = my_pwlf_1.assemble_regression_matrix(breaks, self.x_sin) A2 = my_pwlf_2.assemble_regression_matrix(breaks, self.x_sin) self.assertTrue(A0.shape[1] == my_pwlf_0.n_parameters) self.assertTrue(A1.shape[1] == my_pwlf_1.n_parameters) self.assertTrue(A2.shape[1] == my_pwlf_2.n_parameters) # Also check n_segments correct self.assertTrue(4 == my_pwlf_0.n_segments) self.assertTrue(4 == my_pwlf_1.n_segments) self.assertTrue(4 == my_pwlf_2.n_segments)
def test_single_force_break_point_degree_zero(self): my_fit = pwlf.PiecewiseLinFit(self.x_small, self.y_small, degree=0) x_c = [2.0] y_c = [1.5] my_fit.fit_with_breaks_force_points([0.2, 0.7], x_c, y_c) yhat = my_fit.predict(x_c) self.assertTrue(np.isclose(y_c, yhat))
def test_custom_opt_with_con(self): my_pwlf = pwlf.PiecewiseLinFit(self.x_small, self.y_small) my_pwlf.use_custom_opt(3, x_c=[0.], y_c=[0.]) x_guess = np.array((0.9, 1.1)) from scipy.optimize import minimize _ = minimize(my_pwlf.fit_with_breaks_opt, x_guess) self.assertTrue(True)
def pwlf_test(x, y, breaks): my_pwlf = pwlf.PiecewiseLinFit(x, y) my_pwlf.fit(breaks + 1) yhat = my_pwlf.predict(x) vis(x, y, yhat, '-', title='pwlf') mse = mean_squared_error(y, yhat) print("pwlf mean square error is {}".format(mse))
def piecewise_regression(time, k_lat, l_lon, n_lines): time_ERA = time #2 and 4 k_lat = k_lat l_lon = l_lon data = checkdate(time_ERA, k_lat, l_lon) x = data[0] #ERA 5 model layer heights zlevels y = data[1] #pottemp # initialize piecwise linear fit with your x and y data myPWLF = pwlf.PiecewiseLinFit(x, y) # fit the data for n line segments. if starting points needed (x_c & y_c: x_c=[x[0]], y_c=[y[0]]) z = myPWLF.fit(n_lines) #calculate slopes slopes = myPWLF.calc_slopes() print("Z values:", z) print("a1,a2,a3 values:", slopes) # predict for the determined points xHat = x #np.linspace(min(x), max(x), num=len(x)) yHat = myPWLF.predict(xHat) print("--- day processed:" + str(time) + "---") return xHat, yHat, slopes, z
def test_break_point_spot_on_r2(self): # test r squared value with known solution my_fit1 = pwlf.PiecewiseLinFit(self.x_small, self.y_small) x0 = self.x_small.copy() ssr = my_fit1.fit_with_breaks(x0) rsq = my_fit1.r_squared() self.assertTrue(np.isclose(rsq, 1.0))
def test_degree_not_supported(self): try: _ = pwlf.PiecewiseLinFit(self.x_small, self.y_small, degree=100) self.assertTrue(False) except ValueError: self.assertTrue(True)
def test_custom_opt(self): my_pwlf = pwlf.PiecewiseLinFit(self.x_small, self.y_small) my_pwlf.use_custom_opt(3) x_guess = np.array((0.9, 1.1)) from scipy.optimize import minimize res = minimize(my_pwlf.fit_with_breaks_opt, x_guess) self.assertTrue(np.isclose(res['fun'], 0.0))
def piece_linear(x, y, num_pieces): my_pwlf = pwlf.PiecewiseLinFit(x, y) my_pwlf.fit(num_pieces) if is_monotonic(my_pwlf.predict, x, y): return my_pwlf.predict else: return None
def nonlin_corr_metric(x: np.ndarray, y: np.ndarray, t_shift: int): """ :param x: :param y: :param t_shift: In samples :param save_name: :return: """ break_locs = np.linspace(-5, 5, 50) h2_shift = [] # Array with correlation per time shift r2_shift = [] # Array with correlation per time shift for i in range(2 * t_shift): # Cut data to time shift y_data = y[2 * t_shift - i: len(y)-i] x_data = x[t_shift: -t_shift] # Normalize data y_norm = (y_data - np.mean(y_data)) / np.std(y_data) x_norm = (x_data - np.mean(x_data)) / np.std(x_data) # Non-linear fit g = pwlf.PiecewiseLinFit(x_norm, y_norm) g.fit_with_breaks(break_locs) expect = np.mean(np.abs(y_norm - g.predict(x_norm))) # Uniform distribution h2_shift.append(1 - expect**2 / np.var(y_norm)) # Linear fit r2_shift.append(np.corrcoef(x_norm, y_norm)[0, 1]**2) h2 = h2_shift[int(np.argmax(np.asarray(h2_shift)))] h2_dt = int(np.argmax(np.asarray(h2_shift)) - t_shift) # dt for best correlation from x to y r2 = r2_shift[int(np.argmax(np.asarray(r2_shift)))] r2_dt = int(np.argmax(np.asarray(r2_shift)) - t_shift) # dt for best correlation from x to y return r2, h2, r2_dt, h2_dt, r2_shift, h2_shift
def test_predict_with_custom_param(self): # check to see if predict runs with custom parameters x = np.random.random(20) my_pwlf = pwlf.PiecewiseLinFit(x, np.random.random(20)) my_pwlf.predict(x, beta=np.array((1e-4, 1e-2, 1e-3)), breaks=np.array((0.0, 0.5, 1.0)))
def findPiecewiseFit(data): x = data[0] y = data[1] y = np.reciprocal(y * y) piecewiseFit = pwlf.PiecewiseLinFit(x, y) res1 = piecewiseFit.fit(3) # this gets very slow at about 6 line segments return piecewiseFit
def test_break_point_diff_x0_4(self): # check if my duplicate is in a different location x0 = self.x_small.copy() my_fit6 = pwlf.PiecewiseLinFit(self.x_small, self.y_small) x0[2] = 1.4 ssr6 = my_fit6.fit_with_breaks(x0) self.assertTrue(np.isclose(ssr6, 0.0))
def test_se_no_fit(self): my_pwlf_0 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin) try: my_pwlf_0.standard_errors() self.assertTrue(False) except AttributeError: self.assertTrue(True)
def test_single_force_break_point1(self): my_fit = pwlf.PiecewiseLinFit(self.x_small, self.y_small) x_c = [-0.5] y_c = [-0.5] ssr = my_fit.fit_with_breaks_force_points([0.2, 0.7], x_c, y_c) yhat = my_fit.predict(x_c) self.assertTrue(np.isclose(y_c, yhat))
def test_pv_no_fit(self): my_pwlf_0 = pwlf.PiecewiseLinFit(self.x_sin, self.y_sin) try: my_pwlf_0.prediction_variance(self.x_sin) self.assertTrue(False) except AttributeError: self.assertTrue(True)