def train(self, data, **kwargs): if self.indexer is not None and isinstance(data, pd.DataFrame): data = self.indexer.get_data(data) lagdata, ndata = lagmat(data, maxlag=self.order, trim="both", original='sep') mqt = QuantReg(ndata, lagdata).fit(0.5) if self.alpha is not None: uqt = QuantReg(ndata, lagdata).fit(1 - self.alpha) lqt = QuantReg(ndata, lagdata).fit(self.alpha) self.mean_qt = [k for k in mqt.params] if self.alpha is not None: self.upper_qt = [k for k in uqt.params] self.lower_qt = [k for k in lqt.params] if self.dist: self.dist_qt = [] for alpha in np.arange(0.05,0.5,0.05): lqt = QuantReg(ndata, lagdata).fit(alpha) uqt = QuantReg(ndata, lagdata).fit(1 - alpha) lo_qt = [k for k in lqt.params] up_qt = [k for k in uqt.params] self.dist_qt.append([lo_qt, up_qt]) self.shortname = "QAR(" + str(self.order) + ") - " + str(self.alpha)
def fit_predict(self, train, val=None, test=None, **kwa): model = QuantReg(train[1], train[0]).fit(q=0.5, max_iter=10000) if val is None: return model.predict(test[0]) else: return model.predict(val[0]), model.predict(test[0])
def fit(self): optimizedHyperParameters = self.optimizedHyperParameters fixedHyperParameters = self.fixedHyperParameters kernelName = optimizedHyperParameters["kernelName"] trainX, trainY, validationX, validationY = self.dataset.getDataset(2) self.model = QuantReg(trainY, trainX)
def test_alpha_summary(): X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64) y = np.array([0, 1, 2, 3], dtype=np.float64) res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain', use_t=True) summ_20 = res.summary(alpha=.2) assert '[0.025 0.975]' not in str(summ_20) assert '[0.1 0.9]' in str(summ_20)
def test_use_t_summary(): X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64) y = np.array([0, 1, 2, 3], dtype=np.float64) res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain', use_t=True) summ = res.summary() assert 'P>|t|' in str(summ) assert 'P>|z|' not in str(summ)
def test_fitted_residuals(): data = sm.datasets.engel.load_pandas().data y, X = dmatrices('foodexp ~ income', data, return_type='dataframe') res = QuantReg(y, X).fit(q=.1) # Note: maxabs relative error with fitted is 1.789e-09 assert_almost_equal(np.array(res.fittedvalues), Rquantreg.fittedvalues, 5) assert_almost_equal(np.array(res.predict()), Rquantreg.fittedvalues, 5) assert_almost_equal(np.array(res.resid), Rquantreg.residuals, 5)
def train_LAD(x, y): """ è®ç»ƒLAD线性回归模型,并返回模型预测值 """ X = sm.add_constant(x) model = QuantReg(y, X) model = model.fit(q=0.5) re = model.predict(X) return re
def test_zero_resid(): # smoke and regression tests X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64) y = np.array([0, 1, 2, 3], dtype=np.float64) res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain') #'bofinger') res.summary() assert_allclose(res.params, np.array([0.0, 0.96774163]), rtol=1e-4, atol=1e-20) assert_allclose(res.bse, np.array([0.0447576, 0.01154867]), rtol=1e-4, atol=1e-20) assert_allclose(res.resid, np.array([0.0, 3.22583680e-02, -3.22574272e-02, 9.40732912e-07]), rtol=1e-4, atol=1e-20) X = np.array([[1, 0], [0.1, 1], [0, 2.1], [0, 3.1]], dtype=np.float64) y = np.array([0, 1, 2, 3], dtype=np.float64) res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain') res.summary() assert_allclose(res.params, np.array([9.99982796e-08, 9.67741630e-01]), rtol=1e-4, atol=1e-20) assert_allclose(res.bse, np.array([0.04455029, 0.01155251]), rtol=1e-4, atol=1e-20) assert_allclose(res.resid, np.array([-9.99982796e-08, 3.22583598e-02, -3.22574234e-02, 9.46361860e-07]), rtol=1e-4, atol=1e-20)
def calcuSlope(i, LogData, SeqDepth, Genes, Tau): if i % round(len(Genes) / 10) == 0: print(i / round(len(Genes) / 10) * 10, '%') X = Genes[i] with warnings.catch_warnings(): warnings.filterwarnings("ignore") validIdx = np.logical_not(np.isnan( LogData.loc[X].values)) & (SeqDepth.values > 0) mod = QuantReg(LogData.loc[X].values[validIdx], tools.add_constant(np.log(SeqDepth.values[validIdx]))) # mod = smf.quantreg('response ~ variable', # pd.DataFrame({'response': LogData.loc[X], 'variable': np.log(SeqDepth)})) slope = mod.fit(q=Tau).params[1] return slope
def forecaster(returns, ff, loss='MSE'): output = [] dates = sorted(list(ff.index)) dataset = ff.merge(returns, left_index=True, right_index=True) columnNames = ['MktPremium', 'HML', 'Mom'] name = returns.columns.tolist()[0] i = dates.index('200201') for j in range(i, (len(dates))): trainData = dataset.loc['199801':dates[j], :] trainX = trainData[columnNames] trainY = trainData[[name]] model = LinearRegression() if loss == 'MSE': model = LinearRegression() if loss == 'Ridge': model = Ridge() if loss == 'Lasso': model = Lasso() if loss == 'Hub': model = HuberRegressor() if loss == 'ElasticNet': model = ElasticNet() model.fit(trainX, trainY) testData = pd.DataFrame(dataset.loc[dates[j], :]).T testX = testData[columnNames] prediction = model.predict(testX) if loss == 'LAD': model = QuantReg(endog=trainY, exog=trainX) res = model.fit(q=0.5) prediction = model.predict(res.params, exog=testX) if loss == '1Q': model = QuantReg(endog=trainY, exog=trainX) res = model.fit(q=0.25) prediction = model.predict(res.params, exog=testX) if loss == '3Q': model = QuantReg(endog=trainY, exog=trainX) res = model.fit(q=0.75) prediction = model.predict(res.params, exog=testX) if loss in ['Lasso', 'Hub', 'ElasticNet', 'LAD', '1Q', '3Q']: output.append(prediction[0]) else: output.append(prediction[0][0]) return (name, output)
def calcuTD(x, O, Y, SeqDepth, Grid, Tau): TauGroup, D = Grid[x] D = int(D) try: polyX, centre, scale, alpha, beta = poly.poly(O, D) except Exception: polyX = None if polyX is not None: colVars = ['var_' + str(j) for j in range(D)] polydata = pd.concat([pd.DataFrame({'Y':Y}), pd.DataFrame(polyX, columns=colVars)], axis=1) try: rqfit = smf.quantreg('Y~' + '+'.join(colVars), polydata).fit(q=TauGroup) revX = poly.predict_poly(polyX, centre, scale, alpha, beta, SeqDepth) revX = pd.DataFrame(revX, columns=colVars) pdvalsrq = rqfit.predict(revX) if min(pdvalsrq) > 0: S = QuantReg(pdvalsrq.values, tools.add_constant(SeqDepth)).fit(q=Tau).params[1] else: S = -50 except Exception: S = -50 else: S = -50 return S
def test_collinear_matrix(): X = np.array([[1, 0, .5], [1, 0, .8], [1, 0, 1.5], [1, 0, .25]], dtype=np.float64) y = np.array([0, 1, 2, 3], dtype=np.float64) res_collinear = QuantReg(y, X).fit(0.5) assert len(res_collinear.params) == X.shape[1]
def setup_class(cls): data = sm.datasets.engel.load_pandas().data y, X = dmatrices('foodexp ~ income', data, return_type='dataframe') cls.res1 = QuantReg(y, X).fit(q=.75, vcov='iid', kernel='epa', bandwidth='hsheather') cls.res2 = epanechnikov_hsheather_q75
def test_nontrivial_singular_matrix(): x_one = np.random.random(1000) x_two = np.random.random(1000)*10 x_three = np.random.random(1000) intercept = np.ones(1000) y = np.random.random(1000)*5 X = np.column_stack((intercept, x_one, x_two, x_three, x_one)) assert np.linalg.matrix_rank(X) < X.shape[1] res_singular = QuantReg(y, X).fit(0.5) assert len(res_singular.params) == X.shape[1] assert np.linalg.matrix_rank(res_singular.cov_params()) == X.shape[1] - 1 # prediction is correct even with singular exog res_ns = QuantReg(y, X[:, :-1]).fit(0.5) assert_allclose(res_singular.fittedvalues, res_ns.fittedvalues, rtol=0.01)
class SkQuantReg: def __init__(self, tau): self.tau = tau def fit(self, X, y): self.m = QuantReg(y, X).fit(self.tau) return self def predict(self, X): return self.m.predict(X)
class QuantileRegressor(BaseEstimator, RegressorMixin): def __init__(self, q=0.5): self.q = q def fit(self, X, y): self.model_ = QuantReg(y, smapi.add_constant(X)) self.model_result_ = self.model_.fit(q=self.q) return self def predict(self, X): return self.model_result_.predict(smapi.add_constant(X))
def get_quantreg(_y, what="slope", q=0.5): if not np.ma.is_masked(_y): _x = sm.add_constant(np.arange(len(_y))) res=QuantReg(_y, _x).fit(q=0.5) if what=="slope": return res.params[1] elif what=="pval": return res.pvalues[1] elif what=="intercept": return res.params[0] else: return np.nan
def plot_ar1_coef(series, granularity='30Min'): """ plots ar1 coeff input: pandas series of prices output: plots graph """ # Groupby granularity series = series.groupby( pd.TimeGrouper(freq=granularity)).last().fillna(method='ffill') # returns and volatility ret = series.pct_change() vol = ret.rolling(vol_window).std() dff = pd.concat([ret, vol], axis=1, join='inner') dff.columns = ['ret', 'vol'] # add constant dff = sm.add_constant(dff) # y-variable dff = dff.assign(y=dff.ret.shift(-1)) # dropna dff.replace([np.inf, -np.inf], np.nan) dff.dropna(inplace=True) from statsmodels.regression.quantile_regression import QuantReg mod = QuantReg(endog=dff.y, exog=dff.loc[:, ['const', 'ret']]) # quantiles = np.arange(.01, .99, .01) def fit_model(q): res = mod.fit(q=q) return [q, res.params['ret']] + \ res.conf_int().loc['ret'].tolist() models = [fit_model(x) for x in quantiles] models = pd.DataFrame(models, columns=['q', 'b', 'lb', 'ub']) # plot the quantile regression params import matplotlib.pyplot as plt plt.title('AR1 Coefficient with {} granularity'.format(granularity)) plt.plot(models.q, models.b, color='b', label='1st Order AutoRegression') plt.plot(models.q, models.ub, linestyle='dotted', color='b') plt.plot(models.q, models.lb, linestyle='dotted', color='b') #plt.plot(models.q, models.high_vol, color='red', label='High Volatility') plt.axhline(y=0, color='black', linestyle='--') plt.legend() plt.show()
class QuantileRegression: """Quantile regression wrapper It can work on sklearn pipelines Example ------- >>> from sktools import QuantileRegression >>> from sklearn.datasets import load_boston >>> boston = load_boston()['data'] >>> y = load_boston()['target'] >>> qr = QuantileRegression(quantile=0.9) >>> qr.fit(boston, y) >>> qr.predict(boston)[0:5].round(2) array([34.87, 28.98, 34.86, 32.67, 32.52]) """ def __init__(self, quantile=0.5, add_intercept=True): self.quantile = quantile self.add_intercept = add_intercept self.regressor = None self.regressor_fit = None def preprocess(self, X): X = X.copy() if self.add_intercept: X = sm.add_constant(X) return X def fit(self, X, y): X = self.preprocess(X) self.regressor = QuantReg(y, X) self.regressor_fit = self.regressor.fit(q=self.quantile) def predict(self, X, y=None): X = self.preprocess(X) return self.regressor_fit.predict(X)
import time xmin = [-1., -1.] xmax = [2., 3.] mu, invSig = ConstructRBF(xmin, xmax, [3, 3]) t0 = time.time() data_x, data_f = GenerateSample(xmin, xmax, N_sample=300, Func=Func, NoiseFunc=NoiseFunc) print 'GenerateSample/Computation time:', time.time() - t0 t0 = time.time() Theta = np.array([FeaturesNG(x, mu, invSig) for x in data_x]) quant_reg = QuantReg(data_f, Theta) fit1 = quant_reg.fit(q=0.1) fit5 = quant_reg.fit(q=0.5) fit9 = quant_reg.fit(q=0.95) w1 = fit1.params w5 = fit5.params w9 = fit9.params print fit9.summary() print 'Parameters w1:', w1 print 'Parameters w5:', w5 print 'Parameters w9:', w9 print 'QuantReg/Computation time:', time.time() - t0 fp = file('/tmp/data.dat', 'w') for x, f in zip(data_x, data_f): fp.write('%f %f %f\n' % (x[0], x[1], f))
def forecaster(returns, ff, loss='MSE'): output = [] factorLoadings = [] varianceOfErrors = [] df = ff.merge(returns, left_index=True, right_index=True) name = returns.columns.tolist()[0] df[name] = df[name] - df['RF'] regressors = ['Mkt.Rf', 'HML', 'Mom', 'RMW', 'CMA'] for j in range(120, len(df.index.tolist())): trainData = df.iloc[(j - 120):j, :] trainX = trainData[regressors] trainY = trainData[[name]] model = LinearRegression() if loss == 'MSE': model = LinearRegression() if loss == 'Ridge': model = Ridge() if loss == 'Lasso': model = Lasso() if loss == 'Hub': model = HuberRegressor() if True == trainY.isnull().values.any(): output.append(np.nan) factorLoadings.append(np.zeros((1, 5))) varianceOfErrors.append(np.nan) continue model.fit(trainX, trainY) res = '' if loss == 'LAD': model = QuantReg(endog=trainY, exog=trainX) res = model.fit(q=0.5) if loss == '1Q': model = QuantReg(endog=trainY, exog=trainX) res = model.fit(q=0.25) if loss == '3Q': model = QuantReg(endog=trainY, exog=trainX) res = model.fit(q=0.75) if loss in ['LAD', '1Q', '3Q']: factorLoadings.append(np.array(res.params)) else: factorLoadings.append(model.coef_) if loss not in ['Lasso', 'Hub', 'LAD', '1Q', '3Q']: varianceOfErrors.append( np.var(trainY - model.predict(trainX)).tolist()[0]) if loss in ['Lasso', 'Hub']: varianceOfErrors.append( np.var(np.array(trainY) - model.predict(trainX))) if loss in ['LAD', '1Q', '3Q']: varianceOfErrors.append( np.var( model.predict(res.params, exog=trainX) - np.array(trainY))) testData = pd.DataFrame(df.iloc[j, :]).T testX = testData[regressors] if loss in ['LAD', '1Q', '3Q']: prediction = model.predict(res.params, exog=testX) else: prediction = model.predict(testX) if loss in ['Lasso', 'Hub', 'LAD', '1Q', '3Q']: output.append(prediction[0]) else: output.append(prediction[0][0]) return (name, output, factorLoadings, varianceOfErrors)
def train_predict_stacking_linear_regression(df_learning, df_prod, l_tuple_strategy_normalised): for quantile in constants.LIST_QUANTILE: to_keep = [] for strategy, normalize_by in l_tuple_strategy_normalised: str_normalized = '_normed_by_' + normalize_by if normalize_by is not None else '' to_keep.append('{}{}_quantile_{:.3f}'.format( strategy, str_normalized, quantile)) # Remove NA columns to_keep = df_learning[to_keep].notnull().all() to_keep = to_keep[to_keep].index.tolist() # We need to remove constants columns from the sampled data df_learning_weighted = df_learning.sample(10000, weights='weight', replace=True, random_state=1) # Remove constants columns cols_constants = df_learning_weighted[to_keep].std() == 0 cols_constants = cols_constants[cols_constants].index.tolist() for col in cols_constants: to_keep.remove(col) # # Remove correlated features # # Create correlation matrix # corr_matrix = df_learning[to_keep].corr().abs().fillna(1) # # Select upper triangle of correlation matrix # upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) # # Find index of feature columns with correlation greater than 0.95 # to_drop = [column for column in upper.columns if any(upper[column] > 0.95)] # to_keep.remove(to_drop) # Drop duplicates columns def getDuplicateColumns(df): ''' Get a list of duplicate columns. It will iterate over all the columns in dataframe and find the columns whose contents are duplicate. :param df: Dataframe object :return: List of columns whose contents are duplicates. ''' duplicateColumnNames = set() # Iterate over all the columns in dataframe for x in range(df.shape[1]): # Select column at xth index. col = df.iloc[:, x] # Iterate over all the columns in DataFrame from (x+1)th index till end for y in range(x + 1, df.shape[1]): # Select column at yth index. otherCol = df.iloc[:, y] # Check if two columns at x 7 y index are equal if col.equals(otherCol): duplicateColumnNames.add(df.columns.values[y]) return list(duplicateColumnNames) cols_duplicate = getDuplicateColumns(df_learning_weighted[to_keep]) for cols in cols_duplicate: to_keep.remove(cols) # to_keep = df_learning_weighted[to_keep].T.drop_duplicates().T.columns # Not efficient but ok X_learning_weighted = df_learning_weighted[to_keep].fillna(0) X_learning = df_learning[to_keep].fillna(0) X_prod = df_prod[to_keep].fillna(0) y_learning_weighted = df_learning_weighted['sales'] # weight_learning = df_learning['weight'] if X_learning_weighted.nunique().max() != 1: linear_model = QuantReg(y_learning_weighted, X_learning_weighted) linear_model = linear_model.fit(q=quantile) # print(linear_model.summary()) df_learning['quantile_{:.3f}'.format( quantile)] = linear_model.predict(X_learning) df_prod['quantile_{:.3f}'.format(quantile)] = linear_model.predict( X_prod) else: df_learning['quantile_{:.3f}'.format(quantile)] = 0 df_prod['quantile_{:.3f}'.format(quantile)] = 0 return df_learning, df_prod
#rankg2 = df['V3.%s' % first_min_second[1]].argsort().values #response = np.array((rankg1 - rankg2), dtype='d')# ** 3 #response = MinMaxScaler().fit_transform(response[:, None])[:, 0] response = pd.DataFrame(response, columns=['Rank%s-Rank%s' % first_min_second]) explanatory = df[features].copy() #explanatory = pd.DataFrame(MinMaxScaler().fit_transform(explanatory.copy().values), # columns=explanatory.columns) #explanatory['intercept'] = np.ones(len(explanatory), dtype='d') explanatory['is_catole'] = np.array(df['bairro'] == 'catole', dtype='d') explanatory['is_centro'] = np.array(df['bairro'] == 'centro', dtype='d') explanatory['is_liberdade'] = np.array(df['bairro'] == 'liberdade', dtype='d') model = QuantReg(response, explanatory) max_left = 0.0 max_left_q = 0 max_right = 0.0 max_right_q = 0 rsqs = [] qs = [] util = [] values = {} for name in explanatory.columns: values[name] = np.zeros(10) i = 0 for q in np.linspace(0.05, 0.95, 5): values[name][i] = 0
def QuantileRegression(X, Y, quantile): mod = QuantReg(Y, X) res = mod.fit(q=quantile) return res.params
def fit(self, X, y): self.m = QuantReg(y, X).fit(self.tau) return self
def find_knee(X, Y, q=0.75, conf_level=0.999, q_init=0.5, n_knees=1): """ Finds the knee of the XY curve (i.e. where Y shoots up in '"non-linear" fashion with respect to X) Assumes that Y is noisily increasing with X. The choice of q_init, q and conf_level reflects the subjectivity of the problem. - larger q_init will detect knees 'later' (i.e. for higher values of X or miss them altogether) - larger conf_level will detect knees 'later' - larger q will detect knees 'earlier' Example (M/M/1): X = np.random.uniform(low=0, high=1, size=100) Y = np.maximum(0, 1.0 / (1-X) + np.random.normal(0, 1, size=100)) plt.scatter(X, Y) find_knee(X, Y, q=0.5, conf_level=0.999, q_init = 0.5) find_knee(X, Y, q=0.25, conf_level=0.999, q_init = 0.5) find_knee(X, Y, q=0.75, conf_level=0.999, q_init = 0.5) :param X: independent values (n x 1 list or np array) :param Y: dependent values (n x 1 list or np array) :param q: knee quantile level. The lower q, the less sensitive to knee detection, i.e. the knee, if any, will be detected at higher values of X. :param q_init: the percentile value where we start looking for the knee, e.g. if q_init = 0.5, we look for knees past the median of X. :param conf_level: knee detection confidence level. Set very high if we want knee certainty. :param n_knees: number of knees to detect :param knee_list: knee_list output :return: knee list """ if len(X) != len(Y): print 'invalid input lengths. X: ' + str(len(X)) + ' Y: ' + str(len(Y)) sys.exit(0) check_prob(q, 'q') check_prob(q_init, 'q_init') check_prob(conf_level, 'conf_level') if not(isinstance(n_knees, int)) or n_knees < 0: print 'invalid n_knees: ' + str(n_knees) sys.exit(0) # close recursion if n_knees == 0: return [] # sort by increasing X and add 1's for the intercept x0 = np.ones(len(X)) # add 1's for intercept Z = zip(x0, X, Y) Z.sort(key=itemgetter(1)) init_cnt = int(q_init * len(Z)) Z_q, Z_k = Z[:init_cnt], Z[init_cnt:] X_q, Y_q = np.array([z[:-1] for z in Z_q]), np.array([z[-1] for z in Z_q]) q_reg_obj = QuantReg(endog=Y_q, exog=X_q) mdl = q_reg_obj.fit(q=q) ones, X_k, Y_k = zip(*Z_k) # already sorted! Y_preds = mdl.predict(zip(ones, X_k)) # predict all values from q-itle onwards signs = np.sign(Y_k - Y_preds) # 1 if positive, -1 if negative, 0 if equal upr = np.maximum(0, signs) cum_upr = int((1.0 - q) * init_cnt) + np.cumsum(upr) # cum_upr: count of points over regression line ttl_cnt = range(init_cnt, len(Z)) # total running count rv = sp.binom(n=ttl_cnt, p=1.0 - q) diffs = 1.0 - conf_level - rv.sf(x=cum_upr - 1) knee_idx = find_ge_idx(diffs, 0.0) # knee: the first time we have binom_test(p_val) < 1-conf_level x_knee = X_k[knee_idx] if knee_idx < len(X_k) else None if x_knee is not None: if n_knees > 1: Z_n = [zn for zn in Z_k if zn[1] >= x_knee] if len(Z_n) > 10: ones, X_n, Y_n = zip(*Z_n) return [x_knee] + find_knee(X_n, Y_n, q=q, conf_level=conf_level, q_init=q_init, n_knees=n_knees - 1) else: return [x_knee] else: return [x_knee] else: return []
class QuantileRegressionPOD(POD): """ Quantile regression based POD. **Available constructor:** QuantileRegressionPOD(*inputSample, outputSample, detection, noiseThres, saturationThres, boxCox*) Parameters ---------- inputSample : 2-d sequence of float Vector of the defect sizes, of dimension 1. outputSample : 2-d sequence of float Vector of the signals, of dimension 1. detection : float Detection value of the signal. noiseThres : float Value for low censored data. Default is None. saturationThres : float Value for high censored data. Default is None boxCox : bool or float Enable or not the Box Cox transformation. If boxCox is a float, the Box Cox transformation is enabled with the given value. Default is False. Notes ----- This class aims at building the POD based on a quantile regression model. The return POD model corresponds with an interpolate function built with the defect values computed for the given quantile as parameters. The default is 21 quantile values from 0.05 to 0.98. They can be user-defined using the method *setQuantile*. The confidence level is computed by bootstrap. The POD model at the given confidence level is also an interpolate function based on the defect quantile value computed at the given confidence level. The computeDetectionSize method calls the real quantile regression at the given probability level. A progress bar is shown if the verbosity is enabled. It can be disabled using the method *setVerbose*. """ def __init__(self, inputSample=None, outputSample=None, detection=None, noiseThres=None, saturationThres=None, boxCox=False): self._quantile = np.linspace(0.05, 0.98, 21) self._verbose = True # initialize the POD class super(QuantileRegressionPOD, self).__init__(inputSample, outputSample, detection, noiseThres, saturationThres, boxCox) # inherited attributes # self._simulationSize # self._detection # self._inputSample # self._outputSample # self._noiseThres # self._saturationThres # self._lambdaBoxCox # self._boxCox # self._size # self._dim # self._censored # assertion input dimension is 1 assert (self._dim == 1), "Dimension of inputSample must be 1." if self._censored: logging.info('Censored data are not taken into account : the quantile ' + \ 'regression model is only performed on filtered data.') def run(self): """ Build the POD models. Notes ----- This method build the quantile regression model. First the censored data are filtered if needed. The Box Cox transformation is performed if it is enabled. Then it builds the POD model for given data and computes using bootstrap all the defects quantile needed to build the POD model at the confidence level. """ # Run the preliminary run of the POD class result = self._run(self._inputSample, self._outputSample, self._detection, self._noiseThres, self._saturationThres, self._boxCox, self._censored) # get some results self._defects = result['inputSample'] self._signals = result['signals'] self._detectionBoxCox = result['detectionBoxCox'] defectsSize = self._defects.getSize() # create the quantile regression object X = ot.NumericalSample(defectsSize, [1, 0]) X[:, 1] = self._defects self._algoQuantReg = QuantReg(np.array(self._signals), np.array(X)) # Compute the defect quantile defectMax = self._defects.getMax()[0] defectList = [] for probLevel in self._quantile: # fit the quantile regression and return the NMF model = self._buildModel(1. - probLevel) # Solve the model == detectionBoxCox with defects # boundaries = [0, defectMax] defectList.append(ot.Brent().solve(model, self._detectionBoxCox, 0, defectMax)) # create support of the interpolating function including # point (0, 0) and point (defectMax, max(quantile)) xvalue = np.hstack([0, defectList, defectMax]) yvalue = np.hstack([0., self._quantile, self._quantile.max()]) interpModel = interp1d(xvalue, yvalue, kind='linear') self._PODmodel = ot.PythonFunction(1, 1, interpModel) ############ Confidence interval with bootstrap ######################## # Compute a NsimulationSize defect sizes for all quantiles data = ot.NumericalSample(self._size, 2) data[:, 0] = self._inputSample data[:, 1] = self._outputSample # bootstrap of the data bootstrapExp = ot.BootstrapExperiment(data) # create a numerical sample which contains for all simulations the # defect quantile value. The goal is to compute the QuantilePerComponent # of the simulation for each defect quantile (columns) self._defectsPerQuantile = ot.NumericalSample(self._simulationSize, self._quantile.size) for i in range(self._simulationSize): # generate a sample with replacement within data of the same size bootstrapData = bootstrapExp.generate() # run the preliminary analysis : censore checking and box cox result = self._run(bootstrapData[:,0], bootstrapData[:,1], self._detection, self._noiseThres, self._saturationThres, self._boxCox, self._censored) # get some results defects = result['inputSample'] signals = result['signals'] detectionBoxCox = result['detectionBoxCox'] defectsSize = defects.getSize() # new quantile regression algorithm X = ot.NumericalSample(defectsSize, [1, 0]) X[:, 1] = defects algoQuantReg = QuantReg(np.array(signals), np.array(X)) # compute the quantile defects defectMax = defects.getMax()[0] defectList = [] for probLevel in self._quantile: fit = algoQuantReg.fit(1. - probLevel, max_iter=300, p_tol=1e-2) def model(x): X = ot.NumericalPoint([1, x[0]]) return ot.NumericalPoint(fit.predict(X)) model = ot.PythonFunction(1, 1, model) # Solve the model == detectionBoxCox with defects # boundaries = [-infinity, defectMax] : it allows negative defects # when for small prob level, there is no intersection with # the detection threshold for positive defects defectList.append(ot.Brent().solve(model, detectionBoxCox, -ot.SpecFunc.MaxNumericalScalar, defectMax)) # add the quantile in the numerical sample as the ith simulation self._defectsPerQuantile[i, :] = defectList if self._verbose: updateProgress(i, self._simulationSize, 'Computing defect quantile') def getPODModel(self): """ Accessor to the POD model. Returns ------- PODModel : :py:class:`openturns.NumericalMathFunction` The function which computes the probability of detection for a given defect value. """ return self._PODmodel def getPODCLModel(self, confidenceLevel=0.95): """ Accessor to the POD model at a given confidence level. Parameters ---------- confidenceLevel : float The confidence level the POD must be computed. Default is 0.95 Returns ------- PODModelCl : :py:class:`openturns.NumericalMathFunction` The function which computes the probability of detection for a given defect value at the confidence level given as parameter. """ # Compute the quantile at the given confidence level for each # defect quantile and build the interpolate function. defectsQuantile = self._defectsPerQuantile.computeQuantilePerComponent( confidenceLevel) xvalue = np.hstack([0, np.array(defectsQuantile), self._defects.getMax()[0]]) yvalue = np.hstack([0., self._quantile, self._quantile.max()]) interpModel = interp1d(xvalue, yvalue, kind='linear') PODmodelCl = ot.PythonFunction(1, 1, interpModel) return PODmodelCl def getR2(self, quantile): """ Accessor to the pseudo R2 value. Parameters ---------- quantile : float The quantile value for which the regression is performed. Returns ------- R2 : float The pseudo R2 value. """ return self._algoQuantReg.fit(quantile).prsquared def getQuantile(self): """ Accessor to the quantile list for the regression. """ return self._quantile def setQuantile(self, quantile): """ Accessor to the quantile list for the regression. Parameters ---------- quantile : sequence of float The quantile value for which the regression is performed and the corresponding defect size is computed. """ quantile = np.hstack(np.array(quantile)) quantile.sort() if quantile.max() >= 1 or quantile.min() <= 0: raise ValueError('Quantile values must range between ]0, 1[.') self._quantile = quantile @DocInherit # decorator to inherit the docstring from POD class @keepingArgs # decorator to keep the real signature def computeDetectionSize(self, probabilityLevel, confidenceLevel=None): defectMin = self._defects.getMin()[0] defectMax = self._defects.getMax()[0] # compute 'a90' model = self._buildModel(1. - probabilityLevel) try: detectionSize = ot.NumericalPointWithDescription(1, ot.Brent().solve( model, self._detectionBoxCox, defectMin, defectMax)) except: raise Exception('The POD model does not contain, for the given ' + \ 'defect interval, the wanted probability level.') description = ['a'+str(int(probabilityLevel*100))] # compute 'a90_95' if confidenceLevel is not None: modelCl = self.getPODCLModel(confidenceLevel) if not (modelCl([defectMin])[0] <= probabilityLevel <= modelCl([defectMax])[0]): raise Exception('The POD model at the confidence level does not '+\ 'contain, for the given defect interval, the '+\ 'wanted probability level.') detectionSize.add(ot.Brent().solve(modelCl, probabilityLevel, defectMin, defectMax)) description.append('a'+str(int(probabilityLevel*100))+'/'\ +str(int(confidenceLevel*100))) # add description to the NumericalPoint detectionSize.setDescription(description) return detectionSize @DocInherit # decorator to inherit the docstring from POD class @keepingArgs # decorator to keep the real signature def drawPOD(self, probabilityLevel=None, confidenceLevel=None, defectMin=None, defectMax=None, nbPt=100, name=None): if defectMin is None: defectMin = np.min(self._defects) else: if defectMin < np.min(self._defects): raise ValueError('DefectMin must be greater than the minimum ' + \ 'of the given defect sizes.') if defectMin > np.max(self._defects): raise ValueError('DefectMin must be lower than the maximum ' + \ 'of the given defect sizes.') if defectMax is None: defectMax = np.max(self._defects) else: if defectMax > np.max(self._defects): raise ValueError('DefectMax must be lower than the maximum ' + \ 'of the given defect sizes.') if defectMax < np.min(self._defects): raise ValueError('DefectMax must be greater than the minimum ' + \ 'of the given defect sizes.') if confidenceLevel is None: fig, ax = self._drawPOD(self.getPODModel(), None, probabilityLevel, confidenceLevel, defectMin, defectMax, nbPt, name) elif confidenceLevel is not None: fig, ax = self._drawPOD(self.getPODModel(), self.getPODCLModel(confidenceLevel), probabilityLevel, confidenceLevel, defectMin, defectMax, nbPt, name) ax.set_title('POD - Quantile regression model') if name is not None: fig.savefig(name, bbox_inches='tight', transparent=True) return fig, ax def drawLinearModel(self, probabilityLevel, name=None): """ Draw the quantile regression prediction versus the true data. Parameters ---------- probabilityLevel : float The probability level for which the quantile regression is performed name : string name of the figure to be saved with *transparent* option sets to True and *bbox_inches='tight'*. It can be only the file name or the full path name. Default is None. Returns ------- fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_ Matplotlib figure object. ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_ Matplotlib axes object. """ model = self._algoQuantReg.fit(1. - probabilityLevel) defects = self._defects signals = self._signals fittedSignals = model.fittedvalues fig, ax = plt.subplots(figsize=(8, 6)) ax.plot(defects, signals, 'b.', label='Data', ms=9) ax.plot(defects, fittedSignals, 'r-', label='Linear regression model') ax.set_xlabel('Defects') ax.set_ylabel('Signals') ax.set_title('Quantile regression model at level (1 - ' + \ str(probabilityLevel) + ')') ax.grid() ax.legend(loc='upper left') if name is not None: fig.savefig(name, bbox_inches='tight', transparent=True) return fig, ax def getVerbose(self): """ Accessor to the verbosity. Returns ------- verbose : bool Enable or disable the verbosity. Default is True. """ return self._verbose def setVerbose(self, verbose): """ Accessor to the verbosity. Parameters ---------- verbose : bool Enable or disable the verbosity. """ if type(verbose) is not bool: raise TypeError('The parameter is not a bool.') else: self._verbose = verbose def _buildModel(self, probabilityLevel): """ Build the NumericalMathFunction at the given probabilityLevel. It is used in the run and in computeDetectionSize in order to do not use the interpolate function. """ fit = self._algoQuantReg.fit(probabilityLevel, max_iter=300, p_tol=1e-2) def model(x): X = ot.NumericalPoint([1, x[0]]) return ot.NumericalPoint(fit.predict(X)) return ot.PythonFunction(1, 1, model)
def run(self): """ Build the POD models. Notes ----- This method build the quantile regression model. First the censored data are filtered if needed. The Box Cox transformation is performed if it is enabled. Then it builds the POD model for given data and computes using bootstrap all the defects quantile needed to build the POD model at the confidence level. """ # Run the preliminary run of the POD class result = self._run(self._inputSample, self._outputSample, self._detection, self._noiseThres, self._saturationThres, self._boxCox, self._censored) # get some results self._defects = result['inputSample'] self._signals = result['signals'] self._detectionBoxCox = result['detectionBoxCox'] defectsSize = self._defects.getSize() # create the quantile regression object X = ot.NumericalSample(defectsSize, [1, 0]) X[:, 1] = self._defects self._algoQuantReg = QuantReg(np.array(self._signals), np.array(X)) # Compute the defect quantile defectMax = self._defects.getMax()[0] defectList = [] for probLevel in self._quantile: # fit the quantile regression and return the NMF model = self._buildModel(1. - probLevel) # Solve the model == detectionBoxCox with defects # boundaries = [0, defectMax] defectList.append(ot.Brent().solve(model, self._detectionBoxCox, 0, defectMax)) # create support of the interpolating function including # point (0, 0) and point (defectMax, max(quantile)) xvalue = np.hstack([0, defectList, defectMax]) yvalue = np.hstack([0., self._quantile, self._quantile.max()]) interpModel = interp1d(xvalue, yvalue, kind='linear') self._PODmodel = ot.PythonFunction(1, 1, interpModel) ############ Confidence interval with bootstrap ######################## # Compute a NsimulationSize defect sizes for all quantiles data = ot.NumericalSample(self._size, 2) data[:, 0] = self._inputSample data[:, 1] = self._outputSample # bootstrap of the data bootstrapExp = ot.BootstrapExperiment(data) # create a numerical sample which contains for all simulations the # defect quantile value. The goal is to compute the QuantilePerComponent # of the simulation for each defect quantile (columns) self._defectsPerQuantile = ot.NumericalSample(self._simulationSize, self._quantile.size) for i in range(self._simulationSize): # generate a sample with replacement within data of the same size bootstrapData = bootstrapExp.generate() # run the preliminary analysis : censore checking and box cox result = self._run(bootstrapData[:,0], bootstrapData[:,1], self._detection, self._noiseThres, self._saturationThres, self._boxCox, self._censored) # get some results defects = result['inputSample'] signals = result['signals'] detectionBoxCox = result['detectionBoxCox'] defectsSize = defects.getSize() # new quantile regression algorithm X = ot.NumericalSample(defectsSize, [1, 0]) X[:, 1] = defects algoQuantReg = QuantReg(np.array(signals), np.array(X)) # compute the quantile defects defectMax = defects.getMax()[0] defectList = [] for probLevel in self._quantile: fit = algoQuantReg.fit(1. - probLevel, max_iter=300, p_tol=1e-2) def model(x): X = ot.NumericalPoint([1, x[0]]) return ot.NumericalPoint(fit.predict(X)) model = ot.PythonFunction(1, 1, model) # Solve the model == detectionBoxCox with defects # boundaries = [-infinity, defectMax] : it allows negative defects # when for small prob level, there is no intersection with # the detection threshold for positive defects defectList.append(ot.Brent().solve(model, detectionBoxCox, -ot.SpecFunc.MaxNumericalScalar, defectMax)) # add the quantile in the numerical sample as the ith simulation self._defectsPerQuantile[i, :] = defectList if self._verbose: updateProgress(i, self._simulationSize, 'Computing defect quantile')
def setup_fun(kernel='gau', bandwidth='bofinger'): data = sm.datasets.engel.load_pandas().data y, X = dmatrices('foodexp ~ income', data, return_type='dataframe') statsm = QuantReg(y, X).fit(vcov='iid', kernel=kernel, bandwidth=bandwidth) stata = d[(kernel, bandwidth)] return statsm, stata
def fit(self, X, y): X = self.preprocess(X) self.regressor = QuantReg(y, X) self.regressor_fit = self.regressor.fit(q=self.quantile)
from scipy import stats import statsmodels.api as sm from statsmodels.regression.quantile_regression import QuantReg sige = 0.1 nobs, k_vars = 500, 3 x = np.random.uniform(-1, 1, size=nobs) x.sort() exog = np.vander(x, k_vars + 1)[:, ::-1] mix = 0.1 * stats.norm.pdf( x[:, None], loc=np.linspace(-0.5, 0.75, 4), scale=0.01).sum(1) y = exog.sum(1) + mix + sige * (np.random.randn(nobs) / 2 + 1)**3 p = 0.5 res_qr = QuantReg(y, exog).fit(p) res_qr2 = QuantReg(y, exog).fit(0.1) res_qr3 = QuantReg(y, exog).fit(0.75) res_ols = sm.OLS(y, exog).fit() params = [res_ols.params, res_qr2.params, res_qr.params, res_qr3.params] labels = ['ols', 'qr 0.1', 'qr 0.5', 'qr 0.75'] plt.figure() plt.plot(x, y, '.', alpha=0.5) for lab, beta in zip(['ols', 'qr 0.1', 'qr 0.5', 'qr 0.75'], params): print('%-8s' % lab, np.round(beta, 4)) fitted = np.dot(exog, beta) lw = 2 plt.plot(x, fitted, lw=lw, label=lab) plt.legend()
evals = [(dtrain, 'train'), (dvalid_xy, 'eval')] model = xgb.train(xgb_params, dtrain, num_boost_round=num_boost_rounds, evals=evals, early_stopping_rounds=early_stopping_rounds, verbose_eval=10) valid_pred = model.predict(dvalid_x, ntree_limit=model.best_ntree_limit) print("XGBoost validation set predictions:") print(pd.DataFrame(valid_pred).head()) print("\nMean absolute validation error:") mean_absolute_error(y_valid, valid_pred) if OPTIMIZE_FUDGE_FACTOR: mod = QuantReg(y_valid, valid_pred) res = mod.fit(q=.5) print("\nLAD Fit for Fudge Factor:") print(res.summary()) fudge = res.params[0] print("Optimized fudge factor:", fudge) print("\nMean absolute validation error with optimized fudge factor: ") print(mean_absolute_error(y_valid, fudge * valid_pred)) fudge **= FUDGE_FACTOR_SCALEDOWN print("Scaled down fudge factor:", fudge) print("\nMean absolute validation error with scaled down fudge factor: ") print(mean_absolute_error(y_valid, fudge * valid_pred)) else: fudge = 1.0
"""Huber""" reg2 = HuberRegressor(epsilon = 1) model2 = reg2.fit(x, y) y_pred2 = model2.predict(x_test) """L1""" dfx = pd.DataFrame(x, columns = ['x']) dfy = pd.DataFrame(y, columns = ['y']) exog = sm.add_constant(dfx['x']) endog = dfy['y'] dft = pd.DataFrame(x_test, columns = ['test']) qrmodel = QuantReg(endog, exog) result = qrmodel.fit(q=0.5) ypred_qr = np.dot(dft, result.params[1]) + result.params[0] #results.predict(dft) """Student-t""" tmodel = TLinearModel(endog, exog) results = tmodel.fit(df=0.6) ypred_t = np.dot(dft, results.params[1]) + results.params[0] #results.predict(dft) """Plot""" plt.xlim(xmin, xmax) plt.ylim(ymin, ymax)
# prod = orig[f1].values * orig[f2].values # orig[f1 + '_times_' + f2] = prod orig['is_catole'] = np.array(df['bairro'] == 'catole', dtype='d') orig['is_centro'] = np.array(df['bairro'] == 'centro', dtype='d') orig['is_liberdade'] = np.array(df['bairro'] == 'liberdade', dtype='d') scaled = pd.DataFrame(StandardScaler().fit_transform(orig.copy().values), columns=orig.columns) print(orig.shape) assert orig.shape == scaled.shape # In[5]: model = QuantReg(response, orig) # In[6]: for q in np.linspace(0.05, 0.95, 10): print(q) print(model.fit(q=q).summary()) print() print() # In[ ]:
#rankg1 = df['V3.%s' % first_min_second[0]].argsort().values#Increasing order! #rankg2 = df['V3.%s' % first_min_second[1]].argsort().values #response = np.array((rankg1 - rankg2), dtype='d')# ** 3 #response = MinMaxScaler().fit_transform(response[:, None])[:, 0] response = pd.DataFrame(response, columns=['Rank%s-Rank%s' % first_min_second]) explanatory = df[features].copy() #explanatory = pd.DataFrame(MinMaxScaler().fit_transform(explanatory.copy().values), # columns=explanatory.columns) #explanatory['intercept'] = np.ones(len(explanatory), dtype='d') explanatory['is_catole'] = np.array(df['bairro'] == 'catole', dtype='d') explanatory['is_centro'] = np.array(df['bairro'] == 'centro', dtype='d') explanatory['is_liberdade'] = np.array(df['bairro'] == 'liberdade', dtype='d') model = QuantReg(response, explanatory) max_left = 0.0 max_left_q = 0 max_right = 0.0 max_right_q = 0 rsqs = [] qs = [] util = [] values = {} for name in explanatory.columns: values[name] = np.zeros(10) i = 0 for q in np.linspace(0.05, 0.95, 5): values[name][i] = 0
def fit(self,X,*args,**kwargs): """ Fit a projection pursuit dimension reduction model. Required input argument: X data as matrix or data frame Optinal input arguments: arg or kwarg: y data as vector or 1D matrix kwargs: h, int: option to overrule class's n_components parameter in fit. Convenient command line, yet should not be used in automated loops, e.g. cross-validation. dmetric, str: distance metric used internally. Defaults to 'euclidean' mixing, bool: to estimate mixing matrix (only relevant for ICA) Further parameters to the regression methods can be passed on here as well as kwargs, e.g. quantile=0.8 for quantile regression. kwargs only relevant if y specified: """ # Collect optional fit arguments biascorr = kwargs.pop('biascorr',False) if 'h' not in kwargs: h = self.n_components else: h = kwargs.pop('h') self.n_components = h if 'dmetric' not in kwargs: dmetric = 'euclidean' else: dmetric = kwargs.get('dmetric') if 'mixing' not in kwargs: mixing = False else: mixing = kwargs.get('mixing') if 'y' not in kwargs: na = len(args) if na > 0: #Use of *args makes it sklearn consistent flag = 'two-block' y = args[0] else: flag = 'one-block' y = 0 # to allow calls with 'y=y' in spit of no real y argument present else: flag = 'two-block' y = kwargs.get('y') if 'quantile' not in kwargs: quantile = .5 else: quantile = kwargs.get('quantile') if self.regopt == 'robust': if 'fun' not in kwargs: fun = 'Hampel' else: fun = kwargs.get('fun') if 'probp1' not in kwargs: probp1 = 0.95 else: probp1 = kwargs.get('probp1') if 'probp2' not in kwargs: probp2 = 0.975 else: probp2 = kwargs.get('probp2') if 'probp3' not in kwargs: probp3 = 0.99 else: probp3 = kwargs.get('probp3') if self.projection_index == dicomo: if self.pi_arguments['mode'] in ('M3','cos','c*k'): if 'option' not in kwargs: option = 1 else: option = kwargs.get('option') if option > 3: print('Option value >3 will compute results, but meaning may be questionable') # Initiate projection index self.most = self.projection_index(**self.pi_arguments) # Initiate some parameters and data frames if self.copy: X0 = copy.deepcopy(X) self.X0 = X0 else: X0 = X X = convert_X_input(X0) n,p = X0.shape trimming = self.trimming # Check dimensions if h > min(n,p): raise(MyException('number of components cannot exceed number of samples')) if (self.projection_index == dicomo and self.pi_arguments['mode'] == 'kurt' and self.whiten_data==False): warnings.warn('Whitening step is recommended for ICA') # Pre-processing adjustment if whitening if self.whiten_data: self.center_data = True self.scale_data = False self.compression = False print('All results produced are for whitened data') # Centring and scaling if self.scale_data: if self.center=='mean': scale = 'std' elif ((self.center=='median')|(self.center=='l1median')): scale = 'mad' else: scale = 'None' warnings.warn('Without scaling, convergence to optima is not given') # Data Compression for flat tables if required if ((p>n) and self.compression): V,S,U = np.linalg.svd(X.T,full_matrices=False) X = np.matmul(U.T,np.diag(S)) n,p = X.shape if (srs.mad(X)==0).any(): warnings.warn('Due to low scales in data, compression would induce zero scales.' + '\n' + 'Proceeding without compression.') dimensions = False if copy: X = copy.deepcopy(X0) else: X = X0 else: dimensions = True else: dimensions = False # Initiate centring object and scale X data centring = VersatileScaler(center=self.center,scale=scale,trimming=trimming) if self.center_data: Xs = centring.fit_transform(X) mX = centring.col_loc_ sX = centring.col_sca_ else: Xs = X mX = np.zeros((1,p)) sX = np.ones((1,p)) fit_arguments = {} # Data whitening (best practice for ICA) if self.whiten_data: V,S,U = np.linalg.svd(Xs.T,full_matrices=False) del U K = (V/S)[:,:p] del V,S Xs = np.matmul(Xs, K) Xs *= np.sqrt(p) # Presently, X and y need to be matrices # Will be changed to use regular np.ndarray Xs = np.matrix(Xs) # Pre-process y data when available if flag != 'one-block': ny = y.shape[0] y = convert_y_input(y) if len(y.shape) < 2: y = np.matrix(y).reshape((ny,1)) # py = y.shape[1] if ny != n: raise(MyException('X and y number of rows must agree')) if self.copy: y0 = copy.deepcopy(y) self.y0 = y0 if self.center_data: ys = centring.fit_transform(y) my = centring.col_loc_ sy = centring.col_sca_ else: ys = y my = 0 sy = 1 ys = np.matrix(ys).astype('float64') else: ys = None # Initializing output matrices W = np.zeros((p,h)) T = np.zeros((n,h)) P = np.zeros((p,h)) B = np.zeros((p,h)) R = np.zeros((p,h)) B_scaled = np.zeros((p,h)) C = np.zeros((h,1)) Xev = np.zeros((h,1)) assovec = np.zeros((h,1)) Maxobjf = np.zeros((h,1)) # Initialize deflation matrices E = copy.deepcopy(Xs) f = ys bi = np.zeros((p,1)) opt_args = { 'alpha': self.alpha, 'trimming': self.trimming, 'biascorr': biascorr, 'dmetric' : 'euclidean', } if self.optimizer=='grid': # Define grid optimization ranges if 'ndir' not in self.optimizer_options: self.optimizer_options['ndir'] = 1000 optrange = np.sign(self.optrange) optmax = self.optrange[1] stop0s = np.arcsin(optrange[0]) stop1s = np.arcsin(optrange[1]) stop1c = np.arccos(optrange[0]) stop0c = np.arccos(optrange[1]) anglestart = max(stop0c,stop0s) anglestop = max(stop1c,stop1s) nangle = np.linspace(anglestart,anglestop,self.optimizer_options['ndir'],endpoint=False) alphamat = np.matrix([np.cos(nangle), np.sin(nangle)]) opt_args['_stop0c'] = stop0c opt_args['_stop0s'] = stop0s opt_args['_stop1c'] = stop1c opt_args['_stop1s'] = stop1s opt_args['optmax'] = optmax opt_args['optrange'] = self.optrange opt_args['square_pi'] = self.square_pi if optmax != 1: alphamat *= optmax if p>2: anglestart = min(opt_args['_stop0c'],opt_args['_stop0s']) anglestop = min(opt_args['_stop1c'],opt_args['_stop1s']) nangle = np.linspace(anglestart,anglestop,self.optimizer_options['ndir'],endpoint=True) alphamat2 = np.matrix([np.cos(nangle), np.sin(nangle)]) if optmax != 1: alphamat2 *= opt_args['optmax'] # Arguments for grid plane opt_args['alphamat'] = alphamat, opt_args['ndir'] = self.optimizer_options['ndir'], opt_args['maxiter'] = self.optimizer_options['maxiter'] if type(opt_args['ndir'] is tuple): opt_args['ndir'] = opt_args['ndir'][0] # Arguments for grid plane #2 grid_args_2 = { 'alpha': self.alpha, 'alphamat': alphamat2, 'ndir': self.optimizer_options['ndir'], 'trimming': self.trimming, 'biascorr': biascorr, 'dmetric' : 'euclidean', '_stop0c' : stop0c, '_stop0s' : stop0s, '_stop1c' : stop1c, '_stop1s' : stop1s, 'optmax' : optmax, 'optrange' : self.optrange, 'square_pi' : self.square_pi } if flag=='two-block': grid_args_2['y'] = f if flag=='two-block': opt_args['y'] = f # Itertive coefficient estimation for i in range(0,h): if self.optimizer=='grid': if p==2: wi,maximo = gridplane(E,self.most, pi_arguments=opt_args ) elif p>2: afin = np.zeros((p,1)) # final parameters for linear combinations Z = copy.deepcopy(E) # sort variables according to criterion meas = [self.most.fit(E[:,k], **opt_args) for k in np.arange(0,p)] if self.square_pi: meas = np.square(meas) wi,maximo = gridplane(Z[:,0:2],self.most,opt_args) Zopt = Z[:,0:2]*wi afin[0:2]=wi for j in np.arange(2,p): projmat = np.matrix([np.array(Zopt[:,0]).reshape(-1), np.array(Z[:,j]).reshape(-1)]).T wi,maximo = gridplane(projmat,self.most, opt_args ) Zopt = Zopt*float(wi[0]) + Z[:,j]*float(wi[1]) afin[0:(j+1)] = afin[0:(j+1)]*float(wi[0]) afin[j] = float(wi[1]) tj = Z*afin objf = self.most.fit(tj, **{**fit_arguments,**opt_args} ) if self.square_pi: objf *= objf # outer loop to run until convergence objfold = copy.deepcopy(objf) objf = -1000 afinbest = afin ii = 0 maxiter_2j = 2**round(np.log2(self.optimizer_options['maxiter'])) while ((ii < self.optimizer_options['maxiter'] + 1) and (abs(objfold - objf)/abs(objf) > 1e-4)): for j in np.arange(0,p): projmat = np.matrix([np.array(Zopt[:,0]).reshape(-1), np.array(Z[:,j]).reshape(-1)]).T if j > 16: divv = maxiter_2j else: divv = min(2**j,maxiter_2j) wi,maximo = gridplane_2(projmat, self.most, q=afin[j], div=divv, pi_arguments=grid_args_2 ) Zopt = Zopt*float(wi[0,0]) + Z[:,j]*float(wi[1,0]) afin *= float(wi[0,0]) afin[j] += float(wi[1,0]) # % evaluate the objective function: tj = Z*afin objfold = copy.deepcopy(objf) objf = self.most.fit(tj, q=afin, **opt_args ) if self.square_pi: objf *= objf if objf!=objfold: if self.constraint == 'norm': afinbest = afin/np.sqrt(np.sum(np.square(afin))) else: afinbest = afin ii +=1 if self.verbose: print(str(ii)) #endwhile afinbest = afin wi = np.zeros((p,1)) wi = afinbest Maxobjf[i] = objf # endif;%if p>2; else: # do not optimize by the grid algorithm if self.trimming > 0: warnings.warn('Optimization that involves a trimmed objective is not a quadratic program. The scipy-optimize result will be off!!') if 'center' in self.pi_arguments: if (self.pi_arguments['center']=='median'): warnings.warn('Optimization that involves a median in the objective is not a quadratic program. The scipy-optimize result will be off!!') constraint = {'type':'eq', 'fun': lambda x: np.linalg.norm(x) -1, } if len(self.optimizer_constraints)>0: constraint = [constraint,self.optimizer_constraints] wi = minimize(pp_objective, E[0,:].transpose(), args=(self.most,E,opt_args), method=self.optimizer, constraints=constraint, options=self.optimizer_options).x wi = np.matrix(wi).reshape((p,1)) wi /= np.sqrt(np.sum(np.square(wi))) # Computing projection weights and scores ti = E*wi if self.optimizer != 'grid': Maxobjf[i] = self.most.fit(E*wi,**opt_args) nti = np.linalg.norm(ti) pi = E.T*ti / (nti**2) if self.whiten_data: wi /= np.sqrt((wi**2).sum()) wi = K*wi wi0 = wi wi = np.array(wi) if len(W[:,i].shape) == 1: wi = wi.reshape(-1) W[:,i] = wi T[:,i] = np.array(ti).reshape(-1) P[:,i] = np.array(pi).reshape(-1) if flag != 'one-block': criteval = self.most.fit(E*wi0, **opt_args ) if self.square_pi: criteval *= criteval assovec[i] = criteval # Deflation of the datamatrix guaranteeing orthogonality restrictions E -= ti*pi.T # Calculate R-Weights R = np.dot(W[:,0:(i+1)],pinv2(np.dot(P[:,0:(i+1)].T,W[:,0:(i+1)]),check_finite=False)) # Execute regression y~T if y is present. Generate regression estimates. if flag != 'one-block': if self.regopt=='OLS': ci = np.dot(ti.T,ys)/(nti**2) elif self.regopt == 'robust': linfit = rm(fun=fun,probp1=probp1,probp2=probp2,probp3=probp3, centre=self.center,scale=scale, start_cutoff_mode='specific',verbose=self.verbose) linfit.fit(ti,ys) ci = linfit.coef_ elif self.regopt == 'quantile': linfit = QuantReg(y,ti) model = linfit.fit(q=quantile) ci = model.params # end regression if C[i] = ci bi = np.dot(R,C[0:(i+1)]) bi_scaled = bi bi = np.multiply(np.reshape(sy/sX,(p,1)),bi) B[:,i] = bi[:,0] B_scaled[:,i] = bi_scaled[:,0] # endfor; Loop for latent dimensions # Re-adjust estimates to original dimensions if data have been compressed if dimensions: B = np.matmul(V[:,0:p],B) B_scaled = np.matmul(V[:,0:p],B_scaled) R = np.matmul(V[:,0:p],R) W = np.matmul(V[:,0:p],W) P = np.matmul(V[:,0:p],P) bi = B[:,h-1] if self.center_data: Xs = centring.fit_transform(X0) mX = centring.col_loc_ sX = centring.col_sca_ else: Xs = X0 mX = np.zeros((1,p)) sX = np.ones((1,p)) bi = bi.astype("float64") if flag != 'one-block': # Calculate scaled and unscaled intercepts if dimensions: X = convert_X_input(X0) if(self.center == "mean"): intercept = sps.trim_mean(y - np.matmul(X,bi),trimming) else: intercept = np.median(np.reshape(y - np.matmul(X,bi),(-1))) yfit = np.matmul(X,bi) + intercept if not(scale == 'None'): if (self.center == "mean"): b0 = np.mean(ys - np.matmul(Xs.astype("float64"),bi)) else: b0 = np.median(np.array(ys.astype("float64") - np.matmul(Xs.astype("float64"),bi))) else: b0 = intercept # Calculate fit values and residuals yfit = yfit r = y - yfit setattr(self,"coef_",B) setattr(self,"intercept_",intercept) setattr(self,"coef_scaled_",B_scaled) setattr(self,"intercept_scaled_",b0) setattr(self,"residuals_",r) setattr(self,"fitted_",yfit) setattr(self,"y_loadings_",C) setattr(self,"y_loc_",my) setattr(self,"y_sca_",sy) setattr(self,"x_weights_",W) setattr(self,"x_loadings_",P) setattr(self,"x_rotations_",R) setattr(self,"x_scores_",T) setattr(self,"x_ev_",Xev) setattr(self,"crit_values_",assovec) setattr(self,"Maxobjf_",Maxobjf) if self.whiten_data: setattr(self,"whitening_",K) if mixing: setattr(self,"mixing_",np.linalg.pinv(W)) setattr(self,"x_loc_",mX) setattr(self,"x_sca_",sX) setattr(self,'scaling',scale) if self.return_scaling_object: setattr(self,'scaling_object_',centring) return(self)