def calcOLSWeights(df, sourceModels, targetModel, tLabel, DROP_FIELDS): X = df.drop(DROP_FIELDS, axis=1).copy() X = X.drop(tLabel, axis=1) Y = df[tLabel].copy() metaX = pd.DataFrame(columns=sourceModels.keys()) for k, v in sourceModels.iteritems(): pred = sourceModels[k].predict(X) metaX[k] = pred metaX['target'] = targetModel.predict(X) metaModel = OLS() metaModel.fit(metaX, Y) sourceOLS = dict() for coef, feat in zip(metaModel.coef_, metaX.columns): sourceOLS[feat] = coef targetOLS = sourceOLS['target'] del sourceOLS['target'] totalOLS = targetOLS + sum(sourceOLS.itervalues()) weights = { 'sourceR2s': sourceOLS, 'targetR2': targetOLS, 'totalR2': totalOLS, 'metaModel': metaModel, 'metaXColumns': metaX.columns, 'coeffs': metaModel.coef_ } return weights
def calcOLSFEWeights(df, sourceModels, targetModel, tLabel, DROP_FIELDS): X = df.drop(DROP_FIELDS, axis=1).copy() X = X.drop(tLabel, axis=1) Y = df[tLabel].copy() metaX = pd.DataFrame(columns=sourceModels.keys()) dropKeys = [] for k, v in sourceModels.iteritems(): pred = sourceModels[k].predict(X) metaX[k] = pred r2 = metrics.r2_score(Y, pred) if r2 <= 0: dropKeys.append(k) metaX['target'] = targetModel.predict(X) if len(dropKeys) > 0: metaX = metaX.drop(dropKeys, axis=1) metaModel = OLS() metaModel.fit(metaX, Y) sourceOLS = dict() for coef, feat in zip(metaModel.coef_, metaX.columns): sourceOLS[feat] = coef for k in dropKeys: sourceOLS[k] = 0 targetOLS = sourceOLS['target'] del sourceOLS['target'] totalOLS = targetOLS + sum(sourceOLS.itervalues()) weights = { 'sourceR2s': sourceOLS, 'targetR2': targetOLS, 'totalR2': totalOLS, 'metaModel': metaModel, 'metaXColumns': metaX.columns } return weights
def get_OLS(alpha): lasso = Lasso(random_state=0, max_iter=3000000, alpha=alpha) lasso.fit(X_norm, Y) x_index = lasso.coef_ != 0 X_OLS = X_norm[:, x_index] return OLS().fit(X_OLS, Y)
def CAPM(portfolio, benchmark, model='OLS', check_pvals=False): if model == 'OLS': from sklearn.linear_model import LinearRegression as OLS model = OLS(n_jobs=-1, fit_intercept=True) model.fit(X=portfolio.reshape(-1, 1), y=benchmark) if check_pvals: raise NotImplemented("Not yet!") else: alpha, beta = model.intercept_, model.coef_[0] else: raise NotImplemented("Not yet!") return alpha, beta
def updateInitialOLSWeights(df, sourceModels, tLabel, DROP_FIELDS): X = df.drop(DROP_FIELDS, axis=1).copy() X = X.drop(tLabel, axis=1) Y = df[tLabel].copy() metaX = pd.DataFrame(columns=sourceModels.keys()) for k, v in sourceModels.iteritems(): pred = sourceModels[k].predict(X) metaX[k] = pred metaModel = OLS() metaModel.fit(metaX, Y) sourceOLS = dict() for coef, feat in zip(metaModel.coef_, metaX.columns): sourceOLS[feat] = coef sourceOLS['metaModel'] = metaModel sourceOLS['metaXColumns'] = metaX.columns return sourceOLS
def test_refit_nochange_reg(sim_nochange): """ Test refit ``keep_regularized=False`` (i.e., not ignoring coef == 0) """ from sklearn.linear_model import LinearRegression as OLS estimator = OLS() refit = refit_record(sim_nochange, 'ols', estimator, keep_regularized=False) assert 'ols_coef' in refit.dtype.names assert 'ols_rmse' in refit.dtype.names coef = np.array([[-3.83016528e+03, -3.83016528e+03], [5.24635240e-03, 5.24635240e-03]]) rmse = np.array([0.96794599, 0.96794599]) np.testing.assert_allclose(refit[0]['ols_coef'], coef) np.testing.assert_allclose(refit[0]['ols_rmse'], rmse)
def sklearn_reg(self, X): if self.model == 'OLS': clf = OLS() clf.fit(self.X, self.z) y_pred = clf.predict(X) elif self.model == 'Ridge': clf = Ridge(alpha=self.lamb) y_pred = clf.predict(X) elif self.model == 'Lasso': clf = Lasso(alpha=self.lamb, max_iter=10000, normalize=False, tol=0.0001) clf.fit(self.X, self.z) y_pred = clf.predict(X) return y_pred
def f(alpha): error = 0 lasso = Lasso(random_state=0, max_iter=3000000, alpha=alpha) for train_index, test_index in kf.split(X_norm): X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] lasso.fit(X_train, Y_train) x_index = lasso.coef_ != 0 X_train_OLS = X_train[:, x_index] X_test_OLS = X_test[:, x_index] error += mean_squared_error( Y_test, OLS().fit(X_train_OLS, Y_train).predict(X_test_OLS)) return error
def updateInitialOLSFEWeights(df, sourceModels, tLabel, DROP_FIELDS): X = df.drop(DROP_FIELDS, axis=1).copy() X = X.drop(tLabel, axis=1) Y = df[tLabel].copy() metaX = pd.DataFrame(columns=sourceModels.keys()) dropKeys = [] for k, v in sourceModels.iteritems(): pred = sourceModels[k].predict(X) metaX[k] = pred r2 = metrics.r2_score(Y, pred) if r2 <= 0: dropKeys.append(k) if len(dropKeys) > 0 and len(dropKeys) < len(metaX.columns): metaX = metaX.drop(dropKeys, axis=1) metaModel = OLS() metaModel.fit(metaX, Y) sourceOLS = dict() for coef, feat in zip(metaModel.coef_, metaX.columns): sourceOLS[feat] = coef for k in dropKeys: sourceOLS[k] = 0 sourceOLS['metaModel'] = metaModel sourceOLS['metaXColumns'] = metaX.columns return sourceOLS
axes[0].scatter(inner_circle.X1, inner_circle.X2, s=3, c='red', label='class 1') axes[0].scatter(outer_circle.X1, outer_circle.X2, s=3, c='blue', label='class 2') axes[1].scatter(inner_circle.r, inner_circle.theta, s=3, c='red') axes[1].scatter(outer_circle.r, outer_circle.theta, s=3, c='blue') axes[0].legend(markerscale=3, ncol=2) for i, ax in enumerate(axes): ax.set_yticks([]) ax.set_xticks([]) ax.set_ylabel('height' if i==0 else r'$\theta$') ax.set_xlabel('width' if i==0 else r'$r$') plt.tight_layout() # Regressor FE fig, axes = plt.subplots(1, 2, figsize=(6,3)) time = np.linspace(0, 3, 200).reshape(-1, 1) skewed_data = np.exp(time.ravel() + 0.5*np.random.randn(200)).reshape(-1, 1) axes[0].scatter(time, skewed_data, s=3, c='green') axes[0].scatter(time, OLS().fit(time, skewed_data).predict(time), s=3, c='orange', ls=':') axes[1].scatter(time, np.log1p(skewed_data), s=3, c='magenta') axes[1].scatter(time, OLS().fit(time, np.log1p(skewed_data)).predict(time), s=3, c='orange', ls=':') for i, ax in enumerate(axes): ax.set_yticks([]) ax.set_xticks([]) ax.set_ylabel('signal' if i==0 else 'log signal') ax.set_xlabel('regressor') plt.tight_layout() plt.show()
ax.set_xticks(()) ax.set_yticks(()) ## Regression n = 20 # generate data time = np.linspace(0, 1, n).reshape(-1, 1) + 3 signal = time + 0.2*np.random.randn(n).reshape(-1, 1) weak_signal = time + 0.3*np.random.randn(n).reshape(-1, 1) X = np.hstack([time, weak_signal]) ax = axes[1] ax.scatter(time, signal, s=10, c='#0000FF') ax.plot(time, OLS().fit(time, signal).predict(time), c=dark_green_hex) ax.plot(time, KNeighborsRegressor(n_neighbors=2).fit(time, signal).predict(time), c=bright_green_hex) ax.set_xticks(()) ax.set_yticks(()) plt.tight_layout() plt.show()
else: tree = swap(tree) else: #print(Prob) Prob = [1, 1, 1] tree = grow(tree) return tree #%% df = pd.read_csv('SkillCraft1_Dataset.csv').dropna() df0 = df.iloc[:2000] df1 = df.iloc[2000:] y = df0.iloc[:, 0] dfd = pd.get_dummies(df0) e = y - OLS().fit(dfd.iloc[:, 1:], dfd.iloc[:, 0]).predict(dfd.iloc[:, 1:]) #df0 = df1 #df0 = genData() #df0['x3'] = 22 #%% v = 3 q = 1 - 0.9 k = 2 m = 50 # mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm is here alpha = 0.95 beta = 2 minObs = 3 n0, p = df0.shape var = e.var()
def simple_share_model(n=10): home = '/home/nealbob' folder = '/Dropbox/Model/results/chapter6/lambda/' model = '/Dropbox/Model/' out = '/Dropbox/Thesis/IMG/chapter7/' img_ext = '.pdf' table_out = '/Dropbox/Thesis/STATS/chapter7/' results = [] paras = [] for i in range(n): if i != 9: with open(home + folder + 'lambda_result_' + str(i) +'.pkl', 'rb') as f: results.extend(pickle.load(f)) f.close() with open(home + folder + 'lambda_para_' + str(i) + '.pkl', 'rb') as f: paras.extend(pickle.load(f)) f.close() nn = (n - 1) * 10 Y = np.zeros([nn, 4]) X = np.zeros([nn, 12]) for i in range(nn): Y[i, 0] = results[i][0][1][0] Y[i, 1] = results[i][0][1][1] Y[i, 2] = results[i][1][1][0] Y[i, 3] = results[i][1][1][1] X[i, :] = np.array([paras[i][p] for p in paras[i]]) """ tree = Tree(min_samples_split=3, min_samples_leaf=2, n_estimators = 300) tree.fit(X, Y) with open(home + model + 'sharemodel.pkl', 'wb') as f: pickle.dump(tree, f) f.close() scen = ['RS-O', 'CS-O', 'RS-HL-O', 'CS-HL-O'] for i in range(4): chart = {'OUTFILE' : (home + out + 'lambda_' + scen[i] + img_ext), 'XLABEL' : 'Optimal flow share', 'XMIN' : min(Y[:,i]), 'XMAX' : max(Y[:,i]), 'BINS' : 10} data = [Y[:,i]] build_chart(chart, data, chart_type='hist') chart = {'OUTFILE' : (home + out + 'lambda_scat_' + scen[i] + img_ext), 'XLABEL' : 'Number of high reliability users', 'YLABEL' : 'Optimal flow share'} data = [[X[:, 2], Y[:,i]]] build_chart(chart, data, chart_type='scatter') rank = tree.feature_importances_ * 100 data0 = [] for i in range(len(paras[0])): record = {} record['Importance'] = rank[i] data0.append(record) tab = pandas.DataFrame(data0) tab.index = [p for p in paras[i]] tab = tab.sort(columns=['Importance'], ascending=False) with open(home + table_out + 'lambda' + '.txt', 'w') as f: f.write(tab.to_latex(float_format='{:,.2f}'.format)) f.close() """ from sklearn.linear_model import LinearRegression as OLS ols = OLS() ols.fit(X[:,2].reshape([190, 1]), Y[:,1]) CS_c = ols.intercept_ CS_b = ols.coef_[0] xp = np.linspace(30, 70, 300) yp = CS_c + CS_b * xp chart_params() pylab.figure() pylab.plot(X[:,2], Y[:, 1], 'o') pylab.plot(xp, yp) pylab.xlabel('Number of high reliability users') pylab.ylabel('Optimal flow share') pylab.ylim(0, 0.8) pylab.savefig(home + out + 'sharemodel1.pdf') pylab.show() ols.fit(X[:,2].reshape([190, 1]), Y[:,3]) CSHL_c = ols.intercept_ CSHL_b = ols.coef_[0] xp = np.linspace(30, 70, 300) yp = CSHL_c + CSHL_b * xp chart_params() pylab.figure() pylab.plot(X[:,2], Y[:, 3], 'o') pylab.plot(xp, yp) pylab.xlabel('Number of high reliability users') pylab.ylabel('Optimal flow share') pylab.ylim(0, 0.8) pylab.savefig(home + out + 'sharemodel2.pdf') pylab.show() return [CS_c, CS_b, CSHL_c, CSHL_b]
return pearsonr(y,z)[0] # ############################################################################# # Generate sample data X = np.sort(5 * np.random.rand(40, 1), axis=0) y = 3-np.sin(X).ravel() # ############################################################################# # Add noise to targets y[::5] += 3 * (0.5 - np.random.rand(8)) # ############################################################################# # Fit regression model svr_rbf = SVR(kernel='rbf', C=1000, gamma='auto', epsilon=.1) svr_lin = SVR(kernel='linear', C=100, epsilon=.1) svr_poly = SVR(kernel='poly', C=100, gamma='auto', degree=2, epsilon=.1, coef0=1) svr_poly = OLS() y_rbf = svr_rbf.fit(X, y).predict(X) y_lin = svr_lin.fit(X, y).predict(X) y_poly = svr_poly.fit(X, y).predict(X) # ############################################################################# # Look at the results lw = 2 svrs = [svr_poly, svr_lin, svr_rbf] kernel_label = ['OLS', 'Linear','RBF' ] model_color = ['m', 'c', 'g'] plt.close("all") fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 10), sharey=True) for ix, svr in enumerate(svrs): axes[ix].plot(X, svr.fit(X, y).predict(X), color=model_color[ix], lw=lw,
# Initial wealth N = len(WealthDistribution) # Number of bins N_bins = (int(N * delta_m)) # Making the initial histogram w_m, bins = np.histogram(WealthDistribution, N_bins, density=True) # Finding the bin centers and converting to m array m = 0.5 * (bins[1:] + bins[:-1]) m_fit = m[w_m > 0][:-2] w_fit = w_m[w_m > 0][:-2] model = OLS(fit_intercept=False) # Fitting the wealth distirbution to a Gibbs distribution model.fit(np.c_[np.ones_like(m_fit), m_fit], np.log(w_fit)) print(np.exp(model.coef_[0]), model.coef_[1]) m_ = np.linspace(min(m_fit), max(m_fit), 10) print(np.sum(w_m * m)) # Plotting the wealth distribution plt.semilogy(m_, 0.01 * np.exp(-0.01 * m_)) plt.semilogy(m, w_m, "bo") plt.legend(["Ordinary Least squares", "Computed distribution"]) plt.grid(linestyle="--")
# Initial wealth m_0 = np.average(WealthDistribution) N = len(WealthDistribution) # Number of bins N_bins = (int(N * delta_m)) # Making the initial histogram w_m, bins = np.histogram(WealthDistribution, N_bins, density=True) # Finding the bin centers and converting to m array m = 0.5 * (bins[1:] + bins[:-1]) w_m = w_m[m > 600] m = m[m > 600] model = OLS(fit_intercept=True) model.fit(np.c_[m**(-1 - float(alpha[i]))], w_m) m_ = np.linspace(min(m), max(m), 1000) # Plotting the wealth distribution and parametrized solution plt.loglog(m, w_m, "-", color=color_list[i], label=r"$\alpha =$ %.2f" % float(alpha[i])) plt.loglog(m_, model.predict(np.c_[m_**(-1 - float(alpha[i]))]), "--", color=color_list[i])
def OlsFromPoints(xvals,yvals): LEN = len(xvals) xvals = np.array(xvals).reshape(LEN,1) model = OLS() model.fit(xvals,yvals) return model