コード例 #1
0
def calcOLSWeights(df, sourceModels, targetModel, tLabel, DROP_FIELDS):
    X = df.drop(DROP_FIELDS, axis=1).copy()
    X = X.drop(tLabel, axis=1)
    Y = df[tLabel].copy()
    metaX = pd.DataFrame(columns=sourceModels.keys())
    for k, v in sourceModels.iteritems():
        pred = sourceModels[k].predict(X)
        metaX[k] = pred
    metaX['target'] = targetModel.predict(X)
    metaModel = OLS()
    metaModel.fit(metaX, Y)

    sourceOLS = dict()
    for coef, feat in zip(metaModel.coef_, metaX.columns):
        sourceOLS[feat] = coef
    targetOLS = sourceOLS['target']
    del sourceOLS['target']
    totalOLS = targetOLS + sum(sourceOLS.itervalues())
    weights = {
        'sourceR2s': sourceOLS,
        'targetR2': targetOLS,
        'totalR2': totalOLS,
        'metaModel': metaModel,
        'metaXColumns': metaX.columns,
        'coeffs': metaModel.coef_
    }
    return weights
コード例 #2
0
def calcOLSFEWeights(df, sourceModels, targetModel, tLabel, DROP_FIELDS):
    X = df.drop(DROP_FIELDS, axis=1).copy()
    X = X.drop(tLabel, axis=1)
    Y = df[tLabel].copy()
    metaX = pd.DataFrame(columns=sourceModels.keys())
    dropKeys = []
    for k, v in sourceModels.iteritems():
        pred = sourceModels[k].predict(X)
        metaX[k] = pred
        r2 = metrics.r2_score(Y, pred)
        if r2 <= 0:
            dropKeys.append(k)
    metaX['target'] = targetModel.predict(X)

    if len(dropKeys) > 0:
        metaX = metaX.drop(dropKeys, axis=1)

    metaModel = OLS()
    metaModel.fit(metaX, Y)
    sourceOLS = dict()
    for coef, feat in zip(metaModel.coef_, metaX.columns):
        sourceOLS[feat] = coef
    for k in dropKeys:
        sourceOLS[k] = 0
    targetOLS = sourceOLS['target']
    del sourceOLS['target']
    totalOLS = targetOLS + sum(sourceOLS.itervalues())
    weights = {
        'sourceR2s': sourceOLS,
        'targetR2': targetOLS,
        'totalR2': totalOLS,
        'metaModel': metaModel,
        'metaXColumns': metaX.columns
    }
    return weights
コード例 #3
0
ファイル: selection.py プロジェクト: EGimenez/stats-exam-t1
def get_OLS(alpha):
    lasso = Lasso(random_state=0, max_iter=3000000, alpha=alpha)
    lasso.fit(X_norm, Y)
    x_index = lasso.coef_ != 0
    X_OLS = X_norm[:, x_index]

    return OLS().fit(X_OLS, Y)
コード例 #4
0
ファイル: measures.py プロジェクト: redjerdai/risk_utils
def CAPM(portfolio, benchmark, model='OLS', check_pvals=False):
    if model == 'OLS':
        from sklearn.linear_model import LinearRegression as OLS
        model = OLS(n_jobs=-1, fit_intercept=True)
        model.fit(X=portfolio.reshape(-1, 1), y=benchmark)
        if check_pvals:
            raise NotImplemented("Not yet!")
        else:
            alpha, beta = model.intercept_, model.coef_[0]

    else:
        raise NotImplemented("Not yet!")

    return alpha, beta
コード例 #5
0
def updateInitialOLSWeights(df, sourceModels, tLabel, DROP_FIELDS):
    X = df.drop(DROP_FIELDS, axis=1).copy()
    X = X.drop(tLabel, axis=1)
    Y = df[tLabel].copy()
    metaX = pd.DataFrame(columns=sourceModels.keys())

    for k, v in sourceModels.iteritems():
        pred = sourceModels[k].predict(X)
        metaX[k] = pred
    metaModel = OLS()
    metaModel.fit(metaX, Y)
    sourceOLS = dict()
    for coef, feat in zip(metaModel.coef_, metaX.columns):
        sourceOLS[feat] = coef
    sourceOLS['metaModel'] = metaModel
    sourceOLS['metaXColumns'] = metaX.columns
    return sourceOLS
コード例 #6
0
def test_refit_nochange_reg(sim_nochange):
    """ Test refit ``keep_regularized=False`` (i.e., not ignoring coef == 0)
    """
    from sklearn.linear_model import LinearRegression as OLS
    estimator = OLS()

    refit = refit_record(sim_nochange,
                         'ols',
                         estimator,
                         keep_regularized=False)
    assert 'ols_coef' in refit.dtype.names
    assert 'ols_rmse' in refit.dtype.names

    coef = np.array([[-3.83016528e+03, -3.83016528e+03],
                     [5.24635240e-03, 5.24635240e-03]])
    rmse = np.array([0.96794599, 0.96794599])
    np.testing.assert_allclose(refit[0]['ols_coef'], coef)
    np.testing.assert_allclose(refit[0]['ols_rmse'], rmse)
コード例 #7
0
    def sklearn_reg(self, X):
        if self.model == 'OLS':
            clf = OLS()
            clf.fit(self.X, self.z)
            y_pred = clf.predict(X)

        elif self.model == 'Ridge':
            clf = Ridge(alpha=self.lamb)
            y_pred = clf.predict(X)

        elif self.model == 'Lasso':
            clf = Lasso(alpha=self.lamb,
                        max_iter=10000,
                        normalize=False,
                        tol=0.0001)
            clf.fit(self.X, self.z)
            y_pred = clf.predict(X)

        return y_pred
コード例 #8
0
ファイル: selection.py プロジェクト: EGimenez/stats-exam-t1
def f(alpha):
    error = 0
    lasso = Lasso(random_state=0, max_iter=3000000, alpha=alpha)

    for train_index, test_index in kf.split(X_norm):
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]

        lasso.fit(X_train, Y_train)
        x_index = lasso.coef_ != 0

        X_train_OLS = X_train[:, x_index]
        X_test_OLS = X_test[:, x_index]

        error += mean_squared_error(
            Y_test,
            OLS().fit(X_train_OLS, Y_train).predict(X_test_OLS))

    return error
コード例 #9
0
def updateInitialOLSFEWeights(df, sourceModels, tLabel, DROP_FIELDS):
    X = df.drop(DROP_FIELDS, axis=1).copy()
    X = X.drop(tLabel, axis=1)
    Y = df[tLabel].copy()
    metaX = pd.DataFrame(columns=sourceModels.keys())
    dropKeys = []
    for k, v in sourceModels.iteritems():
        pred = sourceModels[k].predict(X)
        metaX[k] = pred
        r2 = metrics.r2_score(Y, pred)
        if r2 <= 0:
            dropKeys.append(k)
    if len(dropKeys) > 0 and len(dropKeys) < len(metaX.columns):
        metaX = metaX.drop(dropKeys, axis=1)
    metaModel = OLS()
    metaModel.fit(metaX, Y)
    sourceOLS = dict()
    for coef, feat in zip(metaModel.coef_, metaX.columns):
        sourceOLS[feat] = coef
    for k in dropKeys:
        sourceOLS[k] = 0
    sourceOLS['metaModel'] = metaModel
    sourceOLS['metaXColumns'] = metaX.columns
    return sourceOLS
コード例 #10
0
axes[0].scatter(inner_circle.X1, inner_circle.X2, s=3, c='red', label='class 1')
axes[0].scatter(outer_circle.X1, outer_circle.X2, s=3, c='blue', label='class 2')
axes[1].scatter(inner_circle.r, inner_circle.theta, s=3, c='red')
axes[1].scatter(outer_circle.r, outer_circle.theta, s=3, c='blue')
axes[0].legend(markerscale=3, ncol=2)
for i, ax in enumerate(axes):
    ax.set_yticks([])
    ax.set_xticks([])
    ax.set_ylabel('height' if i==0 else r'$\theta$')
    ax.set_xlabel('width' if i==0 else r'$r$')
plt.tight_layout()

# Regressor FE
fig, axes = plt.subplots(1, 2, figsize=(6,3))

time = np.linspace(0, 3, 200).reshape(-1, 1)
skewed_data = np.exp(time.ravel() + 0.5*np.random.randn(200)).reshape(-1, 1)

axes[0].scatter(time, skewed_data, s=3, c='green')
axes[0].scatter(time, OLS().fit(time, skewed_data).predict(time), s=3, c='orange', ls=':')
axes[1].scatter(time, np.log1p(skewed_data), s=3, c='magenta')
axes[1].scatter(time, OLS().fit(time, np.log1p(skewed_data)).predict(time), s=3, c='orange', ls=':')
for i, ax in enumerate(axes):
    ax.set_yticks([])
    ax.set_xticks([])
    ax.set_ylabel('signal' if i==0 else 'log signal')
    ax.set_xlabel('regressor')
plt.tight_layout()
plt.show()

ax.set_xticks(())
ax.set_yticks(())


## Regression

n = 20
# generate data
time = np.linspace(0, 1, n).reshape(-1, 1) + 3
signal = time + 0.2*np.random.randn(n).reshape(-1, 1)
weak_signal = time + 0.3*np.random.randn(n).reshape(-1, 1)
X = np.hstack([time, weak_signal])

ax = axes[1]
ax.scatter(time, signal, s=10, c='#0000FF')
ax.plot(time, OLS().fit(time, signal).predict(time), c=dark_green_hex)
ax.plot(time, KNeighborsRegressor(n_neighbors=2).fit(time, signal).predict(time), c=bright_green_hex)
ax.set_xticks(())
ax.set_yticks(())
plt.tight_layout()
plt.show()









コード例 #12
0
        else:
            tree = swap(tree)
    else:
        #print(Prob)
        Prob = [1, 1, 1]
        tree = grow(tree)
    return tree


#%%
df = pd.read_csv('SkillCraft1_Dataset.csv').dropna()
df0 = df.iloc[:2000]
df1 = df.iloc[2000:]
y = df0.iloc[:, 0]
dfd = pd.get_dummies(df0)
e = y - OLS().fit(dfd.iloc[:, 1:], dfd.iloc[:, 0]).predict(dfd.iloc[:, 1:])
#df0 = df1
#df0 = genData()
#df0['x3'] = 22

#%%
v = 3
q = 1 - 0.9
k = 2
m = 50  # mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm is here
alpha = 0.95
beta = 2
minObs = 3

n0, p = df0.shape
var = e.var()
コード例 #13
0
def simple_share_model(n=10):

    home = '/home/nealbob'
    folder = '/Dropbox/Model/results/chapter6/lambda/'
    model = '/Dropbox/Model/'
    out = '/Dropbox/Thesis/IMG/chapter7/'
    img_ext = '.pdf'
    table_out = '/Dropbox/Thesis/STATS/chapter7/'
   
    results = []
    paras = []

    for i in range(n):
        if i != 9:
            with open(home + folder + 'lambda_result_' + str(i) +'.pkl', 'rb') as f:
                results.extend(pickle.load(f))
                f.close()

            with open(home + folder + 'lambda_para_' + str(i) + '.pkl', 'rb') as f:
                paras.extend(pickle.load(f))
                f.close()
    
    nn = (n - 1) * 10

    Y = np.zeros([nn, 4])
    X = np.zeros([nn, 12])

    for i in range(nn):
       
        Y[i, 0] = results[i][0][1][0]
        Y[i, 1] = results[i][0][1][1]
        Y[i, 2] = results[i][1][1][0]
        Y[i, 3] = results[i][1][1][1]
        
        X[i, :] = np.array([paras[i][p] for p in paras[i]])

    """    
    tree = Tree(min_samples_split=3, min_samples_leaf=2, n_estimators = 300)
    tree.fit(X, Y)
    
    with open(home + model + 'sharemodel.pkl', 'wb') as f:
       pickle.dump(tree, f)
       f.close()
    
    scen = ['RS-O', 'CS-O', 'RS-HL-O', 'CS-HL-O']

    for i in range(4):
    
        chart = {'OUTFILE' : (home + out + 'lambda_' + scen[i] + img_ext),
                 'XLABEL' : 'Optimal flow share',
                 'XMIN' : min(Y[:,i]),
                 'XMAX' : max(Y[:,i]),
                 'BINS' : 10}
        data = [Y[:,i]]
        build_chart(chart, data, chart_type='hist')

        chart = {'OUTFILE' : (home + out + 'lambda_scat_' + scen[i] + img_ext),
                 'XLABEL' : 'Number of high reliability users',
                 'YLABEL' : 'Optimal flow share'}
        data = [[X[:, 2], Y[:,i]]]
        build_chart(chart, data, chart_type='scatter')
    
    
    rank = tree.feature_importances_ * 100
    
    data0 = []
    for i in range(len(paras[0])):
        record = {}
        record['Importance'] = rank[i]
        data0.append(record)

    tab = pandas.DataFrame(data0)
    tab.index = [p for p in paras[i]]
    tab = tab.sort(columns=['Importance'], ascending=False)
    
    with open(home + table_out + 'lambda' + '.txt', 'w') as f:
        f.write(tab.to_latex(float_format='{:,.2f}'.format))
        f.close()
    """  

    from sklearn.linear_model import LinearRegression as OLS
    ols = OLS()
    
    ols.fit(X[:,2].reshape([190, 1]), Y[:,1])
    CS_c = ols.intercept_
    CS_b = ols.coef_[0]
    xp = np.linspace(30, 70, 300)
    yp = CS_c + CS_b * xp
    
    chart_params()
    pylab.figure()
    pylab.plot(X[:,2], Y[:, 1], 'o') 
    pylab.plot(xp, yp)
    pylab.xlabel('Number of high reliability users')
    pylab.ylabel('Optimal flow share')
    pylab.ylim(0, 0.8)
    pylab.savefig(home + out + 'sharemodel1.pdf')
    pylab.show()
    
    ols.fit(X[:,2].reshape([190, 1]), Y[:,3])
    CSHL_c = ols.intercept_
    CSHL_b = ols.coef_[0]
    xp = np.linspace(30, 70, 300)
    yp = CSHL_c + CSHL_b * xp
    
    chart_params() 
    pylab.figure()
    pylab.plot(X[:,2], Y[:, 3], 'o') 
    pylab.plot(xp, yp)
    pylab.xlabel('Number of high reliability users')
    pylab.ylabel('Optimal flow share')
    pylab.ylim(0, 0.8)
    pylab.savefig(home + out + 'sharemodel2.pdf')
    pylab.show()

    return [CS_c, CS_b, CSHL_c, CSHL_b]
コード例 #14
0
    return pearsonr(y,z)[0]
# #############################################################################
# Generate sample data
X = np.sort(5 * np.random.rand(40, 1), axis=0)
y = 3-np.sin(X).ravel()

# #############################################################################
# Add noise to targets
y[::5] += 3 * (0.5 - np.random.rand(8))

# #############################################################################
# Fit regression model
svr_rbf = SVR(kernel='rbf', C=1000, gamma='auto', epsilon=.1)
svr_lin = SVR(kernel='linear', C=100, epsilon=.1)
svr_poly = SVR(kernel='poly', C=100, gamma='auto', degree=2, epsilon=.1, coef0=1)
svr_poly = OLS()
y_rbf = svr_rbf.fit(X, y).predict(X)
y_lin = svr_lin.fit(X, y).predict(X)
y_poly = svr_poly.fit(X, y).predict(X)

# #############################################################################
# Look at the results
lw = 2

svrs = [svr_poly, svr_lin, svr_rbf]
kernel_label = ['OLS', 'Linear','RBF' ]
model_color = ['m', 'c', 'g']
plt.close("all")
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 10), sharey=True)
for ix, svr in enumerate(svrs):
    axes[ix].plot(X, svr.fit(X, y).predict(X), color=model_color[ix], lw=lw,
コード例 #15
0
# Initial wealth
N = len(WealthDistribution)

# Number of bins
N_bins = (int(N * delta_m))

# Making the initial histogram
w_m, bins = np.histogram(WealthDistribution, N_bins, density=True)

# Finding the bin centers and converting to m array
m = 0.5 * (bins[1:] + bins[:-1])

m_fit = m[w_m > 0][:-2]
w_fit = w_m[w_m > 0][:-2]

model = OLS(fit_intercept=False)

# Fitting the wealth distirbution to a Gibbs distribution
model.fit(np.c_[np.ones_like(m_fit), m_fit], np.log(w_fit))
print(np.exp(model.coef_[0]), model.coef_[1])

m_ = np.linspace(min(m_fit), max(m_fit), 10)

print(np.sum(w_m * m))

# Plotting the wealth distribution
plt.semilogy(m_, 0.01 * np.exp(-0.01 * m_))
plt.semilogy(m, w_m, "bo")

plt.legend(["Ordinary Least squares", "Computed distribution"])
plt.grid(linestyle="--")
コード例 #16
0
ファイル: nearest_neighbour.py プロジェクト: MetinSa/FYS4150
    # Initial wealth
    m_0 = np.average(WealthDistribution)
    N = len(WealthDistribution)

    # Number of bins
    N_bins = (int(N * delta_m))

    # Making the initial histogram
    w_m, bins = np.histogram(WealthDistribution, N_bins, density=True)

    # Finding the bin centers and converting to m array
    m = 0.5 * (bins[1:] + bins[:-1])
    w_m = w_m[m > 600]
    m = m[m > 600]

    model = OLS(fit_intercept=True)
    model.fit(np.c_[m**(-1 - float(alpha[i]))], w_m)

    m_ = np.linspace(min(m), max(m), 1000)

    # Plotting the wealth distribution and parametrized solution
    plt.loglog(m,
               w_m,
               "-",
               color=color_list[i],
               label=r"$\alpha =$ %.2f" % float(alpha[i]))
    plt.loglog(m_,
               model.predict(np.c_[m_**(-1 - float(alpha[i]))]),
               "--",
               color=color_list[i])
コード例 #17
0
def OlsFromPoints(xvals,yvals):
    LEN = len(xvals)
    xvals = np.array(xvals).reshape(LEN,1) 
    model = OLS()
    model.fit(xvals,yvals) 
    return model