def prediction(train_features, train_targets, test_features, test_targets):
    """Ridge regression predictions."""
    # Test ridge regression predictions.
    rr = RidgeRegression(cv='loocv')
    reg = rr.find_optimal_regularization(X=train_features, Y=train_targets)
    coef = rr.RR(X=train_features, Y=train_targets, omega2=reg)[0]

    # Test the model.
    sumd = 0.
    for tf, tt in zip(test_features, test_targets):
        p = (np.dot(coef, tf))
        sumd += (p - tt)**2
    error = (sumd / len(test_features))**0.5

    return error
Esempio n. 2
0
    def test_rr_bootstrap(self):
        """Test ridge regression predictions with bootstrap fitting."""
        train_features, train_targets, test_features, test_targets = get_data()

        # Test ridge regression predictions.
        rr = RidgeRegression(cv='bootstrap')
        reg = rr.find_optimal_regularization(X=train_features, Y=train_targets)
        coef = rr.RR(X=train_features, Y=train_targets, omega2=reg)[0]

        # Test the model.
        sumd = 0.
        for tf, tt in zip(test_features, test_targets):
            p = (np.dot(coef, tf))
            sumd += (p - tt)**2
        print('Ridge regression prediction:', (sumd / len(test_features))**0.5)
Esempio n. 3
0
def predict(train_features, train_targets, test_features, test_targets):
    """Function to perform the prediction."""
    data = {}

    # Set up the ridge regression function.
    rr = RidgeRegression(W2=None, Vh=None, cv='loocv')
    b = rr.find_optimal_regularization(X=train_features, Y=train_targets)
    coef = rr.RR(X=train_features, Y=train_targets, omega2=b)[0]

    # Test the model.
    sumd = 0.
    err = []
    for tf, tt in zip(test_features, test_targets):
        p = np.dot(coef, tf)
        sumd += (p - tt)**2
        e = ((p - tt)**2)**0.5
        err.append(e)
    error = (sumd / len(test_features))**0.5

    data['result'] = error
    data['size'] = len(train_targets)

    return data
Esempio n. 4
0
print(np.shape(test))
print(np.shape(target))

# Standardize the training and test data on the same scale.
std = standardize(train_matrix=train, test_matrix=test)

# Standardize the training targets.
train_targets = target_standardize(target)
# Note that predictions will now be made on the standardized scale.

# ## Model example 1 - Ridge regression.

# In[4]:

# Test ridge regression predictions.
rr = RidgeRegression()
reg = rr.find_optimal_regularization(X=std['train'], Y=train_targets['target'])
coef = rr.RR(X=std['train'], Y=train_targets['target'], omega2=reg)[0]
# Test the model.
sumd = 0.
rr_predictions = []
for tf, tt in zip(std['test'], afunc(test)):
    p = ((np.dot(coef, tf)) * train_targets['std']) + train_targets['mean']
    rr_predictions.append(p)
    sumd += (p - tt)**2
# Plotting.
plt3d = plt.figure(0).gca(projection='3d')
# Plot training data.
plt3d.scatter(train[:, 0], train[:, 1], target, color='b')
# Plot exact function.
plt3d.plot_surface(test_x1,
Esempio n. 5
0
# In[12]:

df_std = (df - df.mean()) / df.std()
df_std.fillna(0.)
parallel_plot(df_std, num=4)
plt.savefig('std.png')

# ## Predictions <a name="predictions"></a>
# [(Back to top)](#head)
#
# We can now try predictions with ridge regression to start. This clearly performs very well with this data. Based on these results, it is unlikely that you would consider moving to more complex models.

# In[13]:

# Set up the ridge regression function.
rr = RidgeRegression(W2=None, Vh=None, cv='loocv')
b = rr.find_optimal_regularization(X=train_features, Y=train_targets)
coef = rr.RR(X=train_features, Y=train_targets, omega2=b)[0]

# Test the model.
sumd = 0.
err = []
pred = []
for tf, tt in zip(test_features, test_targets):
    p = np.dot(coef, tf)
    pred.append(p)
    sumd += (p - tt)**2
    e = ((p - tt)**2)**0.5
    err.append(e)
error = (sumd / len(test_features))**0.5
Esempio n. 6
0
    def get_data_scale(self, split, set_size=None, p_error=None, result=None):
        """Get the data for each sub-set of data and scales it accordingly.

        Parameters
        ----------
        split : int
           Which sub-set od data within hierarchy level.
        result : list
            Contain all the coefficien and omega2 for all training data.
        set_size : list
           Size of sub-set of data/features which the model is based on.
        p_error : list
           The prediction error for plain vanilla ridge.
        """
        ridge = RidgeRegression()
        # Dont want the targtes to be scaled with global.
        self.s_tar, self.m_tar = None, None
        train_targets, train_features, _, _ = self.hv.get_subset_data(
            self.index_split, self.indicies)

        test_targets, test_features, _, _ =\
            self.hv.get_subset_data(self.index_split, self.indicies, split)
        # Get some statitics from data.
        (self.s_tar, self.m_tar, self.s_feat, self.m_feat, train_targets,
         train_features, test_features) = self.PC.scaling_data(
             train_features, train_targets, test_features, self.s_tar,
             self.m_tar, self.s_feat, self.m_feat)

        if self.featselect_featconst or self.featselect_featvar:
            # Search for the subset of features.
            if self.selected_features is None:
                if self.glob_feat1 is None and self.glob_tar1 is None:
                    #  Search with minimum data.
                    FS = feature_selection(train_features, train_targets)
                else:
                    # Search with maximum data.
                    FS = feature_selection(self.glob_feat1, self.glob_tar1)
                self.selected_features = FS.selection(self.select_limit)
                if not bool(self.selected_features):
                    return (set_size, p_error, result)

        if self.featselect_featvar:
            (set_size, p_error,
             result) = self.reg_feat_var(train_features, train_targets,
                                         test_features, test_targets, ridge,
                                         set_size, p_error, result)
            return (set_size, p_error, result)
        else:
            if self.featselect_featconst:
                #  Get the data for the chosen features set.
                train_features = np.take(train_features,
                                         self.selected_features[str(
                                             self.feat_sub)][0],
                                         axis=1)
                test_features = np.take(test_features,
                                        self.selected_features[str(
                                            self.feat_sub)][0],
                                        axis=1)
            set_size, p_error, result \
                = self.reg_data_var(
                    train_features, train_targets,
                    test_features, test_targets, ridge,
                    set_size, p_error, result)
            return (set_size, p_error, result)