Esempio n. 1
0
 def __call__(self, model: LinearModel, tx, y, **kwargs):
     """
     Performs Least Squares
     :param tx: sample
     :param y: labels
     """
     w = np.linalg.inv(tx.transpose() @ tx) @ tx.transpose() @ y
     model.set_param(w)
Esempio n. 2
0
 def __call__(self, model: LinearModel, tx, y, **kwargs):
     """
     Performs Ridge regression.
     :param tx: sample
     :param y: labels
     :param lambda_: ridge hyper-parameter
     """
     lambda_ = kwargs['lambda_'] if 'lambda_' in kwargs else 10**-5
     w = np.linalg.inv(
         np.transpose(tx) @ tx + lambda_ / (2 * len(y)) *
         np.eye(tx.shape[1], tx.shape[1])) @ np.transpose(tx) @ y
     model.set_param(w)
Esempio n. 3
0
def least_squares(y, tx):
    np.random.seed(0)
    data = np.hstack([np.reshape(y, (-1, 1)), tx])

    model = LinearModel((tx.shape[1], y.shape[1]))
    optimizer = LS()

    training, test = split(data)
    optimizer(model, training[:, y.shape[1]:], training[:, :y.shape[1]])
    error = MSE()(model(test[:, y.shape[1]:]), test[:, :y.shape[1]])

    return model.get_params(), error
Esempio n. 4
0
def ridge_regression(y, tx, lambda_):
    np.random.seed(0)
    data = np.hstack([np.reshape(y, (-1, 1)), tx])

    model = LinearModel((tx.shape[1], y.shape[1]))
    optimizer = Ridge()

    training, test = split(data)

    optimizer(model,
              training[:, y.shape[1]:],
              training[:, :y.shape[1]],
              lambda_=lambda_)
    error = MSE()(model(test[:, y.shape[1]:]), test[:, :y.shape[1]])

    return model.get_params(), error
Esempio n. 5
0
 def __init__(self, x, epochs=0):
     x = x[~np.any(x == -999, axis=1)]
     self.normalizer = MinMaxNormalizer()
     x = self.normalizer(x)
     self.models = [
         LinearModel((x.shape[1] - 1, 1)) for i in range(x.shape[1])
     ]
     optimizer = LinearSGD()
     mask = np.repeat(True, x.shape[1])
     for i in range(x.shape[1]):
         mask[i] = False
         step_epoch = 50
         lr_reduc = 0.9
         lr = 10**-1
         for epoch in range(int(epochs / step_epoch)):
             optimizer(self.models[i],
                       x[:, mask],
                       np.reshape(x[:, i], (-1, 1)),
                       lr=lr,
                       epochs=step_epoch,
                       batch_size=20,
                       num_batches=100)
             lr *= lr_reduc
         sys.stdout.write("\rLearned %d\r" % i)
         mask[i] = True
Esempio n. 6
0
    def __call__(self, model: LinearModel, tx, y, **kwargs):
        """
        Performs Stochastic Gradient Descent.
        :param tx: sample
        :param y: labels
        :param max_iter: number of batches to learn
        :param loss: loss function
        :param lr: learning rate
        """
        loss = kwargs['loss'] if 'loss' in kwargs else LogCosh()
        lr = kwargs['lr'] if 'lr' in kwargs else .01
        epochs = kwargs['epochs'] if 'epochs' in kwargs else 100

        for epoch in range(epochs):
            gradient = np.dot(np.transpose(tx, (1, 0)),
                              loss.gradient(model(tx), y))
            model.set_param(model.get_params() - lr * gradient)

            if np.sum(
                    np.abs(gradient)) < lr * 10**-2 / model.get_params().size:
                break
Esempio n. 7
0
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    np.random.seed(0)
    data = np.hstack([np.reshape(y, (-1, 1)), tx])

    model = LinearModel((tx.shape[1], y.shape[1]))
    optimizer = LS()

    training, test = split(data)
    model.set_param(initial_w)
    optimizer(model,
              training[:, y.shape[1]:],
              training[:, :y.shape[1]],
              epochs=max_iters,
              epoch_step=(max_iters, 1),
              num_batches=1,
              batch_size=1,
              lr=gamma,
              regularize=0)
    error = MSE()(model(test[:, y.shape[1]:]), test[:, :y.shape[1]])

    return model.get_params(), error
Esempio n. 8
0
    def __call__(self, model: LinearModel, tx, y, **kwargs):
        """
        Performs Stochastic Gradient Descent.
        :param tx: sample
        :param y: labels
        :param batch_size: size of the batches
        :param num_batches: number of batches to learn
        :param loss: loss function
        :param lr: learning rate
        :param epoch: number of times to go over the dataset
        """
        batch_size = kwargs['batch_size'] if 'batch_size' in kwargs else 1
        num_batches = min(kwargs['num_batches'],
                          tx.shape[0]) if 'num_batches' in kwargs else 1000
        loss = kwargs['loss'] if 'loss' in kwargs else LogCosh()
        lr = kwargs['lr'] if 'lr' in kwargs else .01
        epochs = kwargs['epochs'] if 'epochs' in kwargs else 100
        epoch_step = kwargs['epoch_step'] if 'epoch_step' in kwargs else (50,
                                                                          0.75)

        i = 0
        running_loss = 0
        for step in range(int(epochs / epoch_step[0])):
            for epoch_iter in range(epoch_step[0]):
                running_loss = 0
                acc_grad = 0

                for batch_y, batch_tx in batch_iter(y, tx, batch_size,
                                                    num_batches):
                    out = model(batch_tx)
                    running_loss += loss(out, batch_y)
                    grad = np.dot(np.transpose(batch_tx, (1, 0)),
                                  loss.gradient(model(batch_tx), batch_y))
                    model.set_param(model.get_params() - lr * grad)
                    acc_grad += np.sum(np.abs(grad))

                if acc_grad < lr * 10**-2 / model.get_params().size:
                    return
            i += 1
            lr *= epoch_step[1]
Esempio n. 9
0
    ax.set_xticks(np.arange(data.shape[1] + 1) - .5, minor=True)
    ax.set_yticks(np.arange(data.shape[0] + 1) - .5, minor=True)
    ax.grid(which="minor", color="w", linestyle='-', linewidth=3)
    ax.tick_params(which="minor", bottom=False, left=False)

    return im, cbar


if __name__ == "__main__":
    path = os.path.split(
        os.path.split(os.path.dirname(os.path.abspath(__file__)))[0])[0]
    data = np.load(file=path + '\\resources\\' + 'train.npy')

    loss = LogCosh()
    model = LinearModel((3, 1))
    kwargs = {
        'batch_size': 25,
        'loss': loss,
        'lr': 10**-1,
        'epochs': 1000,
        'epoch_step': (100, .75)
    }
    optimizer = LinearSGD()
    n_models = 1

    for i in range(32, data.shape[1]):
        for j in range(i + 1, data.shape[1]):
            if i in [8, 19, 28] or j in [8, 19, 28]:
                continue
            min_error, best = np.inf, None
Esempio n. 10
0
#
#     ax.set_xticks(np.arange(data.shape[1]+1)-.5, minor=True)
#     ax.set_yticks(np.arange(data.shape[0]+1)-.5, minor=True)
#     ax.grid(which="minor", color="w", linestyle='-', linewidth=3)
#     ax.tick_params(which="minor", bottom=False, left=False)
#
#     return im, cbar


if __name__ == "__main__":
    path = os.path.split(os.path.split(os.path.dirname(os.path.abspath(__file__)))[0])[0]
    data = np.load(file=path + '\\resources\\' + 'train.npy')

    data = MinMaxNormalizer()(MeanFilling(data[:, 2:])(data[:, 2:]))
    loss = LogCosh()
    model = LinearModel((data.shape[1], 1))
    kwargs = {'batch_size': 25, 'loss': loss, 'lr': 10**-1, 'epochs': 1000, 'epoch_step': (100, .75)}
    optimizer = LinearSGD()
    n_models = 2

    mask = np.repeat(True, data.shape[1])

    for i in range(2, data.shape[1]):
        if not mask[i]:
            continue
        np.random.seed(0)
        min_error, best = np.inf, None
        mask[i] = False
        for it in range(n_models):
            model.set_param(np.random.uniform(-1, 1, (np.sum(mask) + 1, 1)))
            train, test = split(data)
Esempio n. 11
0
import numpy as np

from src.functions.loss import LogCosh
from src.model.regression.linear_model import LinearModel
from src.optimization.linear import LinearSGD
from src.preconditioning.feature_filling import MeanFilling

if __name__ == "__main__":
    path = os.path.split(os.path.split(os.path.dirname(os.path.abspath(__file__)))[0])[0]
    data = np.load(file=path + '\\resources\\' + 'train.npy')

    data = MeanFilling(data[:, 2:])(data[:, 2:])
    # data = MinMaxNormalizer()(data)
    loss = LogCosh()
    model = LinearModel((data.shape[1], 1))
    kwargs = {'batch_size': 25, 'loss': loss, 'lr': 10**-1, 'epochs': 1000, 'epoch_step': (100, .75)}
    optimizer = LinearSGD()
    n_models = 2

    separability = []
    for i in range(2, data.shape[1]):
        mean1 = np.mean(data[data[:, 1] == 0, i])
        mean2 = np.mean(data[data[:, 1] == 1, i])
        var1 = np.var(data[data[:, 1] == 0, i])
        var2 = np.var(data[data[:, 1] == 1, i])
        sum_var = np.max([var1 + var2, 0.000001])
        print(sum_var)
        separability.append(np.abs(mean1 - mean2) / sum_var)

    print(separability)