Ejemplo n.º 1
0
    def fit(self, loader: DataLoader, optimizer=None, loss_function=None) -> None:
        """
        Fits the model to the data.
        If no optimizer is passed in, the default optimizer is SGD.
        If no loss function is passed in, the default loss function is MSE.  :returns: None; self.params are fit to the data.
        """
        if optimizer is None:
            optimizer = SGD(0.01)

        if loss_function is None:
            loss_function = mean_squared_error

        for X, y in loader:
            if self.params is None:
                self.params = Matrix([[Variable(random.random())] for _ in range(len(X[0]))])
                self.bias = Matrix([[Variable(random.random())]])

            output = self._evaluate(X)
            loss = loss_function(output, y)
            loss += self._regularize()
            self.params = optimizer.step(self.params, loss.get_grad(self.params))
            self.bias = optimizer.step(self.bias, loss.get_grad(self.bias))
Ejemplo n.º 2
0
def sgd_test():

    # Load data
    if args.data_set == 'SwissRollData':
        Xtrain, Xtest, Ytrain, Ytest = loadSwissRollData()
    elif args.data_set == 'GMMData':
        Xtrain, Xtest, Ytrain, Ytest = loadGMMData()
    else:
        Xtrain, Xtest, Ytrain, Ytest = loadPeaksData()

    # Define set of learning rate and batch size (use only for testing)
    batch_size = np.geomspace(2, 2**8, 8)
    batch_size = [round_(i) for i in batch_size]

    # preprocess data - shuffle and split into different batch sizes (using batch_size list)
    Xtrain, Ytrain, test_sets, train_sets = preprocess_data(
        Xtest, Xtrain, Ytest, Ytrain)

    # train loop

    all_batches, all_labels = train_sets

    softmax = Softmax(Xtrain.shape[0] + 1, Ytrain.shape[0])
    loss_func = CrossEntropy(softmax.W)
    opt = SGD(lr=args.lr)

    accs_hyper_params_train = []
    accs_hyper_params_test = []

    for e in range(args.iter):
        acc_train = []
        loss_l = []
        for batch, labels in tqdm(zip(all_batches, all_labels),
                                  total=len(all_batches),
                                  file=sys.stdout):
            labels = labels.T

            ones = np.ones((1, batch.shape[-1]), dtype=int)
            batch = np.concatenate((batch, ones), axis=0)

            loss = loss_func(batch, labels)
            loss_l.append(loss)

            loss_func.grad_w(batch, labels)
            softmax.W = opt.step(loss_func.grad_W, softmax.W)
            loss_func.W = softmax.W

            output = softmax(batch)
            # calculate train error
            labels = get_index(labels)
            prediction = predict(output)

            acc_train = np.append(acc_train, prediction == labels, axis=0)

        print('Epoch {} train acc: {}  train loss: {}'.format(
            e, np.mean(acc_train), np.mean(loss_l)))

        accs_hyper_params_train.append(np.mean(acc_train))
        accs_hyper_params_test.append(
            np.mean(test_accuracy(softmax, test_sets)))

    plt.plot(range(args.iter), accs_hyper_params_train, label='Train Accuracy')
    plt.plot(range(args.iter),
             accs_hyper_params_test,
             label='Validation Accuracy')
    plt.title('SGD test: {} Set, Acc of lr={} and batch size={}'.format(
        args.data_set, args.lr, args.batch_size))
    plt.legend()
    plt.savefig(
        './Test_Figures/{} Set, Acc of lr={} and batch size={}.png'.format(
            args.data_set, args.lr, args.batch_size),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0)
    plt.show()
error_function = MSE()

repeats = 10
is_linearly_separable = []
for targets in all_targets:
    for idx in range(1, repeats + 1):
        data = DataHandler(inputs, targets)

        linearly_separable = False
        for jdx in range(updates):
            input, target = data.sample(batch_size)

            output = network.forward(input)
            train_error = error_function(target, output)
            network.backward(error=error_function.grad())
            optimizer.step()

            full_input, full_target = data.full
            full_output = network.forward(full_input)
            accuracy = (np.sign(full_output) == full_target).mean()
            print("repeat: [{}/{}]".format(idx, repeats))
            print("update: ({}/{})".format(jdx, updates))
            print("   error: {:.6f}".format(train_error))
            print("   accuracy: {}%\n".format(int(100 * accuracy)))
            if accuracy == 1:
                linearly_separable = True
                break

        if linearly_separable:
            break
    is_linearly_separable.append(linearly_separable)
Ejemplo n.º 4
0
class neural_net(object):
    def __init__(self, input_dims, layers_info, opts):
        self.layers_info = layers_info
        self.num_layers = len(layers_info)
        self.params = {}
        self.save_prefix = opts.save_prefix
        for ix in xrange(len(layers_info)):
            if ix == 0:
                input_dim = input_dims
            else:
                input_dim = layers_info[ix - 1][1]
            output_dim = layers_info[ix][1]
            if layers_info[ix][0] != "batchnorm":
                layer_object = DenseLayer(input_dim,
                                          output_dim,
                                          layers_info[ix][2],
                                          dropout=layers_info[ix][3])
            else:
                layer_object = BatchNormLayer(input_dim)
            self.params[layers_info[ix][0] +
                        "_{}".format(ix)] = layer_object.params
            setattr(self, 'layer_{}'.format(ix), layer_object)
        self.optimizer = SGD(self.params,
                             'categorical_cross_entropy',
                             lr=opts.lr,
                             l2_penalty=opts.l2,
                             momentum=opts.momentum)

    def forward(self, input_tensor, test=False):
        output = input_tensor
        for ix in xrange(self.num_layers):
            output = getattr(self, 'layer_{}'.format(ix))(output, test=test)
        return output

    def backward(self, loss_grad):
        back_grad = loss_grad
        for ix in xrange(self.num_layers - 1, -1, -1):
            back_grad = getattr(self,
                                'layer_{}'.format(ix)).backward(back_grad)

    def save_params(self, filename):
        params = {}
        for layer in self.params:
            params[layer] = {}
            for param in self.params[layer]:
                params[layer][param] = self.params[layer][param].value
                if "batchnorm" in layer:
                    params[layer]["buffers"] = {}
                    index = int(layer.split('_')[-1])
                    for buffer_key in getattr(
                            self, 'layer_{}'.format(index)).buffers:
                        params[layer]["buffers"][buffer_key] = getattr(
                            self, 'layer_{}'.format(index)).buffers[buffer_key]
        with open(filename, "wb") as f:
            cp.dump(params, f)

    def load_params(self, filename):
        saved_params = cp.load(open(filename))
        for layer in saved_params:
            assert layer in self.params, "Tried to load layer %s, but layer not found" % (
                layer)
            for param in saved_params[layer]:
                if param == "buffers":
                    assert "batchnorm" in layer, "Error. Only BatchNorm currently has registered params"
                    index = int(layer.split('_')[-1])
                    for buffer_key in saved_params[layer][param]:
                        getattr(self, 'layer_{}'.format(
                            index)).buffers[buffer_key] = saved_params[layer][
                                param][buffer_key]
                else:
                    self.params[layer][param].value = saved_params[layer][
                        param]

    def compute_numerical_grad(self, layer, param, i, j, X, y, eps=0.0001):
        original_params = deepcopy(self.params[layer][param].value)
        self.params[layer][param].value[i][j] = original_params[i][j] + eps
        loss_pos, _ = self.optimizer.loss(y, self.forward(X))
        self.params[layer][param].value[i][j] = original_params[i][j] - eps
        loss_neg, _ = self.optimizer.loss(y, self.forward(X))
        num_grad = (loss_pos - loss_neg) / (2 * eps)
        self.params[layer][param].value = original_params
        return num_grad

    def test_layer_gradient(self, layer, param, X, y):
        max_abs_difference = -1
        for i in xrange(self.params[layer][param].value.shape[0]):
            for j in xrange(self.params[layer][param].value.shape[1]):
                num_gradient = self.compute_numerical_grad(
                    layer, param, i, j, X, y)
                abs_difference = abs(
                    num_gradient - self.params[layer][param].grad[i][j]) / abs(
                        num_gradient + self.params[layer][param].grad[i][j] +
                        np.finfo(float).eps)
                max_abs_difference = max(abs_difference, max_abs_difference)
        return max_abs_difference

    def train_batch(self, X, y):
        self.optimizer.zero_grads()
        loss, loss_grad = self.optimizer.loss(y, self.forward(X))
        self.backward(loss_grad)
        # print self.test_layer_gradient('hidden_0', 'b', X, y)
        self.optimizer.step()
        return loss

    def predict(self, X):
        output = self.forward(X, test=True)
        output = np.argmax(output, axis=-1)
        return output

    def fit(self,
            X_train,
            y_train,
            X_val,
            y_val,
            n_epochs=200,
            batch_size=32,
            return_history=False):
        y_labels_val = np.argmax(y_val, axis=-1)
        y_labels_train = np.argmax(y_train, axis=-1)
        bar = Progbar(n_epochs)
        if return_history:
            history = {
                'train_loss': [],
                'val_loss': [],
                'train_acc': [],
                'val_acc': [],
                'best_val_acc': None,
                'Model_Save_Prefix': self.save_prefix
            }
        best_val_acc = None
        for epoch in xrange(n_epochs):
            # Shuffle the training data
            index = np.arange(X_train.shape[0])
            np.random.shuffle(index)
            X = X_train[index]
            y = y_train[index]
            train_loss = 0.
            for ix in xrange(0, X.shape[0], batch_size):
                batch_x = X[ix:ix + batch_size]
                batch_y = y[ix:ix + batch_size]
                loss_train = self.train_batch(batch_x, batch_y)
                train_loss += loss_train * batch_x.shape[0]
            train_loss /= X.shape[0]
            train_acc = accuracy_score(y_labels_train, self.predict(X_train))
            # Computing Validation Metrics
            val_loss, _ = self.optimizer.loss(y_val,
                                              self.forward(X_val, test=True))
            val_acc = accuracy_score(y_labels_val, self.predict(X_val))
            if best_val_acc is None or val_acc > best_val_acc:
                best_val_acc = val_acc
                model_file = self.save_prefix + "acc_%.4f_epoch_%d" % (
                    val_acc, epoch + 1)
                self.save_params(model_file)
            if return_history:
                history['train_loss'].append(train_loss)
                history['val_loss'].append(val_loss)
                history['train_acc'].append(train_acc)
                history['val_acc'].append(val_acc)
            bar.update(epoch + 1,
                       values=[("train_loss", train_loss),
                               ("val_loss", val_loss),
                               ("train_acc", train_acc), ("val_acc", val_acc)])
        if return_history:
            history['best_val_acc'] = best_val_acc
            return history
Ejemplo n.º 5
0
import numpy as np
from tensor import Tensor
from layers import Sequential, Linear
from activations import Tanh, Sigmoid
from optimizers import SGD
from losses import MSELoss

np.random.seed(0)

data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True)

model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)

    loss.backward()
    optim.step()
    print(loss)