コード例 #1
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # The mini-batch gradient descent algorithm
            model.backward(X_batch, model.forward(X_batch), Y_batch)
            model.ws = model.ws - learning_rate * np.array(model.grads)

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss

                _train_loss = cross_entropy_loss(Y_batch,
                                                 model.forward(X_batch))
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
コード例 #2
0
ファイル: task2.py プロジェクト: KelianB/CVDL-Assignment-1
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float,
        use_shift=False):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5

    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}

    #Variables for early stopping
    last_val_loss = 1
    best_val_loss = 1
    best_weights = None
    increased_last_time = False

    # Store last weights update term for momentum
    last_weights_update = []
    for l in range(len(model.ws)):
        last_weights_update.append(np.zeros_like(model.ws[l]))

    global_step = 0
    for epoch in range(num_epochs):
        print("Epoch:", epoch)
        for step in range(num_batches_per_epoch):
            shift = np.random.randint(low=-2, high=3, size=batch_size)
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            X_local = X_batch
            if use_shift:
                X_local = np.roll(X_batch[:, :784], shift, axis=1)
                ones = np.ones((X_local.shape[0], 1))
                X_local = np.concatenate((X_local, ones), axis=1)

            train_output = model.forward(X_batch)

            model.backward(X_batch, train_output, Y_batch)

            for l in range(len(model.ws)):
                if use_momentum:
                    update_term = momentum_gamma * last_weights_update[
                        l] - learning_rate * model.grads[l]
                    model.ws[l] += update_term
                    last_weights_update[l] = update_term
                else:
                    model.ws[l] -= learning_rate * model.grads[l]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                val_output = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, val_output)
                val_loss[global_step] = _val_loss

                train_output = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, train_output)
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1

        # In order to keep labels in the right order, we shuffle an array of indices
        # and then apply this ordering to both inputs and labels
        if use_shuffle:
            indices = np.arange(X_train.shape[0])
            np.random.shuffle(indices)
            X_train = X_train[indices]
            Y_train = Y_train[indices]

        # Compute validation loss for early stopping
        val_outputs = model.forward(X_val)
        _val_loss = cross_entropy_loss(Y_val, val_outputs)
        if _val_loss <= best_val_loss:
            best_weights = model.ws
            best_val_loss = _val_loss
        if _val_loss > last_val_loss:
            if increased_last_time:
                model.ws = best_weights
                break
            else:
                increased_last_time = True
        else:
            increased_last_time = False
        last_val_loss = _val_loss

    return model, train_loss, val_loss, train_accuracy, val_accuracy