Esempio n. 1
0
    def validation_step(self, batch, batch_idx):
        for key, data in batch.items():
            x, y = data
            _, y_pred, _ = self.D(x)

            loss = F.cross_entropy(y_pred, y)
            self.log(f"{key}/loss", loss)

            acc = accuracy(y_pred, y)
            self.log(f"{key}/accuracy", acc)
Esempio n. 2
0
def test_APU():
    v1 = numpy.array([
        0.865067, 0.467834, 0.006436, 0.822698, 0.500021, 0.625819, 0.685094,
        0.684385, 0.730635, 0.620578, 0.382865, 0.642284, 0.144894, 0.505433,
        0.421729, 0.986743, 0.961358, 0.841948, 0.801575, 0.937703, 0.255979,
        0.686074, 0.796511, 0.696359
    ])

    v2 = numpy.array([
        0.892189, 0.479804, 0.006296, 0.800723, 0.519695, 0.615373, 0.685457,
        0.711348, 0.721573, 0.616186, 0.372007, 0.615052, 0.151552, 0.499962,
        0.418653, 0.945880, 0.915512, 0.848146, 0.780696, 0.906468, 0.248680,
        0.696450, 0.834688, 0.687994
    ])

    delta = utilities.delta_sr(v1, v2)
    assert utilities.accuracy(delta) == pytest.approx(0.00415158)
    assert utilities.precision(delta) == pytest.approx(0.02098248)
    assert utilities.uncertainty(delta) == pytest.approx(0.02095605)
Esempio n. 3
0
def run(epoch,
        model,
        data_loader,
        criterion,
        print_logger,
        sr_scheduler=None,
        optimizer=None):
    global args
    is_train = True if optimizer != None else False
    if is_train: model.train()
    else: model.eval()

    batch_time_avg = AverageMeter()
    loss_avg, top1_avg, top5_avg = AverageMeter(), AverageMeter(
    ), AverageMeter()

    timestamp = time.time()
    for idx, (input, target) in enumerate(data_loader):
        # print('start batch training', time.time())
        if torch.cuda.is_available():
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)
        # print('loaded data to cuda', time.time())
        if is_train:
            optimizer.zero_grad()

            for args.sr_idx in next(sr_scheduler):
                # update slice rate idx
                model.module.update_sr_idx(args.sr_idx)  # DataParallel .module

                output = model(input)
                loss = criterion(output, target)
                loss.backward()

            optimizer.step()
        else:
            with torch.no_grad():
                output = model(input)
                loss = criterion(output, target)
        # print('finnish batch training', time.time())
        err1, err5 = accuracy(output, target, topk=(1, 5))
        loss_avg.update(loss.item(), input.size()[0])
        top1_avg.update(err1, input.size()[0])
        top5_avg.update(err5, input.size()[0])

        batch_time_avg.update(time.time() - timestamp)
        timestamp = time.time()

        # print('start logging', time.time())
        if idx % args.log_freq == 0:
            print_logger.info(
                'Epoch: [{0}/{1}][{2}/{3}][SR-{4}]\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\tLoss {loss.val:.4f} ({loss.avg:.4f})\t'
                'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\tTop 5-err {top5.val:.4f} ({top5.avg:.4f})'
                .format(epoch,
                        args.epoch,
                        idx,
                        len(data_loader),
                        args.sr_list[args.sr_idx],
                        batch_time=batch_time_avg,
                        loss=loss_avg,
                        top1=top1_avg,
                        top5=top5_avg))

    print_logger.info(
        '* Epoch: [{0}/{1}]{2:>8s}  Total Time: {3}\tTop 1-err {top1.avg:.4f}  Top 5-err {top5.avg:.4f}\tTest Loss {loss.avg:.4f}'
        .format(epoch,
                args.epoch, ('[train]' if is_train else '[val]'),
                timeSince(s=batch_time_avg.sum),
                top1=top1_avg,
                top5=top5_avg,
                loss=loss_avg))
    return top1_avg.avg, top5_avg.avg
Esempio n. 4
0
    def train_hidden1(self,
                      max_epochs=100,
                      learning_rate_init=0.0001,
                      annealing=100,
                      batch_size=None,
                      shuffle=False,
                      gradient_checking=False,
                      momentum=False):
        assert (self.hidden_layers == 1)

        # Start a Timer for Training
        strt = time()
        print("Training...")

        W_hidden1 = self.weights[0]
        W_output = self.weights[1]

        momentum1 = 0
        momentum2 = 0

        # Iterate
        for epoch in range(max_epochs):
            # Decay Learning Rate
            alpha = learning_rate_init / (1 + epoch / annealing)

            # Mini-batching
            batches = []
            num_samples = self.xTrain.shape[0]

            if shuffle:
                indices = np.random.permutation(num_samples)
            else:
                indices = np.arange(num_samples)

            if batch_size == None:
                batch_size = num_samples

            for batch_num in range(num_samples // batch_size):
                indxs = indices[batch_num * batch_size:(batch_num + 1) *
                                batch_size]
                batches.append((self.xTrain[indxs], self.yTrain[indxs]))

            # Iterate Over Mini-Batches
            for x_batch, t_batch in batches:
                # Forward Prop
                net_input_h1 = np.dot(x_batch, W_hidden1)
                hidden_layer_out1 = self.hidden_activation(net_input_h1)
                hidden_layer_out1 = np.insert(hidden_layer_out1, 0, 1, axis=1)

                # Output Layer
                net_input_o = np.dot(hidden_layer_out1, W_output)
                y = utilities.softmax_activation(net_input_o)

                # Backprop (deltas)
                delta_output = (t_batch - y)
                if self.hidden_activation == utilities.sigmoid_activation:
                    delta_hidden1 = utilities.sigmoid_activation(
                        net_input_h1) * (
                            1 - utilities.sigmoid_activation(net_input_h1)
                        ) * np.dot(delta_output, W_output[1:, :].T)
                elif self.hidden_activation == utilities.tanh_activation:
                    delta_hidden1 = (2 / 3) * (
                        1.7159 - 1.0 / 1.7159 *
                        np.power(utilities.tanh_activation(net_input_h1), 2)
                    ) * np.dot(delta_output, W_output[1:, :].T)
                elif self.hidden_activation == utilities.relu_activation:
                    delta_hidden1 = (net_input_h1 >= 0) * np.dot(
                        delta_output, W_output[1:, :].T)  # fix relu here
                else:
                    raise Exception(
                        "ERROR: Not supported hidden activation function!")

                if gradient_checking == True:
                    # Tune which weight and which layer for gradient checking here!
                    weight_indices = (0, 3)
                    layer_tag = 'output'

                    if layer_tag == 'output':
                        numerical_grad = self.get_numerical_gradient(
                            x_batch, t_batch, layer_tag, weight_indices)
                        backprop_grad = -np.dot(hidden_layer_out1.T,
                                                delta_output)[weight_indices]

                        print('Numerical Gradient:', numerical_grad)
                        print('Backprop Gradient:', backprop_grad)
                        print('Difference between Gradient:',
                              numerical_grad - backprop_grad)

                    elif layer_tag == 'hidden':
                        numerical_grad = self.get_numerical_gradient(
                            x_batch, t_batch, layer_tag, weight_indices)
                        backprop_grad = -np.dot(x_batch.T,
                                                delta_hidden1)[weight_indices]

                        print('Numerical Gradient:', numerical_grad)
                        print('Backprop Gradient:', backprop_grad)
                        print('Difference between Gradient:',
                              numerical_grad - backprop_grad)

                    else:
                        print('Invalid Tag')
                    sys.exit()

                # Gradient Descent
                if momentum == True:

                    current_grad1 = alpha * np.dot(hidden_layer_out1.T,
                                                   delta_output)
                    current_grad2 = alpha * np.dot(x_batch.T, delta_hidden1)

                    W_output = W_output + current_grad1 + (0.9 * momentum1)
                    W_hidden1 = W_hidden1 + current_grad2 + (0.9 * momentum2)

                    momentum1 = current_grad1 + (0.9 * momentum1)
                    momentum2 = current_grad2 + (0.9 * momentum2)

                else:
                    W_output = W_output + alpha * np.dot(
                        hidden_layer_out1.T, delta_output)
                    W_hidden1 = W_hidden1 + alpha * np.dot(
                        x_batch.T, delta_hidden1)

                # Store the Model
                self.weights[0] = W_hidden1
                self.weights[1] = W_output

            # Get model predictions
            predictions_train = self.get_model_predictions(self.xTrain)
            predictions_valid = self.get_model_predictions(self.xValid)
            predictions_test = self.get_model_predictions(self.xTest)

            # Compute accuracies over epochs
            self.accuracies['train_acc'].append(
                utilities.accuracy(self.yTrain, predictions_train))
            self.accuracies['valid_acc'].append(
                utilities.accuracy(self.yValid, predictions_valid))
            self.accuracies['test_acc'].append(
                utilities.accuracy(self.yTest, predictions_test))

            # Code Profiling
            if not self.train_stats and self.accuracies['valid_acc'][
                    -1] >= 0.97:
                self.train_stats = (time() - strt, epoch)

            # Cross-Entropy Loss over epochs
            self.losses['train_loss'].append(
                utilities.cross_entropy_loss(self.yTrain, predictions_train))
            self.losses['valid_loss'].append(
                utilities.cross_entropy_loss(self.yValid, predictions_valid))
            self.losses['test_loss'].append(
                utilities.cross_entropy_loss(self.yTest, predictions_test))

            # Update best model so far
            if self.losses['valid_loss'][-1] < self.best_model[0]:
                self.best_model[0] = self.losses['valid_loss'][-1]
                self.best_model[1] = self.weights

            # Early Stopping
            if epoch > 4 and utilities.early_stopping(
                    self.losses['valid_loss']):
                print("\tEarly Stopping (3 consecutive increases) at epoch =",
                      epoch)
                break
            elif epoch > 2 and np.abs(self.losses['valid_loss'][-1] -
                                      self.losses['valid_loss'][-2]) < 0.00001:
                print("\tEarly Stopping, error below epsilon.",
                      self.losses['valid_loss'][-1])
                break

            # Debug statements
            if epoch % 10 == 0:
                print("Epoch:", epoch)
                # print("\tTraining Accuracy:", self.accuracies['train_acc'][-1])
                # print("\tValidation Accuracy:", self.accuracies['valid_acc'][-1])
                # print("\tTest Accuracy:", self.accuracies['train_acc'][-1])
                # print("\tLoss:", self.losses['train_loss'][-1])

        if not self.train_stats:
            self.train_stats = (time() - strt, epoch)

        print('\n\nTraining Done! Took', round(time() - strt, 3), " secs.")
        # print('Final Training Accuracy: ', self.accuracies['train_acc'][-1], " in ", epoch, " epochs.")
        # print('Final Validation Accuracy: ', self.accuracies['valid_acc'][-1], " in ", epoch, " epochs.")
        # print('Final Test Accuracy: ', self.accuracies['test_acc'][-1], " in ", epoch, " epochs.\n")

        return 1
Esempio n. 5
0
    def train_hidden2(self,
                      max_epochs=100,
                      learning_rate_init=0.0001,
                      annealing=100,
                      batch_size=None,
                      shuffle=False,
                      momentum=False):
        assert (self.hidden_layers == 2)

        # Start a Timer for Training
        strt = time()
        print("Training...")

        W_hidden1 = self.weights[0]
        W_hidden2 = self.weights[1]
        W_output = self.weights[2]

        momentum1 = 0
        momentum2 = 0
        momentum3 = 0

        # Iterate
        for epoch in range(max_epochs):
            # Decay Learning Rate
            alpha = learning_rate_init / (1 + epoch / annealing)

            # Mini-batching
            batches = []
            num_samples = self.xTrain.shape[0]

            if shuffle:
                indices = np.random.permutation(num_samples)
            else:
                indices = np.arange(num_samples)

            if batch_size == None:
                batch_size = num_samples

            for batch_num in range(num_samples // batch_size):
                indxs = indices[batch_num * batch_size:(batch_num + 1) *
                                batch_size]
                batches.append((self.xTrain[indxs], self.yTrain[indxs]))

            # Iterate Over Mini-Batches
            for x_batch, t_batch in batches:
                # Forward Prop
                net_input_h1 = np.dot(x_batch, W_hidden1)
                hidden_layer_out1 = utilities.sigmoid_activation(net_input_h1)
                hidden_layer_out1 = np.insert(hidden_layer_out1, 0, 1, axis=1)

                net_input_h2 = np.dot(hidden_layer_out1, W_hidden2)
                hidden_layer_out2 = utilities.sigmoid_activation(net_input_h2)
                hidden_layer_out2 = np.insert(hidden_layer_out2, 0, 1, axis=1)

                net_input_o = np.dot(hidden_layer_out2, W_output)
                y = utilities.softmax_activation(net_input_o)

                # Back Prop (deltas)
                delta_output = (t_batch - y)
                if self.hidden_activation == utilities.sigmoid_activation:
                    delta_hidden2 = utilities.sigmoid_activation(
                        net_input_h2) * (
                            1 - utilities.sigmoid_activation(net_input_h2)
                        ) * np.dot(delta_output, W_output[1:, :].T)
                    delta_hidden1 = utilities.sigmoid_activation(
                        net_input_h1) * (
                            1 - utilities.sigmoid_activation(net_input_h1)
                        ) * np.dot(delta_hidden2, W_hidden2[1:, :].T)
                elif self.hidden_activation == utilities.tanh_activation:
                    delta_hidden2 = (2 / 3) * (
                        1.7159 - 1.0 / 1.7159 *
                        np.power(utilities.tanh_activation(net_input_h2), 2)
                    ) * np.dot(delta_output, W_output[1:, :].T)
                    delta_hidden1 = (2 / 3) * (
                        1.7159 - 1.0 / 1.7159 *
                        np.power(utilities.tanh_activation(net_input_h1), 2)
                    ) * np.dot(delta_hidden2, W_hidden2[1:, :].T)
                else:
                    raise Exception(
                        "ERROR: Not supported hidden activation function!")

                # Gradient Descent
                if momentum == True:
                    current_grad1 = alpha * np.dot(hidden_layer_out2.T,
                                                   delta_output)
                    current_grad2 = alpha * np.dot(hidden_layer_out1.T,
                                                   delta_hidden2)
                    current_grad3 = alpha * np.dot(x_batch.T, delta_hidden1)

                    W_output = W_output + current_grad1 + (0.9 * momentum1)
                    W_hidden2 = W_hidden2 + current_grad2 + (0.9 * momentum2)
                    W_hidden1 = W_hidden1 + current_grad3 + (0.9 * momentum3)

                    momentum1 = current_grad1 + (0.9 * momentum1)
                    momentum2 = current_grad2 + (0.9 * momentum2)
                    momentum3 = current_grad3 + (0.9 * momentum3)

                else:
                    W_output = W_output + alpha * np.dot(
                        hidden_layer_out2.T, delta_output)
                    W_hidden2 = W_hidden2 + alpha * np.dot(
                        hidden_layer_out1.T, delta_hidden2)
                    W_hidden1 = W_hidden1 + alpha * np.dot(
                        x_batch.T, delta_hidden1)

                # Store the Model
                self.weights[0] = W_hidden1
                self.weights[1] = W_hidden2
                self.weights[2] = W_output

            # Get model predictions
            predictions_train = self.get_model_predictions(self.xTrain)
            predictions_valid = self.get_model_predictions(self.xValid)
            predictions_test = self.get_model_predictions(self.xTest)

            # Compute accuracies over epochs
            self.accuracies['train_acc'].append(
                utilities.accuracy(self.yTrain, predictions_train))
            self.accuracies['valid_acc'].append(
                utilities.accuracy(self.yValid, predictions_valid))
            self.accuracies['test_acc'].append(
                utilities.accuracy(self.yTest, predictions_test))

            # Code Profiling
            if not self.train_stats and self.accuracies['valid_acc'][
                    -1] >= 0.97:
                self.train_stats = (time() - strt, epoch)

            # Cross-Entropy Loss over epochs
            self.losses['train_loss'].append(
                utilities.cross_entropy_loss(self.yTrain, predictions_train))
            self.losses['valid_loss'].append(
                utilities.cross_entropy_loss(self.yValid, predictions_valid))
            self.losses['test_loss'].append(
                utilities.cross_entropy_loss(self.yTest, predictions_test))

            # Update best model so far
            if self.losses['valid_loss'][-1] < self.best_model[0]:
                self.best_model[0] = self.losses['valid_loss'][-1]
                self.best_model[1] = self.weights

            # Early Stopping
            # if epoch > 4 and utilities.early_stopping(self.losses['valid_loss']):
            #     print("\tEarly Stopping at epoch =", epoch)
            #     break
            # elif epoch > 2 and np.abs(self.losses['valid_loss'][-1] - self.losses['valid_loss'][-2]) < 0.00001:
            #     print("\tEarly Stopping, error below epsilon.", self.losses['valid_loss'][-1])
            # break

            # Debug statements
            if epoch % 10 == 0:
                print("Epoch:", epoch)
                # print("\tAccuracy:", self.accuracies['train_acc'][-1])
                # print("\tLoss:", self.losses['train_loss'][-1])

        if not self.train_stats:
            self.train_stats = (time() - strt, epoch)

        print('\n\nTraining Done! Took', round(time() - strt, 3), " secs.")
        # print('Final Training Accuracy: ', self.accuracies['train_acc'][-1], " in ", epoch, " epochs.")

        return 1