def validation_step(self, batch, batch_idx): for key, data in batch.items(): x, y = data _, y_pred, _ = self.D(x) loss = F.cross_entropy(y_pred, y) self.log(f"{key}/loss", loss) acc = accuracy(y_pred, y) self.log(f"{key}/accuracy", acc)
def test_APU(): v1 = numpy.array([ 0.865067, 0.467834, 0.006436, 0.822698, 0.500021, 0.625819, 0.685094, 0.684385, 0.730635, 0.620578, 0.382865, 0.642284, 0.144894, 0.505433, 0.421729, 0.986743, 0.961358, 0.841948, 0.801575, 0.937703, 0.255979, 0.686074, 0.796511, 0.696359 ]) v2 = numpy.array([ 0.892189, 0.479804, 0.006296, 0.800723, 0.519695, 0.615373, 0.685457, 0.711348, 0.721573, 0.616186, 0.372007, 0.615052, 0.151552, 0.499962, 0.418653, 0.945880, 0.915512, 0.848146, 0.780696, 0.906468, 0.248680, 0.696450, 0.834688, 0.687994 ]) delta = utilities.delta_sr(v1, v2) assert utilities.accuracy(delta) == pytest.approx(0.00415158) assert utilities.precision(delta) == pytest.approx(0.02098248) assert utilities.uncertainty(delta) == pytest.approx(0.02095605)
def run(epoch, model, data_loader, criterion, print_logger, sr_scheduler=None, optimizer=None): global args is_train = True if optimizer != None else False if is_train: model.train() else: model.eval() batch_time_avg = AverageMeter() loss_avg, top1_avg, top5_avg = AverageMeter(), AverageMeter( ), AverageMeter() timestamp = time.time() for idx, (input, target) in enumerate(data_loader): # print('start batch training', time.time()) if torch.cuda.is_available(): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # print('loaded data to cuda', time.time()) if is_train: optimizer.zero_grad() for args.sr_idx in next(sr_scheduler): # update slice rate idx model.module.update_sr_idx(args.sr_idx) # DataParallel .module output = model(input) loss = criterion(output, target) loss.backward() optimizer.step() else: with torch.no_grad(): output = model(input) loss = criterion(output, target) # print('finnish batch training', time.time()) err1, err5 = accuracy(output, target, topk=(1, 5)) loss_avg.update(loss.item(), input.size()[0]) top1_avg.update(err1, input.size()[0]) top5_avg.update(err5, input.size()[0]) batch_time_avg.update(time.time() - timestamp) timestamp = time.time() # print('start logging', time.time()) if idx % args.log_freq == 0: print_logger.info( 'Epoch: [{0}/{1}][{2}/{3}][SR-{4}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\tLoss {loss.val:.4f} ({loss.avg:.4f})\t' 'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\tTop 5-err {top5.val:.4f} ({top5.avg:.4f})' .format(epoch, args.epoch, idx, len(data_loader), args.sr_list[args.sr_idx], batch_time=batch_time_avg, loss=loss_avg, top1=top1_avg, top5=top5_avg)) print_logger.info( '* Epoch: [{0}/{1}]{2:>8s} Total Time: {3}\tTop 1-err {top1.avg:.4f} Top 5-err {top5.avg:.4f}\tTest Loss {loss.avg:.4f}' .format(epoch, args.epoch, ('[train]' if is_train else '[val]'), timeSince(s=batch_time_avg.sum), top1=top1_avg, top5=top5_avg, loss=loss_avg)) return top1_avg.avg, top5_avg.avg
def train_hidden1(self, max_epochs=100, learning_rate_init=0.0001, annealing=100, batch_size=None, shuffle=False, gradient_checking=False, momentum=False): assert (self.hidden_layers == 1) # Start a Timer for Training strt = time() print("Training...") W_hidden1 = self.weights[0] W_output = self.weights[1] momentum1 = 0 momentum2 = 0 # Iterate for epoch in range(max_epochs): # Decay Learning Rate alpha = learning_rate_init / (1 + epoch / annealing) # Mini-batching batches = [] num_samples = self.xTrain.shape[0] if shuffle: indices = np.random.permutation(num_samples) else: indices = np.arange(num_samples) if batch_size == None: batch_size = num_samples for batch_num in range(num_samples // batch_size): indxs = indices[batch_num * batch_size:(batch_num + 1) * batch_size] batches.append((self.xTrain[indxs], self.yTrain[indxs])) # Iterate Over Mini-Batches for x_batch, t_batch in batches: # Forward Prop net_input_h1 = np.dot(x_batch, W_hidden1) hidden_layer_out1 = self.hidden_activation(net_input_h1) hidden_layer_out1 = np.insert(hidden_layer_out1, 0, 1, axis=1) # Output Layer net_input_o = np.dot(hidden_layer_out1, W_output) y = utilities.softmax_activation(net_input_o) # Backprop (deltas) delta_output = (t_batch - y) if self.hidden_activation == utilities.sigmoid_activation: delta_hidden1 = utilities.sigmoid_activation( net_input_h1) * ( 1 - utilities.sigmoid_activation(net_input_h1) ) * np.dot(delta_output, W_output[1:, :].T) elif self.hidden_activation == utilities.tanh_activation: delta_hidden1 = (2 / 3) * ( 1.7159 - 1.0 / 1.7159 * np.power(utilities.tanh_activation(net_input_h1), 2) ) * np.dot(delta_output, W_output[1:, :].T) elif self.hidden_activation == utilities.relu_activation: delta_hidden1 = (net_input_h1 >= 0) * np.dot( delta_output, W_output[1:, :].T) # fix relu here else: raise Exception( "ERROR: Not supported hidden activation function!") if gradient_checking == True: # Tune which weight and which layer for gradient checking here! weight_indices = (0, 3) layer_tag = 'output' if layer_tag == 'output': numerical_grad = self.get_numerical_gradient( x_batch, t_batch, layer_tag, weight_indices) backprop_grad = -np.dot(hidden_layer_out1.T, delta_output)[weight_indices] print('Numerical Gradient:', numerical_grad) print('Backprop Gradient:', backprop_grad) print('Difference between Gradient:', numerical_grad - backprop_grad) elif layer_tag == 'hidden': numerical_grad = self.get_numerical_gradient( x_batch, t_batch, layer_tag, weight_indices) backprop_grad = -np.dot(x_batch.T, delta_hidden1)[weight_indices] print('Numerical Gradient:', numerical_grad) print('Backprop Gradient:', backprop_grad) print('Difference between Gradient:', numerical_grad - backprop_grad) else: print('Invalid Tag') sys.exit() # Gradient Descent if momentum == True: current_grad1 = alpha * np.dot(hidden_layer_out1.T, delta_output) current_grad2 = alpha * np.dot(x_batch.T, delta_hidden1) W_output = W_output + current_grad1 + (0.9 * momentum1) W_hidden1 = W_hidden1 + current_grad2 + (0.9 * momentum2) momentum1 = current_grad1 + (0.9 * momentum1) momentum2 = current_grad2 + (0.9 * momentum2) else: W_output = W_output + alpha * np.dot( hidden_layer_out1.T, delta_output) W_hidden1 = W_hidden1 + alpha * np.dot( x_batch.T, delta_hidden1) # Store the Model self.weights[0] = W_hidden1 self.weights[1] = W_output # Get model predictions predictions_train = self.get_model_predictions(self.xTrain) predictions_valid = self.get_model_predictions(self.xValid) predictions_test = self.get_model_predictions(self.xTest) # Compute accuracies over epochs self.accuracies['train_acc'].append( utilities.accuracy(self.yTrain, predictions_train)) self.accuracies['valid_acc'].append( utilities.accuracy(self.yValid, predictions_valid)) self.accuracies['test_acc'].append( utilities.accuracy(self.yTest, predictions_test)) # Code Profiling if not self.train_stats and self.accuracies['valid_acc'][ -1] >= 0.97: self.train_stats = (time() - strt, epoch) # Cross-Entropy Loss over epochs self.losses['train_loss'].append( utilities.cross_entropy_loss(self.yTrain, predictions_train)) self.losses['valid_loss'].append( utilities.cross_entropy_loss(self.yValid, predictions_valid)) self.losses['test_loss'].append( utilities.cross_entropy_loss(self.yTest, predictions_test)) # Update best model so far if self.losses['valid_loss'][-1] < self.best_model[0]: self.best_model[0] = self.losses['valid_loss'][-1] self.best_model[1] = self.weights # Early Stopping if epoch > 4 and utilities.early_stopping( self.losses['valid_loss']): print("\tEarly Stopping (3 consecutive increases) at epoch =", epoch) break elif epoch > 2 and np.abs(self.losses['valid_loss'][-1] - self.losses['valid_loss'][-2]) < 0.00001: print("\tEarly Stopping, error below epsilon.", self.losses['valid_loss'][-1]) break # Debug statements if epoch % 10 == 0: print("Epoch:", epoch) # print("\tTraining Accuracy:", self.accuracies['train_acc'][-1]) # print("\tValidation Accuracy:", self.accuracies['valid_acc'][-1]) # print("\tTest Accuracy:", self.accuracies['train_acc'][-1]) # print("\tLoss:", self.losses['train_loss'][-1]) if not self.train_stats: self.train_stats = (time() - strt, epoch) print('\n\nTraining Done! Took', round(time() - strt, 3), " secs.") # print('Final Training Accuracy: ', self.accuracies['train_acc'][-1], " in ", epoch, " epochs.") # print('Final Validation Accuracy: ', self.accuracies['valid_acc'][-1], " in ", epoch, " epochs.") # print('Final Test Accuracy: ', self.accuracies['test_acc'][-1], " in ", epoch, " epochs.\n") return 1
def train_hidden2(self, max_epochs=100, learning_rate_init=0.0001, annealing=100, batch_size=None, shuffle=False, momentum=False): assert (self.hidden_layers == 2) # Start a Timer for Training strt = time() print("Training...") W_hidden1 = self.weights[0] W_hidden2 = self.weights[1] W_output = self.weights[2] momentum1 = 0 momentum2 = 0 momentum3 = 0 # Iterate for epoch in range(max_epochs): # Decay Learning Rate alpha = learning_rate_init / (1 + epoch / annealing) # Mini-batching batches = [] num_samples = self.xTrain.shape[0] if shuffle: indices = np.random.permutation(num_samples) else: indices = np.arange(num_samples) if batch_size == None: batch_size = num_samples for batch_num in range(num_samples // batch_size): indxs = indices[batch_num * batch_size:(batch_num + 1) * batch_size] batches.append((self.xTrain[indxs], self.yTrain[indxs])) # Iterate Over Mini-Batches for x_batch, t_batch in batches: # Forward Prop net_input_h1 = np.dot(x_batch, W_hidden1) hidden_layer_out1 = utilities.sigmoid_activation(net_input_h1) hidden_layer_out1 = np.insert(hidden_layer_out1, 0, 1, axis=1) net_input_h2 = np.dot(hidden_layer_out1, W_hidden2) hidden_layer_out2 = utilities.sigmoid_activation(net_input_h2) hidden_layer_out2 = np.insert(hidden_layer_out2, 0, 1, axis=1) net_input_o = np.dot(hidden_layer_out2, W_output) y = utilities.softmax_activation(net_input_o) # Back Prop (deltas) delta_output = (t_batch - y) if self.hidden_activation == utilities.sigmoid_activation: delta_hidden2 = utilities.sigmoid_activation( net_input_h2) * ( 1 - utilities.sigmoid_activation(net_input_h2) ) * np.dot(delta_output, W_output[1:, :].T) delta_hidden1 = utilities.sigmoid_activation( net_input_h1) * ( 1 - utilities.sigmoid_activation(net_input_h1) ) * np.dot(delta_hidden2, W_hidden2[1:, :].T) elif self.hidden_activation == utilities.tanh_activation: delta_hidden2 = (2 / 3) * ( 1.7159 - 1.0 / 1.7159 * np.power(utilities.tanh_activation(net_input_h2), 2) ) * np.dot(delta_output, W_output[1:, :].T) delta_hidden1 = (2 / 3) * ( 1.7159 - 1.0 / 1.7159 * np.power(utilities.tanh_activation(net_input_h1), 2) ) * np.dot(delta_hidden2, W_hidden2[1:, :].T) else: raise Exception( "ERROR: Not supported hidden activation function!") # Gradient Descent if momentum == True: current_grad1 = alpha * np.dot(hidden_layer_out2.T, delta_output) current_grad2 = alpha * np.dot(hidden_layer_out1.T, delta_hidden2) current_grad3 = alpha * np.dot(x_batch.T, delta_hidden1) W_output = W_output + current_grad1 + (0.9 * momentum1) W_hidden2 = W_hidden2 + current_grad2 + (0.9 * momentum2) W_hidden1 = W_hidden1 + current_grad3 + (0.9 * momentum3) momentum1 = current_grad1 + (0.9 * momentum1) momentum2 = current_grad2 + (0.9 * momentum2) momentum3 = current_grad3 + (0.9 * momentum3) else: W_output = W_output + alpha * np.dot( hidden_layer_out2.T, delta_output) W_hidden2 = W_hidden2 + alpha * np.dot( hidden_layer_out1.T, delta_hidden2) W_hidden1 = W_hidden1 + alpha * np.dot( x_batch.T, delta_hidden1) # Store the Model self.weights[0] = W_hidden1 self.weights[1] = W_hidden2 self.weights[2] = W_output # Get model predictions predictions_train = self.get_model_predictions(self.xTrain) predictions_valid = self.get_model_predictions(self.xValid) predictions_test = self.get_model_predictions(self.xTest) # Compute accuracies over epochs self.accuracies['train_acc'].append( utilities.accuracy(self.yTrain, predictions_train)) self.accuracies['valid_acc'].append( utilities.accuracy(self.yValid, predictions_valid)) self.accuracies['test_acc'].append( utilities.accuracy(self.yTest, predictions_test)) # Code Profiling if not self.train_stats and self.accuracies['valid_acc'][ -1] >= 0.97: self.train_stats = (time() - strt, epoch) # Cross-Entropy Loss over epochs self.losses['train_loss'].append( utilities.cross_entropy_loss(self.yTrain, predictions_train)) self.losses['valid_loss'].append( utilities.cross_entropy_loss(self.yValid, predictions_valid)) self.losses['test_loss'].append( utilities.cross_entropy_loss(self.yTest, predictions_test)) # Update best model so far if self.losses['valid_loss'][-1] < self.best_model[0]: self.best_model[0] = self.losses['valid_loss'][-1] self.best_model[1] = self.weights # Early Stopping # if epoch > 4 and utilities.early_stopping(self.losses['valid_loss']): # print("\tEarly Stopping at epoch =", epoch) # break # elif epoch > 2 and np.abs(self.losses['valid_loss'][-1] - self.losses['valid_loss'][-2]) < 0.00001: # print("\tEarly Stopping, error below epsilon.", self.losses['valid_loss'][-1]) # break # Debug statements if epoch % 10 == 0: print("Epoch:", epoch) # print("\tAccuracy:", self.accuracies['train_acc'][-1]) # print("\tLoss:", self.losses['train_loss'][-1]) if not self.train_stats: self.train_stats = (time() - strt, epoch) print('\n\nTraining Done! Took', round(time() - strt, 3), " secs.") # print('Final Training Accuracy: ', self.accuracies['train_acc'][-1], " in ", epoch, " epochs.") return 1