def predict(model_path, im_path): ''' Test procedure --------------- :param model_path: path of the saved model :param im_path: path of an image ''' # TODO 3: load configurations from saved model, initialize the model. # Note: you can complete this section by referring to Part 4: test. # step 1: load configurations from saved model using torch.load(model_path) # and get the configs dictionary, configs = checkpoint['configs'], # then get each config from configs, eg., norm_size = configs['norm_size'] checkpoint = torch.load(model_path) configs = checkpoint['configs'] norm_size = configs['norm_size'] output_size = configs['output_size'] hidden_size = configs['hidden_size'] n_layers = configs['n_layers'] act_type = configs['act_type'] # step 2: initialize the model by MLP() model = MLP(norm_size[0] * norm_size[1], output_size, hidden_size, n_layers, act_type) # step 3: load model parameters we saved in model_path # hint: similar to what we do in Part 4: test. model.load_state_dict(checkpoint['state_dict']) # End TODO 3 # enter the evaluation mode model.eval() # image pre-processing, similar to what we do in ListDataset() transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(norm_size), transforms.ToTensor() ]) # image pre-processing, similar to what we do in ListDataset() im = cv2.imread(im_path) im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) im = transform(im) im.sub_(0.5).div_(0.5) # input im into the model with torch.no_grad(): input = im.view(1, -1) out = model(input) prediction = out.argmax(1)[0].item() # convert index of prediction to the corresponding character letters = string.ascii_letters[-26:] # ABCD...XYZ prediction = letters[prediction] print('Prediction: {}'.format(prediction))
# epoch, step, len(train_loader), batch_time=batch_time, loss=losses, top1=top1)) train_loss = losses.avg train_acc = top1.avg train_losses.append(train_loss) train_accs.append(train_acc) # vis.plot_many({'train loss': train_loss, 'train acc': train_acc}) # validation phase print('validation phase') # vis.log('validation phase') batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() net.eval() end = time.time() for step, (images, labels) in enumerate(val_loader): images = images.to(device) labels = labels.to(device) with torch.no_grad(): logits = images # logits = clf(clf_norm(images)) # logits = logits - logits.max(dim=1, keepdim=True)[0] # Normalizing by reducing the maximum # logits = logits / logits.norm(p=2, dim=1, keepdim=True) # L2 normalization output = net(logits) loss = criterion(output, labels) _, pred = output.max(dim=1)
def test(model_path, im_dir='data/character_classification/images', test_file_path='data/character_classification/test.json', batch_size=8, device='cpu'): ''' Test procedure --------------- :param model_path: path of the saved model :param im_dir: path to directory with images :param test_file_path: file with test image paths and labels :param batch_size: test batch size :param device: 'cpu' or 'cuda' ''' # load configurations from saved model, initialize and test the model checkpoint = torch.load(model_path) configs = checkpoint['configs'] norm_size = configs['norm_size'] output_size = configs['output_size'] hidden_size = configs['hidden_size'] n_layers = configs['n_layers'] act_type = configs['act_type'] # initialize the model by MLP() model = MLP(norm_size[0] * norm_size[1], output_size, hidden_size, n_layers, act_type) # load model parameters we saved in model_path model.load_state_dict(checkpoint['state_dict']) model = model.to(device) print('[Info] Load model from {}'.format(model_path)) # enter the evaluation mode model.eval() # test loader testloader = dataLoader(im_dir, test_file_path, norm_size, batch_size) # run the test process n_correct = 0. n_ims = 0. logits = [] all_labels = [] with torch.no_grad( ): # we do not need to compute gradients during test stage for ims, labels in testloader: ims, labels = ims.to(device), labels.type(torch.float).to(device) input = ims.view(ims.size(0), -1) out = model(input) predictions = out.argmax(1) n_correct += torch.sum(predictions == labels) n_ims += ims.size(0) logits.append(out) all_labels.append(labels) logits = torch.cat(logits, dim=0).detach().cpu().numpy() all_labels = torch.cat(all_labels, dim=0).cpu().numpy() tsne = TSNE(n_components=2, init='pca') Y = tsne.fit_transform(logits) letters = list(string.ascii_letters[-26:]) Y = (Y - Y.min(0)) / (Y.max(0) - Y.min(0)) for i in range(len(all_labels)): if (all_labels[i] < 26): c = plt.cm.rainbow(float(all_labels[i]) / 26) plt.text(Y[i, 0], Y[i, 1], s=letters[int(all_labels[i])], color=c) plt.show() print('[Info] Test accuracy = {:.1f}%'.format(100 * n_correct / n_ims))
def train_val(im_dir, train_file_path, val_file_path, hidden_size, n_layers, act_type, norm_size, n_epochs, batch_size, n_letters, lr, optim_type, momentum, weight_decay, valInterval, device='cpu'): ''' The main training procedure ---------------------------- :param im_dir: path to directory with images :param train_file_path: file list of training image paths and labels :param val_file_path: file list of validation image paths and labels :param hidden_size: a list of hidden size for each hidden layer :param n_layers: number of layers in the MLP :param act_type: type of activation function, can be none, sigmoid, tanh, or relu :param norm_size: image normalization size, (height, width) :param n_epochs: number of training epochs :param batch_size: batch size of training and validation :param n_letters: number of classes, in this task it is 26 English letters :param lr: learning rate :param optim_type: optimizer, can be 'sgd', 'adagrad', 'rmsprop', 'adam', or 'adadelta' :param momentum: only used if optim_type == 'sgd' :param weight_decay: the factor of L2 penalty on network weights :param valInterval: the frequency of validation, e.g., if valInterval = 5, then do validation after each 5 training epochs :param device: 'cpu' or 'cuda', we can use 'cpu' for our homework if GPU with cuda support is not available ''' # training and validation data loader trainloader = dataLoader(im_dir, train_file_path, norm_size, batch_size) valloader = dataLoader(im_dir, val_file_path, norm_size, batch_size) # TODO 1: initialize the MLP model and loss function # what is the input size of the MLP? # hint 1: we convert an image to a vector as the input of the MLP, # each image has shape [norm_size[0], norm_size[1]] # hint 2: Input parameters for MLP: input_size, output_size, hidden_size, n_layers, act_type model = MLP(norm_size[0] * norm_size[1], n_letters, hidden_size, n_layers, act_type) # loss function cal_loss = CrossEntropyLoss.apply # End TODO 1 # put the model on CPU or GPU model = model.to(device) # optimizer if optim_type == 'sgd': optimizer = optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) elif optim_type == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr, weight_decay=weight_decay) elif optim_type == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr, weight_decay=weight_decay) elif optim_type == 'adam': optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay) elif optim_type == 'adadelta': optimizer = optim.Adadelta(model.parameters(), lr, weight_decay=weight_decay) else: print( '[Error] optim_type should be one of sgd, adagrad, rmsprop, adam, or adadelta' ) raise NotImplementedError # training # to save loss of each training epoch in a python "list" data structure losses = [] for epoch in range(n_epochs): # set the model in training mode model.train() # to save total loss in one epoch total_loss = 0. #TODO 2: calculate losses and train the network using the optimizer for step, (ims, labels) in enumerate(trainloader): # get a batch of data # step 1: set data type and device ims = ims.to(device) labels = labels.to(device) # step 2: convert an image to a vector as the input of the MLP ims = ims.view(batch_size, norm_size[0] * norm_size[1]) # hint: clear gradients in the optimizer optimizer.zero_grad() # step 3: run the model which is the forward process pred = model(ims) # step 4: compute the loss, and call backward propagation function loss = cal_loss(pred, labels) loss.backward() # step 5: sum up of total loss, loss.item() return the value of the tensor as a standard python number # this operation is not differentiable total_loss += loss.item() # step 6: call a function, optimizer.step(), to update the parameters of the model optimizer.step() # End TODO 2 # average of the total loss for iterations avg_loss = total_loss / len(trainloader) losses.append(avg_loss) print('Epoch {:02d}: loss = {:.3f}'.format(epoch + 1, avg_loss)) # validation if (epoch + 1) % valInterval == 0: # set the model in evaluation mode model.eval() n_correct = 0. # number of images that are correctly classified n_ims = 0. # number of total images with torch.no_grad( ): # we do not need to compute gradients during validation # calculate losses for validation data and do not need train the network for ims, labels in valloader: # set data type and device ims, labels = ims.to(device), labels.type( torch.float).to(device) # convert an image to a vector as the input of the MLP input = ims.view(ims.size(0), -1) # run the model which is the forward process out = model(input) # get the predicted value by the output using out.argmax(1) predictions = out.argmax(1) # sum up the number of images correctly recognized and the total image number n_correct += torch.sum(predictions == labels) n_ims += ims.size(0) # show prediction accuracy print('Epoch {:02d}: validation accuracy = {:.1f}%'.format( epoch + 1, 100 * n_correct / n_ims)) # save model parameters in a file model_save_path = 'saved_models/recognition.pth'.format(epoch + 1) torch.save( { 'state_dict': model.state_dict(), 'configs': { 'norm_size': norm_size, 'output_size': n_letters, 'hidden_size': hidden_size, 'n_layers': n_layers, 'act_type': act_type } }, model_save_path) print('Model saved in {}\n'.format(model_save_path)) # draw the loss curve plot_loss(losses)