def main(): img_datasets,data_loaders = utilities.transform_load_data(root_data) model,criterion,optimizer = utilities.model_constructor(chosen_arch,dropout,hidden_layers,learning_rate,device_chosen) utilities.train_model_process(model,criterion,optimizer) print("Training Done...") utilities.save_checkpoint(model,checkpoint_path,chosen_arch,hidden_layers,dropout,learning_rate,device_chosen) print("Checkpoint Saved...")
def Main(): train_loader, validation_loader, test_loader = utilities.load_data( directory) model, optimizer, criterion = utilities.net_setup(structure, dropout, hidden_layer1, lr, device) utilities.train_network(model, optimizer, criterion, epochs, 20, train_loader, device) utilities.save_checkpoint(path, structure, hidden_layer1, dropout, lr) print( "**************Training Complete !! Thanks for the patience******************" )
def train(episodes=2000, print_interval=100): agent_scores = deque(maxlen=100) all_scores = [] for episode in range(1, episodes + 1): # reset all agent scores scores = np.zeros(num_agents) # reset unity env env_info = env.reset(train_mode=True)[brain_name] states = env_info.vector_observations # reset agent (noise) agent.reset() while True: # interact with env actions = agent.act(states) env_info = env.step(actions)[brain_name] next_states = env_info.vector_observations rewards = env_info.rewards dones = env_info.local_done # learn agent.step(states, actions, rewards, next_states, dones) # record next state cycle scores += rewards states = next_states # termination if np.any(dones): break # compute max score score = np.max(scores) agent_scores.append(score) all_scores.append(score) # print training scores print( '\rEpisode {}\tAverage Score: {:.5f}\tMax Score: {:.5f} in 100 episodes' .format(episode, np.mean(agent_scores), np.max(agent_scores)), end="") if episode % print_interval == 0: print( '\rEpisode {}\tAverage Score: {:.5f}\tMax Score: {:.5f} in 100 episodes' .format(episode, np.mean(agent_scores), np.max(agent_scores))) save_checkpoint(agent.agents) # if solved if np.mean(agent_scores) >= 0.5: print( '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.5f}' .format(episode, np.mean(agent_scores))) save_checkpoint(agent.agents) break return all_scores
def main(): #Run load_data function with command line file path trainloader, testloader, validloader = utilities.load_data(root) #Run network_setup with command line structure, dropout, hiddenlayer number, and learnrate model, criterion, optimizer = utilities.network_setup( structure, dropout, hiddenlayer1, learnrate) #run deep_learning training function with model, criterion, and optimizer from network_setup and command line arguments utilities.deep_learning(model, criterion, optimizer, trainloader, epochs, 40) #save the checkpoint of the trained model for later use. utilities.save_checkpoint(model, path, structure, hiddenlayer1, dropout, learnrate) print("Training complete. Model saved at {}".format(path))
def main(): print("Start creationg your own image classifier") args = parse_args() train_data, valid_data, test_data = load_datasets(args.data_dir) #Data batching train_loaders = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True) valid_loaders = torch.utils.data.DataLoader(valid_data, batch_size=64, shuffle=True) test_loaders = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=True) device = device_type(args.gpu) model = getattr(models, args.arch)(pretrained=True) for param in model.parameters(): param.requires_grad = False if args.arch == 'vgg16': num_ftrs = model.classifier[0].in_features classifier = torch.nn.Sequential(OrderedDict([ ('fc1', torch.nn.Linear(num_ftrs, 1024)), ('relu', torch.nn.ReLU()), ('dropout', torch.nn.Dropout(p=0.5)), ('fc2', torch.nn.Linear(1024, args.hidden_units)), ('output', nn.LogSoftmax(dim=1))])) elif args.arch == 'densenet121': classifier = nn.Sequential(OrderedDict([ ('fc1', nn.Linear(1024, 500)), ('dropout', nn.Dropout(p=0.6)), ##not in the note ('relu1', nn.ReLU()), #in the note relu ('fc2', nn.Linear(500, 102)), ('output', nn.LogSoftmax(dim=1)) ])) model.classifier = classifier criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=args.learning_rate) epochs = int(args.epochs) model = model.to(device) model = train_model(model, criterion, optimizer, train_loaders, device, epochs) # model.class_to_idx = train_data.class_to_idx save_checkpoint(args.save_dir, model, optimizer, classifier):
def main(): args = parse_args() data_dir = 'flowers' train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' test_dir = data_dir + '/test' training_transforms = transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomRotation(30), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) validataion_transforms = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) testing_transforms = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) image_datasets = [ImageFolder(train_dir, transform=training_transforms), ImageFolder(valid_dir, transform=validataion_transforms), ImageFolder(test_dir, transform=testing_transforms)] dataloaders = [torch.utils.data.DataLoader(image_datasets[0], batch_size=64, shuffle=True), torch.utils.data.DataLoader(image_datasets[1], batch_size=64), torch.utils.data.DataLoader(image_datasets[2], batch_size=64)] model = getattr(models, args.arch)(pretrained=True) for param in model.parameters(): param.requires_grad = False if args.arch == "vgg13": feature_num = model.classifier[0].in_features classifier = nn.Sequential(OrderedDict([ ('fc1', nn.Linear(feature_num, 1024)), ('drop', nn.Dropout(p=0.5)), ('relu', nn.ReLU()), ('fc2', nn.Linear(1024, 102)), ('output', nn.LogSoftmax(dim=1))])) elif args.arch == "vgg19": feature_num = model.classifier[0].in_features classifier = nn.Sequential(OrderedDict([ ('fc1', nn.Linear(feature_num, 1024)), ('drop', nn.Dropout(p=0.5)), ('relu', nn.ReLU()), ('fc2', nn.Linear(1024, 102)), ('output', nn.LogSoftmax(dim=1))])) model.classifier = classifier criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.classifier.parameters(), lr=float(args.learning_rate)) epochs = int(args.epochs) class_index = image_datasets[0].class_to_idx gpu = args.gpu train(model, criterion, optimizer, dataloaders, epochs, gpu) model.class_to_idx = class_index save_checkpoint(model, optimizer, args, classifier)
def main(args): # reproducibility # need to seed numpy/torch random number generators if args.seed is not None: torch.manual_seed(args.seed) np.random.seed(args.seed) # need directory with checkpoint files to recover previously trained models if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) checkpoint_file = args.checkpoint + args.model + str(datetime.now())[:-10] # decide which device to use; assumes at most one GPU is available args.use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if args.use_cuda else "cpu") # decide if we're using a validation set; # if not, don't evaluate at end of epochs evaluate = args.train_split < 1. # prep data loaders if args.train_split == 1: train_loader, _, test_loader = prepare_data(args) else: train_loader, val_loader, test_loader = prepare_data(args) # build model if args.model == 'linear': model = Softmax().to(device) elif args.model == 'neuralnet': model = TwoLayer().to(device) else: model = ConvNet().to(device) # build optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=args.amsgrad) # setup validation metrics we want to track for tracking best model over training run best_val_loss = float('inf') best_val_acc = 0 # set up tensorboard logger logger = LoggerX('test_mnist', 'mnist_data', 25) # loop over epochs for epoch in range(args.epochs): print('\n================== TRAINING ==================') model.train() # set model to training mode # set up training metrics we want to track correct = 0 train_num = len(train_loader.sampler) # metrics from logger model_metrics = CalculateMetrics(batch_size=args.batch_size, batches_per_epoch=len(train_loader)) for ix, (img, label ) in enumerate(train_loader): # iterate over training batches img, label = img.to(device), label.to( device) # get data, send to gpu if needed optimizer.zero_grad( ) # clear parameter gradients from previous training update output = model(img) # forward pass loss = F.cross_entropy(output, label) # calculate network loss loss.backward() # backward pass optimizer.step( ) # take an optimization step to update model's parameters pred = output.max( 1, keepdim=True)[1] # get the index of the max logit # correct += pred.eq(label.view_as(pred)).sum().item() # add to running total of hits # convert this data to binary for the sake of testing the metrics functionality label[label < 5] = 0 label[label > 0] = 1 pred[pred < 5] = 0 pred[pred > 0] = 1 ###### scores_dict = model_metrics.update_scores(label, pred) if ix % args.log_interval == 0: # log the metrics to tensorboard X, track best model according to current weighted average accuracy logger.log(model, optimizer, loss.item(), track_score=scores_dict['weighted_acc'] / model_metrics.bn, scores_dict=scores_dict, epoch=epoch, bn=model_metrics.bn, batches_per_epoch=model_metrics.batches_per_epoch) print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, model_metrics.bn, model_metrics.batches_per_epoch, (model_metrics.bn / model_metrics.batches_per_epoch) * 100, loss.item())) # print whole epoch's training accuracy; useful for monitoring overfitting print('Train Accuracy: ({:.0f}%)'.format(model_metrics.w_accuracy * 100)) if evaluate: print('\n================== VALIDATION ==================') model.eval() # set model to evaluate mode # set up validation metrics we want to track val_loss = 0. val_correct = 0 val_num = len(val_loader.sampler) # disable autograd here (replaces volatile flag from v0.3.1 and earlier) with torch.no_grad(): # loop over validation batches for img, label in val_loader: img, label = img.to(device), label.to( device) # get data, send to gpu if needed output = model(img) # forward pass # sum up batch loss val_loss += F.cross_entropy(output, label, size_average=False).item() # monitor for accuracy pred = output.max( 1, keepdim=True)[1] # get the index of the max logit val_correct += pred.eq( label.view_as(pred)).sum().item() # add to total hits # update current evaluation metrics val_loss /= val_num val_acc = 100. * val_correct / val_num print( '\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n' .format(val_loss, val_correct, val_num, val_acc)) # check if best model according to accuracy; # if so, replace best metrics is_best = val_acc > best_val_acc if is_best: best_val_acc = val_acc best_val_loss = val_loss # note this is val_loss of best model w.r.t. accuracy, # not the best val_loss throughout training # create checkpoint dictionary and save it; # if is_best, copy the file over to the file containing best model for this run state = { 'epoch': epoch, 'model': args.model, 'state_dict': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'val_loss': val_loss, 'best_val_loss': best_val_loss, 'val_acc': val_acc, 'best_val_acc': best_val_acc } save_checkpoint(state, is_best, checkpoint_file) print('\n================== TESTING ==================') # load best model from training run (according to validation accuracy) check = torch.load(logger.best_path) model.load_state_dict(check['state_dict']) model.eval() # set model to evaluate mode # set up evaluation metrics we want to track test_loss = 0. test_correct = 0 test_num = len(test_loader.sampler) test_metrics = CalculateMetrics(batch_size=args.batch_size, batches_per_epoch=test_num) # disable autograd here (replaces volatile flag from v0.3.1 and earlier) with torch.no_grad(): for img, label in test_loader: img, label = img.to(device), label.to(device) output = model(img) # sum up batch loss test_loss += F.cross_entropy(output, label, size_average=False).item() pred = output.max( 1, keepdim=True)[1] # get the index of the max logit test_scores = test_metrics.update_scores(label, pred) logger.log(model, optimizer, test_loss, test_scores['weighted_acc'], test_scores, phase='test') test_loss /= test_num print('Test set: Average loss: {:.4f}, Accuracy: ({:.0f}%)\n'.format( test_loss, test_metrics['weighted_acc'] * 100)) print('Final model stored at "{}".'.format(checkpoint_file + '-best.pth.tar'))
def train(self): """ Train the Network """ lr_decay_list = [0.5, 0.5, 0.5, 0.1, 0.1] best_acc, total_test_correct = 0.0, 0.0 for step in range(self.start_step, self.FLAGS.iterations): train_loss, train_correct = [], [] test_loss, test_correct = [], [] start_time = time.time() for i, data in enumerate(self.train_loader, 0): # get the inputs images, labels = data loss, correct = self.model.train_step(images, labels, forward_only=False) train_loss.append(loss.numpy()) train_correct.append(correct.numpy()) total_train_loss = np.mean(np.hstack(train_loss)) total_train_correct = np.mean(np.hstack(train_correct)) step_time = (time.time() - start_time) print("============================\n" "Data Type: %s\n" "Global step: %d\n" "Learning rate: %.4f\n" "Step-time (ms): %.4f\n" "Train loss avg: %.4f\n" "Train Accuracy: %.4f\n" "============================" % (self.data_type, step + 1, self.model.learning_rate, step_time * 1000, total_train_loss, total_train_correct)) if (step + 1) % 50 == 0: start_time = time.time() for i, data in enumerate(self.test_loader, 0): images, labels = data test_image, test_labels = images[0, :], labels[0, :] # test_image, test_labels = self.test_dataset[test_index] loss, correct = self.model.test_step( test_image, test_labels) test_loss.append(loss.numpy()) test_correct.append(correct.numpy()) total_test_loss = np.mean(np.hstack(test_loss)) total_test_correct = np.mean(np.hstack(test_correct)) step_time = (time.time() - start_time) print("Test-time (ms): %.4f\n" "Test loss avg: %.4f\n" "Test Accuracy: %.4f\n" "============================" % (step_time * 1000, total_test_loss, total_test_correct)) self.logger.scalar_summary('test_loss', total_test_loss, step + 1) self.logger.scalar_summary('test_acc', total_test_correct, step + 1) print() self.logger.scalar_summary('train_loss', total_train_loss, step + 1) self.logger.scalar_summary('train_acc', total_train_correct, step + 1) self.logger.scalar_summary('learning_rate', self.model.learning_rate, step + 1) # Adjust Learning Rate if (step + 1) == 100: # Unfreeze the parameters self.model.set_optimizer(self.model.learning_rate, 1.0) if (step + 1) % self.FLAGS.learning_rate_step == 0: try: self.model.learning_rate = self.model.learning_rate * lr_decay_list.pop( ) self.model.set_optimizer(self.model.learning_rate, 1.0) except Exception as e: pass # Save Checkpoint if ((step + 1) % self.FLAGS.print_freq == 0) or total_test_correct > best_acc: if total_test_correct > best_acc: best_acc = total_test_correct print("Saving the model...") start_time = time.time() save_checkpoint( { 'step': step, 'lr': self.model.learning_rate, 'state_dict': self.model.model.state_dict() }, self.train_dir, 100) print('Saving checkpoint at step: %d' % (step + 1)) # model.saver.save(sess, os.path.normpath(os.path.join(train_dir, 'checkpoint')), global_step=current_step ) print("done in {0:.2f} ms".format( (time.time() - start_time) * 1000))
if results.train == True: #load data into image loaders (train, test, validation) trainloader, testloader, validloader, classes = utilities.load_transform(results.datadir) #load the classes into training process if exist model, optimizer = utilities.network(results.pre_model, results.hidden_units, results.output, results.lrate, results.gpu) utilities.train(trainloader, validloader, model, optimizer, results.epochs, results.print_every, results.gpu) utilities.test(model, testloader, results.gpu) saved_dict = {'arch' : results.pre_model, 'hidden_units' : results.hidden_units, 'class_labels' : classes, 'epochs': results.epochs, 'learning_rate': results.lrate, 'output_classes' : results.output, 'model_state_dict' : model.state_dict(), 'optimizer' : optimizer.state_dict()} utilities.save_checkpoint(saved_dict, results.save) print("the checkpoint was saved in {}".format(results.save)) ''' The average validation accuracy: 87.019 Accuracy of the network on the test images: 83.57 % ''' ''' python train.py --data_dir flowers --class_file cat_to_name.json --gpu --arch vgg16 --learning_rate .001 --hidden_unit 512 --epochs 5 --output 102 --print_every 20 --train --save_dir checkpointvgg.pth '''
def main(args): # reproducibility if args.seed is not None: torch.manual_seed( args.seed) # don't think this works with SparseMNIST right now np.random.seed(args.seed) if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) if args.checkpoint_filename is None: checkpoint_file = args.checkpoint + str(datetime.now())[:-10] else: checkpoint_file = args.checkpoint + args.checkpoint_filename # cuda args.use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if args.use_cuda else "cpu") # eval? args.evaluate = args.val_batches > 0 # prep sparse mnist if not args.evaluate: train_loader, _, test_loader = prepare_data(args) else: train_loader, val_loader, test_loader = prepare_data(args) # machinery model = Judge().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # setup validation metrics we want to track for tracking best model over training run best_val_loss = float('inf') best_val_acc = 0 print('\n================== TRAINING ==================') model.train() # set model to training mode # set up training metrics we want to track correct = 0 train_num = args.batches * args.batch_size # timer time0 = time.time() for ix, ( sparse, img, label) in enumerate(train_loader): # iterate over training batches sparse, label = sparse.to(device), label.to( device) # get data, send to gpu if needed optimizer.zero_grad( ) # clear parameter gradients from previous training update logits = model(sparse) # forward pass loss = F.cross_entropy(logits, label) # calculate network loss loss.backward() # backward pass optimizer.step( ) # take an optimization step to update model's parameters pred = logits.max(1, keepdim=True)[1] # get the index of the max logit correct += pred.eq( label.view_as(pred)).sum().item() # add to running total of hits if ix % args.log_interval == 0: # maybe log current metrics to terminal print('Train: [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t\ Accuracy: {:.2f}%\tTime: {:0f} min, {:.2f} s'.format( (ix + 1) * len(sparse), train_num, 100. * ix / len(train_loader), loss.item(), 100. * correct / ((ix + 1) * len(sparse)), (time.time() - time0) // 60, (time.time() - time0) % 60)) print( 'Train Accuracy: {}/{} ({:.2f}%)\tTrain Time: {:0f} minutes, {:2f} seconds\n' .format(correct, train_num, 100. * correct / train_num, (time.time() - time0) // 60, (time.time() - time0) % 60)) if args.evaluate: print('\n================== VALIDATION ==================') model.eval() # set up validation metrics we want to track val_loss = 0. val_correct = 0 val_num = args.eval_batch_size * args.val_batches # disable autograd here (replaces volatile flag from v0.3.1 and earlier) with torch.no_grad(): for sparse, img, label in val_loader: sparse, label = sparse.to(device), label.to(device) logits = model(sparse) val_loss += F.cross_entropy(logits, label, size_average=False).item() pred = logits.max(1, keepdim=True)[1] val_correct += pred.eq(label.view_as(pred)).sum().item() # update current evaluation metrics val_loss /= val_num val_acc = 100. * val_correct / val_num print( '\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n' .format(val_loss, val_correct, val_num, val_acc)) is_best = val_acc > best_val_acc if is_best: best_val_acc = val_acc best_val_loss = val_loss # note this is val_loss of best model w.r.t. accuracy, # not the best val_loss throughout training # create checkpoint dictionary and save it; # if is_best, copy the file over to the file containing best model for this run state = { 'state_dict': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'val_loss': val_loss, 'val_acc': val_acc, } save_checkpoint(state, is_best, checkpoint_file) print('\n================== TESTING ==================') check = torch.load(checkpoint_file + '-best.pth.tar') model.load_state_dict(check['state_dict']) model.eval() test_loss = 0. test_correct = 0 test_num = args.eval_batch_size * args.test_batches # disable autograd here (replaces volatile flag from v0.3.1 and earlier) with torch.no_grad(): for sparse, img, label in test_loader: sparse, label = sparse.to(device), label.to(device) logits = model(sparse) test_loss += F.cross_entropy(logits, label, size_average=False).item() pred = logits.max( 1, keepdim=True)[1] # get the index of the max logit test_correct += pred.eq(label.view_as(pred)).sum().item() test_loss /= test_num print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format( test_loss, test_correct, test_num, 100. * test_correct / test_num)) print('Final model stored at "{}".'.format(checkpoint_file + '-best.pth.tar'))
action="store", default=400, type=int) ap.add_argument('--epochs', dest="epochs", action="store", default=1, type=int) ap.add_argument('--gpu', dest="gpu", action="store_true") arguments = ap.parse_args() data_dir = arguments.data_dir[0] save_dir = arguments.save_dir learning_rate = arguments.learning_rate arch = arguments.arch hidden_units = arguments.hidden_units use_gpu = arguments.gpu and torch.cuda.is_available() epochs = arguments.epochs dataloaders, image_datasets = utilities.load_data(data_dir) model, optimizer, criterion = utilities.define_model( arch=arch, hidden_layer1=hidden_units, use_gpu=use_gpu) utilities.train_network(model, optimizer, criterion, epochs=epochs, dataloaders=dataloaders, use_gpu=use_gpu) model.class_to_idx = image_datasets['train'].class_to_idx utilities.save_checkpoint(save_dir, model, optimizer, arch, hidden_units, learning_rate, epochs) print("Network has finished its training")
metrics = {"train_loss_by_epoch": train_loss_by_epoch, "valid_loss_by_epoch": valid_loss_by_epoch, "valid_accuracy_by_epoch": valid_accuracy_by_epoch} return metrics if __name__ == "__main__": parser = ArgumentParser(__file__, description="train network") parser.add_argument("path", type=str, help="path specifying dataset location") parser.add_argument("--save_directory", "-s", type=str, default=None, help="always following structure: directory_name/file_name") parser.add_argument("--arch", "-a", type=str, default="vgg11") parser.add_argument("--learning_rate", "-lr", type=int, default=0.003) parser.add_argument("--epochs", "-e", type=int, default=20) parser.add_argument("--hidden_units", "-u", nargs="*", type=int, default=[256, 128], help="specify each layer size like this (e.g 3 layers): 512 256 256") parser.add_argument("--gpu", "-g", action="store_true", default=False) args = parser.parse_args() data = load_data(args.path) idx_to_class = load_class_mapping("cat_to_name.json") model = build_model(args.arch, len(idx_to_class), args.hidden_units) criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) device = "cuda" if args.gpu else "cpu" metrics = apply_model(model, device, criterion, optimizer, data["train"], data["validation"], epochs=args.epochs) save_checkpoint(model, arch=args.arch, epochs=args.epochs, class_mapping=idx_to_class, optimizer=optimizer.state_dict(), save_dir=args.save_directory)
def supervised_train(model, criterion, optimizer): """ Performs supervised training on the agent :param nodes: graph nodes :type nodes: dict :return: losses during the training cycle :rtype: list """ print("\nSupervised Training\n") losses = {'train': [], 'valid': []} # Get folders for training data x_list, y_list = [], [] for npy in os.listdir('data'): ext = os.path.splitext(npy)[-1].lower() if not ext == '.npy': continue if npy[0] == 'X': x_list.append(npy) elif npy[0] == 'y': y_list.append(npy) x_list.sort() y_list.sort() train_iter = 0 for epoch in range(1, conf.epochs + 1): print("\nEpoch %d\n" % epoch) train_loss, val_loss = 0, 0 indexes = np.arange(len(x_list)) if conf.shuffle: np.random.shuffle(indexes) for num, file_i in enumerate(indexes): batch_size = conf.batch_size x_d = x_list[file_i] y_d = y_list[file_i] if not (x_d[2:] == y_d[2:]): print("File not the same. They are: " + x_d[2:] + " and " + y_d[2:]) continue print("Loading " + str(num) + "th Race: " + x_d[2:]) x_data, y_data = np.load(conf.data_dir + x_d), np.load(conf.data_dir + y_d) x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=conf.val_split) if len(x_train) < batch_size: batch_size = len(x_train) num_batches = 1 val_size = 1 else: num_batches = len(x_train) // batch_size val_size = len(x_val) // batch_size for batch_i, (x_input, y_input) in enumerate(utils.get_batches(x_train, y_train, batch_size)): x_input = np.swapaxes(x_input,1,3) print np.shape(x_input) x_input,y_input = torch.from_numpy(x_input).float(),torch.from_numpy(y_input).float() x_input,y_input = Variable(x_input),Variable(y_input) output = model(x_input) loss = criterion(output,y_input) train_loss += loss train_iter += 1 if train_iter % 10 == 0: print("Done with %d iterations of training:\tCurr Loss: %f" % (train_iter, loss)) if train_iter % conf.save_freq == 0: if not os.path.isdir(config.save_dir): os.mkdir(config.save_dir) utils.save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, config.checkpoint) # Validation check if len(x_val) < batch_size: batch_size = len(x_val) for val_i, (val_x_inp, val_y_inp) in enumerate(utils.get_batches(x_val, y_val, batch_size)): val_x_inp = np.swapaxes(val_x_inp,1,3) print np.shape(val_x_inp) val_x_inp,val_y_inp = torch.from_numpy(val_x_inp).float(),torch.from_numpy(val_y_inp).float() val_x_inp,val_y_inp = Variable(val_x_inp),Variable(val_y_inp) output = model(val_x_inp) loss = criterion(output,val_y_inp) val_loss += loss # Append losses to generate plots in the future losses["train"].append(train_loss/num_batches) losses["valid"].append(val_loss/val_size) with open(conf.pickle_dir + 'losses.p', 'wb') as f: pkl.dump(losses, f) return losses
def main(): print("Start training") # message informing about the begining of the training phase args = parse_args() # TODO: Define your transforms for the training, validation, and testing sets training_data_transforms = transforms.Compose([transforms.RandomRotation(30), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]) validaiton_data_transforms = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]) test_data_transforms = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]) # TODO: Load the datasets with ImageFolder train_image_dataset = datasets.ImageFolder(train_dir, transform = training_data_transforms) valid_image_dataset = datasets.ImageFolder(valid_dir, transform = validaiton_data_transforms) test_image_dataset = datasets.ImageFolder(test_dir, transform = test_data_transforms) # TODO: Using the image datasets and the trainforms, define the dataloaders train_dataloader = torch.utils.data.DataLoader(train_image_dataset, batch_size = 32, shuffle = True) valid_dataloader = torch.utils.data.DataLoader(valid_image_dataset, batch_size = 32, shuffle = True) test_dataloader = torch.utils.data.DataLoader(test_image_dataset, batch_size = 32, shuffle = True) # Use GPU if it's available # Good coding practice - do this after the transformation process device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") class_names = train_image_dataset.classes model = getattr(models, args.arch)(pretrained=True) for param in model.parameters(): param.requires_grad = False if args.arch == "vgg13": from collections import OrderedDict classifier = nn.Sequential(OrderedDict([ ('fc1', nn.Linear(25088, 1500)), ('relu', nn.ReLU()), ('dropout',nn.Dropout(.2)), ('fc2', nn.Linear(1500,102)), ('output', nn.LogSoftmax(dim=1)) ])) elif args.arch == "densenet121": classifier = nn.Sequential(OrderedDict([ ('fc1', nn.Linear(1024, 500)), ('drop', nn.Dropout(p=0.6)), ('relu', nn.ReLU()), ('fc2', nn.Linear(500, 102)), ('output', nn.LogSoftmax(dim=1))])) model.classifier = classifier class_index = train_image_dataset.class_to_idx train(model, criterion, optimizer, dataloaders, epochs, gpu) model.class_to_idx = class_index path = args.save_dir save_checkpoint(path, model, optimizer, args, classifier)
# send the model to the device dnn_model.to(device) # use the negative log likelihood loss because the output of classifier is log softmax criterion = nn.NLLLoss() # only train model on classifier parameters, feature parameters are frozen optimizer = optim.Adam(dnn_model.classifier.parameters(), lr=FLAGS.learning_rate) # train the classifier of the model utilities.train_model(dnn_model, optimizer, criterion, dataloaders, device, num_epochs=FLAGS.epochs, print_every=2) # save the class to index dictionary to the model dnn_model.class_to_idx = image_datasets['train'].class_to_idx # save a checkpoint of the model utilities.save_checkpoint(dnn_model, model_arch, optimizer, num_input, num_hid, num_output, save_dir=FLAGS.save_dir)
def main(): in_args = get_input_args() data_directory = in_args.data_directory train_directory = data_directory + '/train' valid_directory = data_directory + '/valid' test_directory = data_directory + '/test' save_checkpoint_path = in_args.save_checkpoint_path train_transforms = transforms.Compose([ transforms.RandomRotation(30), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) validation_test_transforms = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # Load the datasets with ImageFolder image_datasets = { "train": datasets.ImageFolder(train_directory, transform=train_transforms), "validation": datasets.ImageFolder(valid_directory, transform=validation_test_transforms), "test": datasets.ImageFolder(test_directory, transform=validation_test_transforms) } # Using the image datasets and the trainforms, define the dataloaders dataloaders = { "train": torch.utils.data.DataLoader(image_datasets["train"], batch_size=128, shuffle=True), "validation": torch.utils.data.DataLoader(image_datasets["validation"], batch_size=128, shuffle=True), "test": torch.utils.data.DataLoader(image_datasets["test"], batch_size=128, shuffle=True) } model = getattr(models, in_args.arch)(pretrained=True) class_to_idx = image_datasets['train'].class_to_idx for param in model.parameters(): param.requires_grad = False fc1_input = model.classifier[0].in_features if hasattr( model.classifier, "__getitem__") else model.classifier.in_features fc1_output = in_args.hidden_units fc2_input = in_args.hidden_units fc2_output = len(class_to_idx) classifier = nn.Sequential( OrderedDict([('fc1', nn.Linear(fc1_input, fc1_output)), ('relu1', nn.ReLU()), ('drop1', nn.Dropout(0.5)), ('fc2', nn.Linear(fc2_input, fc2_output)), ('output', nn.LogSoftmax(dim=1))])) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.classifier = classifier model.to(device) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=in_args.learning_rate) epochs = in_args.epochs steps = 0 running_loss = 0 print_every = 20 for epoch in range(epochs): for inputs, labels in dataloaders["train"]: steps += 1 # Move input and label tensors to the default device inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() logps = model.forward(inputs) loss = criterion(logps, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps % print_every == 0: test_loss = 0 accuracy = 0 model.eval() with torch.no_grad(): for inputs, labels in dataloaders["test"]: inputs, labels = inputs.to(device), labels.to(device) logps = model.forward(inputs) batch_loss = criterion(logps, labels) test_loss += batch_loss.item() # Calculate accuracy ps = torch.exp(logps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type( torch.FloatTensor)).item() print( f"Epoch {epoch+1}/{epochs}.. " f"Train loss: {running_loss/print_every:.3f}.. " f"Test loss: {test_loss/len(dataloaders['test']):.3f}.. " f"Test accuracy: {accuracy/len(dataloaders['test']):.3f}") running_loss = 0 model.train() print("Training finished : ---------------------------") save_checkpoint(save_checkpoint_path, model, optimizer, epochs, running_loss, class_to_idx)