def train(config): if not os.path.exists(config.model_path): os.mkdir(config.model_path) train_loader, num_class = utils.get_trainloader(config.dataset, config.dataset_path, config.img_size, config.batch_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") cnn = model.CNN(img_size=config.img_size, num_class=num_class).to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = torch.optim.Adam(cnn.parameters(), lr=config.lr) min_loss = 999 print("START TRAINING") for epoch in range(config.epoch): epoch_loss = 0 for i, (images, labels) in enumerate(train_loader): images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs, _ = cnn(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() epoch_loss += loss.item() if (i + 1) % config.log_step == 0: if config.save_model_in_epoch: torch.save(cnn.state_dict(), os.path.join(config.model_path, config.model_name)) print('Epoch [%d/%d], Iter [%d/%d], Loss: %.4f' % (epoch + 1, config.epoch, i + 1, len(train_loader), loss.item())) avg_epoch_loss = epoch_loss / len(train_loader) print('Epoch [%d/%d], Loss: %.4f' % (epoch + 1, config.epoch, avg_epoch_loss)) if avg_epoch_loss < min_loss: min_loss = avg_epoch_loss torch.save(cnn.state_dict(), os.path.join(config.model_path, config.model_name))
print(model_name) print('+' * 80) print(args.__dict__) print('+' * 80) init_path = '{}/{}/{}_{}.init'.format(ckpt_path, 'init', args.dataset, args.clf) best_path = os.path.join(ckpt_path, folder, 'models', model_name + '.best') stop_path = os.path.join(ckpt_path, folder, 'models', model_name + '.stop') if args.batch_size == 0: args.batch_size = args.num_train print("Resetting batch size: {}...".format(args.batch_size)) train_loader = get_trainloader(args.dataset, args.batch_size, False) test_loader = get_testloader(args.dataset, args.test_batch_size, noise=args.noise) print('+' * 80) # Fire the engines # Fire the engines if args.clf == 'fcn': print('Initializing FCN...') model = FCN(args.input_size, args.output_size) elif args.clf == 'svm': print('Initializing SVM...') model = SVM(args.input_size, args.output_size) elif args.clf == 'resnet18': print('Initializing ResNet18...')
save_dir = args.save_dir else: save_dir = 'checkpoint.pth' device = torch.device('cuda' if args.gpu else 'cpu') if args.arch: model = build_network( 'resnet50', hidden_layer1_units=args.hidden_units if args.hidden_units else 512) else: model = build_network( hidden_layer1_units=args.hidden_units if args.hidden_units else 512) # Load the datasets with ImageFolder trainloader, train_dataset = get_trainloader() valloader, _ = get_valloader() testloader, _ = get_testloader() # Using the image datasets and the trainforms, define the dataloaders # trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True) # valloader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=True) # testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True) def train(epochs=3, lr=0.003, print_scores_in=40, gpu=True): # criterion criterion = nn.NLLLoss() # optimizer, we will only use classifier param. as features param are frozen. optimizer = optim.Adam(model.classifier.parameters(), lr=lr)