def main(): train_dataset = MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor()) test_dataset = MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor()) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) net = Network(1, 128, 10, 10) if USE_CUDA: net = net.cuda() opt = optim.SGD(net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, momentum=.9, nesterov=True) for epoch in range(1, EPOCHS + 1): print('[Epoch %d]' % epoch) train_loss = 0 train_correct, train_total = 0, 0 start_point = time.time() for inputs, labels in train_loader: inputs, labels = Variable(inputs), Variable(labels) if USE_CUDA: inputs, labels = inputs.cuda(), labels.cuda() opt.zero_grad() preds = F.log_softmax(net(inputs), dim=1) loss = F.cross_entropy(preds, labels) loss.backward() opt.step() train_loss += loss.item() train_correct += (preds.argmax(dim=1) == labels).sum().item() train_total += len(preds) print('train-acc : %.4f%% train-loss : %.5f' % (100 * train_correct / train_total, train_loss / len(train_loader))) print('elapsed time: %ds' % (time.time() - start_point)) test_loss = 0 test_correct, test_total = 0, 0 for inputs, labels in test_loader: with torch.no_grad(): inputs, labels = Variable(inputs), Variable(labels) if USE_CUDA: inputs, labels = inputs.cuda(), labels.cuda() preds = F.softmax(net(inputs), dim=1) test_loss += F.cross_entropy(preds, labels).item() test_correct += (preds.argmax(dim=1) == labels).sum().item() test_total += len(preds) print('test-acc : %.4f%% test-loss : %.5f' % (100 * test_correct / test_total, test_loss / len(test_loader))) torch.save(net.state_dict(), './checkpoint/checkpoint-%04d.bin' % epoch)
def main(params): print("Loading dataset ... ") with open(params['train_data_pkl'], 'rb') as f: train_data = pkl.load(f) with open(params['train_anno_pkl'], 'rb') as f: train_anno = pkl.load(f) """ with open(params['val_data_pkl'], 'rb') as f: val_data = pkl.load(f) with open(params['val_anno_pkl'], 'rb') as f: val_anno = pkl.load(f) """ # Train dataset and Train dataloader train_data = np.transpose(train_data, (0, 3, 1, 2)) train_dataset = torch.utils.data.TensorDataset( torch.FloatTensor(train_data), torch.LongTensor(train_anno)) train_loader = dataloader.DataLoader(train_dataset, params['batch_size'], shuffle=True, collate_fn=collate_fn) """ # Validation dataset and Validation dataloader val_data = np.transpose(val_data, (0, 3, 1, 2)) val_dataset = torch.utils.data.TensorDataset( torch.FloatTensor(val_data), torch.LongTensor(val_anno)) val_loader = dataloader.DataLoader( val_dataset, params['batch_size'], collate_fn=collate_fn) """ # the number of layers in each dense block n_layers_list = [4, 5, 7, 10, 12, 15, 12, 10, 7, 5, 4] print("Constructing the network ... ") # Define the network densenet = Network(n_layers_list, 5).to(device) if os.path.isfile(params['model_from']): print("Starting from the saved model") densenet.load_state_dict(torch.load(params['model_from'])) else: print("Couldn't find the saved model") print("Starting from the bottom") print("Training the model ...") # hyperparameter, optimizer, criterion learning_rate = params['lr'] optimizer = torch.optim.RMSprop(densenet.parameters(), learning_rate, weight_decay=params['l2_reg']) criterion = nn.CrossEntropyLoss() for epoch in range(params['max_epoch']): for i, (img, label) in enumerate(train_loader): img = img.to(device) label = label.to(device) # forward-propagation pred = densenet(img) # flatten for all pixel pred = pred.view((-1, params['num_answers'])) label = label.view((-1)) # get loss loss = criterion(pred, label) # back-propagation optimizer.zero_grad() loss.backward() optimizer.step() print("Epoch: %d, Steps:[%d/%d], Loss: %.4f" % (epoch, i, len(train_loader), loss.data)) learning_rate *= 0.995 optimizer = torch.optim.RMSprop(densenet.parameters(), learning_rate, weight_decay=params['l2_reg']) if (epoch + 1) % 10 == 0: print("Saved the model") torch.save(densenet.state_dict(), params['model_save'])