batch_size=batch_size, sampler=test_sample) # GPU setup device = torch.device('cuda') net = AlexNet(in_channel=2, classes=10).to(device=device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.001) #, momentum=0.9) # Train loop num_epochs = 25 for epoch in range(num_epochs): print("Epoch: {} - Train".format(epoch)) net.train() running_loss = 0. # Train: for batch_index, (signals, labels) in enumerate(tqdm(train_loader)): signals, labels = signals.to(device=device), labels.to(device=device) optimizer.zero_grad() outputs = net(signals) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() #if batch_index > 50:
def main(args): logs = [] transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainloader, testloader = get_dataset(args, transform) net = AlexNet() if args.no_distributed: optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.0) else: optimizer = DownpourSGD(net.parameters(), lr=args.lr, n_push=args.num_push, n_pull=args.num_pull, model=net) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True, min_lr=1e-3) # train net.train() if args.cuda: net = net.cuda() for epoch in range(args.epochs): # loop over the dataset multiple times print("Training for epoch {}".format(epoch)) for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data if args.cuda: inputs, labels = inputs.cuda(), labels.cuda() # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = F.cross_entropy(outputs, labels) loss.backward() optimizer.step() _, predicted = torch.max(outputs, 1) accuracy = accuracy_score(predicted, labels) log_obj = { 'timestamp': datetime.now(), 'iteration': i, 'training_loss': loss.item(), 'training_accuracy': accuracy, } if i % args.log_interval == 0 and i > 0: # print every n mini-batches log_obj['test_loss'], log_obj['test_accuracy']= evaluate( net, testloader, args) print("Timestamp: {timestamp} | " "Iteration: {iteration:6} | " "Loss: {training_loss:6.4f} | " "Accuracy : {training_accuracy:6.4f} | " "Test Loss: {test_loss:6.4f} | " "Test Accuracy: {test_accuracy:6.4f}".format(**log_obj)) logs.append(log_obj) val_loss, val_accuracy = evaluate(net, testloader, args, verbose=True) scheduler.step(val_loss) df = pd.DataFrame(logs) print(df) if args.no_distributed: if args.cuda: df.to_csv('log/gpu.csv', index_label='index') else: df.to_csv('log/single.csv', index_label='index') else: df.to_csv('log/node{}.csv'.format(dist.get_rank()), index_label='index') print('Finished Training')
else: # GoogLeNet output, _1, _2 = model(image) _, pred = torch.max(output.data, 1) correct = (pred == label).sum() loss = loss_func(output, label.long()) total_loss += loss.item() total_acc += correct.cpu().numpy() step += 1 total_loss /= step total_acc /= 10000 return total_acc, total_loss # train model model.train() steps = len(train_loader) bar = ProgressBar(maxStep=steps) # initialize progress bar for epoch in range(epochs): epoch_loss = 0.0 epoch_acc = 0.0 for i, (image, label) in enumerate(train_loader): image, label = image.to(Device), label.to(Device) # step 1. optimizer.zero_grad() # step 2. if modelName != 'GoogLeNet': # Others output = model(image) # step 3. loss = loss_func(output, label.long()) # step 4. else: # GoogLeNet output, auxOut_1, auxOut_2 = model(image)