Exemple #1
0
def log_grad_norm_sum(optimizer):
  tensors = []
  for param_group in optimizer.param_groups:
      for p in param_group['params']:
        if p.requires_grad:
          tensors.append(torch.norm(p.grad))

  t = torch.stack(tensors)

  print('Rank:', dadt.rank(), 'Grad norm sum:', torch.sum(t))
Exemple #2
0
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(
                output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(
                dim=1,
                keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print(
        '\nRank:{}, Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
        .format(dadt.rank(), test_loss, correct, len(test_loader.dataset),
                100. * correct / len(test_loader.dataset)))
Exemple #3
0
def train(args, model, device, train_loader, distribte_optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        start_time = int(time.time() * 1000)

        data, target = data.to(device), target.to(device)

        distribte_optimizer.zero_grad()

        output = model(data)
        loss = F.nll_loss(output, target)

        loss.backward()
        distribte_optimizer.step()

        end_time = int(time.time() * 1000)

        if batch_idx % args.log_interval == 0:
            print(
                'Rank:{}, Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, cost time:{}'
                .format(dadt.rank(), epoch, batch_idx * len(data),
                        len(train_loader.dataset),
                        100. * batch_idx / len(train_loader), loss.item(),
                        (end_time - start_time)))
#coding=utf-8

import torch
import ctypes
import dadt.pytorch as dadt

dadt.init(broad_cast_executor='nccl', all_reduce_executor='nccl')

device = torch.device('cuda:{}'.format(dadt.local_rank()))
# device = torch.device('cpu')

if 0 == dadt.rank():
  x = torch.tensor([1, 2, 3, 4], device=device, dtype=torch.float)
else:
  x = torch.tensor([1, 1, 1, 1], device=device, dtype=torch.float)

y = dadt.all_reduce(x, "x")

print(dadt.rank(), y)

y = dadt.all_reduce(x, "x")

print(dadt.rank(), y)

dadt.shutdown()
Exemple #5
0
def train():
  # init dadt
  dadt.init(
    cycle_duration_ms=5,
    broad_cast_executor='nccl',
    all_reduce_executor='nccl',
    group_buffer_size=0)

  # Data augmentation and normalization for training
  # Just normalization for validation
  data_transforms = {
      'train': transforms.Compose([
          transforms.RandomResizedCrop(224),
          transforms.RandomHorizontalFlip(),
          transforms.ToTensor(),
          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
      ]),
      'val': transforms.Compose([
          transforms.Resize(256),
          transforms.CenterCrop(224),
          transforms.ToTensor(),
          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
      ]),
  }

  data_dir = 'hymenoptera_data'

  image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                    for x in ['train', 'val']}

  dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4)
              for x in ['train', 'val']}

  dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
  class_names = image_datasets['train'].classes

  device = torch.device("cuda:{}".format(dadt.local_rank()))

  model_ft = models.resnet101(pretrained=False)
  num_ftrs = model_ft.fc.in_features

  # Here the size of each output sample is set to 2.
  # Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
  model_ft.fc = nn.Linear(num_ftrs, 2)

  model_ft = model_ft.to(device)

  criterion = nn.CrossEntropyLoss()

  # Observe that all parameters are being optimized
  optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.002, momentum=0.9)

  # init distribute optimizer
  d_optimizer = dadt.DistributedOptimizer(optimizer=optimizer_ft)

  # Decay LR by a factor of 0.1 every 7 epochs
  exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

  model_ft.train()

  total_cost_time = 0.0
  total_count = 0.0

  for epoch in range(2500000):
    running_loss = 0.0
    running_corrects = 0

    for inputs, labels in dataloaders['train']:
      start_time = time.time()

      inputs = inputs.to(device)
      labels = labels.to(device)

      # zero the parameter gradients
      d_optimizer.zero_grad()

      outputs = model_ft(inputs)
      _, preds = torch.max(outputs, 1)

      loss = criterion(outputs, labels)

      loss.backward()
      d_optimizer.step()

      cost_time = int(round((time.time() - start_time) * 1000))

      total_cost_time += cost_time
      total_count += 1

      # statistics
      running_loss += loss.item() * inputs.size(0)
      running_corrects += torch.sum(preds == labels.data)

      print('Rank:{}, cost time:{}, avg tiem:{} epoch:{}, loss:{}'.format(dadt.rank(), cost_time, total_cost_time/total_count, epoch, loss.item()))
      print('--------------------------------------------------------------------------')

    epoch_loss = running_loss / dataset_sizes['train']
    epoch_acc = running_corrects.double() / dataset_sizes['train']

    print('Rank:{}, {} Loss: {:.4f} Acc: {:.4f}'.format(dadt.rank(), 'train', epoch_loss, epoch_acc))
Exemple #6
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=14,
                        metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr',
                        type=float,
                        default=1.0,
                        metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.7,
                        metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()

    # initialize dadt
    dadt.init(broad_cast_executor='nccl',
              all_reduce_executor='nccl',
              all_reduce_buffer_size=64 * 1024 * 1024)

    torch.manual_seed(args.seed)

    # get device by rank
    device = torch.device("cuda:{}".format(dadt.local_rank()))

    kwargs = {'batch_size': args.batch_size}
    kwargs.update({'num_workers': 1, 'pin_memory': True, 'shuffle': True}, )

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    dataset1 = datasets.MNIST('./data{}'.format(dadt.local_rank()),
                              train=True,
                              download=True,
                              transform=transform)
    dataset2 = datasets.MNIST('./data{}'.format(dadt.local_rank()),
                              train=False,
                              transform=transform)

    train_loader = torch.utils.data.DataLoader(dataset1, **kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **kwargs)

    model = Net().to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
    distribte_optimizer = dadt.DistributedOptimizer(optimizer=optimizer)

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, distribte_optimizer, epoch)
        test(model, device, test_loader)
        scheduler.step()

    if args.save_model and 0 == dadt.rank():
        torch.save(model.state_dict(), "mnist_cnn.pt")

    # shut down background thread
    dadt.shutdown()