コード例 #1
0
def test_already_has_grad():
    model = nn.Sequential(nn.Conv2d(3, 3, 1))
    sample = torch.rand(1, 3, 32, 32)
    model(sample).norm().backward()

    with pytest.raises(ValueError, match='some parameter already has gradient'):
        balance_by_time(1, model, sample, device='cpu')
コード例 #2
0
def test_balance_by_time_tuple():
    class Twin(nn.Module):
        def forward(self, x):
            return x, x.detach()

    class Add(nn.Module):
        def forward(self, a_b):
            a, b = a_b
            return a + b

    model = nn.Sequential(Twin(), Add())
    sample = torch.rand(1, requires_grad=True)
    balance_by_time(1, model, sample, device='cpu')
コード例 #3
0
def test_sandbox_during_profiling(device):
    model = nn.Sequential(nn.BatchNorm2d(3))

    before = {k: v.clone() for k, v in model.state_dict().items()}

    sample = torch.rand(1, 3, 10, 10)
    balance_by_time(1, model, sample, device=device)

    after = model.state_dict()

    assert before.keys() == after.keys()
    for key, value in before.items():
        assert torch.allclose(after[key], value), key
コード例 #4
0
def test_not_training():
    class AssertTraining(nn.Module):
        def forward(self, x):
            assert self.training
            return x
    model = nn.Sequential(AssertTraining())

    model.eval()
    assert not model.training

    sample = torch.rand(1)
    balance_by_time(1, model, sample, device='cpu')

    assert not model.training
コード例 #5
0
def test_balance_by_time_loop_resets_input():
    # nn.Flatten was introduced at PyTorch 1.2.0.
    class Flatten(nn.Module):
        def forward(self, x):
            return x.flatten(1)

    model = nn.Sequential(nn.Conv2d(3, 2, 1), Flatten(), nn.Linear(128, 10))
    sample = torch.rand(10, 3, 8, 8)
    balance = balance_by_time(2, model, sample, device='cpu')
    assert balance == [1, 2]
コード例 #6
0
def test_balance_by_time(device):
    class Delay(nn.Module):
        def __init__(self, seconds):
            super().__init__()
            self.seconds = seconds

        def forward(self, x):
            time.sleep(self.seconds)
            return x

    model = nn.Sequential(*[Delay(i/100) for i in [1, 2, 3, 4, 5, 6]])
    sample = torch.rand(1)
    balance = balance_by_time(2, model, sample, device=device)
    assert balance == [4, 2]
コード例 #7
0
def main():
    parser = argparse.ArgumentParser(description='D-DNN imagenet benchmark')
    parser.add_argument('-a',
                        '--arch',
                        metavar='ARCH',
                        default='resnet50',
                        choices=model_names,
                        help='model architecture: ' + ' | '.join(model_names) +
                        ' (default: resnet50)')
    parser.add_argument('--lr',
                        '--learning-rate',
                        default=0.1,
                        type=float,
                        metavar='LR',
                        help='initial learning rate',
                        dest='lr')
    parser.add_argument('--momentum',
                        default=0.9,
                        type=float,
                        metavar='M',
                        help='momentum')
    parser.add_argument('--wd',
                        '--weight-decay',
                        default=1e-4,
                        type=float,
                        metavar='W',
                        help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    # Value of args.synthetic_data may seem confusing, but those values
    # come from bash and there 0=true and all else =false
    parser.add_argument('-s',
                        '--synthetic_data',
                        type=int,
                        default=0,
                        help="Use synthetic data")
    args = parser.parse_args()

    torch.manual_seed(1)
    torch.cuda.manual_seed(1)
    cudnn.benchmark = True

    #---------------------------------------------------------------------------------
    # Move model to GPU.
    print("=> creating model '{}'".format(args.arch))
    model = model_names[args.arch].cuda()

    partitions = torch.cuda.device_count()
    if args.synthetic_data == -1:
        sample = torch.empty(batch_size, 3, 512, 512)
    else:
        sample = torch.empty(batch_size, 3, 224, 224)
    balance = balance_by_time(partitions, model, sample)
    model = GPipe(model, balance, chunks=microbatches)

    #---------------------------------------------------------------------------------
    devices = list(model.devices)
    in_device = devices[0]
    out_device = devices[-1]
    torch.cuda.set_device(in_device)

    throughputs = []
    elapsed_times = []
    #---------------------------------------------------------------------------------

    # define optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    #---------------------------------------------------------------------------------
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_comp = [
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normalize
    ]
    val_comp = [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(), normalize
    ]

    if args.synthetic_data == -1:
        # Load highres data
        traindir = datadir + '/HIGHRES/train'
        valdir = datadir + '/HIGHRES/val'
        train_comp = [transforms.ToTensor(), normalize]
        val_comp = [transforms.ToTensor(), normalize]
    elif args.synthetic_data:
        # Load normal data
        traindir = datadir + '/train'
        valdir = datadir + '/val'
    else:
        # Load synthetic data
        traindir = datadir + '/IMAGENET/train'
        valdir = datadir + '/IMAGENET/val'

    train_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        traindir, transforms.Compose(train_comp)),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=cores_gpu,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir, transforms.Compose(val_comp)),
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=cores_gpu,
                                             pin_memory=True)
    #---------------------------------------------------------------------------------

    for epoch in range(epochs):
        throughput, elapsed_time = run_epoch(train_loader, val_loader, model,
                                             optimizer, epoch, args, in_device,
                                             out_device)

        throughputs.append(throughput)
        elapsed_times.append(elapsed_time)

    _, valid_accuracy = evaluate(val_loader, model, args, in_device,
                                 out_device)

    n = len(throughputs)
    throughput = sum(throughputs) / n if n > 0 else 0.0
    elapsed_time = sum(elapsed_times) / n if n > 0 else 0.0
    print('valid accuracy: %.4f | %.3f samples/sec, %.3f sec/epoch (average)'
          '' % (valid_accuracy, throughput, elapsed_time))
コード例 #8
0
            ]),
            loader=grayloader,
        ),
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=cores_gpu,
                                                  pin_memory=True)

    #---------------------------------------------------------------------------------
    # Move model to GPU.
    print("=> creating model '{}'".format(args.arch))
    model = model_names[args.arch].cuda()

    partitions = torch.cuda.device_count()
    sample = torch.empty(batch_size, 1, 28, 28)
    balance = balance_by_time(partitions, model, sample)

    model = GPipe(model, balance, chunks=microbatches)

    #---------------------------------------------------------------------------------
    devices = list(model.devices)
    in_device = devices[0]
    out_device = devices[-1]
    torch.cuda.set_device(in_device)

    throughputs = []
    elapsed_times = []
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)
    for epoch in range(epochs):