コード例 #1
0
ファイル: hyperopt_example.py プロジェクト: petuum/adaptdl
def train_mnist(config: Dict, checkpoint_dir: Optional[str] = None):
    # Data Setup
    mnist_transforms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    train_loader = adl.AdaptiveDataLoader(datasets.MNIST(
        "~/data", train=True, download=True, transform=mnist_transforms),
                                          batch_size=64,
                                          shuffle=True)

    # Autoscale batch size
    train_loader.autoscale_batch_size(4096, local_bsz_bounds=(16, 1024))

    test_loader = adl.AdaptiveDataLoader(datasets.MNIST(
        "~/data", train=False, transform=mnist_transforms),
                                         batch_size=64,
                                         shuffle=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = ConvNet()
    optimizer = optim.SGD(model.parameters(),
                          lr=config.get("lr", 0.01),
                          momentum=config.get("momentum", 0.79))

    model.to(device)
    model = adl.AdaptiveDataParallel(model, optimizer)

    for epoch in adl.remaining_epochs_until(config.get("epochs", 10)):
        train(model, optimizer, train_loader)
        acc = test(model, test_loader)
        # Send the current training result back to Tune
        tune.report(mean_accuracy=acc)
コード例 #2
0
def test_single_replica_parallel():
    adl.init_process_group("gloo")
    true_values = np.asarray([3.0, 4.0])
    dataset = LRIterableDataset(1000, true_values, 1.0)
    dataloader = adl.AdaptiveDataLoader(dataset,
                                        batch_size=32,
                                        shuffle=False,
                                        num_workers=1)
    model = torch.nn.Linear(1, 1, bias=True)
    params = [model.bias, model.weight]
    sgd = torch.optim.SGD([{"params": [param]} for param in params], lr=0.01)
    schedule = torch.optim.lr_scheduler.MultiStepLR(sgd, [50])
    model = adl.AdaptiveDataParallel(model, sgd, schedule)
    loss = torch.nn.MSELoss()
    for epoch in adl.remaining_epochs_until(100):
        for inputs, targets in dataloader:
            inputs = inputs.float()
            targets = targets.float()
            sgd.zero_grad()
            output = model(torch.reshape(inputs, (-1, 1)))
            targets = torch.reshape(targets, (-1, 1))
            loss_value = loss(output, targets)
            loss_value.backward()
            sgd.step()
        schedule.step()
    params = np.asarray([param.item() for param in params])
    assert(np.all(np.isclose(params, true_values, atol=0.1))), \
        (params, true_values)
コード例 #3
0
def _train_simple(config: Dict, checkpoint_dir: Optional[str] = None):
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    H = config.get("H", 16)
    N = config.get("N", 16)

    # Create random Tensors to hold inputs and outputs
    dataloader = adl.AdaptiveDataLoader(dataset, batch_size=N)
    dataloader.autoscale_batch_size(4096, local_bsz_bounds=(16, 1024))

    loss_fn = nn.MSELoss()

    # Use the nn package to define our model and loss function.
    model = torch.nn.Sequential(
        torch.nn.Linear(D_in, H),
        torch.nn.ReLU(),
        torch.nn.Linear(H, D_out),
    )
    optimizer = optim.SGD(model.parameters(), lr=0.1)

    model = model.to(device)
    model = adl.AdaptiveDataParallel(model, optimizer)

    loss = torch.Tensor([0.0])
    for epoch in adl.remaining_epochs_until(config.get("epochs", 10)):
        for (x, y) in dataloader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            output = model(x)
            loss = loss_fn(output, y)
            loss.backward()
            optimizer.step()

        tune.report(mean_loss=loss.item())
コード例 #4
0
def _train_simple(config: Dict, checkpoint_dir: Optional[str] = None):
    import torch
    import torch.nn as nn
    import torch.optim as optim
    import adaptdl.torch as adl
    from ray import tune

    class MyDataset:
        def __init__(self, xs, ys):
            self.xs = xs
            self.ys = ys

        def __getitem__(self, i):
            return self.xs[i], self.ys[i]

        def __len__(self):
            return len(self.xs)

    # N is batch size; D_in is input dimension;
    # H is hidden dimension; D_out is output dimension.
    N, D_in, H, D_out = 64, 5, 5, 5
    dataset = MyDataset(torch.randn(N, D_in), torch.randn(N, D_out))

    H = config.get("H", 16)
    N = config.get("N", 16)

    # Create random Tensors to hold inputs and outputs
    dataloader = adl.AdaptiveDataLoader(dataset, batch_size=N)
    dataloader.autoscale_batch_size(4096, local_bsz_bounds=(16, 1024))

    loss_fn = nn.MSELoss()

    # Use the nn package to define our model and loss function.
    model = torch.nn.Sequential(
        torch.nn.Linear(D_in, H),
        torch.nn.ReLU(),
        torch.nn.Linear(H, D_out),
    )
    optimizer = optim.SGD(model.parameters(), lr=0.1)

    model = adl.AdaptiveDataParallel(model, optimizer)

    loss = torch.Tensor([0.0])
    for epoch in adl.remaining_epochs_until(config.get("epochs", 10)):
        for (x, y) in dataloader:
            optimizer.zero_grad()
            output = model(x)
            loss = loss_fn(output, y)
            loss.backward()
            optimizer.step()

        tune.report(mean_loss=loss.item())
コード例 #5
0
        random = torch.randn(size)
        x = make_features(random)
        y = f(x) + 0.25 * torch.randn(1)
        self.data = list(zip(x, y))

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)


dataset = SimpleDataset(10000)
dataloader = adl.AdaptiveDataLoader(dataset,
                                    batch_size=args.bs,
                                    shuffle=True,
                                    num_workers=2,
                                    drop_last=True)

optimizer = optim.SGD(net.parameters(),
                      lr=args.lr,
                      momentum=0.9,
                      weight_decay=5e-4)
lr_scheduler = MultiStepLR(optimizer, [30, 45], 0.1)

net = adl.AdaptiveDataParallel(net, optimizer, lr_scheduler)
trainer = Trainer(net, optimizer, lr_scheduler)

for epoch in adl.remaining_epochs_until(args.epochs):

    for inputs, targets in dataloader:
コード例 #6
0
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

adaptdl.torch.init_process_group(
    "nccl" if torch.cuda.is_available() else "gloo")

if adaptdl.env.replica_rank() == 0:
    trainset = torchvision.datasets.CIFAR10(root=adaptdl.env.share_path(),
                                            train=True,
                                            download=True,
                                            transform=transform_train)
    trainloader = adl.AdaptiveDataLoader(trainset,
                                         batch_size=args.bs,
                                         shuffle=True,
                                         num_workers=2,
                                         drop_last=True)
    dist.barrier(
    )  # We use a barrier here so that non-master replicas would wait for master to download the data
else:
    dist.barrier()
    trainset = torchvision.datasets.CIFAR10(root=adaptdl.env.share_path(),
                                            train=True,
                                            download=False,
                                            transform=transform_train)
    trainloader = adl.AdaptiveDataLoader(trainset,
                                         batch_size=args.bs,
                                         shuffle=True,
                                         num_workers=2,
                                         drop_last=True)
コード例 #7
0
model_path = os.path.join(main_path, 'models')
GMF_model_path = os.path.join(model_path, 'GMF.pth')
MLP_model_path = os.path.join(model_path, 'MLP.pth')
NeuMF_model_path = os.path.join(model_path, 'NeuMF.pth')

############################## PREPARE DATASET ##########################
train_data, test_data, user_num, item_num, train_mat = \
    data_utils.load_all(main_path, train_rating, test_negative, dataset)

# construct the train and test datasets
train_dataset = data_utils.NCFData(
        train_data, item_num, train_mat, args.num_ng, True)
test_dataset = data_utils.NCFData(
        test_data, item_num, train_mat, 0, False)
train_loader = adl.AdaptiveDataLoader(
    train_dataset,
    batch_size=args.batch_size, shuffle=True, num_workers=4, drop_last=True)
test_loader = adl.AdaptiveDataLoader(
    test_dataset,
    batch_size=args.test_num_ng+1, shuffle=False, num_workers=0)

if args.autoscale_bsz:
    train_loader.autoscale_batch_size(
        8192, local_bsz_bounds=(32, 512),
        gradient_accumulation=args.gradient_accumulation)

########################### CREATE MODEL #################################
if model_type == 'NeuMF-pre':
    assert os.path.exists(GMF_model_path), 'lack of GMF model'
    assert os.path.exists(MLP_model_path), 'lack of MLP model'
    GMF_model = torch.load(GMF_model_path)
コード例 #8
0
ファイル: dcgan.py プロジェクト: wintersurvival/adaptdl
transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
dataset = dsets.CelebA(dataroot,
                       split='train',
                       target_type='attr',
                       transform=transform,
                       target_transform=None,
                       download=True)

dataloader = adl.AdaptiveDataLoader(dataset,
                                    batch_size=batch_size,
                                    num_workers=workers,
                                    shuffle=False)
dataloader.autoscale_batch_size(8 * batch_size, local_bsz_bounds=(8, 1024))

# Decide which device we want to run on
device = torch.device("cuda:0" if (
    torch.cuda.is_available() and ngpu > 0) else "cpu")

######################################################################
# Implementation
# --------------
#
# With our input parameters set and the dataset prepared, we can now get
# into the implementation. We will start with the weigth initialization
# strategy, then talk about the generator, discriminator, loss functions,
# and training loop in detail.