def train_mnist(config: Dict, checkpoint_dir: Optional[str] = None): # Data Setup mnist_transforms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) train_loader = adl.AdaptiveDataLoader(datasets.MNIST( "~/data", train=True, download=True, transform=mnist_transforms), batch_size=64, shuffle=True) # Autoscale batch size train_loader.autoscale_batch_size(4096, local_bsz_bounds=(16, 1024)) test_loader = adl.AdaptiveDataLoader(datasets.MNIST( "~/data", train=False, transform=mnist_transforms), batch_size=64, shuffle=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = ConvNet() optimizer = optim.SGD(model.parameters(), lr=config.get("lr", 0.01), momentum=config.get("momentum", 0.79)) model.to(device) model = adl.AdaptiveDataParallel(model, optimizer) for epoch in adl.remaining_epochs_until(config.get("epochs", 10)): train(model, optimizer, train_loader) acc = test(model, test_loader) # Send the current training result back to Tune tune.report(mean_accuracy=acc)
def test_single_replica_parallel(): adl.init_process_group("gloo") true_values = np.asarray([3.0, 4.0]) dataset = LRIterableDataset(1000, true_values, 1.0) dataloader = adl.AdaptiveDataLoader(dataset, batch_size=32, shuffle=False, num_workers=1) model = torch.nn.Linear(1, 1, bias=True) params = [model.bias, model.weight] sgd = torch.optim.SGD([{"params": [param]} for param in params], lr=0.01) schedule = torch.optim.lr_scheduler.MultiStepLR(sgd, [50]) model = adl.AdaptiveDataParallel(model, sgd, schedule) loss = torch.nn.MSELoss() for epoch in adl.remaining_epochs_until(100): for inputs, targets in dataloader: inputs = inputs.float() targets = targets.float() sgd.zero_grad() output = model(torch.reshape(inputs, (-1, 1))) targets = torch.reshape(targets, (-1, 1)) loss_value = loss(output, targets) loss_value.backward() sgd.step() schedule.step() params = np.asarray([param.item() for param in params]) assert(np.all(np.isclose(params, true_values, atol=0.1))), \ (params, true_values)
def _train_simple(config: Dict, checkpoint_dir: Optional[str] = None): device = "cuda:0" if torch.cuda.is_available() else "cpu" H = config.get("H", 16) N = config.get("N", 16) # Create random Tensors to hold inputs and outputs dataloader = adl.AdaptiveDataLoader(dataset, batch_size=N) dataloader.autoscale_batch_size(4096, local_bsz_bounds=(16, 1024)) loss_fn = nn.MSELoss() # Use the nn package to define our model and loss function. model = torch.nn.Sequential( torch.nn.Linear(D_in, H), torch.nn.ReLU(), torch.nn.Linear(H, D_out), ) optimizer = optim.SGD(model.parameters(), lr=0.1) model = model.to(device) model = adl.AdaptiveDataParallel(model, optimizer) loss = torch.Tensor([0.0]) for epoch in adl.remaining_epochs_until(config.get("epochs", 10)): for (x, y) in dataloader: x, y = x.to(device), y.to(device) optimizer.zero_grad() output = model(x) loss = loss_fn(output, y) loss.backward() optimizer.step() tune.report(mean_loss=loss.item())
def _train_simple(config: Dict, checkpoint_dir: Optional[str] = None): import torch import torch.nn as nn import torch.optim as optim import adaptdl.torch as adl from ray import tune class MyDataset: def __init__(self, xs, ys): self.xs = xs self.ys = ys def __getitem__(self, i): return self.xs[i], self.ys[i] def __len__(self): return len(self.xs) # N is batch size; D_in is input dimension; # H is hidden dimension; D_out is output dimension. N, D_in, H, D_out = 64, 5, 5, 5 dataset = MyDataset(torch.randn(N, D_in), torch.randn(N, D_out)) H = config.get("H", 16) N = config.get("N", 16) # Create random Tensors to hold inputs and outputs dataloader = adl.AdaptiveDataLoader(dataset, batch_size=N) dataloader.autoscale_batch_size(4096, local_bsz_bounds=(16, 1024)) loss_fn = nn.MSELoss() # Use the nn package to define our model and loss function. model = torch.nn.Sequential( torch.nn.Linear(D_in, H), torch.nn.ReLU(), torch.nn.Linear(H, D_out), ) optimizer = optim.SGD(model.parameters(), lr=0.1) model = adl.AdaptiveDataParallel(model, optimizer) loss = torch.Tensor([0.0]) for epoch in adl.remaining_epochs_until(config.get("epochs", 10)): for (x, y) in dataloader: optimizer.zero_grad() output = model(x) loss = loss_fn(output, y) loss.backward() optimizer.step() tune.report(mean_loss=loss.item())
random = torch.randn(size) x = make_features(random) y = f(x) + 0.25 * torch.randn(1) self.data = list(zip(x, y)) def __getitem__(self, index): return self.data[index] def __len__(self): return len(self.data) dataset = SimpleDataset(10000) dataloader = adl.AdaptiveDataLoader(dataset, batch_size=args.bs, shuffle=True, num_workers=2, drop_last=True) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) lr_scheduler = MultiStepLR(optimizer, [30, 45], 0.1) net = adl.AdaptiveDataParallel(net, optimizer, lr_scheduler) trainer = Trainer(net, optimizer, lr_scheduler) for epoch in adl.remaining_epochs_until(args.epochs): for inputs, targets in dataloader:
transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) adaptdl.torch.init_process_group( "nccl" if torch.cuda.is_available() else "gloo") if adaptdl.env.replica_rank() == 0: trainset = torchvision.datasets.CIFAR10(root=adaptdl.env.share_path(), train=True, download=True, transform=transform_train) trainloader = adl.AdaptiveDataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=2, drop_last=True) dist.barrier( ) # We use a barrier here so that non-master replicas would wait for master to download the data else: dist.barrier() trainset = torchvision.datasets.CIFAR10(root=adaptdl.env.share_path(), train=True, download=False, transform=transform_train) trainloader = adl.AdaptiveDataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=2, drop_last=True)
model_path = os.path.join(main_path, 'models') GMF_model_path = os.path.join(model_path, 'GMF.pth') MLP_model_path = os.path.join(model_path, 'MLP.pth') NeuMF_model_path = os.path.join(model_path, 'NeuMF.pth') ############################## PREPARE DATASET ########################## train_data, test_data, user_num, item_num, train_mat = \ data_utils.load_all(main_path, train_rating, test_negative, dataset) # construct the train and test datasets train_dataset = data_utils.NCFData( train_data, item_num, train_mat, args.num_ng, True) test_dataset = data_utils.NCFData( test_data, item_num, train_mat, 0, False) train_loader = adl.AdaptiveDataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, drop_last=True) test_loader = adl.AdaptiveDataLoader( test_dataset, batch_size=args.test_num_ng+1, shuffle=False, num_workers=0) if args.autoscale_bsz: train_loader.autoscale_batch_size( 8192, local_bsz_bounds=(32, 512), gradient_accumulation=args.gradient_accumulation) ########################### CREATE MODEL ################################# if model_type == 'NeuMF-pre': assert os.path.exists(GMF_model_path), 'lack of GMF model' assert os.path.exists(MLP_model_path), 'lack of MLP model' GMF_model = torch.load(GMF_model_path)
transform = transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = dsets.CelebA(dataroot, split='train', target_type='attr', transform=transform, target_transform=None, download=True) dataloader = adl.AdaptiveDataLoader(dataset, batch_size=batch_size, num_workers=workers, shuffle=False) dataloader.autoscale_batch_size(8 * batch_size, local_bsz_bounds=(8, 1024)) # Decide which device we want to run on device = torch.device("cuda:0" if ( torch.cuda.is_available() and ngpu > 0) else "cpu") ###################################################################### # Implementation # -------------- # # With our input parameters set and the dataset prepared, we can now get # into the implementation. We will start with the weigth initialization # strategy, then talk about the generator, discriminator, loss functions, # and training loop in detail.