def data_creator(config): """Returns training dataloader, validation dataloader.""" train_dataset = LinearDataset(2, 5) val_dataset = LinearDataset(2, 5, size=400) train_loader = DataLoader(train_dataset, batch_size=config[BATCH_SIZE]) validation_loader = DataLoader(val_dataset, batch_size=config[BATCH_SIZE]) return train_loader, validation_loader
def setup(self, config): # setup all components needed for training here. This could include # data, models, optimizers, loss & schedulers. # setup data loader train_dataset, val_dataset = LinearDataset(2, 5), LinearDataset(2, 5) train_loader = DataLoader(train_dataset, batch_size=config["batch_size"]) val_loader = DataLoader(val_dataset, batch_size=config["batch_size"]) # setup model model = nn.Linear(1, 1) # setup optimizer optimizer = torch.optim.SGD(model.parameters(), lr=config.get("lr", 1e-4)) # setup loss criterion = torch.nn.BCELoss() # setup scheduler scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9) # Register all of these components with Ray SGD. # This allows Ray SGD to do framework level setup like Cuda, DDP, # Distributed Sampling, FP16. # We also assign the return values of self.register to instance # attributes so we can access it in our custom training/validation # methods. self.model, self.optimizer, self.criterion, self.scheduler = \ self.register(models=model, optimizers=optimizer, criterion=criterion, schedulers=scheduler)
def data_creator(config): """Returns training dataloader, validation dataloader.""" train_dataset = LinearDataset(2, 5, size=config["data_size"]) test_dataset = LinearDataset(2, 5, size=config["data_size"]) return DataLoader( train_dataset, batch_size=1), DataLoader( test_dataset, batch_size=1)
def data_creator(config): """Constructs torch.utils.data.Dataset objects. Note that even though two Dataset objects can be returned, only one dataset will be used for training. Args: config: Configuration dictionary passed into ``TorchTrainer`` Returns: One or Two Dataset objects. If only one Dataset object is provided, ``trainer.validate()`` will throw a ValueError. """ return LinearDataset(2, 5), LinearDataset(2, 5, size=400)
def data_creator(config): """Returns training dataloader, validation dataloader.""" train_dataset = LinearDataset(2, 5, size=config["data_size"]) return torch.utils.data.DataLoader( train_dataset, batch_size=config[BATCH_SIZE], )
def data_creator(config): """Constructs Iterables for training and validation. Note that even though two Iterable objects can be returned, only one Iterable will be used for training. Args: config: Configuration dictionary passed into ``TorchTrainer`` Returns: One or Two Iterable objects. If only one Iterable object is provided, ``trainer.validate()`` will throw a ValueError. """ train_dataset, val_dataset = LinearDataset(2, 5), LinearDataset(2, 5) train_loader = DataLoader(train_dataset, batch_size=config["batch_size"]) val_loader = DataLoader(val_dataset, batch_size=config["batch_size"]) return train_loader, val_loader
def setup(self, config): # Load data. train_loader = DataLoader(LinearDataset(2, 5), config["batch_size"]) val_loader = DataLoader(LinearDataset(2, 5), config["batch_size"]) # Create model. model = torch.nn.Linear(1, 1) # Create optimizer. optimizer = torch.optim.SGD(model.parameters(), lr=1e-2) # Create loss. loss = torch.nn.MSELoss() # Register model, optimizer, and loss. self.model, self.optimizer, self.criterion = self.register( models=model, optimizers=optimizer, criterion=loss) # Register data loaders. self.register_data(train_loader=train_loader, validation_loader=val_loader)
def setup(self, config): model = getattr(models, config.get("model"))() optimizer = optim.SGD(model.parameters(), lr=0.01 * config["lr_scaler"]) train_data = LinearDataset(4, 2) # Have to use dummy data for training. self.model, self.optimizer = self.register( models=model, optimizers=optimizer, ) self.register_data(train_loader=train_data, validation_loader=None) data = torch.randn(args.batch_size, 3, 224, 224) target = torch.LongTensor(args.batch_size).random_() % 1000 if args.cuda: data, target = data.cuda(), target.cuda() self.data, self.target = data, target
def data_creator(config): class LinearDataset(torch.utils.data.Dataset): def __init__(self, a, b, size=1000): x = np.random.randn(size) y = np.random.randn(size) self.x = torch.tensor(x, dtype=torch.float32) self.y = torch.tensor(y, dtype=torch.float32) self.z = torch.tensor(a * (x + y) + 2 * b, dtype=torch.float32) def __getitem__(self, index): return (self.x[index, None], self.y[index, None], self.z[index, None]) def __len__(self): return len(self.x) train_dataset = LinearDataset(3, 4) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.get("batch_size", 32), ) return train_loader, None
def single_loader(config): dataset = LinearDataset(2, 5, size=1000000) return DataLoader(dataset, batch_size=config.get("batch_size", 32))
if __name__ == "__main__": ray.init(address=None if args.local else "auto") num_workers = 2 if args.local else int(ray.cluster_resources().get(device)) from ray.util.sgd.torch.examples.train_example import LinearDataset print("Model: %s" % args.model) print("Batch size: %d" % args.batch_size) print("Number of %ss: %d" % (device, num_workers)) trainer = TorchTrainer( model_creator=lambda cfg: getattr(models, args.model)(), optimizer_creator=lambda model, cfg: optim.SGD( model.parameters(), lr=0.01 * cfg.get("lr_scaler")), data_creator=lambda cfg: LinearDataset(4, 2), initialization_hook=init_hook, config=dict(lr_scaler=num_workers), training_operator_cls=Training, num_workers=num_workers, use_gpu=args.cuda, use_fp16=args.fp16, ) img_secs = [] for x in range(args.num_iters): result = trainer.train() # print(result) img_sec = result["img_sec"] print("Iter #%d: %.1f img/sec per %s" % (x, img_sec, device)) img_secs.append(img_sec)
def single_loader(config): return LinearDataset(2, 5, size=1000000)