def main():
    num_points = 32 * 100 * 2
    data = [i * (1 / num_points) for i in range(num_points)]
    it = parallel_it.from_items(data, 2, False).for_each(lambda x: [x, x])
    # this will create MLDataset with column RangeIndex(range(2))
    ds = ml_data.from_parallel_iter(it, True, batch_size=32, repeated=False)
    torch_ds = ds.to_torch(feature_columns=[0], label_column=1)

    trainer = TorchTrainer(
        num_workers=2,
        training_operator_cls=make_train_operator(torch_ds),
        add_dist_sampler=False,
        config={"batch_size": 32})
    for i in range(10):
        trainer.train(num_steps=100)
        model = trainer.get_model()
        print("f(0.5)=", float(model(torch.tensor([[0.5]]).float())[0][0]))
Example #2
0
def main():
    dataset = dataset_creator()
    trainer = TorchTrainer(
        model_creator=model_creator,
        data_creator=None,
        optimizer_creator=optimizer_creator,
        loss_creator=torch.nn.MSELoss,
        num_workers=2,
    )

    for i in range(10):
        # Train a full epoch using the data_creator
        # trainer.train()

        # Train for another epoch using the dataset
        trainer.train(dataset=dataset, num_steps=100)

        model = trainer.get_model()
        print("f(0.5)=", float(model(to_mat(0.5))[0][0]))
Example #3
0
    def _create_trainer(self, train_ds: TorchMLDataset, evaluate_ds: Optional[TorchMLDataset]):
        outer = self

        class TorchEstimatorOperator(TrainingOperator):

            def setup(self, config):
                # create model
                if isinstance(outer._model, torch.nn.Module):
                    model = outer._model
                elif callable(outer._model):
                    model = outer._model(config)
                else:
                    raise Exception(
                        "Unsupported parameter, we only support torch.nn.Model instance "
                        "or a function(dict -> model)")

                # create optimizer
                if isinstance(outer._optimizer, torch.optim.Optimizer):
                    # it is the instance of torch.optim.Optimizer subclass instance
                    # rewrite the optimizer
                    optimizer_cls = outer._optimizer.__class__
                    state = outer._optimizer.state_dict()
                    optimizer = optimizer_cls(model.parameters(), lr=0.1)  # lr must pass for SGD
                    optimizer.load_state_dict(state)
                elif callable(outer._optimizer):
                    optimizer = outer._optimizer(model, config)
                else:
                    raise Exception(
                        "Unsupported parameter, we only support torch.optim.Optimizer subclass "
                        "instance or a function((models, dict) -> optimizer)")

                # create loss
                if inspect.isclass(outer._loss) and issubclass(outer._loss, TLoss):
                    loss = outer._loss
                elif isinstance(outer._loss, TLoss):
                    loss = outer._loss
                elif callable(outer._loss):
                    loss = outer._loss(config)
                else:
                    raise Exception(
                        "Unsupported parameter, we only support torch.nn.modules.loss._Loss "
                        "subclass, subclass instance or a function(dict -> loss)")

                # create lr scheduler
                if outer._lr_scheduler_creator:
                    lr_scheduler = outer._lr_scheduler_creator(optimizer, config)
                else:
                    lr_scheduler = None

                registered = self.register(
                    models=model, optimizers=optimizer, criterion=loss, schedulers=lr_scheduler)
                if lr_scheduler is not None:
                    self.model, self.optimizer, self.criterion, self.scheduler = registered
                else:
                    self.model, self.optimizer, self.criterion = registered

                # create dataset
                batch_size = config["batch_size"]
                get_shard_config = config.get("get_shard", {})
                if "shuffle" in config:
                    get_shard_config["shuffle"] = config["shuffle"]
                if not self._is_distributed:
                    world_rank = -1
                else:
                    world_rank = self.world_rank
                train_data = train_ds.get_shard(world_rank, **get_shard_config)
                train_loader = DataLoader(train_data, batch_size=batch_size)

                if evaluate_ds is not None:
                    evaluate_data = evaluate_ds.get_shard(self.world_rank, **get_shard_config)
                    evaluate_loader = DataLoader(evaluate_data, batch_size=batch_size)
                else:
                    evaluate_loader = None

                self.register_data(train_loader=train_loader, validation_loader=evaluate_loader)

        self._trainer = TorchTrainer(num_workers=self._num_workers,
                                     training_operator_cls=TorchEstimatorOperator,
                                     add_dist_sampler=False,
                                     scheduler_step_freq=self._scheduler_step_freq,
                                     **self._extra_config)
Example #4
0
    def _create_trainer(self, data_creator: Callable):
        def model_creator(config):
            if isinstance(self._model, torch.nn.Module):
                # it is the instance of torch.nn.Module
                return self._model
            elif callable(self._model):
                return self._model(config)
            else:
                raise Exception(
                    "Unsupported parameter, we only support torch.nn.Model instance "
                    "or a function(dict -> model)")

        def optimizer_creator(models, config):
            if isinstance(self._optimizer, torch.optim.Optimizer):
                # it is the instance of torch.optim.Optimizer subclass instance
                if not isinstance(models, torch.nn.Module):
                    raise Exception(
                        "You should pass optimizers with a function((models, dict) -> optimizers) "
                        "when train with multiple models.")

                # rewrite the optimizer
                optimizer_cls = self._optimizer.__class__
                state = self._optimizer.state_dict()
                optimizer = optimizer_cls(models.parameters(),
                                          lr=0.1)  # lr must pass for SGD
                optimizer.load_state_dict(state)
                return optimizer
            elif callable(self._optimizer):
                return self._optimizer(models, config)
            else:
                raise Exception(
                    "Unsupported parameter, we only support torch.optim.Optimizer subclass "
                    "instance or a function((models, dict) -> optimizer)")

        def loss_creator(config):
            if inspect.isclass(self._loss) and issubclass(self._loss, TLoss):
                # it is the loss class
                return self._loss
            elif isinstance(self._loss, TLoss):
                # it is the loss instance
                return self._loss
            elif callable(self._loss):
                # it ts the loss create function
                return self._loss(config)
            else:
                raise Exception(
                    "Unsupported parameter, we only support torch.nn.modules.loss._Loss subclass "
                    ", subclass instance or a function(dict -> loss)")

        def scheduler_creator(optimizers, config):
            return self._lr_scheduler_creator(optimizers, config)

        lr_scheduler_creator = (scheduler_creator if self._lr_scheduler_creator
                                is not None else None)

        self._trainer = TorchTrainer(
            model_creator=model_creator,
            data_creator=data_creator,
            optimizer_creator=optimizer_creator,
            loss_creator=loss_creator,
            scheduler_creator=lr_scheduler_creator,
            scheduler_step_freq=self._scheduler_step_freq,
            num_workers=self._num_workers,
            add_dist_sampler=False,
            training_operator_cls=TrainingOperatorWithWarmUp,
            **self._extra_config)
Example #5
0
    def _create_trainer(self):
        def model_creator(config):
            if isinstance(self._model, torch.nn.Module):
                # it is the instance of torch.nn.Module
                return self._model
            elif callable(self._model):
                return self._model(config)
            else:
                raise Exception(
                    "Unsupported parameter, we only support torch.nn.Model instance "
                    "or a function(dict -> model)")

        def optimizer_creator(models, config):
            if isinstance(self._optimizer, torch.optim.Optimizer):
                # it is the instance of torch.optim.Optimizer subclass instance
                if not isinstance(models, torch.nn.Module):
                    raise Exception(
                        "You should pass optimizers with a function((models, dict) -> optimizers) "
                        "when train with multiple models.")

                # rewrite the optimizer
                optimizer_cls = self._optimizer.__class__
                state = self._optimizer.state_dict()
                optimizer = optimizer_cls(models.parameters(),
                                          lr=0.1)  # lr must pass for SGD
                optimizer.load_state_dict(state)
                return optimizer
            elif callable(self._optimizer):
                return self._optimizer(models, config)
            else:
                raise Exception(
                    "Unsupported parameter, we only support torch.optim.Optimizer subclass "
                    "instance or a function((models, dict) -> optimizer)")

        def loss_creator(config):
            if inspect.isclass(self._loss) and issubclass(self._loss, TLoss):
                # it is the loss class
                return self._loss
            elif isinstance(self._loss, TLoss):
                # it is the loss instance
                return self._loss
            elif callable(self._loss):
                # it ts the loss create function
                return self._loss(config)
            else:
                raise Exception(
                    "Unsupported parameter, we only support torch.nn.modules.loss._Loss subclass "
                    ", subclass instance or a function(dict -> loss)")

        def data_creator(config):
            batch_size = config["batch_size"]
            shuffle = config["shuffle"]
            sampler = BlockSetSampler(self._data_set, shuffle=shuffle)
            context = None
            init_fn = None
            if self._num_processes_for_data_loader > 0:
                context = torch.multiprocessing.get_context("spawn")
                init_fn = worker_init_fn

            dataloader = torch.utils.data.DataLoader(
                self._data_set,
                batch_size=batch_size,
                sampler=sampler,
                num_workers=self._num_processes_for_data_loader,
                multiprocessing_context=context,
                worker_init_fn=init_fn)
            return dataloader, None

        def scheduler_creator(optimizers, config):
            return self._lr_scheduler_creator(optimizers, config)

        lr_scheduler_creator = (scheduler_creator if self._lr_scheduler_creator
                                is not None else None)

        self._trainer = TorchTrainer(
            model_creator=model_creator,
            data_creator=data_creator,
            optimizer_creator=optimizer_creator,
            loss_creator=loss_creator,
            scheduler_creator=lr_scheduler_creator,
            scheduler_step_freq=self._scheduler_step_freq,
            num_workers=self._num_workers,
            add_dist_sampler=False,
            training_operator_cls=TrainingOperatorWithWarmUp,
            **self._extra_config)