Example #1
0
 def run_step(self):
     for i in range(self.inner_iterations):
         with tracker.namespace('sample'):
             self.sample()
         with self.mode.update(is_train=True):
             with tracker.namespace('train'):
                 self.trainer()
         if self.validator:
             with tracker.namespace('valid'):
                 self.validator()
         tracker.save()
Example #2
0
    def run(self):
        if self.is_log_parameters:
            pytorch_utils.add_model_indicators(self.model)

        for _ in self.training_loop:
            with tracker.namespace('train'):
                self.trainer()
            with tracker.namespace('valid'):
                self.validator()
            if self.is_log_parameters:
                pytorch_utils.store_model_indicators(self.model)
Example #3
0
    def run(self):
        for _ in self.training_loop:
            prompt = 'def train('
            log = [(prompt, Text.subtle)]
            for i in monit.iterate('Sample', 25):
                data = self.text.text_to_i(prompt).unsqueeze(-1)
                data = data.to(self.device)
                output, *_ = self.model(data)
                output = output.argmax(dim=-1).squeeze()
                prompt += '' + self.text.itos[output[-1]]
                log += [('' + self.text.itos[output[-1]], Text.value)]

            logger.log(log)

            with Mode(is_train=True,
                      is_log_parameters=self.is_log_parameters,
                      is_log_activations=self.is_log_activations):
                with tracker.namespace('train'):
                    self.trainer()
            with tracker.namespace('valid'):
                self.validator()
Example #4
0
    def start_training(self, model):
        """
        Initializes the Training step with the model initialized

        :param model: Instance of the NewsClassifier class
        """
        best_loss = float('inf')

        for epoch in monit.loop(self.epochs):
            with tracker.namespace('train'):
                self.train_epoch(model, self.train_data_loader, 'train')

            with tracker.namespace('valid'):
                _, val_loss = self.train_epoch(model, self.val_data_loader,
                                               'valid')

            if val_loss < best_loss:
                best_loss = val_loss

                if self.is_save_model:
                    self.save_model(model)

            tracker.new_line()
Example #5
0
def main():
    experiment.create()
    conf = Configs()
    conf.activation = 'relu'
    conf.dropout = 0.1
    experiment.configs(conf,
                       {'conv_sizes': [(128, 2), (256, 4)],
                        'optimizer.learning_rate': 1e-4,
                        'optimizer.optimizer': 'Adam'})

    with experiment.start():
        with monit.section('Initialize'):
            conf.initialize()
        with tracker.namespace('valid'):
            conf.valid_dataset.save_artifacts()
        conf.run()
Example #6
0
def main():
    experiment.create()
    conf = Configs()
    conf.learning_rate = 1e-4
    conf.epochs = 500
    conf.conv_sizes = [(128, 2), (256, 4)]
    # conf.conv_sizes = [(128, 1), (256, 2)]
    conf.activation = 'relu'
    conf.dropout = 0.1
    conf.train_batch_size = 32
    experiment.calculate_configs(conf, 'run')

    experiment.start()
    with tracker.namespace('valid'):
        conf.valid_dataset.save_artifacts()
    conf.run()
Example #7
0
def main():
    # Configurations
    configs = {
        'epochs': 10,
        'train_batch_size': 64,
        'valid_batch_size': 100,
        'use_cuda': True,
        'seed': 5,
        'train_log_interval': 10,
        'learning_rate': 0.01,
    }

    is_cuda = configs['use_cuda'] and torch.cuda.is_available()
    if not is_cuda:
        device = torch.device("cpu")
    else:
        device = torch.device(f"cuda:0")

    data_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=True,
                       download=True,
                       transform=data_transform),
        batch_size=configs['train_batch_size'],
        shuffle=True)

    valid_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=False,
                       download=True,
                       transform=data_transform),
        batch_size=configs['valid_batch_size'],
        shuffle=False)

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate'])

    torch.manual_seed(configs['seed'])

    # ✨ Create the experiment
    experiment.create(name='mnist_labml_monit')

    # ✨ Save configurations
    experiment.configs(configs)

    # ✨ Set PyTorch models for checkpoint saving and loading
    experiment.add_pytorch_models(dict(model=model))

    # ✨ Start and monitor the experiment
    with experiment.start():
        for _ in monit.loop(range(1, configs['epochs'] + 1)):
            for mode, batch in monit.mix(10, ('train', train_loader),
                                         ('valid', valid_loader)):
                with tracker.namespace(mode):
                    with torch.set_grad_enabled(mode == 'train'):
                        data, target = batch[0].to(device), batch[1].to(device)
                        output = model(data)
                        loss = F.cross_entropy(output, target)
                        pred = output.argmax(dim=1, keepdim=True)

                        if mode == 'train':
                            optimizer.zero_grad()
                            loss.backward()
                            optimizer.step()

                            tracker.add_global_step(data.shape[0])

                        tracker.save({
                            'loss.':
                            loss,
                            'accuracy.':
                            pred.eq(target.view_as(pred)).sum() / pred.shape[0]
                        })

            tracker.new_line()

    # save the model
    experiment.save_checkpoint()
Example #8
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch BERT Example")

    parser.add_argument(
        "--max_epochs",
        type=int,
        default=5,
        metavar="N",
        help="number of epochs to train (default: 14)",
    )

    parser.add_argument(
        "--batch_size",
        type=int,
        default=16,
        metavar="N",
        help="batch size (default: 16)",
    )

    parser.add_argument(
        "--max_len",
        type=int,
        default=160,
        metavar="N",
        help="number of tokens per sample (rest is truncated) (default: 140)",
    )

    parser.add_argument(
        "--num_samples",
        type=int,
        default=1_000,
        metavar="N",
        help="Number of samples to be used for training "
        "and evaluation steps (default: 15000) Maximum:100000",
    )

    parser.add_argument(
        "--save_model",
        type=bool,
        default=True,
        help="For Saving the current Model",
    )

    parser.add_argument(
        "--vocab_file",
        default=
        "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
        help="Custom vocab file",
    )

    parser.add_argument("--model_save_path",
                        type=str,
                        default="models",
                        help="Path to save mlflow model")

    experiment.create(name='bert_news')
    args = parser.parse_args()
    experiment.configs(args.__dict__)

    # This is set as an environment variable, check the Makefile
    # mlflow.set_tracking_uri("http://localhost:5005")
    mlflow.start_run()
    mlflow.log_param("epochs", args.max_epochs)
    mlflow.log_param("samples", args.num_samples)

    with experiment.start():
        trainer = NewsClassifierTrainer(epochs=args.max_epochs,
                                        n_samples=args.num_samples,
                                        vocab_file_url=args.vocab_file,
                                        is_save_model=args.save_model,
                                        model_path=args.model_save_path,
                                        batch_size=args.batch_size,
                                        max_len=args.max_len)
        model = Model()
        model = model.to(trainer.device)
        trainer.prepare_data()
        trainer.set_optimizer(model)
        trainer.start_training(model)

        with tracker.namespace('test'):
            test_acc, test_loss = trainer.train_epoch(model,
                                                      trainer.test_data_loader,
                                                      'test')

        y_review_texts, y_pred, y_pred_probs, y_test = trainer.get_predictions(
            model, trainer.test_data_loader)

        inspect(y_review_texts)
        inspect(torch.stack((y_pred, y_test), dim=1))

        mlflow.log_metric("test_acc",
                          float(test_acc),
                          step=tracker.get_global_step())
        mlflow.log_metric("test_loss",
                          float(test_loss),
                          step=tracker.get_global_step())

        mlflow.end_run()