def run_step(self): for i in range(self.inner_iterations): with tracker.namespace('sample'): self.sample() with self.mode.update(is_train=True): with tracker.namespace('train'): self.trainer() if self.validator: with tracker.namespace('valid'): self.validator() tracker.save()
def run(self): if self.is_log_parameters: pytorch_utils.add_model_indicators(self.model) for _ in self.training_loop: with tracker.namespace('train'): self.trainer() with tracker.namespace('valid'): self.validator() if self.is_log_parameters: pytorch_utils.store_model_indicators(self.model)
def run(self): for _ in self.training_loop: prompt = 'def train(' log = [(prompt, Text.subtle)] for i in monit.iterate('Sample', 25): data = self.text.text_to_i(prompt).unsqueeze(-1) data = data.to(self.device) output, *_ = self.model(data) output = output.argmax(dim=-1).squeeze() prompt += '' + self.text.itos[output[-1]] log += [('' + self.text.itos[output[-1]], Text.value)] logger.log(log) with Mode(is_train=True, is_log_parameters=self.is_log_parameters, is_log_activations=self.is_log_activations): with tracker.namespace('train'): self.trainer() with tracker.namespace('valid'): self.validator()
def start_training(self, model): """ Initializes the Training step with the model initialized :param model: Instance of the NewsClassifier class """ best_loss = float('inf') for epoch in monit.loop(self.epochs): with tracker.namespace('train'): self.train_epoch(model, self.train_data_loader, 'train') with tracker.namespace('valid'): _, val_loss = self.train_epoch(model, self.val_data_loader, 'valid') if val_loss < best_loss: best_loss = val_loss if self.is_save_model: self.save_model(model) tracker.new_line()
def main(): experiment.create() conf = Configs() conf.activation = 'relu' conf.dropout = 0.1 experiment.configs(conf, {'conv_sizes': [(128, 2), (256, 4)], 'optimizer.learning_rate': 1e-4, 'optimizer.optimizer': 'Adam'}) with experiment.start(): with monit.section('Initialize'): conf.initialize() with tracker.namespace('valid'): conf.valid_dataset.save_artifacts() conf.run()
def main(): experiment.create() conf = Configs() conf.learning_rate = 1e-4 conf.epochs = 500 conf.conv_sizes = [(128, 2), (256, 4)] # conf.conv_sizes = [(128, 1), (256, 2)] conf.activation = 'relu' conf.dropout = 0.1 conf.train_batch_size = 32 experiment.calculate_configs(conf, 'run') experiment.start() with tracker.namespace('valid'): conf.valid_dataset.save_artifacts() conf.run()
def main(): # Configurations configs = { 'epochs': 10, 'train_batch_size': 64, 'valid_batch_size': 100, 'use_cuda': True, 'seed': 5, 'train_log_interval': 10, 'learning_rate': 0.01, } is_cuda = configs['use_cuda'] and torch.cuda.is_available() if not is_cuda: device = torch.device("cpu") else: device = torch.device(f"cuda:0") data_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) train_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=True, download=True, transform=data_transform), batch_size=configs['train_batch_size'], shuffle=True) valid_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=False, download=True, transform=data_transform), batch_size=configs['valid_batch_size'], shuffle=False) model = Net().to(device) optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate']) torch.manual_seed(configs['seed']) # ✨ Create the experiment experiment.create(name='mnist_labml_monit') # ✨ Save configurations experiment.configs(configs) # ✨ Set PyTorch models for checkpoint saving and loading experiment.add_pytorch_models(dict(model=model)) # ✨ Start and monitor the experiment with experiment.start(): for _ in monit.loop(range(1, configs['epochs'] + 1)): for mode, batch in monit.mix(10, ('train', train_loader), ('valid', valid_loader)): with tracker.namespace(mode): with torch.set_grad_enabled(mode == 'train'): data, target = batch[0].to(device), batch[1].to(device) output = model(data) loss = F.cross_entropy(output, target) pred = output.argmax(dim=1, keepdim=True) if mode == 'train': optimizer.zero_grad() loss.backward() optimizer.step() tracker.add_global_step(data.shape[0]) tracker.save({ 'loss.': loss, 'accuracy.': pred.eq(target.view_as(pred)).sum() / pred.shape[0] }) tracker.new_line() # save the model experiment.save_checkpoint()
def main(): parser = argparse.ArgumentParser(description="PyTorch BERT Example") parser.add_argument( "--max_epochs", type=int, default=5, metavar="N", help="number of epochs to train (default: 14)", ) parser.add_argument( "--batch_size", type=int, default=16, metavar="N", help="batch size (default: 16)", ) parser.add_argument( "--max_len", type=int, default=160, metavar="N", help="number of tokens per sample (rest is truncated) (default: 140)", ) parser.add_argument( "--num_samples", type=int, default=1_000, metavar="N", help="Number of samples to be used for training " "and evaluation steps (default: 15000) Maximum:100000", ) parser.add_argument( "--save_model", type=bool, default=True, help="For Saving the current Model", ) parser.add_argument( "--vocab_file", default= "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", help="Custom vocab file", ) parser.add_argument("--model_save_path", type=str, default="models", help="Path to save mlflow model") experiment.create(name='bert_news') args = parser.parse_args() experiment.configs(args.__dict__) # This is set as an environment variable, check the Makefile # mlflow.set_tracking_uri("http://localhost:5005") mlflow.start_run() mlflow.log_param("epochs", args.max_epochs) mlflow.log_param("samples", args.num_samples) with experiment.start(): trainer = NewsClassifierTrainer(epochs=args.max_epochs, n_samples=args.num_samples, vocab_file_url=args.vocab_file, is_save_model=args.save_model, model_path=args.model_save_path, batch_size=args.batch_size, max_len=args.max_len) model = Model() model = model.to(trainer.device) trainer.prepare_data() trainer.set_optimizer(model) trainer.start_training(model) with tracker.namespace('test'): test_acc, test_loss = trainer.train_epoch(model, trainer.test_data_loader, 'test') y_review_texts, y_pred, y_pred_probs, y_test = trainer.get_predictions( model, trainer.test_data_loader) inspect(y_review_texts) inspect(torch.stack((y_pred, y_test), dim=1)) mlflow.log_metric("test_acc", float(test_acc), step=tracker.get_global_step()) mlflow.log_metric("test_loss", float(test_loss), step=tracker.get_global_step()) mlflow.end_run()