def run(self): tracker.set_queue('train.loss', is_print=True) tracker.set_scalar('valid.loss', is_print=True) for s in self.training_loop: self.train() if self.training_loop.is_interval(self.valid_internal): self.validate()
def run_training_loop(self): """ ### Run training loop """ # last 100 episode information tracker.set_queue('reward', 100, True) tracker.set_queue('length', 100, True) for update in monit.loop(self.updates): progress = update / self.updates # decreasing `learning_rate` and `clip_range` $\epsilon$ learning_rate = 2.5e-4 * (1 - progress) clip_range = 0.1 * (1 - progress) # sample with current policy samples = self.sample() # train the model self.train(samples, learning_rate, clip_range) # write summary info to the writer, and log to the screen tracker.save() if (update + 1) % 1_000 == 0: logger.log()
def __init__(self, *, model: nn.Module, optimizer: Optional[torch.optim.Adam], loss_func: Callable, accuracy_func: Callable): self.accuracy_func = accuracy_func self.loss_func = loss_func self.optimizer = optimizer self.model = model tracker.set_queue("*.loss", 20, True) tracker.set_scalar("*.accuracy", True)
def run(self): pytorch_utils.add_model_indicators(self.model) tracker.set_queue("train.loss", 20, True) tracker.set_histogram("valid.loss", True) tracker.set_scalar("valid.accuracy", True) for _ in self.training_loop: self.train() self.test() if self.is_log_parameters: pytorch_utils.store_model_indicators(self.model)
def run(self): # Training and testing pytorch_utils.add_model_indicators(self.model) tracker.set_queue("train.loss", 20, True) tracker.set_histogram("valid.loss", True) tracker.set_scalar("valid.accuracy", True) tracker.set_indexed_scalar('valid.sample_loss') tracker.set_indexed_scalar('valid.sample_pred') test_data = np.array([d[0].numpy() for d in self.valid_dataset]) experiment.save_numpy("valid.data", test_data) for _ in self.training_loop: self.train() self.valid() if self.is_log_parameters: pytorch_utils.store_model_indicators(self.model)
def run_training_loop(self): """ ### Run training loop """ # last 100 episode information tracker.set_queue('reward', 100, True) tracker.set_queue('length', 100, True) for update in monit.loop(self.updates): # sample with current policy samples = self.sample() # train the model self.train(samples) # Save tracked indicators. tracker.save() # Add a new line to the screen periodically if (update + 1) % 1_000 == 0: logger.log()
def __init__(self, *, name: str, model: nn.Module, optimizer: Optional[torch.optim.Adam], loss_func: Callable, accuracy_func: Callable, data_loader: torch.utils.data.DataLoader, is_increment_global_step: bool, log_interval: Optional[int]): r""" Arguments: loss_func(Callable): A module with a call signature ``(output: torch.Tensor, target: torch.Tensor) -> torch.Tensor`` accuracy_func(Callable): A module with a call signature ``(output: torch.Tensor, target: torch.Tensor) -> int`` """ self.accuracy_func = accuracy_func self.loss_func = loss_func self.log_interval = log_interval self.is_increment_global_step = is_increment_global_step self.optimizer = optimizer self.data_loader = data_loader self.name = name self.model = model tracker.set_queue(".loss", 20, True) tracker.set_scalar(".accuracy", True)
def run_training_loop(self): """ ### Run training loop """ # Last 100 episode information tracker.set_queue('reward', 100, True) tracker.set_queue('length', 100, True) # Copy to target network initially self.target_model.load_state_dict(self.model.state_dict()) for update in monit.loop(self.updates): # $\epsilon$, exploration fraction exploration = self.exploration_coefficient(update) tracker.add('exploration', exploration) # $\beta$ for prioritized replay beta = self.prioritized_replay_beta(update) tracker.add('beta', beta) # Sample with current policy self.sample(exploration) # Start training after the buffer is full if self.replay_buffer.is_full(): # Train the model self.train(beta) # Periodically update target network if update % self.update_target_model == 0: self.target_model.load_state_dict(self.model.state_dict()) # Save tracked indicators. tracker.save() # Add a new line to the screen periodically if (update + 1) % 1_000 == 0: logger.log()
def init(self): tracker.set_queue("loss.*", 20, True) tracker.set_scalar("accuracy.*", True) hook_model_outputs(self.mode, self.model, 'model') self.state_modules = [self.accuracy_func]
def run(self): tracker.set_queue('train.loss', is_print=True) for s in self.training_loop: self.train()
def main(): # set indicator types tracker.set_queue("train_loss", 20, True) tracker.set_histogram("valid_loss", True) tracker.set_scalar("valid_accuracy", True) epochs = 10 train_batch_size = 64 test_batch_size = 1000 use_cuda = True cuda_device = 0 seed = 5 train_log_interval = 10 learning_rate = 0.01 # get device is_cuda = use_cuda and torch.cuda.is_available() if not is_cuda: device = torch.device("cpu") else: if cuda_device < torch.cuda.device_count(): device = torch.device(f"cuda:{cuda_device}") else: print(f"Cuda device index {cuda_device} higher than " f"device count {torch.cuda.device_count()}") device = torch.device(f"cuda:{torch.cuda.device_count() - 1}") # data transform data_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) # train loader train_loader = torch.utils.data.DataLoader(datasets.MNIST( str(lab.get_data_path()), train=True, download=True, transform=data_transform), batch_size=train_batch_size, shuffle=True) # test loader test_loader = torch.utils.data.DataLoader(datasets.MNIST( str(lab.get_data_path()), train=False, download=True, transform=data_transform), batch_size=test_batch_size, shuffle=False) # model model = Net().to(device) # optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate) # set seeds torch.manual_seed(seed) # only for logging purposes configs = { 'epochs': epochs, 'train_batch_size': train_batch_size, 'test_batch_size': test_batch_size, 'use_cuda': use_cuda, 'cuda_device': cuda_device, 'seed': seed, 'train_log_interval': train_log_interval, 'learning_rate': learning_rate, 'device': device, 'train_loader': train_loader, 'test_loader': test_loader, 'model': model, 'optimizer': optimizer, } # create the experiment experiment.create(name='tracker') # experiment configs experiment.calculate_configs(configs) # pyTorch model experiment.add_pytorch_models(dict(model=model)) experiment.start() # training loop for epoch in range(1, epochs + 1): train(model, optimizer, train_loader, device, train_log_interval) test(model, test_loader, device) logger.log() # save the model experiment.save_checkpoint()
def init(self): tracker.set_queue("loss.*", 20, True) tracker.set_scalar("accuracy.*", True) self.state_modules = [self.accuracy_func]
def startup(self): pytorch_utils.add_model_indicators(self.model) tracker.set_queue("train.loss", 20, True) tracker.set_histogram("valid.loss", True) tracker.set_scalar("valid.accuracy", True)
def main(): # ✨ Set the types of the stats/indicators. # They default to scalars if not specified tracker.set_queue('loss.train', 20, True) tracker.set_histogram('loss.valid', True) tracker.set_scalar('accuracy.valid', True) # Configurations configs = { 'epochs': 10, 'train_batch_size': 64, 'valid_batch_size': 100, 'use_cuda': True, 'seed': 5, 'train_log_interval': 10, 'learning_rate': 0.01, } is_cuda = configs['use_cuda'] and torch.cuda.is_available() if not is_cuda: device = torch.device("cpu") else: device = torch.device(f"cuda:0") data_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) train_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=True, download=True, transform=data_transform), batch_size=configs['train_batch_size'], shuffle=True) valid_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=False, download=True, transform=data_transform), batch_size=configs['valid_batch_size'], shuffle=False) model = Net().to(device) optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate']) torch.manual_seed(configs['seed']) # ✨ Create the experiment experiment.create(name='mnist_labml_tracker') # ✨ Save configurations experiment.configs(configs) # ✨ Set PyTorch models for checkpoint saving and loading experiment.add_pytorch_models(dict(model=model)) # ✨ Start and monitor the experiment with experiment.start(): # for epoch in range(1, configs['epochs'] + 1): train(model, optimizer, train_loader, device, configs['train_log_interval']) validate(model, valid_loader, device) logger.log() # ✨ Save the models experiment.save_checkpoint()
def main_train(): lstm_size = 1024 lstm_layers = 3 batch_size = 32 seq_len = 32 with monit.section("Loading data"): # Load all python files files = parser.load.load_files() # Split training and validation data train_files, valid_files = parser.load.split_train_valid( files, is_shuffle=False) with monit.section("Create model"): # Create model model = SimpleLstmModel(encoding_size=tokenizer.VOCAB_SIZE, embedding_size=tokenizer.VOCAB_SIZE, lstm_size=lstm_size, lstm_layers=lstm_layers) # Move model to `device` model.to(device) # Create loss function and optimizer loss_func = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) # Initial state is 0 h0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device) c0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device) # Setup logger indicators tracker.set_queue("train.loss", queue_size=500, is_print=True) tracker.set_queue("valid.loss", queue_size=500, is_print=True) # Specify the model in [lab](https://github.com/vpj/lab) for saving and loading experiment.add_pytorch_models({'base': model}) # Start training scratch (step '0') experiment.start() # Number of batches per epoch batches = math.ceil( sum([len(f[1]) + 1 for f in train_files]) / (batch_size * seq_len)) # Number of steps per epoch. We train and validate on each step. steps_per_epoch = 200 # Train for 100 epochs for epoch in monit.loop(range(100)): # Create trainer trainer = Trainer(files=train_files, model=model, loss_func=loss_func, optimizer=optimizer, batch_size=batch_size, seq_len=seq_len, is_train=True, h0=h0, c0=c0, eof=0) # Create validator validator = Trainer(files=valid_files, model=model, loss_func=loss_func, optimizer=optimizer, is_train=False, seq_len=seq_len, batch_size=batch_size, h0=h0, c0=c0, eof=0) # Next batch to train and validation train_batch = 0 valid_batch = 0 # Loop through steps for i in range(1, steps_per_epoch): try: with DelayedKeyboardInterrupt(): # Set global step global_step = epoch * batches + min( batches, (batches * i) // steps_per_epoch) tracker.set_global_step(global_step) # Last batch to train and validate train_batch_limit = trainer.x.shape[0] * min( 1., (i + 1) / steps_per_epoch) valid_batch_limit = validator.x.shape[0] * min( 1., (i + 1) / steps_per_epoch) with monit.section("train", total_steps=trainer.x.shape[0], is_partial=True): model.train() # Train while train_batch < train_batch_limit: trainer.run(train_batch) monit.progress(train_batch + 1) train_batch += 1 with monit.section("valid", total_steps=validator.x.shape[0], is_partial=True): model.eval() # Validate while valid_batch < valid_batch_limit: validator.run(valid_batch) monit.progress(valid_batch + 1) valid_batch += 1 # Output results tracker.save() # 10 lines of logs per epoch if (i + 1) % (steps_per_epoch // 10) == 0: logger.log() except KeyboardInterrupt: experiment.save_checkpoint() return experiment.save_checkpoint()