def __next__(self): if self.__signal_received is not None: logger.log('\nKilling Loop.', Text.danger) monit.finish_loop() self.__finish() raise StopIteration("SIGINT") try: global_step = next(self.__loop) except StopIteration as e: self.__finish() raise e tracker.set_global_step(global_step) if global_step - self.__last_write_step >= self.__log_write_interval: tracker.save() self.__last_write_step = global_step if global_step - self.__last_new_line_step >= self.__log_new_line_interval: tracker.new_line() self.__last_new_line_step = global_step # if self.is_interval(self.__log_write_interval, global_step): # tracker.save() # if self.is_interval(self.__log_new_line_interval, global_step): # logger.log() # if (self.__is_save_models and # self.is_interval(self.__save_models_interval, global_step)): # experiment.save_checkpoint() if (self.__is_save_models and global_step - self.__last_save_step >= self.__save_models_interval): experiment.save_checkpoint() self.__last_save_step = global_step return global_step
def solve(self): for t in monit.loop(self.epochs): if not self.is_online_update: for I in self.info_sets.values(): I.clear() for i in range(self.n_players): self.cfr(self.create_new_history(), cast(Player, i), [1 for _ in range(self.n_players)]) if not self.is_online_update: self.update() with monit.section("Track"): for I in self.info_sets.values(): for a in I.actions(): tracker.add({ f'strategy.{I.key}.{a}': I.strategy[a], f'average_strategy.{I.key}.{a}': I.average_strategy[a], f'regret.{I.key}.{a}': I.regret[a], f'current_regret.{I.key}.{a}': I.current_regret[a] }) if t % self.track_frequency == 0: tracker.save() logger.log() if (t + 1) % self.save_frequency == 0: experiment.save_checkpoint() logger.inspect(self.info_sets)
def run_training_loop(self): """### Run training loop""" offset = tracker.get_global_step() if offset > 100: # If resumed, sample several iterations first to reduce sampling bias for i in range(16): self.sample(False) for _ in monit.loop(self.c.updates - offset): update = tracker.get_global_step() progress = update / self.c.updates # sample with current policy samples = self.sample() # train the model self.train(samples) # write summary info to the writer, and log to the screen tracker.save() if (update + 1) % 2 == 0: self.set_optim(self.c.lr(), self.c.reg_l2()) self.set_game_param(self.c.right_gain(), self.c.fix_prob(), self.c.neg_mul(), self.c.step_reward()) self.set_weight_param(self.c.entropy_weight(), self.c.prob_reg_weight(), self.c.target_prob_weight(), self.c.gamma(), self.c.lamda()) if (update + 1) % 25 == 0: logger.log() if (update + 1) % 200 == 0: experiment.save_checkpoint()
def __finish(self): try: signal.signal(signal.SIGINT, self.old_handler) except ValueError: pass tracker.save() tracker.new_line() if self.__is_save_models: logger.log("Saving model...") experiment.save_checkpoint()
def main(): conf = Configs() experiment.create(name='sklearn', writers={'sqlite'}) experiment.calculate_configs(conf) experiment.add_sklearn_models(dict(model=conf.model)) experiment.start() conf.run() experiment.save_checkpoint()
def main(): conf = Configs() experiment.create(name='configs') experiment.calculate_configs(conf, {'optimizer': 'sgd_optimizer'}, ['set_seed', 'run']) experiment.start() conf.run() # save the model experiment.save_checkpoint()
def main(): # Configurations configs = { 'epochs': 10, 'train_batch_size': 64, 'valid_batch_size': 100, 'use_cuda': True, 'seed': 5, 'train_log_interval': 10, 'learning_rate': 0.01, } is_cuda = configs['use_cuda'] and torch.cuda.is_available() if not is_cuda: device = torch.device("cpu") else: device = torch.device(f"cuda:0") train_loader = torch.utils.data.DataLoader( RemoteDataset('mnist_train'), batch_size=configs['train_batch_size'], shuffle=True, num_workers=4) valid_loader = torch.utils.data.DataLoader( RemoteDataset('mnist_valid'), batch_size=configs['valid_batch_size'], shuffle=False, num_workers=4) model = Net().to(device) optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate']) torch.manual_seed(configs['seed']) # ✨ Create the experiment experiment.create(name='mnist_labml_monit') # ✨ Save configurations experiment.configs(configs) # ✨ Set PyTorch models for checkpoint saving and loading experiment.add_pytorch_models(dict(model=model)) # ✨ Start and monitor the experiment with experiment.start(): for _ in monit.loop(range(1, configs['epochs'] + 1)): train(model, optimizer, train_loader, device, configs['train_log_interval']) validate(model, valid_loader, device) logger.log() # save the model experiment.save_checkpoint()
def main(): conf = Configs() experiment.create(name='configs') experiment.configs(conf, {'optimizer': 'sgd_optimizer'}) torch.manual_seed(conf.seed) with experiment.start(): conf.run() # save the model experiment.save_checkpoint()
def train(self): """ ### Train the model """ # Loop for the given number of epochs for _ in monit.loop(self.epochs): # Iterate over the minibatches for i, batch in monit.enum('Train', self.dataloader): # Move data to the device data, target = batch[0].to(self.device), batch[1].to( self.device) # Set tracker step, as the number of characters trained on tracker.add_global_step(data.shape[0] * data.shape[1]) # Set model state to training self.model.train() # Evaluate the model output = self.model(data) # Calculate loss loss = self.loss_func(output.view(-1, output.shape[-1]), target.view(-1)) # Log the loss tracker.add("loss.train", loss) # Calculate gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip) # Take optimizer step self.optimizer.step() # Log the model parameters and gradients if (i + 1) % 100 == 0: tracker.add('model', self.model) # Clear the gradients self.optimizer.zero_grad() # Generate a sample if (i + 1) % 100 == 0: self.model.eval() with torch.no_grad(): self.sample() # Save the tracked metrics if (i + 1) % 10 == 0: tracker.save() # Save the model experiment.save_checkpoint()
def run(self): """ ### Training loop """ for _ in monit.loop(self.epochs): # Train the model self.train() # Sample some images self.sample() # New line in the console tracker.new_line() # Save the model experiment.save_checkpoint()
def main(): conf = Configs() experiment.create(name='Battleship_DQN') experiment.calculate_configs(conf, {}, ['set_seed', 'policy', 'target', 'run']) experiment.add_pytorch_models(dict(model=conf.policy)) experiment.start() conf.run() if conf.is_save_models: experiment.save_checkpoint()
def run_training_loop(self): """### Run training loop""" offset = tracker.get_global_step() for _ in monit.loop(self.c.updates - offset): update = tracker.get_global_step() progress = update / self.c.updates # sample with current policy samples = self.sample() # train the model self.train(samples) # write summary info to the writer, and log to the screen tracker.save() logger.log() if (update + 1) % 500 == 0: experiment.save_checkpoint()
def loop(self): # Loop through the monitored iterator for epoch in monit.loop(range(0, self.__epochs)): self._train() self._test() self.__log_model_params() # Clear line and output to console tracker.save() # Clear line and go to the next line; # that is, we add a new line to the output # at the end of each epoch if (epoch + 1) % self.__log_new_line_interval == 0: logger.log() if self.__is_save_models: experiment.save_checkpoint()
def iterate(self): """ ### Iteratively update $\textcolor{lightgreen}{\sigma^t(I)(a)}$ This updates the strategies for $T$ iterations. """ # Loop for `epochs` times for t in monit.iterate('Train', self.epochs): # Walk tree and update regrets for each player for i in range(self.n_players): self.walk_tree(self.create_new_history(), cast(Player, i), 1, 1) # Track data for analytics tracker.add_global_step() self.tracker(self.info_sets) tracker.save() # Save checkpoints every $1,000$ iterations if (t + 1) % 1_000 == 0: experiment.save_checkpoint()
def train(self): for _ in monit.loop(self.epochs): for i, batch in monit.enum('Train', self.dataloader): # Move data to the device data, target = batch[0].to(self.device), batch[1].to( self.device) tracker.add_global_step(data.shape[0] * data.shape[1]) self.model.train() output = self.model(data) # Calculate and log loss loss = self.loss_func(output.view(-1, output.shape[-1]), target.view(-1)) tracker.add("loss.train", loss) # Calculate gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip) # Take optimizer step self.optimizer.step() # Log the model parameters and gradients on last batch of every epoch if (i + 1) % 100 == 0: tracker.add('model', self.model) # Clear the gradients self.optimizer.zero_grad() if (i + 1) % 100 == 0: self.model.eval() with torch.no_grad(): self.sample() # Save the tracked metrics if (i + 1) % 10 == 0: tracker.save() experiment.save_checkpoint()
def main(): # Configurations configs = { 'epochs': 10, 'train_batch_size': 64, 'valid_batch_size': 100, 'use_cuda': True, 'seed': 5, 'train_log_interval': 10, 'learning_rate': 0.01, } is_cuda = configs['use_cuda'] and torch.cuda.is_available() if not is_cuda: device = torch.device("cpu") else: device = torch.device(f"cuda:0") data_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) train_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=True, download=True, transform=data_transform), batch_size=configs['train_batch_size'], shuffle=True) valid_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=False, download=True, transform=data_transform), batch_size=configs['valid_batch_size'], shuffle=False) model = Net().to(device) optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate']) torch.manual_seed(configs['seed']) # ✨ Create the experiment experiment.create(name='mnist_labml_monit') # ✨ Save configurations experiment.configs(configs) # ✨ Set PyTorch models for checkpoint saving and loading experiment.add_pytorch_models(dict(model=model)) # ✨ Start and monitor the experiment with experiment.start(): for _ in monit.loop(range(1, configs['epochs'] + 1)): train(model, optimizer, train_loader, device, configs['train_log_interval']) validate(model, valid_loader, device) logger.log() # save the model experiment.save_checkpoint()
def main(): # Configurations configs = { 'epochs': 10, 'train_batch_size': 64, 'valid_batch_size': 100, 'use_cuda': True, 'seed': 5, 'train_log_interval': 10, 'learning_rate': 0.01, } is_cuda = configs['use_cuda'] and torch.cuda.is_available() if not is_cuda: device = torch.device("cpu") else: device = torch.device(f"cuda:0") data_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) train_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=True, download=True, transform=data_transform), batch_size=configs['train_batch_size'], shuffle=True) valid_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=False, download=True, transform=data_transform), batch_size=configs['valid_batch_size'], shuffle=False) model = Net().to(device) optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate']) torch.manual_seed(configs['seed']) # ✨ Create the experiment experiment.create(name='mnist_labml_monit') # ✨ Save configurations experiment.configs(configs) # ✨ Set PyTorch models for checkpoint saving and loading experiment.add_pytorch_models(dict(model=model)) # ✨ Start and monitor the experiment with experiment.start(): for _ in monit.loop(range(1, configs['epochs'] + 1)): for mode, batch in monit.mix(10, ('train', train_loader), ('valid', valid_loader)): with tracker.namespace(mode): with torch.set_grad_enabled(mode == 'train'): data, target = batch[0].to(device), batch[1].to(device) output = model(data) loss = F.cross_entropy(output, target) pred = output.argmax(dim=1, keepdim=True) if mode == 'train': optimizer.zero_grad() loss.backward() optimizer.step() tracker.add_global_step(data.shape[0]) tracker.save({ 'loss.': loss, 'accuracy.': pred.eq(target.view_as(pred)).sum() / pred.shape[0] }) tracker.new_line() # save the model experiment.save_checkpoint()
def train(): """ ## Create and train a small model """ # Create an experiment experiment.create(name='retro_small') # GPU device device = torch.device('cuda:0') # Load Tiny Shakespeare dataset tds = TextFileDataset( lab.get_data_path() / 'tiny_shakespeare.txt', list, url= 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt' ) # Load [Retro dataset](dataset.html) train_dataset = Dataset(lab.get_data_path() / 'retro_train_dataset.json', tds) # Create dataloader train_dl = DataLoader(train_dataset, batch_size=4, sampler=RandomSampler(train_dataset, replacement=True)) # Hyper-parameters chunk_len = 16 d_model = 128 d_ff = 512 n_heads = 16 d_k = 16 # Create the nearest neighbor encoder nearest_neighbor_encoder = NearestNeighborEncoder(chunk_len, 6, {3}, d_model, n_heads, d_k, d_ff) # Create the model model = RetroModel(tds.n_tokens, d_model, 6, {3, 5}, chunk_len, n_heads, d_k, d_ff, encoder=nearest_neighbor_encoder) # Move the model to the device model = model.to(device) # Create the optimizer optimizer = Noam(model.parameters(), lr=1., d_model=d_model, warmup=2_000) # Create the `Trainer` trainer = Trainer(device, model, train_dl, optimizer) # Create the `Sampler` sampler = Sampler(device, model, tds, chunk_len) # prompt = '''Second Citizen:\nOne word, good citizens.\n\nFirst Citizen:''' # Set models for saving and loading experiment.add_pytorch_models(model=model) # Start the experiment with experiment.start(): # Train for `32` epochs for epoch in monit.loop(32): # Train trainer() # Print a new line tracker.new_line() # Sample from the `prompt` logger.log([(prompt.replace('\n', '\\n\n'), Text.subtle), (sampler.sample(prompt, 128).replace('\n', '\\n\n'), Text.none)]) # Save models experiment.save_checkpoint()
def main_train(): lstm_size = 1024 lstm_layers = 3 batch_size = 32 seq_len = 32 with monit.section("Loading data"): # Load all python files files = parser.load.load_files() # Split training and validation data train_files, valid_files = parser.load.split_train_valid( files, is_shuffle=False) with monit.section("Create model"): # Create model model = SimpleLstmModel(encoding_size=tokenizer.VOCAB_SIZE, embedding_size=tokenizer.VOCAB_SIZE, lstm_size=lstm_size, lstm_layers=lstm_layers) # Move model to `device` model.to(device) # Create loss function and optimizer loss_func = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) # Initial state is 0 h0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device) c0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device) # Setup logger indicators tracker.set_queue("train.loss", queue_size=500, is_print=True) tracker.set_queue("valid.loss", queue_size=500, is_print=True) # Specify the model in [lab](https://github.com/vpj/lab) for saving and loading experiment.add_pytorch_models({'base': model}) # Start training scratch (step '0') experiment.start() # Number of batches per epoch batches = math.ceil( sum([len(f[1]) + 1 for f in train_files]) / (batch_size * seq_len)) # Number of steps per epoch. We train and validate on each step. steps_per_epoch = 200 # Train for 100 epochs for epoch in monit.loop(range(100)): # Create trainer trainer = Trainer(files=train_files, model=model, loss_func=loss_func, optimizer=optimizer, batch_size=batch_size, seq_len=seq_len, is_train=True, h0=h0, c0=c0, eof=0) # Create validator validator = Trainer(files=valid_files, model=model, loss_func=loss_func, optimizer=optimizer, is_train=False, seq_len=seq_len, batch_size=batch_size, h0=h0, c0=c0, eof=0) # Next batch to train and validation train_batch = 0 valid_batch = 0 # Loop through steps for i in range(1, steps_per_epoch): try: with DelayedKeyboardInterrupt(): # Set global step global_step = epoch * batches + min( batches, (batches * i) // steps_per_epoch) tracker.set_global_step(global_step) # Last batch to train and validate train_batch_limit = trainer.x.shape[0] * min( 1., (i + 1) / steps_per_epoch) valid_batch_limit = validator.x.shape[0] * min( 1., (i + 1) / steps_per_epoch) with monit.section("train", total_steps=trainer.x.shape[0], is_partial=True): model.train() # Train while train_batch < train_batch_limit: trainer.run(train_batch) monit.progress(train_batch + 1) train_batch += 1 with monit.section("valid", total_steps=validator.x.shape[0], is_partial=True): model.eval() # Validate while valid_batch < valid_batch_limit: validator.run(valid_batch) monit.progress(valid_batch + 1) valid_batch += 1 # Output results tracker.save() # 10 lines of logs per epoch if (i + 1) % (steps_per_epoch // 10) == 0: logger.log() except KeyboardInterrupt: experiment.save_checkpoint() return experiment.save_checkpoint()
def step(self, idx: int): """ ### Training Step """ # Train the discriminator with monit.section('Discriminator'): # Reset gradients self.discriminator_optimizer.zero_grad() # Accumulate gradients for `gradient_accumulate_steps` for i in range(self.gradient_accumulate_steps): # Update `mode`. Set whether to log activation with self.mode.update(is_log_activations=(idx + 1) % self.log_generated_interval == 0): # Sample images from generator generated_images, _ = self.generate_images(self.batch_size) # Discriminator classification for generated images fake_output = self.discriminator(generated_images.detach()) # Get real images from the data loader real_images = next(self.loader).to(self.device) # We need to calculate gradients w.r.t. real images for gradient penalty if (idx + 1) % self.lazy_gradient_penalty_interval == 0: real_images.requires_grad_() # Discriminator classification for real images real_output = self.discriminator(real_images) # Get discriminator loss real_loss, fake_loss = self.discriminator_loss( real_output, fake_output) disc_loss = real_loss + fake_loss # Add gradient penalty if (idx + 1) % self.lazy_gradient_penalty_interval == 0: # Calculate and log gradient penalty gp = self.gradient_penalty(real_images, real_output) tracker.add('loss.gp', gp) # Multiply by coefficient and add gradient penalty disc_loss = disc_loss + 0.5 * self.gradient_penalty_coefficient * gp * self.lazy_gradient_penalty_interval # Compute gradients disc_loss.backward() # Log discriminator loss tracker.add('loss.discriminator', disc_loss) if (idx + 1) % self.log_generated_interval == 0: # Log discriminator model parameters occasionally tracker.add('discriminator', self.discriminator) # Clip gradients for stabilization torch.nn.utils.clip_grad_norm_(self.discriminator.parameters(), max_norm=1.0) # Take optimizer step self.discriminator_optimizer.step() # Train the generator with monit.section('Generator'): # Reset gradients self.generator_optimizer.zero_grad() self.mapping_network_optimizer.zero_grad() # Accumulate gradients for `gradient_accumulate_steps` for i in range(self.gradient_accumulate_steps): # Sample images from generator generated_images, w = self.generate_images(self.batch_size) # Discriminator classification for generated images fake_output = self.discriminator(generated_images) # Get generator loss gen_loss = self.generator_loss(fake_output) # Add path length penalty if idx > self.lazy_path_penalty_after and ( idx + 1) % self.lazy_path_penalty_interval == 0: # Calculate path length penalty plp = self.path_length_penalty(w, generated_images) # Ignore if `nan` if not torch.isnan(plp): tracker.add('loss.plp', plp) gen_loss = gen_loss + plp # Calculate gradients gen_loss.backward() # Log generator loss tracker.add('loss.generator', gen_loss) if (idx + 1) % self.log_generated_interval == 0: # Log discriminator model parameters occasionally tracker.add('generator', self.generator) tracker.add('mapping_network', self.mapping_network) # Clip gradients for stabilization torch.nn.utils.clip_grad_norm_(self.generator.parameters(), max_norm=1.0) torch.nn.utils.clip_grad_norm_(self.mapping_network.parameters(), max_norm=1.0) # Take optimizer step self.generator_optimizer.step() self.mapping_network_optimizer.step() # Log generated images if (idx + 1) % self.log_generated_interval == 0: tracker.add( 'generated', torch.cat([generated_images[:6], real_images[:3]], dim=0)) # Save model checkpoints if (idx + 1) % self.save_checkpoint_interval == 0: experiment.save_checkpoint() # Flush tracker tracker.save()
def run(self): """ ## Training We aim to solve: $$G^{*}, F^{*} = \arg \min_{G,F} \max_{D_X, D_Y} \mathcal{L}(G, F, D_X, D_Y)$$ where, $G$ translates images from $X \rightarrow Y$, $F$ translates images from $Y \rightarrow X$, $D_X$ tests if images are from $X$ space, $D_Y$ tests if images are from $Y$ space, and \begin{align} \mathcal{L}(G, F, D_X, D_Y) &= \mathcal{L}_{GAN}(G, D_Y, X, Y) \\ &+ \mathcal{L}_{GAN}(F, D_X, Y, X) \\ &+ \lambda_1 \mathcal{L}_{cyc}(G, F) \\ &+ \lambda_2 \mathcal{L}_{identity}(G, F) \\ \\ \mathcal{L}_{GAN}(G, F, D_Y, X, Y) &= \mathbb{E}_{y \sim p_{data}(y)} \Big[log D_Y(y)\Big] \\ &+ \mathbb{E}_{x \sim p_{data}(x)} \bigg[log\Big(1 - D_Y(G(x))\Big)\bigg] \\ &+ \mathbb{E}_{x \sim p_{data}(x)} \Big[log D_X(x)\Big] \\ &+ \mathbb{E}_{y \sim p_{data}(y)} \bigg[log\Big(1 - D_X(F(y))\Big)\bigg] \\ \\ \mathcal{L}_{cyc}(G, F) &= \mathbb{E}_{x \sim p_{data}(x)} \Big[\lVert F(G(x)) - x \lVert_1\Big] \\ &+ \mathbb{E}_{y \sim p_{data}(y)} \Big[\lVert G(F(y)) - y \rVert_1\Big] \\ \\ \mathcal{L}_{identity}(G, F) &= \mathbb{E}_{x \sim p_{data}(x)} \Big[\lVert F(x) - x \lVert_1\Big] \\ &+ \mathbb{E}_{y \sim p_{data}(y)} \Big[\lVert G(y) - y \rVert_1\Big] \\ \end{align} $\mathcal{L}_{GAN}$ is the generative adversarial loss from the original GAN paper. $\mathcal{L}_{cyc}$ is the cyclic loss, where we try to get $F(G(x))$ to be similar to $x$, and $G(F(y))$ to be similar to $y$. Basically if the two generators (transformations) are applied in series it should give back the original image. This is the main contribution of this paper. It trains the generators to generate an image of the other distribution that is similar to the original image. Without this loss $G(x)$ could generate anything that's from the distribution of $Y$. Now it needs to generate something from the distribution of $Y$ but still has properties of $x$, so that $F(G(x)$ can re-generate something like $x$. $\mathcal{L}_{cyc}$ is the identity loss. This was used to encourage the mapping to preserve color composition between the input and the output. To solve $G^{\*}, F^{\*}$, discriminators $D_X$ and $D_Y$ should **ascend** on the gradient, \begin{align} \nabla_{\theta_{D_X, D_Y}} \frac{1}{m} \sum_{i=1}^m &\Bigg[ \log D_Y\Big(y^{(i)}\Big) \\ &+ \log \Big(1 - D_Y\Big(G\Big(x^{(i)}\Big)\Big)\Big) \\ &+ \log D_X\Big(x^{(i)}\Big) \\ & +\log\Big(1 - D_X\Big(F\Big(y^{(i)}\Big)\Big)\Big) \Bigg] \end{align} That is descend on *negative* log-likelihood loss. In order to stabilize the training the negative log- likelihood objective was replaced by a least-squared loss - the least-squared error of discriminator, labelling real images with 1, and generated images with 0. So we want to descend on the gradient, \begin{align} \nabla_{\theta_{D_X, D_Y}} \frac{1}{m} \sum_{i=1}^m &\Bigg[ \bigg(D_Y\Big(y^{(i)}\Big) - 1\bigg)^2 \\ &+ D_Y\Big(G\Big(x^{(i)}\Big)\Big)^2 \\ &+ \bigg(D_X\Big(x^{(i)}\Big) - 1\bigg)^2 \\ &+ D_X\Big(F\Big(y^{(i)}\Big)\Big)^2 \Bigg] \end{align} We use least-squares for generators also. The generators should *descend* on the gradient, \begin{align} \nabla_{\theta_{F, G}} \frac{1}{m} \sum_{i=1}^m &\Bigg[ \bigg(D_Y\Big(G\Big(x^{(i)}\Big)\Big) - 1\bigg)^2 \\ &+ \bigg(D_X\Big(F\Big(y^{(i)}\Big)\Big) - 1\bigg)^2 \\ &+ \mathcal{L}_{cyc}(G, F) + \mathcal{L}_{identity}(G, F) \Bigg] \end{align} We use `generator_xy` for $G$ and `generator_yx$ for $F$. We use `discriminator_x$ for $D_X$ and `discriminator_y` for $D_Y$. """ # Replay buffers to keep generated samples gen_x_buffer = ReplayBuffer() gen_y_buffer = ReplayBuffer() # Loop through epochs for epoch in monit.loop(self.epochs): # Loop through the dataset for i, batch in monit.enum('Train', self.dataloader): # Move images to the device data_x, data_y = batch['x'].to(self.device), batch['y'].to( self.device) # true labels equal to $1$ true_labels = torch.ones(data_x.size(0), *self.discriminator_x.output_shape, device=self.device, requires_grad=False) # false labels equal to $0$ false_labels = torch.zeros(data_x.size(0), *self.discriminator_x.output_shape, device=self.device, requires_grad=False) # Train the generators. # This returns the generated images. gen_x, gen_y = self.optimize_generators( data_x, data_y, true_labels) # Train discriminators self.optimize_discriminator(data_x, data_y, gen_x_buffer.push_and_pop(gen_x), gen_y_buffer.push_and_pop(gen_y), true_labels, false_labels) # Save training statistics and increment the global step counter tracker.save() tracker.add_global_step(max(len(data_x), len(data_y))) # Save images at intervals batches_done = epoch * len(self.dataloader) + i if batches_done % self.sample_interval == 0: # Save models when sampling images experiment.save_checkpoint() # Sample images self.sample_images(batches_done) # Update learning rates self.generator_lr_scheduler.step() self.discriminator_lr_scheduler.step() # New line tracker.new_line()
def main(): # set indicator types tracker.set_queue("train_loss", 20, True) tracker.set_histogram("valid_loss", True) tracker.set_scalar("valid_accuracy", True) epochs = 10 train_batch_size = 64 test_batch_size = 1000 use_cuda = True cuda_device = 0 seed = 5 train_log_interval = 10 learning_rate = 0.01 # get device is_cuda = use_cuda and torch.cuda.is_available() if not is_cuda: device = torch.device("cpu") else: if cuda_device < torch.cuda.device_count(): device = torch.device(f"cuda:{cuda_device}") else: print(f"Cuda device index {cuda_device} higher than " f"device count {torch.cuda.device_count()}") device = torch.device(f"cuda:{torch.cuda.device_count() - 1}") # data transform data_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) # train loader train_loader = torch.utils.data.DataLoader(datasets.MNIST( str(lab.get_data_path()), train=True, download=True, transform=data_transform), batch_size=train_batch_size, shuffle=True) # test loader test_loader = torch.utils.data.DataLoader(datasets.MNIST( str(lab.get_data_path()), train=False, download=True, transform=data_transform), batch_size=test_batch_size, shuffle=False) # model model = Net().to(device) # optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate) # set seeds torch.manual_seed(seed) # only for logging purposes configs = { 'epochs': epochs, 'train_batch_size': train_batch_size, 'test_batch_size': test_batch_size, 'use_cuda': use_cuda, 'cuda_device': cuda_device, 'seed': seed, 'train_log_interval': train_log_interval, 'learning_rate': learning_rate, 'device': device, 'train_loader': train_loader, 'test_loader': test_loader, 'model': model, 'optimizer': optimizer, } # create the experiment experiment.create(name='tracker') # experiment configs experiment.calculate_configs(configs) # pyTorch model experiment.add_pytorch_models(dict(model=model)) experiment.start() # training loop for epoch in range(1, epochs + 1): train(model, optimizer, train_loader, device, train_log_interval) test(model, test_loader, device) logger.log() # save the model experiment.save_checkpoint()