Python save_checkpoint Examples, labml.experiment.save_checkpoint Python Examples

Example #1

0

Show file

File: training_loop.py Project: vishalbelsare/labml

    def __next__(self):
        if self.__signal_received is not None:
            logger.log('\nKilling Loop.', Text.danger)
            monit.finish_loop()
            self.__finish()
            raise StopIteration("SIGINT")

        try:
            global_step = next(self.__loop)
        except StopIteration as e:
            self.__finish()
            raise e

        tracker.set_global_step(global_step)

        if global_step - self.__last_write_step >= self.__log_write_interval:
            tracker.save()
            self.__last_write_step = global_step
        if global_step - self.__last_new_line_step >= self.__log_new_line_interval:
            tracker.new_line()
            self.__last_new_line_step = global_step
        # if self.is_interval(self.__log_write_interval, global_step):
        #     tracker.save()
        # if self.is_interval(self.__log_new_line_interval, global_step):
        #     logger.log()

        # if (self.__is_save_models and
        #         self.is_interval(self.__save_models_interval, global_step)):
        #     experiment.save_checkpoint()
        if (self.__is_save_models and global_step - self.__last_save_step >=
                self.__save_models_interval):
            experiment.save_checkpoint()
            self.__last_save_step = global_step

        return global_step

Example #2

0

Show file

    def solve(self):
        for t in monit.loop(self.epochs):
            if not self.is_online_update:
                for I in self.info_sets.values():
                    I.clear()
            for i in range(self.n_players):
                self.cfr(self.create_new_history(), cast(Player, i),
                         [1 for _ in range(self.n_players)])
            if not self.is_online_update:
                self.update()
            with monit.section("Track"):
                for I in self.info_sets.values():
                    for a in I.actions():
                        tracker.add({
                            f'strategy.{I.key}.{a}': I.strategy[a],
                            f'average_strategy.{I.key}.{a}': I.average_strategy[a],
                            f'regret.{I.key}.{a}': I.regret[a],
                            f'current_regret.{I.key}.{a}': I.current_regret[a]
                        })

            if t % self.track_frequency == 0:
                tracker.save()
                logger.log()

            if (t + 1) % self.save_frequency == 0:
                experiment.save_checkpoint()

        logger.inspect(self.info_sets)

Example #3

0

Show file

File: train.py Project: adrien1018/beta-tetris

 def run_training_loop(self):
     """### Run training loop"""
     offset = tracker.get_global_step()
     if offset > 100:
         # If resumed, sample several iterations first to reduce sampling bias
         for i in range(16):
             self.sample(False)
     for _ in monit.loop(self.c.updates - offset):
         update = tracker.get_global_step()
         progress = update / self.c.updates
         # sample with current policy
         samples = self.sample()
         # train the model
         self.train(samples)
         # write summary info to the writer, and log to the screen
         tracker.save()
         if (update + 1) % 2 == 0:
             self.set_optim(self.c.lr(), self.c.reg_l2())
             self.set_game_param(self.c.right_gain(), self.c.fix_prob(),
                                 self.c.neg_mul(), self.c.step_reward())
             self.set_weight_param(self.c.entropy_weight(),
                                   self.c.prob_reg_weight(),
                                   self.c.target_prob_weight(),
                                   self.c.gamma(), self.c.lamda())
         if (update + 1) % 25 == 0: logger.log()
         if (update + 1) % 200 == 0: experiment.save_checkpoint()

Example #4

0

Show file

File: training_loop.py Project: vishalbelsare/labml

 def __finish(self):
     try:
         signal.signal(signal.SIGINT, self.old_handler)
     except ValueError:
         pass
     tracker.save()
     tracker.new_line()
     if self.__is_save_models:
         logger.log("Saving model...")
         experiment.save_checkpoint()

Example #5

0

Show file

def main():
    conf = Configs()
    experiment.create(name='sklearn', writers={'sqlite'})
    experiment.calculate_configs(conf)

    experiment.add_sklearn_models(dict(model=conf.model))
    experiment.start()
    conf.run()

    experiment.save_checkpoint()

Example #6

0

Show file

def main():
    conf = Configs()
    experiment.create(name='configs')
    experiment.calculate_configs(conf, {'optimizer': 'sgd_optimizer'},
                                 ['set_seed', 'run'])
    experiment.start()
    conf.run()

    # save the model
    experiment.save_checkpoint()

Example #7

0

Show file

def main():
    # Configurations
    configs = {
        'epochs': 10,
        'train_batch_size': 64,
        'valid_batch_size': 100,
        'use_cuda': True,
        'seed': 5,
        'train_log_interval': 10,
        'learning_rate': 0.01,
    }

    is_cuda = configs['use_cuda'] and torch.cuda.is_available()
    if not is_cuda:
        device = torch.device("cpu")
    else:
        device = torch.device(f"cuda:0")

    train_loader = torch.utils.data.DataLoader(
        RemoteDataset('mnist_train'),
        batch_size=configs['train_batch_size'],
        shuffle=True,
        num_workers=4)

    valid_loader = torch.utils.data.DataLoader(
        RemoteDataset('mnist_valid'),
        batch_size=configs['valid_batch_size'],
        shuffle=False,
        num_workers=4)

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate'])

    torch.manual_seed(configs['seed'])

    # ✨ Create the experiment
    experiment.create(name='mnist_labml_monit')

    # ✨ Save configurations
    experiment.configs(configs)

    # ✨ Set PyTorch models for checkpoint saving and loading
    experiment.add_pytorch_models(dict(model=model))

    # ✨ Start and monitor the experiment
    with experiment.start():
        for _ in monit.loop(range(1, configs['epochs'] + 1)):
            train(model, optimizer, train_loader, device,
                  configs['train_log_interval'])
            validate(model, valid_loader, device)
            logger.log()

    # save the model
    experiment.save_checkpoint()

Example #8

0

Show file

def main():
    conf = Configs()
    experiment.create(name='configs')
    experiment.configs(conf, {'optimizer': 'sgd_optimizer'})

    torch.manual_seed(conf.seed)

    with experiment.start():
        conf.run()

    # save the model
    experiment.save_checkpoint()

Example #9

0

Show file

    def train(self):
        """
        ### Train the model
        """

        # Loop for the given number of epochs
        for _ in monit.loop(self.epochs):
            # Iterate over the minibatches
            for i, batch in monit.enum('Train', self.dataloader):
                # Move data to the device
                data, target = batch[0].to(self.device), batch[1].to(
                    self.device)

                # Set tracker step, as the number of characters trained on
                tracker.add_global_step(data.shape[0] * data.shape[1])

                # Set model state to training
                self.model.train()
                # Evaluate the model
                output = self.model(data)

                # Calculate loss
                loss = self.loss_func(output.view(-1, output.shape[-1]),
                                      target.view(-1))
                # Log the loss
                tracker.add("loss.train", loss)

                # Calculate gradients
                loss.backward()
                # Clip gradients
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               max_norm=self.grad_norm_clip)
                # Take optimizer step
                self.optimizer.step()
                # Log the model parameters and gradients
                if (i + 1) % 100 == 0:
                    tracker.add('model', self.model)
                # Clear the gradients
                self.optimizer.zero_grad()

                # Generate a sample
                if (i + 1) % 100 == 0:
                    self.model.eval()
                    with torch.no_grad():
                        self.sample()

                # Save the tracked metrics
                if (i + 1) % 10 == 0:
                    tracker.save()

            # Save the model
            experiment.save_checkpoint()

Example #10

0

Show file

 def run(self):
     """
     ### Training loop
     """
     for _ in monit.loop(self.epochs):
         # Train the model
         self.train()
         # Sample some images
         self.sample()
         # New line in the console
         tracker.new_line()
         # Save the model
         experiment.save_checkpoint()

Example #11

0

Show file

File: dqn.py Project: labmlai/battleship

def main():
    conf = Configs()

    experiment.create(name='Battleship_DQN')
    experiment.calculate_configs(conf,
                                 {},
                                 ['set_seed', 'policy', 'target', 'run'])
    experiment.add_pytorch_models(dict(model=conf.policy))
    experiment.start()

    conf.run()

    if conf.is_save_models:
        experiment.save_checkpoint()

Example #12

0

Show file

File: train.py Project: adrien1018/noro-tetris-ai

 def run_training_loop(self):
     """### Run training loop"""
     offset = tracker.get_global_step()
     for _ in monit.loop(self.c.updates - offset):
         update = tracker.get_global_step()
         progress = update / self.c.updates
         # sample with current policy
         samples = self.sample()
         # train the model
         self.train(samples)
         # write summary info to the writer, and log to the screen
         tracker.save()
         logger.log()
         if (update + 1) % 500 == 0:
             experiment.save_checkpoint()

Example #13

0

Show file

    def loop(self):
        # Loop through the monitored iterator
        for epoch in monit.loop(range(0, self.__epochs)):
            self._train()
            self._test()

            self.__log_model_params()

            # Clear line and output to console
            tracker.save()

            # Clear line and go to the next line;
            # that is, we add a new line to the output
            # at the end of each epoch
            if (epoch + 1) % self.__log_new_line_interval == 0:
                logger.log()

            if self.__is_save_models:
                experiment.save_checkpoint()

Example #14

0

Show file

    def iterate(self):
        """
        ### Iteratively update $\textcolor{lightgreen}{\sigma^t(I)(a)}$

        This updates the strategies for $T$ iterations.
        """

        # Loop for `epochs` times
        for t in monit.iterate('Train', self.epochs):
            # Walk tree and update regrets for each player
            for i in range(self.n_players):
                self.walk_tree(self.create_new_history(), cast(Player, i), 1, 1)

            # Track data for analytics
            tracker.add_global_step()
            self.tracker(self.info_sets)
            tracker.save()

            # Save checkpoints every $1,000$ iterations
            if (t + 1) % 1_000 == 0:
                experiment.save_checkpoint()

Example #15

0

Show file

    def train(self):
        for _ in monit.loop(self.epochs):
            for i, batch in monit.enum('Train', self.dataloader):
                # Move data to the device
                data, target = batch[0].to(self.device), batch[1].to(
                    self.device)

                tracker.add_global_step(data.shape[0] * data.shape[1])

                self.model.train()
                output = self.model(data)

                # Calculate and log loss
                loss = self.loss_func(output.view(-1, output.shape[-1]),
                                      target.view(-1))
                tracker.add("loss.train", loss)

                # Calculate gradients
                loss.backward()
                # Clip gradients
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               max_norm=self.grad_norm_clip)
                # Take optimizer step
                self.optimizer.step()
                # Log the model parameters and gradients on last batch of every epoch
                if (i + 1) % 100 == 0:
                    tracker.add('model', self.model)
                # Clear the gradients
                self.optimizer.zero_grad()

                if (i + 1) % 100 == 0:
                    self.model.eval()
                    with torch.no_grad():
                        self.sample()

                # Save the tracked metrics
                if (i + 1) % 10 == 0:
                    tracker.save()

            experiment.save_checkpoint()

Example #16

0

Show file

File: c1_labml_monit.py Project: raitraidma/samples

def main():
    # Configurations
    configs = {
        'epochs': 10,
        'train_batch_size': 64,
        'valid_batch_size': 100,
        'use_cuda': True,
        'seed': 5,
        'train_log_interval': 10,
        'learning_rate': 0.01,
    }

    is_cuda = configs['use_cuda'] and torch.cuda.is_available()
    if not is_cuda:
        device = torch.device("cpu")
    else:
        device = torch.device(f"cuda:0")

    data_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=True,
                       download=True,
                       transform=data_transform),
        batch_size=configs['train_batch_size'],
        shuffle=True)

    valid_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=False,
                       download=True,
                       transform=data_transform),
        batch_size=configs['valid_batch_size'],
        shuffle=False)

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate'])

    torch.manual_seed(configs['seed'])

    # ✨ Create the experiment
    experiment.create(name='mnist_labml_monit')

    # ✨ Save configurations
    experiment.configs(configs)

    # ✨ Set PyTorch models for checkpoint saving and loading
    experiment.add_pytorch_models(dict(model=model))

    # ✨ Start and monitor the experiment
    with experiment.start():
        for _ in monit.loop(range(1, configs['epochs'] + 1)):
            train(model, optimizer, train_loader, device,
                  configs['train_log_interval'])
            validate(model, valid_loader, device)
            logger.log()

    # save the model
    experiment.save_checkpoint()

Example #17

0

Show file

File: c2_labml_monit_mix.py Project: raitraidma/samples

def main():
    # Configurations
    configs = {
        'epochs': 10,
        'train_batch_size': 64,
        'valid_batch_size': 100,
        'use_cuda': True,
        'seed': 5,
        'train_log_interval': 10,
        'learning_rate': 0.01,
    }

    is_cuda = configs['use_cuda'] and torch.cuda.is_available()
    if not is_cuda:
        device = torch.device("cpu")
    else:
        device = torch.device(f"cuda:0")

    data_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=True,
                       download=True,
                       transform=data_transform),
        batch_size=configs['train_batch_size'],
        shuffle=True)

    valid_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=False,
                       download=True,
                       transform=data_transform),
        batch_size=configs['valid_batch_size'],
        shuffle=False)

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate'])

    torch.manual_seed(configs['seed'])

    # ✨ Create the experiment
    experiment.create(name='mnist_labml_monit')

    # ✨ Save configurations
    experiment.configs(configs)

    # ✨ Set PyTorch models for checkpoint saving and loading
    experiment.add_pytorch_models(dict(model=model))

    # ✨ Start and monitor the experiment
    with experiment.start():
        for _ in monit.loop(range(1, configs['epochs'] + 1)):
            for mode, batch in monit.mix(10, ('train', train_loader),
                                         ('valid', valid_loader)):
                with tracker.namespace(mode):
                    with torch.set_grad_enabled(mode == 'train'):
                        data, target = batch[0].to(device), batch[1].to(device)
                        output = model(data)
                        loss = F.cross_entropy(output, target)
                        pred = output.argmax(dim=1, keepdim=True)

                        if mode == 'train':
                            optimizer.zero_grad()
                            loss.backward()
                            optimizer.step()

                            tracker.add_global_step(data.shape[0])

                        tracker.save({
                            'loss.':
                            loss,
                            'accuracy.':
                            pred.eq(target.view_as(pred)).sum() / pred.shape[0]
                        })

            tracker.new_line()

    # save the model
    experiment.save_checkpoint()

Example #18

0

Show file

File: train.py Project: weihaoxie/nn

def train():
    """
    ## Create and train a small model
    """

    # Create an experiment
    experiment.create(name='retro_small')

    # GPU device
    device = torch.device('cuda:0')

    # Load Tiny Shakespeare dataset
    tds = TextFileDataset(
        lab.get_data_path() / 'tiny_shakespeare.txt',
        list,
        url=
        'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'
    )

    # Load [Retro dataset](dataset.html)
    train_dataset = Dataset(lab.get_data_path() / 'retro_train_dataset.json',
                            tds)

    # Create dataloader
    train_dl = DataLoader(train_dataset,
                          batch_size=4,
                          sampler=RandomSampler(train_dataset,
                                                replacement=True))

    # Hyper-parameters
    chunk_len = 16
    d_model = 128
    d_ff = 512
    n_heads = 16
    d_k = 16

    # Create the nearest neighbor encoder
    nearest_neighbor_encoder = NearestNeighborEncoder(chunk_len, 6, {3},
                                                      d_model, n_heads, d_k,
                                                      d_ff)
    # Create the model
    model = RetroModel(tds.n_tokens,
                       d_model,
                       6, {3, 5},
                       chunk_len,
                       n_heads,
                       d_k,
                       d_ff,
                       encoder=nearest_neighbor_encoder)
    # Move the model to the device
    model = model.to(device)
    # Create the optimizer
    optimizer = Noam(model.parameters(), lr=1., d_model=d_model, warmup=2_000)
    # Create the `Trainer`
    trainer = Trainer(device, model, train_dl, optimizer)
    # Create the `Sampler`
    sampler = Sampler(device, model, tds, chunk_len)
    #
    prompt = '''Second Citizen:\nOne word, good citizens.\n\nFirst Citizen:'''

    # Set models for saving and loading
    experiment.add_pytorch_models(model=model)

    # Start the experiment
    with experiment.start():
        # Train for `32` epochs
        for epoch in monit.loop(32):
            # Train
            trainer()
            # Print a new line
            tracker.new_line()
            # Sample from the `prompt`
            logger.log([(prompt.replace('\n', '\\n\n'), Text.subtle),
                        (sampler.sample(prompt,
                                        128).replace('\n',
                                                     '\\n\n'), Text.none)])
            # Save models
            experiment.save_checkpoint()

Example #19

0

Show file

def main_train():
    lstm_size = 1024
    lstm_layers = 3
    batch_size = 32
    seq_len = 32

    with monit.section("Loading data"):
        # Load all python files
        files = parser.load.load_files()
        # Split training and validation data
        train_files, valid_files = parser.load.split_train_valid(
            files, is_shuffle=False)

    with monit.section("Create model"):
        # Create model
        model = SimpleLstmModel(encoding_size=tokenizer.VOCAB_SIZE,
                                embedding_size=tokenizer.VOCAB_SIZE,
                                lstm_size=lstm_size,
                                lstm_layers=lstm_layers)
        # Move model to `device`
        model.to(device)

        # Create loss function and optimizer
        loss_func = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters())

    # Initial state is 0
    h0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device)
    c0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device)

    # Setup logger indicators
    tracker.set_queue("train.loss", queue_size=500, is_print=True)
    tracker.set_queue("valid.loss", queue_size=500, is_print=True)

    # Specify the model in [lab](https://github.com/vpj/lab) for saving and loading
    experiment.add_pytorch_models({'base': model})

    # Start training scratch (step '0')
    experiment.start()

    # Number of batches per epoch
    batches = math.ceil(
        sum([len(f[1]) + 1 for f in train_files]) / (batch_size * seq_len))

    # Number of steps per epoch. We train and validate on each step.
    steps_per_epoch = 200

    # Train for 100 epochs
    for epoch in monit.loop(range(100)):
        # Create trainer
        trainer = Trainer(files=train_files,
                          model=model,
                          loss_func=loss_func,
                          optimizer=optimizer,
                          batch_size=batch_size,
                          seq_len=seq_len,
                          is_train=True,
                          h0=h0,
                          c0=c0,
                          eof=0)
        # Create validator
        validator = Trainer(files=valid_files,
                            model=model,
                            loss_func=loss_func,
                            optimizer=optimizer,
                            is_train=False,
                            seq_len=seq_len,
                            batch_size=batch_size,
                            h0=h0,
                            c0=c0,
                            eof=0)

        # Next batch to train and validation
        train_batch = 0
        valid_batch = 0

        # Loop through steps
        for i in range(1, steps_per_epoch):
            try:
                with DelayedKeyboardInterrupt():
                    # Set global step
                    global_step = epoch * batches + min(
                        batches, (batches * i) // steps_per_epoch)
                    tracker.set_global_step(global_step)

                    # Last batch to train and validate
                    train_batch_limit = trainer.x.shape[0] * min(
                        1., (i + 1) / steps_per_epoch)
                    valid_batch_limit = validator.x.shape[0] * min(
                        1., (i + 1) / steps_per_epoch)

                    with monit.section("train",
                                       total_steps=trainer.x.shape[0],
                                       is_partial=True):
                        model.train()
                        # Train
                        while train_batch < train_batch_limit:
                            trainer.run(train_batch)
                            monit.progress(train_batch + 1)
                            train_batch += 1

                    with monit.section("valid",
                                       total_steps=validator.x.shape[0],
                                       is_partial=True):
                        model.eval()
                        # Validate
                        while valid_batch < valid_batch_limit:
                            validator.run(valid_batch)
                            monit.progress(valid_batch + 1)
                            valid_batch += 1

                    # Output results
                    tracker.save()

                    # 10 lines of logs per epoch
                    if (i + 1) % (steps_per_epoch // 10) == 0:
                        logger.log()
            except KeyboardInterrupt:
                experiment.save_checkpoint()
                return

        experiment.save_checkpoint()

Example #20

0

Show file

File: experiment.py Project: weihaoxie/nn

    def step(self, idx: int):
        """
        ### Training Step
        """

        # Train the discriminator
        with monit.section('Discriminator'):
            # Reset gradients
            self.discriminator_optimizer.zero_grad()

            # Accumulate gradients for `gradient_accumulate_steps`
            for i in range(self.gradient_accumulate_steps):
                # Update `mode`. Set whether to log activation
                with self.mode.update(is_log_activations=(idx + 1) %
                                      self.log_generated_interval == 0):
                    # Sample images from generator
                    generated_images, _ = self.generate_images(self.batch_size)
                    # Discriminator classification for generated images
                    fake_output = self.discriminator(generated_images.detach())

                    # Get real images from the data loader
                    real_images = next(self.loader).to(self.device)
                    # We need to calculate gradients w.r.t. real images for gradient penalty
                    if (idx + 1) % self.lazy_gradient_penalty_interval == 0:
                        real_images.requires_grad_()
                    # Discriminator classification for real images
                    real_output = self.discriminator(real_images)

                    # Get discriminator loss
                    real_loss, fake_loss = self.discriminator_loss(
                        real_output, fake_output)
                    disc_loss = real_loss + fake_loss

                    # Add gradient penalty
                    if (idx + 1) % self.lazy_gradient_penalty_interval == 0:
                        # Calculate and log gradient penalty
                        gp = self.gradient_penalty(real_images, real_output)
                        tracker.add('loss.gp', gp)
                        # Multiply by coefficient and add gradient penalty
                        disc_loss = disc_loss + 0.5 * self.gradient_penalty_coefficient * gp * self.lazy_gradient_penalty_interval

                    # Compute gradients
                    disc_loss.backward()

                    # Log discriminator loss
                    tracker.add('loss.discriminator', disc_loss)

            if (idx + 1) % self.log_generated_interval == 0:
                # Log discriminator model parameters occasionally
                tracker.add('discriminator', self.discriminator)

            # Clip gradients for stabilization
            torch.nn.utils.clip_grad_norm_(self.discriminator.parameters(),
                                           max_norm=1.0)
            # Take optimizer step
            self.discriminator_optimizer.step()

        # Train the generator
        with monit.section('Generator'):
            # Reset gradients
            self.generator_optimizer.zero_grad()
            self.mapping_network_optimizer.zero_grad()

            # Accumulate gradients for `gradient_accumulate_steps`
            for i in range(self.gradient_accumulate_steps):
                # Sample images from generator
                generated_images, w = self.generate_images(self.batch_size)
                # Discriminator classification for generated images
                fake_output = self.discriminator(generated_images)

                # Get generator loss
                gen_loss = self.generator_loss(fake_output)

                # Add path length penalty
                if idx > self.lazy_path_penalty_after and (
                        idx + 1) % self.lazy_path_penalty_interval == 0:
                    # Calculate path length penalty
                    plp = self.path_length_penalty(w, generated_images)
                    # Ignore if `nan`
                    if not torch.isnan(plp):
                        tracker.add('loss.plp', plp)
                        gen_loss = gen_loss + plp

                # Calculate gradients
                gen_loss.backward()

                # Log generator loss
                tracker.add('loss.generator', gen_loss)

            if (idx + 1) % self.log_generated_interval == 0:
                # Log discriminator model parameters occasionally
                tracker.add('generator', self.generator)
                tracker.add('mapping_network', self.mapping_network)

            # Clip gradients for stabilization
            torch.nn.utils.clip_grad_norm_(self.generator.parameters(),
                                           max_norm=1.0)
            torch.nn.utils.clip_grad_norm_(self.mapping_network.parameters(),
                                           max_norm=1.0)

            # Take optimizer step
            self.generator_optimizer.step()
            self.mapping_network_optimizer.step()

        # Log generated images
        if (idx + 1) % self.log_generated_interval == 0:
            tracker.add(
                'generated',
                torch.cat([generated_images[:6], real_images[:3]], dim=0))
        # Save model checkpoints
        if (idx + 1) % self.save_checkpoint_interval == 0:
            experiment.save_checkpoint()

        # Flush tracker
        tracker.save()

Example #21

0

Show file

File: cycle_gan.py Project: Sandy4321/nn-1

    def run(self):
        """
        ## Training

        We aim to solve:
        $$G^{*}, F^{*} = \arg \min_{G,F} \max_{D_X, D_Y} \mathcal{L}(G, F, D_X, D_Y)$$

        where,
        $G$ translates images from $X \rightarrow Y$,
        $F$ translates images from $Y \rightarrow X$,
        $D_X$ tests if images are from $X$ space,
        $D_Y$ tests if images are from $Y$ space, and
        \begin{align}
        \mathcal{L}(G, F, D_X, D_Y)
            &= \mathcal{L}_{GAN}(G, D_Y, X, Y) \\
            &+ \mathcal{L}_{GAN}(F, D_X, Y, X) \\
            &+ \lambda_1 \mathcal{L}_{cyc}(G, F) \\
            &+ \lambda_2 \mathcal{L}_{identity}(G, F) \\
        \\
        \mathcal{L}_{GAN}(G, F, D_Y, X, Y)
            &= \mathbb{E}_{y \sim p_{data}(y)} \Big[log D_Y(y)\Big] \\
            &+ \mathbb{E}_{x \sim p_{data}(x)} \bigg[log\Big(1 - D_Y(G(x))\Big)\bigg] \\
            &+ \mathbb{E}_{x \sim p_{data}(x)} \Big[log D_X(x)\Big] \\
            &+ \mathbb{E}_{y \sim p_{data}(y)} \bigg[log\Big(1 - D_X(F(y))\Big)\bigg] \\
        \\
        \mathcal{L}_{cyc}(G, F)
            &= \mathbb{E}_{x \sim p_{data}(x)} \Big[\lVert F(G(x)) - x \lVert_1\Big] \\
            &+ \mathbb{E}_{y \sim p_{data}(y)} \Big[\lVert G(F(y)) - y \rVert_1\Big] \\
        \\
        \mathcal{L}_{identity}(G, F)
            &= \mathbb{E}_{x \sim p_{data}(x)} \Big[\lVert F(x) - x \lVert_1\Big] \\
            &+ \mathbb{E}_{y \sim p_{data}(y)} \Big[\lVert G(y) - y \rVert_1\Big] \\
        \end{align}

        $\mathcal{L}_{GAN}$ is the generative adversarial loss from the original
        GAN paper.

        $\mathcal{L}_{cyc}$ is the cyclic loss, where we try to get $F(G(x))$ to be similar to $x$,
        and $G(F(y))$ to be similar to $y$.
        Basically if the two generators (transformations) are applied in series it should give back the
        original image.
        This is the main contribution of this paper.
        It trains the generators to generate an image of the other distribution that is similar to
        the original image.
        Without this loss $G(x)$ could generate anything that's from the distribution of $Y$.
        Now it needs to generate something from the distribution of $Y$ but still has properties of $x$,
        so that $F(G(x)$ can re-generate something like $x$.

        $\mathcal{L}_{cyc}$ is the identity loss.
        This was used to encourage the mapping to preserve color composition between
        the input and the output.

        To solve $G^{\*}, F^{\*}$,
        discriminators $D_X$ and $D_Y$ should **ascend** on the gradient,
        \begin{align}
        \nabla_{\theta_{D_X, D_Y}} \frac{1}{m} \sum_{i=1}^m
        &\Bigg[
        \log D_Y\Big(y^{(i)}\Big) \\
        &+ \log \Big(1 - D_Y\Big(G\Big(x^{(i)}\Big)\Big)\Big) \\
        &+ \log D_X\Big(x^{(i)}\Big) \\
        & +\log\Big(1 - D_X\Big(F\Big(y^{(i)}\Big)\Big)\Big)
        \Bigg]
        \end{align}
        That is descend on *negative* log-likelihood loss.

        In order to stabilize the training the negative log- likelihood objective
        was replaced by a least-squared loss -
        the least-squared error of discriminator, labelling real images with 1,
        and generated images with 0.
        So we want to descend on the gradient,
        \begin{align}
        \nabla_{\theta_{D_X, D_Y}} \frac{1}{m} \sum_{i=1}^m
        &\Bigg[
            \bigg(D_Y\Big(y^{(i)}\Big) - 1\bigg)^2 \\
            &+ D_Y\Big(G\Big(x^{(i)}\Big)\Big)^2 \\
            &+ \bigg(D_X\Big(x^{(i)}\Big) - 1\bigg)^2 \\
            &+ D_X\Big(F\Big(y^{(i)}\Big)\Big)^2
        \Bigg]
        \end{align}

        We use least-squares for generators also.
        The generators should *descend* on the gradient,
        \begin{align}
        \nabla_{\theta_{F, G}} \frac{1}{m} \sum_{i=1}^m
        &\Bigg[
            \bigg(D_Y\Big(G\Big(x^{(i)}\Big)\Big) - 1\bigg)^2 \\
            &+ \bigg(D_X\Big(F\Big(y^{(i)}\Big)\Big) - 1\bigg)^2 \\
            &+ \mathcal{L}_{cyc}(G, F)
            + \mathcal{L}_{identity}(G, F)
        \Bigg]
        \end{align}

        We use `generator_xy` for $G$ and `generator_yx$ for $F$.
        We use `discriminator_x$ for $D_X$ and `discriminator_y` for $D_Y$.
        """

        # Replay buffers to keep generated samples
        gen_x_buffer = ReplayBuffer()
        gen_y_buffer = ReplayBuffer()

        # Loop through epochs
        for epoch in monit.loop(self.epochs):
            # Loop through the dataset
            for i, batch in monit.enum('Train', self.dataloader):
                # Move images to the device
                data_x, data_y = batch['x'].to(self.device), batch['y'].to(
                    self.device)

                # true labels equal to $1$
                true_labels = torch.ones(data_x.size(0),
                                         *self.discriminator_x.output_shape,
                                         device=self.device,
                                         requires_grad=False)
                # false labels equal to $0$
                false_labels = torch.zeros(data_x.size(0),
                                           *self.discriminator_x.output_shape,
                                           device=self.device,
                                           requires_grad=False)

                # Train the generators.
                # This returns the generated images.
                gen_x, gen_y = self.optimize_generators(
                    data_x, data_y, true_labels)

                #  Train discriminators
                self.optimize_discriminator(data_x, data_y,
                                            gen_x_buffer.push_and_pop(gen_x),
                                            gen_y_buffer.push_and_pop(gen_y),
                                            true_labels, false_labels)

                # Save training statistics and increment the global step counter
                tracker.save()
                tracker.add_global_step(max(len(data_x), len(data_y)))

                # Save images at intervals
                batches_done = epoch * len(self.dataloader) + i
                if batches_done % self.sample_interval == 0:
                    # Save models when sampling images
                    experiment.save_checkpoint()
                    # Sample images
                    self.sample_images(batches_done)

            # Update learning rates
            self.generator_lr_scheduler.step()
            self.discriminator_lr_scheduler.step()
            # New line
            tracker.new_line()

Example #22

0

Show file

def main():
    # set indicator types
    tracker.set_queue("train_loss", 20, True)
    tracker.set_histogram("valid_loss", True)
    tracker.set_scalar("valid_accuracy", True)

    epochs = 10

    train_batch_size = 64
    test_batch_size = 1000

    use_cuda = True
    cuda_device = 0
    seed = 5
    train_log_interval = 10

    learning_rate = 0.01

    # get device
    is_cuda = use_cuda and torch.cuda.is_available()
    if not is_cuda:
        device = torch.device("cpu")
    else:
        if cuda_device < torch.cuda.device_count():
            device = torch.device(f"cuda:{cuda_device}")
        else:
            print(f"Cuda device index {cuda_device} higher than "
                  f"device count {torch.cuda.device_count()}")

            device = torch.device(f"cuda:{torch.cuda.device_count() - 1}")

    # data transform
    data_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    # train loader
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        str(lab.get_data_path()),
        train=True,
        download=True,
        transform=data_transform),
                                               batch_size=train_batch_size,
                                               shuffle=True)

    # test loader
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        str(lab.get_data_path()),
        train=False,
        download=True,
        transform=data_transform),
                                              batch_size=test_batch_size,
                                              shuffle=False)

    # model
    model = Net().to(device)

    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # set seeds
    torch.manual_seed(seed)

    # only for logging purposes
    configs = {
        'epochs': epochs,
        'train_batch_size': train_batch_size,
        'test_batch_size': test_batch_size,
        'use_cuda': use_cuda,
        'cuda_device': cuda_device,
        'seed': seed,
        'train_log_interval': train_log_interval,
        'learning_rate': learning_rate,
        'device': device,
        'train_loader': train_loader,
        'test_loader': test_loader,
        'model': model,
        'optimizer': optimizer,
    }

    # create the experiment
    experiment.create(name='tracker')

    # experiment configs
    experiment.calculate_configs(configs)

    # pyTorch model
    experiment.add_pytorch_models(dict(model=model))

    experiment.start()

    # training loop
    for epoch in range(1, epochs + 1):
        train(model, optimizer, train_loader, device, train_log_interval)
        test(model, test_loader, device)
        logger.log()

    # save the model
    experiment.save_checkpoint()