Ejemplo n.º 1
0
    def solve(self):
        for t in monit.loop(self.epochs):
            if not self.is_online_update:
                for I in self.info_sets.values():
                    I.clear()
            for i in range(self.n_players):
                self.cfr(self.create_new_history(), cast(Player, i),
                         [1 for _ in range(self.n_players)])
            if not self.is_online_update:
                self.update()
            with monit.section("Track"):
                for I in self.info_sets.values():
                    for a in I.actions():
                        tracker.add({
                            f'strategy.{I.key}.{a}': I.strategy[a],
                            f'average_strategy.{I.key}.{a}': I.average_strategy[a],
                            f'regret.{I.key}.{a}': I.regret[a],
                            f'current_regret.{I.key}.{a}': I.current_regret[a]
                        })

            if t % self.track_frequency == 0:
                tracker.save()
                logger.log()

            if (t + 1) % self.save_frequency == 0:
                experiment.save_checkpoint()

        logger.inspect(self.info_sets)
Ejemplo n.º 2
0
 def run_training_loop(self):
     """### Run training loop"""
     offset = tracker.get_global_step()
     if offset > 100:
         # If resumed, sample several iterations first to reduce sampling bias
         for i in range(16):
             self.sample(False)
     for _ in monit.loop(self.c.updates - offset):
         update = tracker.get_global_step()
         progress = update / self.c.updates
         # sample with current policy
         samples = self.sample()
         # train the model
         self.train(samples)
         # write summary info to the writer, and log to the screen
         tracker.save()
         if (update + 1) % 2 == 0:
             self.set_optim(self.c.lr(), self.c.reg_l2())
             self.set_game_param(self.c.right_gain(), self.c.fix_prob(),
                                 self.c.neg_mul(), self.c.step_reward())
             self.set_weight_param(self.c.entropy_weight(),
                                   self.c.prob_reg_weight(),
                                   self.c.target_prob_weight(),
                                   self.c.gamma(), self.c.lamda())
         if (update + 1) % 25 == 0: logger.log()
         if (update + 1) % 200 == 0: experiment.save_checkpoint()
Ejemplo n.º 3
0
    def run_training_loop(self):
        """
        ### Run training loop
        """

        # last 100 episode information
        tracker.set_queue('reward', 100, True)
        tracker.set_queue('length', 100, True)

        for update in monit.loop(self.updates):
            progress = update / self.updates

            # decreasing `learning_rate` and `clip_range` $\epsilon$
            learning_rate = 2.5e-4 * (1 - progress)
            clip_range = 0.1 * (1 - progress)

            # sample with current policy
            samples = self.sample()

            # train the model
            self.train(samples, learning_rate, clip_range)

            # write summary info to the writer, and log to the screen
            tracker.save()
            if (update + 1) % 1_000 == 0:
                logger.log()
Ejemplo n.º 4
0
    def run(self):
        """
        ### Training loop

        We do full batch training since the dataset is small.
        If we were to sample and train we will have to sample a set of
        nodes for each training step along with the edges that span
        across those selected nodes.
        """
        # Move the feature vectors to the device
        features = self.dataset.features.to(self.device)
        # Move the labels to the device
        labels = self.dataset.labels.to(self.device)
        # Move the adjacency matrix to the device
        edges_adj = self.dataset.adj_mat.to(self.device)
        # Add an empty third dimension for the heads
        edges_adj = edges_adj.unsqueeze(-1)

        # Random indexes
        idx_rand = torch.randperm(len(labels))
        # Nodes for training
        idx_train = idx_rand[:self.training_samples]
        # Nodes for validation
        idx_valid = idx_rand[self.training_samples:]

        # Training loop
        for epoch in monit.loop(self.epochs):
            # Set the model to training mode
            self.model.train()
            # Make all the gradients zero
            self.optimizer.zero_grad()
            # Evaluate the model
            output = self.model(features, edges_adj)
            # Get the loss for training nodes
            loss = self.loss_func(output[idx_train], labels[idx_train])
            # Calculate gradients
            loss.backward()
            # Take optimization step
            self.optimizer.step()
            # Log the loss
            tracker.add('loss.train', loss)
            # Log the accuracy
            tracker.add('accuracy.train', accuracy(output[idx_train], labels[idx_train]))

            # Set mode to evaluation mode for validation
            self.model.eval()

            # No need to compute gradients
            with torch.no_grad():
                # Evaluate the model again
                output = self.model(features, edges_adj)
                # Calculate the loss for validation nodes
                loss = self.loss_func(output[idx_valid], labels[idx_valid])
                # Log the loss
                tracker.add('loss.valid', loss)
                # Log the accuracy
                tracker.add('accuracy.valid', accuracy(output[idx_valid], labels[idx_valid]))

            # Save logs
            tracker.save()
Ejemplo n.º 5
0
def main():
    experiment.create(name='test_schedule', writers={'screen', 'web_api'})
    lr = DynamicSchedule(0.01, (0, 1))
    experiment.configs({'lr': lr})
    with experiment.start():
        for epoch in monit.loop(100):
            tracker.save('hp.lr', lr())
            time.sleep(1)
Ejemplo n.º 6
0
def main():
    import time

    for _ in monit.loop(10):
        for n, v in monit.mix(5, ('train', range(50)), ('valid', range(10))):
            time.sleep(0.05)
            # print(n, v)
            tracker.save({n: v})
        tracker.new_line()
Ejemplo n.º 7
0
def setup_and_add():
    for t in range(10):
        tracker.set_scalar(f"loss1.{t}", is_print=t == 0)

    experiment.start()

    for i in monit.loop(1000):
        for t in range(10):
            tracker.add({f'loss1.{t}': i})
            tracker.save()
Ejemplo n.º 8
0
 def __iter__(self):
     self.__loop = monit.loop(
         range(tracker.get_global_step(), self.__loop_count,
               self.__loop_step))
     iter(self.__loop)
     try:
         self.old_handler = signal.signal(signal.SIGINT, self.__handler)
     except ValueError:
         pass
     return self
Ejemplo n.º 9
0
def main():
    # Configurations
    configs = {
        'epochs': 10,
        'train_batch_size': 64,
        'valid_batch_size': 100,
        'use_cuda': True,
        'seed': 5,
        'train_log_interval': 10,
        'learning_rate': 0.01,
    }

    is_cuda = configs['use_cuda'] and torch.cuda.is_available()
    if not is_cuda:
        device = torch.device("cpu")
    else:
        device = torch.device(f"cuda:0")

    train_loader = torch.utils.data.DataLoader(
        RemoteDataset('mnist_train'),
        batch_size=configs['train_batch_size'],
        shuffle=True,
        num_workers=4)

    valid_loader = torch.utils.data.DataLoader(
        RemoteDataset('mnist_valid'),
        batch_size=configs['valid_batch_size'],
        shuffle=False,
        num_workers=4)

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate'])

    torch.manual_seed(configs['seed'])

    # ✨ Create the experiment
    experiment.create(name='mnist_labml_monit')

    # ✨ Save configurations
    experiment.configs(configs)

    # ✨ Set PyTorch models for checkpoint saving and loading
    experiment.add_pytorch_models(dict(model=model))

    # ✨ Start and monitor the experiment
    with experiment.start():
        for _ in monit.loop(range(1, configs['epochs'] + 1)):
            train(model, optimizer, train_loader, device,
                  configs['train_log_interval'])
            validate(model, valid_loader, device)
            logger.log()

    # save the model
    experiment.save_checkpoint()
Ejemplo n.º 10
0
def main():
    experiment.create(name='test_dynamic_hp', writers={'screen', 'web_api'})
    lr = FloatDynamicHyperParam(0.01, (0, 1))
    # experiment.configs({'lr': lr})
    conf = Configs()
    experiment.configs(conf)
    lr = conf.lr
    with experiment.start():
        for epoch in monit.loop(100):
            tracker.save('hp.lr', lr())
            time.sleep(1)
Ejemplo n.º 11
0
    def train(self):
        """
        ## Train model
        """

        # Loop for `training_steps`
        for i in monit.loop(self.training_steps):
            # Take a training step
            self.step(i)
            #
            if (i + 1) % self.log_generated_interval == 0:
                tracker.new_line()
Ejemplo n.º 12
0
    def __iter__(self):
        self._iter = TrainingLoopIterator(tracker.get_global_step(),
                                          self.__loop_count, self.__loop_step)

        self.__loop = monit.loop(typing.cast(Collection, self._iter))

        iter(self.__loop)
        try:
            self.old_handler = signal.signal(signal.SIGINT, self.__handler)
        except ValueError:
            pass
        return self
Ejemplo n.º 13
0
    def run(self):
        tracker.set_text('text_artifact', is_print=True)
        tracker.set_indexed_text('ti', is_print=True)
        tracker.set_indexed_text('other', is_print=True)
        for i in monit.loop(self.epochs):
            tracker.add('text_artifact', f'sample {i}')
            for j in range(5):
                tracker.add('ti', (f'{j}', 'text' * 5 + f'text {i} {j}'))
                tracker.add('other', (f'{j}', f'other {j}'))

            tracker.save()
            logger.log()
Ejemplo n.º 14
0
    def train(self):
        """
        ### Train the model
        """

        # Loop for the given number of epochs
        for _ in monit.loop(self.epochs):
            # Iterate over the minibatches
            for i, batch in monit.enum('Train', self.dataloader):
                # Move data to the device
                data, target = batch[0].to(self.device), batch[1].to(
                    self.device)

                # Set tracker step, as the number of characters trained on
                tracker.add_global_step(data.shape[0] * data.shape[1])

                # Set model state to training
                self.model.train()
                # Evaluate the model
                output = self.model(data)

                # Calculate loss
                loss = self.loss_func(output.view(-1, output.shape[-1]),
                                      target.view(-1))
                # Log the loss
                tracker.add("loss.train", loss)

                # Calculate gradients
                loss.backward()
                # Clip gradients
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               max_norm=self.grad_norm_clip)
                # Take optimizer step
                self.optimizer.step()
                # Log the model parameters and gradients
                if (i + 1) % 100 == 0:
                    tracker.add('model', self.model)
                # Clear the gradients
                self.optimizer.zero_grad()

                # Generate a sample
                if (i + 1) % 100 == 0:
                    self.model.eval()
                    with torch.no_grad():
                        self.sample()

                # Save the tracked metrics
                if (i + 1) % 10 == 0:
                    tracker.save()

            # Save the model
            experiment.save_checkpoint()
Ejemplo n.º 15
0
def add_save():
    arr = torch.zeros((1000, 1000))
    experiment.start()

    for i in monit.loop(N):
        for t in range(10):
            arr += 1
        for t in range(10):
            if i == 0:
                tracker.set_scalar(f"loss1.{t}", is_print=t == 0)
        for t in range(10):
            tracker.add({f'loss1.{t}': i})
            tracker.save()
Ejemplo n.º 16
0
 def run(self):
     """
     ### Training loop
     """
     for _ in monit.loop(self.epochs):
         # Train the model
         self.train()
         # Sample some images
         self.sample()
         # New line in the console
         tracker.new_line()
         # Save the model
         experiment.save_checkpoint()
Ejemplo n.º 17
0
 def run_training_loop(self):
     """### Run training loop"""
     offset = tracker.get_global_step()
     for _ in monit.loop(self.c.updates - offset):
         update = tracker.get_global_step()
         progress = update / self.c.updates
         # sample with current policy
         samples = self.sample()
         # train the model
         self.train(samples)
         # write summary info to the writer, and log to the screen
         tracker.save()
         logger.log()
         if (update + 1) % 500 == 0:
             experiment.save_checkpoint()
Ejemplo n.º 18
0
    def loop(self):
        # Loop through the monitored iterator
        for epoch in monit.loop(range(0, self.__epochs)):
            self._train()
            self._test()

            self.__log_model_params()

            # Clear line and output to console
            tracker.save()

            # Clear line and go to the next line;
            # that is, we add a new line to the output
            # at the end of each epoch
            if (epoch + 1) % self.__log_new_line_interval == 0:
                logger.log()

            if self.__is_save_models:
                experiment.save_checkpoint()
Ejemplo n.º 19
0
    def run_training_loop(self):
        """
        ### Run training loop
        """

        # last 100 episode information
        tracker.set_queue('reward', 100, True)
        tracker.set_queue('length', 100, True)

        for update in monit.loop(self.updates):
            # sample with current policy
            samples = self.sample()

            # train the model
            self.train(samples)

            # Save tracked indicators.
            tracker.save()
            # Add a new line to the screen periodically
            if (update + 1) % 1_000 == 0:
                logger.log()
Ejemplo n.º 20
0
    def train(self):
        for _ in monit.loop(self.epochs):
            for i, batch in monit.enum('Train', self.dataloader):
                # Move data to the device
                data, target = batch[0].to(self.device), batch[1].to(
                    self.device)

                tracker.add_global_step(data.shape[0] * data.shape[1])

                self.model.train()
                output = self.model(data)

                # Calculate and log loss
                loss = self.loss_func(output.view(-1, output.shape[-1]),
                                      target.view(-1))
                tracker.add("loss.train", loss)

                # Calculate gradients
                loss.backward()
                # Clip gradients
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               max_norm=self.grad_norm_clip)
                # Take optimizer step
                self.optimizer.step()
                # Log the model parameters and gradients on last batch of every epoch
                if (i + 1) % 100 == 0:
                    tracker.add('model', self.model)
                # Clear the gradients
                self.optimizer.zero_grad()

                if (i + 1) % 100 == 0:
                    self.model.eval()
                    with torch.no_grad():
                        self.sample()

                # Save the tracked metrics
                if (i + 1) % 10 == 0:
                    tracker.save()

            experiment.save_checkpoint()
Ejemplo n.º 21
0
    def start_training(self, model):
        """
        Initializes the Training step with the model initialized

        :param model: Instance of the NewsClassifier class
        """
        best_loss = float('inf')

        for epoch in monit.loop(self.epochs):
            with tracker.namespace('train'):
                self.train_epoch(model, self.train_data_loader, 'train')

            with tracker.namespace('valid'):
                _, val_loss = self.train_epoch(model, self.val_data_loader,
                                               'valid')

            if val_loss < best_loss:
                best_loss = val_loss

                if self.is_save_model:
                    self.save_model(model)

            tracker.new_line()
    def run_training_loop(self):
        """
        ### Run training loop
        """

        # Last 100 episode information
        tracker.set_queue('reward', 100, True)
        tracker.set_queue('length', 100, True)

        # Copy to target network initially
        self.target_model.load_state_dict(self.model.state_dict())

        for update in monit.loop(self.updates):
            # $\epsilon$, exploration fraction
            exploration = self.exploration_coefficient(update)
            tracker.add('exploration', exploration)
            # $\beta$ for prioritized replay
            beta = self.prioritized_replay_beta(update)
            tracker.add('beta', beta)

            # Sample with current policy
            self.sample(exploration)

            # Start training after the buffer is full
            if self.replay_buffer.is_full():
                # Train the model
                self.train(beta)

                # Periodically update target network
                if update % self.update_target_model == 0:
                    self.target_model.load_state_dict(self.model.state_dict())

            # Save tracked indicators.
            tracker.save()
            # Add a new line to the screen periodically
            if (update + 1) % 1_000 == 0:
                logger.log()
Ejemplo n.º 23
0
    def run(self):
        """
        ## Training

        We aim to solve:
        $$G^{*}, F^{*} = \arg \min_{G,F} \max_{D_X, D_Y} \mathcal{L}(G, F, D_X, D_Y)$$

        where,
        $G$ translates images from $X \rightarrow Y$,
        $F$ translates images from $Y \rightarrow X$,
        $D_X$ tests if images are from $X$ space,
        $D_Y$ tests if images are from $Y$ space, and
        \begin{align}
        \mathcal{L}(G, F, D_X, D_Y)
            &= \mathcal{L}_{GAN}(G, D_Y, X, Y) \\
            &+ \mathcal{L}_{GAN}(F, D_X, Y, X) \\
            &+ \lambda_1 \mathcal{L}_{cyc}(G, F) \\
            &+ \lambda_2 \mathcal{L}_{identity}(G, F) \\
        \\
        \mathcal{L}_{GAN}(G, F, D_Y, X, Y)
            &= \mathbb{E}_{y \sim p_{data}(y)} \Big[log D_Y(y)\Big] \\
            &+ \mathbb{E}_{x \sim p_{data}(x)} \bigg[log\Big(1 - D_Y(G(x))\Big)\bigg] \\
            &+ \mathbb{E}_{x \sim p_{data}(x)} \Big[log D_X(x)\Big] \\
            &+ \mathbb{E}_{y \sim p_{data}(y)} \bigg[log\Big(1 - D_X(F(y))\Big)\bigg] \\
        \\
        \mathcal{L}_{cyc}(G, F)
            &= \mathbb{E}_{x \sim p_{data}(x)} \Big[\lVert F(G(x)) - x \lVert_1\Big] \\
            &+ \mathbb{E}_{y \sim p_{data}(y)} \Big[\lVert G(F(y)) - y \rVert_1\Big] \\
        \\
        \mathcal{L}_{identity}(G, F)
            &= \mathbb{E}_{x \sim p_{data}(x)} \Big[\lVert F(x) - x \lVert_1\Big] \\
            &+ \mathbb{E}_{y \sim p_{data}(y)} \Big[\lVert G(y) - y \rVert_1\Big] \\
        \end{align}

        $\mathcal{L}_{GAN}$ is the generative adversarial loss from the original
        GAN paper.

        $\mathcal{L}_{cyc}$ is the cyclic loss, where we try to get $F(G(x))$ to be similar to $x$,
        and $G(F(y))$ to be similar to $y$.
        Basically if the two generators (transformations) are applied in series it should give back the
        original image.
        This is the main contribution of this paper.
        It trains the generators to generate an image of the other distribution that is similar to
        the original image.
        Without this loss $G(x)$ could generate anything that's from the distribution of $Y$.
        Now it needs to generate something from the distribution of $Y$ but still has properties of $x$,
        so that $F(G(x)$ can re-generate something like $x$.

        $\mathcal{L}_{cyc}$ is the identity loss.
        This was used to encourage the mapping to preserve color composition between
        the input and the output.

        To solve $G^{\*}, F^{\*}$,
        discriminators $D_X$ and $D_Y$ should **ascend** on the gradient,
        \begin{align}
        \nabla_{\theta_{D_X, D_Y}} \frac{1}{m} \sum_{i=1}^m
        &\Bigg[
        \log D_Y\Big(y^{(i)}\Big) \\
        &+ \log \Big(1 - D_Y\Big(G\Big(x^{(i)}\Big)\Big)\Big) \\
        &+ \log D_X\Big(x^{(i)}\Big) \\
        & +\log\Big(1 - D_X\Big(F\Big(y^{(i)}\Big)\Big)\Big)
        \Bigg]
        \end{align}
        That is descend on *negative* log-likelihood loss.

        In order to stabilize the training the negative log- likelihood objective
        was replaced by a least-squared loss -
        the least-squared error of discriminator, labelling real images with 1,
        and generated images with 0.
        So we want to descend on the gradient,
        \begin{align}
        \nabla_{\theta_{D_X, D_Y}} \frac{1}{m} \sum_{i=1}^m
        &\Bigg[
            \bigg(D_Y\Big(y^{(i)}\Big) - 1\bigg)^2 \\
            &+ D_Y\Big(G\Big(x^{(i)}\Big)\Big)^2 \\
            &+ \bigg(D_X\Big(x^{(i)}\Big) - 1\bigg)^2 \\
            &+ D_X\Big(F\Big(y^{(i)}\Big)\Big)^2
        \Bigg]
        \end{align}

        We use least-squares for generators also.
        The generators should *descend* on the gradient,
        \begin{align}
        \nabla_{\theta_{F, G}} \frac{1}{m} \sum_{i=1}^m
        &\Bigg[
            \bigg(D_Y\Big(G\Big(x^{(i)}\Big)\Big) - 1\bigg)^2 \\
            &+ \bigg(D_X\Big(F\Big(y^{(i)}\Big)\Big) - 1\bigg)^2 \\
            &+ \mathcal{L}_{cyc}(G, F)
            + \mathcal{L}_{identity}(G, F)
        \Bigg]
        \end{align}

        We use `generator_xy` for $G$ and `generator_yx$ for $F$.
        We use `discriminator_x$ for $D_X$ and `discriminator_y` for $D_Y$.
        """

        # Replay buffers to keep generated samples
        gen_x_buffer = ReplayBuffer()
        gen_y_buffer = ReplayBuffer()

        # Loop through epochs
        for epoch in monit.loop(self.epochs):
            # Loop through the dataset
            for i, batch in monit.enum('Train', self.dataloader):
                # Move images to the device
                data_x, data_y = batch['x'].to(self.device), batch['y'].to(
                    self.device)

                # true labels equal to $1$
                true_labels = torch.ones(data_x.size(0),
                                         *self.discriminator_x.output_shape,
                                         device=self.device,
                                         requires_grad=False)
                # false labels equal to $0$
                false_labels = torch.zeros(data_x.size(0),
                                           *self.discriminator_x.output_shape,
                                           device=self.device,
                                           requires_grad=False)

                # Train the generators.
                # This returns the generated images.
                gen_x, gen_y = self.optimize_generators(
                    data_x, data_y, true_labels)

                #  Train discriminators
                self.optimize_discriminator(data_x, data_y,
                                            gen_x_buffer.push_and_pop(gen_x),
                                            gen_y_buffer.push_and_pop(gen_y),
                                            true_labels, false_labels)

                # Save training statistics and increment the global step counter
                tracker.save()
                tracker.add_global_step(max(len(data_x), len(data_y)))

                # Save images at intervals
                batches_done = epoch * len(self.dataloader) + i
                if batches_done % self.sample_interval == 0:
                    # Save models when sampling images
                    experiment.save_checkpoint()
                    # Sample images
                    self.sample_images(batches_done)

            # Update learning rates
            self.generator_lr_scheduler.step()
            self.discriminator_lr_scheduler.step()
            # New line
            tracker.new_line()
Ejemplo n.º 24
0
def main_train():
    lstm_size = 1024
    lstm_layers = 3
    batch_size = 32
    seq_len = 32

    with monit.section("Loading data"):
        # Load all python files
        files = parser.load.load_files()
        # Split training and validation data
        train_files, valid_files = parser.load.split_train_valid(
            files, is_shuffle=False)

    with monit.section("Create model"):
        # Create model
        model = SimpleLstmModel(encoding_size=tokenizer.VOCAB_SIZE,
                                embedding_size=tokenizer.VOCAB_SIZE,
                                lstm_size=lstm_size,
                                lstm_layers=lstm_layers)
        # Move model to `device`
        model.to(device)

        # Create loss function and optimizer
        loss_func = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters())

    # Initial state is 0
    h0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device)
    c0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device)

    # Setup logger indicators
    tracker.set_queue("train.loss", queue_size=500, is_print=True)
    tracker.set_queue("valid.loss", queue_size=500, is_print=True)

    # Specify the model in [lab](https://github.com/vpj/lab) for saving and loading
    experiment.add_pytorch_models({'base': model})

    # Start training scratch (step '0')
    experiment.start()

    # Number of batches per epoch
    batches = math.ceil(
        sum([len(f[1]) + 1 for f in train_files]) / (batch_size * seq_len))

    # Number of steps per epoch. We train and validate on each step.
    steps_per_epoch = 200

    # Train for 100 epochs
    for epoch in monit.loop(range(100)):
        # Create trainer
        trainer = Trainer(files=train_files,
                          model=model,
                          loss_func=loss_func,
                          optimizer=optimizer,
                          batch_size=batch_size,
                          seq_len=seq_len,
                          is_train=True,
                          h0=h0,
                          c0=c0,
                          eof=0)
        # Create validator
        validator = Trainer(files=valid_files,
                            model=model,
                            loss_func=loss_func,
                            optimizer=optimizer,
                            is_train=False,
                            seq_len=seq_len,
                            batch_size=batch_size,
                            h0=h0,
                            c0=c0,
                            eof=0)

        # Next batch to train and validation
        train_batch = 0
        valid_batch = 0

        # Loop through steps
        for i in range(1, steps_per_epoch):
            try:
                with DelayedKeyboardInterrupt():
                    # Set global step
                    global_step = epoch * batches + min(
                        batches, (batches * i) // steps_per_epoch)
                    tracker.set_global_step(global_step)

                    # Last batch to train and validate
                    train_batch_limit = trainer.x.shape[0] * min(
                        1., (i + 1) / steps_per_epoch)
                    valid_batch_limit = validator.x.shape[0] * min(
                        1., (i + 1) / steps_per_epoch)

                    with monit.section("train",
                                       total_steps=trainer.x.shape[0],
                                       is_partial=True):
                        model.train()
                        # Train
                        while train_batch < train_batch_limit:
                            trainer.run(train_batch)
                            monit.progress(train_batch + 1)
                            train_batch += 1

                    with monit.section("valid",
                                       total_steps=validator.x.shape[0],
                                       is_partial=True):
                        model.eval()
                        # Validate
                        while valid_batch < valid_batch_limit:
                            validator.run(valid_batch)
                            monit.progress(valid_batch + 1)
                            valid_batch += 1

                    # Output results
                    tracker.save()

                    # 10 lines of logs per epoch
                    if (i + 1) % (steps_per_epoch // 10) == 0:
                        logger.log()
            except KeyboardInterrupt:
                experiment.save_checkpoint()
                return

        experiment.save_checkpoint()
Ejemplo n.º 25
0
def main():
    # Configurations
    configs = {
        'epochs': 10,
        'train_batch_size': 64,
        'valid_batch_size': 100,
        'use_cuda': True,
        'seed': 5,
        'train_log_interval': 10,
        'learning_rate': 0.01,
    }

    is_cuda = configs['use_cuda'] and torch.cuda.is_available()
    if not is_cuda:
        device = torch.device("cpu")
    else:
        device = torch.device(f"cuda:0")

    data_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=True,
                       download=True,
                       transform=data_transform),
        batch_size=configs['train_batch_size'],
        shuffle=True)

    valid_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=False,
                       download=True,
                       transform=data_transform),
        batch_size=configs['valid_batch_size'],
        shuffle=False)

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate'])

    torch.manual_seed(configs['seed'])

    # ✨ Create the experiment
    experiment.create(name='mnist_labml_monit')

    # ✨ Save configurations
    experiment.configs(configs)

    # ✨ Set PyTorch models for checkpoint saving and loading
    experiment.add_pytorch_models(dict(model=model))

    # ✨ Start and monitor the experiment
    with experiment.start():
        for _ in monit.loop(range(1, configs['epochs'] + 1)):
            train(model, optimizer, train_loader, device,
                  configs['train_log_interval'])
            validate(model, valid_loader, device)
            logger.log()

    # save the model
    experiment.save_checkpoint()
Ejemplo n.º 26
0
def train():
    """
    ## Create and train a small model
    """

    # Create an experiment
    experiment.create(name='retro_small')

    # GPU device
    device = torch.device('cuda:0')

    # Load Tiny Shakespeare dataset
    tds = TextFileDataset(
        lab.get_data_path() / 'tiny_shakespeare.txt',
        list,
        url=
        'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'
    )

    # Load [Retro dataset](dataset.html)
    train_dataset = Dataset(lab.get_data_path() / 'retro_train_dataset.json',
                            tds)

    # Create dataloader
    train_dl = DataLoader(train_dataset,
                          batch_size=4,
                          sampler=RandomSampler(train_dataset,
                                                replacement=True))

    # Hyper-parameters
    chunk_len = 16
    d_model = 128
    d_ff = 512
    n_heads = 16
    d_k = 16

    # Create the nearest neighbor encoder
    nearest_neighbor_encoder = NearestNeighborEncoder(chunk_len, 6, {3},
                                                      d_model, n_heads, d_k,
                                                      d_ff)
    # Create the model
    model = RetroModel(tds.n_tokens,
                       d_model,
                       6, {3, 5},
                       chunk_len,
                       n_heads,
                       d_k,
                       d_ff,
                       encoder=nearest_neighbor_encoder)
    # Move the model to the device
    model = model.to(device)
    # Create the optimizer
    optimizer = Noam(model.parameters(), lr=1., d_model=d_model, warmup=2_000)
    # Create the `Trainer`
    trainer = Trainer(device, model, train_dl, optimizer)
    # Create the `Sampler`
    sampler = Sampler(device, model, tds, chunk_len)
    #
    prompt = '''Second Citizen:\nOne word, good citizens.\n\nFirst Citizen:'''

    # Set models for saving and loading
    experiment.add_pytorch_models(model=model)

    # Start the experiment
    with experiment.start():
        # Train for `32` epochs
        for epoch in monit.loop(32):
            # Train
            trainer()
            # Print a new line
            tracker.new_line()
            # Sample from the `prompt`
            logger.log([(prompt.replace('\n', '\\n\n'), Text.subtle),
                        (sampler.sample(prompt,
                                        128).replace('\n',
                                                     '\\n\n'), Text.none)])
            # Save models
            experiment.save_checkpoint()
Ejemplo n.º 27
0
def main():
    # Configurations
    configs = {
        'epochs': 10,
        'train_batch_size': 64,
        'valid_batch_size': 100,
        'use_cuda': True,
        'seed': 5,
        'train_log_interval': 10,
        'learning_rate': 0.01,
    }

    is_cuda = configs['use_cuda'] and torch.cuda.is_available()
    if not is_cuda:
        device = torch.device("cpu")
    else:
        device = torch.device(f"cuda:0")

    data_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=True,
                       download=True,
                       transform=data_transform),
        batch_size=configs['train_batch_size'],
        shuffle=True)

    valid_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=False,
                       download=True,
                       transform=data_transform),
        batch_size=configs['valid_batch_size'],
        shuffle=False)

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate'])

    torch.manual_seed(configs['seed'])

    # ✨ Create the experiment
    experiment.create(name='mnist_labml_monit')

    # ✨ Save configurations
    experiment.configs(configs)

    # ✨ Set PyTorch models for checkpoint saving and loading
    experiment.add_pytorch_models(dict(model=model))

    # ✨ Start and monitor the experiment
    with experiment.start():
        for _ in monit.loop(range(1, configs['epochs'] + 1)):
            for mode, batch in monit.mix(10, ('train', train_loader),
                                         ('valid', valid_loader)):
                with tracker.namespace(mode):
                    with torch.set_grad_enabled(mode == 'train'):
                        data, target = batch[0].to(device), batch[1].to(device)
                        output = model(data)
                        loss = F.cross_entropy(output, target)
                        pred = output.argmax(dim=1, keepdim=True)

                        if mode == 'train':
                            optimizer.zero_grad()
                            loss.backward()
                            optimizer.step()

                            tracker.add_global_step(data.shape[0])

                        tracker.save({
                            'loss.':
                            loss,
                            'accuracy.':
                            pred.eq(target.view_as(pred)).sum() / pred.shape[0]
                        })

            tracker.new_line()

    # save the model
    experiment.save_checkpoint()