def _optimize_checkpoints(self, encoded_data : RestrictedDatasetType,
                              arg_values : Namespace,
                              tactic_vocab_size : int, term_vocab_size : int) \
        -> Iterable[NeuralPredictorState]:
        dataloader = data.DataLoader(data.TensorDataset(
            *(self._data_tensors(encoded_data, arg_values))),
                                     batch_size=arg_values.batch_size, num_workers=0,
                                     shuffle=True, pin_memory=True, drop_last=True)
        # Drop the last batch in the count
        num_batches = int(len(encoded_data) / arg_values.batch_size)
        dataset_size = num_batches * arg_values.batch_size

        print("Initializing model...")
        if arg_values.start_from:
            print("Starting from file")
            with open(arg_values.start_from, 'rb') as f:
                state = torch.load(f)
                self.load_saved_state(*state) # type: ignore
            model = self._model
            epoch_start = state[2].epoch
        else:
            epoch_start = 1
            model = maybe_cuda(self._get_model(arg_values, tactic_vocab_size,
                                               term_vocab_size))
        optimizer = optimizers[arg_values.optimizer](model.parameters(),
                                                     lr=arg_values.learning_rate)
        adjuster = scheduler.StepLR(optimizer, arg_values.epoch_step,
                                    gamma=arg_values.gamma)

        training_start=time.time()

        print("Training...")
        for epoch in range(epoch_start, arg_values.num_epochs + 1):
            adjuster.step()
            print("Epoch {} (learning rate {:.6f})".format(epoch, optimizer.param_groups[0]['lr']))

            epoch_loss = 0.

            for batch_num, data_batch in enumerate(dataloader, start=1):
                optimizer.zero_grad()
                loss = self._getBatchPredictionLoss(data_batch, model)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()

                if batch_num % arg_values.print_every == 0:
                    items_processed = batch_num * arg_values.batch_size + \
                        (epoch - 1) * len(encoded_data)
                    progress = items_processed / (len(encoded_data) * arg_values.num_epochs)
                    print("{} ({:7} {:5.2f}%) {:.4f}"
                          .format(timeSince(training_start, progress),
                                  items_processed, progress * 100,
                                  epoch_loss / batch_num))
            yield NeuralPredictorState(epoch,
                                       epoch_loss / num_batches,
                                       model.state_dict())
Example #2
0
def optimize_checkpoints(data_tensors : List[torch.Tensor],
                         arg_values : Namespace,
                         model : ModelType,
                         batchLoss :
                         Callable[[Sequence[torch.Tensor], ModelType],
                                  torch.FloatTensor]) \
    -> Iterable[NeuralPredictorState]:
    dataloader = data.DataLoader(data.TensorDataset(*data_tensors),
                                 batch_size=arg_values.batch_size,
                                 num_workers=0,
                                 shuffle=True,
                                 pin_memory=True,
                                 drop_last=True)
    # Drop the last batch in the count
    dataset_size = data_tensors[0].size()[0]
    num_batches = int(dataset_size / arg_values.batch_size)
    dataset_size = num_batches * arg_values.batch_size
    print("Initializing model...")
    if arg_values.start_from:
        print("Starting from file")
        with open(arg_values.start_from, 'rb') as f:
            state = torch.load(f)
            model.load_state_dict(state[1][2].weights)  # type: ignore
        epoch_start = state[1][2].epoch
    else:
        epoch_start = 1
    model = maybe_cuda(model)
    optimizer = optimizers[arg_values.optimizer](model.parameters(),
                                                 lr=arg_values.learning_rate)
    adjuster = scheduler.StepLR(optimizer,
                                arg_values.epoch_step,
                                gamma=arg_values.gamma)
    training_start = time.time()
    print("Training...")
    for epoch in range(epoch_start, arg_values.num_epochs + 1):
        adjuster.step()
        print("Epoch {} (learning rate {:.6f})".format(
            epoch, optimizer.param_groups[0]['lr']))
        epoch_loss = 0.
        for batch_num, data_batch in enumerate(dataloader, start=1):
            optimizer.zero_grad()
            with autograd.detect_anomaly():
                loss = batchLoss(data_batch, model)
                loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            if batch_num % arg_values.print_every == 0:
                items_processed = batch_num * arg_values.batch_size + \
                    (epoch - 1) * dataset_size
                progress = items_processed / (dataset_size *
                                              arg_values.num_epochs)
                print("{} ({:7} {:5.2f}%) {:.4f}".format(
                    timeSince(training_start, progress), items_processed,
                    progress * 100, epoch_loss / batch_num))
        yield NeuralPredictorState(epoch, epoch_loss / num_batches,
                                   model.state_dict())