예제 #1
0
    def load_saved_state(self, filename: str) -> None:
        checkpoint = torch.load(filename)
        assert checkpoint['tokenizer']
        assert checkpoint['embedding']
        assert checkpoint['network-state']
        assert checkpoint['training-args']

        args = checkpoint['training-args']
        self.options = [
            ("tokenizer", args.tokenizer),
            ("# network layers", args.num_decoder_layers),
            ("hidden size", args.hidden_size),
            ("# keywords", args.num_keywords),
            ("learning rate", args.learning_rate),
            ("# epochs", args.num_epochs),
            ("optimizer", args.optimizer),
            ("gamma", args.gamma),
            ("epoch step", args.epoch_step),
            ("context filter", args.context_filter),
        ]

        self.tokenizer = checkpoint['tokenizer']
        self.embedding = checkpoint['embedding']

        self.network = maybe_cuda(
            DNNClassifier(self.tokenizer.numTokens(), args.hidden_size,
                          self.embedding.num_tokens(),
                          args.num_decoder_layers))
        self.network.load_state_dict(checkpoint['network-state'])
        self.criterion = maybe_cuda(nn.NLLLoss())
        self.lock = threading.Lock()
예제 #2
0
def train(dataset : ClassifyBagDataset,
          input_vocab_size : int, hidden_size : int, output_vocab_size : int,
          num_layers : int, batch_size : int, learning_rate : float, gamma : float,
          epoch_step : int, num_epochs : int,
          print_every : int, optimizer_f : Callable[..., Optimizer]) \
          -> Iterable[Checkpoint]:
    print("Initializing PyTorch...")
    inputs, outputs = zip(*dataset)
    dataloader = data.DataLoader(data.TensorDataset(torch.FloatTensor(inputs),
                                                    torch.LongTensor(outputs)),
                                 batch_size=batch_size,
                                 num_workers=0,
                                 shuffle=True,
                                 pin_memory=True,
                                 drop_last=True)
    network = maybe_cuda(
        DNNClassifier(input_vocab_size, hidden_size, output_vocab_size,
                      num_layers))

    optimizer = optimizer_f(network.parameters(), lr=learning_rate)
    criterion = maybe_cuda(nn.NLLLoss())
    adjuster = scheduler.StepLR(optimizer, epoch_step, gamma=gamma)

    start = time.time()
    num_items = len(dataset) * num_epochs
    total_loss = 0

    print("Training...")
    for epoch in range(num_epochs):
        print("Epoch {}".format(epoch))
        adjuster.step()

        for batch_num, (input_batch, output_batch) in enumerate(dataloader):

            optimizer.zero_grad()
            input_var = maybe_cuda(Variable(input_batch))
            output_var = maybe_cuda(Variable(output_batch))

            prediction_distribution = network(input_var)

            loss = cast(torch.FloatTensor, 0)
            # print("prediction_distribution.size(): {}"
            #       .format(prediction_distribution.size()))
            loss += criterion(prediction_distribution.squeeze(), output_var)
            loss.backward()

            optimizer.step()
            total_loss += loss.data[0] * batch_size

            if (batch_num + 1) % print_every == 0:
                items_processed = (batch_num +
                                   1) * batch_size + epoch * len(dataset)
                progress = items_processed / num_items
                print("{} ({:7} {:5.2f}%) {:.4f}".format(
                    timeSince(start, progress), items_processed,
                    progress * 100, total_loss / items_processed))
        yield (network.state_dict(), total_loss / items_processed)
예제 #3
0
 def __init__(self,
              wordf_sizes : List[int],
              vecf_size : int,
              hidden_size : int,
              num_layers : int,
              stem_vocab_size : int)\
     -> None:
     super().__init__()
     self._word_features_encoder = maybe_cuda(
         WordFeaturesEncoder(wordf_sizes, hidden_size, 1, hidden_size))
     self._features_classifier = maybe_cuda(
         DNNClassifier(hidden_size + vecf_size, hidden_size,
                       stem_vocab_size, num_layers))
     self._softmax = maybe_cuda(nn.LogSoftmax(dim=1))
     pass
예제 #4
0
 def _get_model(self, arg_values : Namespace,
                tactic_vocab_size : int,
                goal_vocab_size : int) \
     -> CopyArgModel:
     assert self._word_feature_functions
     assert self._vec_feature_functions
     feature_vec_size = sum([feature.feature_size()
                             for feature in self._vec_feature_functions])
     word_feature_vocab_sizes = [feature.vocab_size()
                                 for feature in self._word_feature_functions]
     return CopyArgModel(FindArgModel(tactic_vocab_size,
                                      goal_vocab_size, arg_values.max_length,
                                      arg_values.hidden_size),
                         WordFeaturesEncoder(word_feature_vocab_sizes,
                                             arg_values.hidden_size, 1,
                                             arg_values.hidden_size),
                         DNNClassifier(arg_values.hidden_size + feature_vec_size,
                                       arg_values.hidden_size, tactic_vocab_size,
                                       3))
예제 #5
0
 def _get_model(self, arg_values : Namespace, num_tokens : int) \
     -> DNNClassifier:
     return DNNClassifier(2 * (num_tokens**arg_values.num_grams),
                          arg_values.hidden_size, 2, arg_values.num_layers)
예제 #6
0
def train(dataset: StructDataset, args: Namespace, num_stems: int,
          encoded_term_size: int):
    curtime = time.time()
    print("Building data loader...", end="")
    sys.stdout.flush()
    hyp_lists, goals, tactics = zip(*dataset)
    for hyp_list in hyp_lists:
        assert len(hyp_list) == len(hyp_lists[0])
        assert len(hyp_list) == args.max_hyps
        for hyp in hyp_list:
            assert len(hyp) == len(hyp_list[0]), \
                "len(hyp): {}, len(hyp_list[0]): {}".format(len(hyp), len(hyp_list[0]))
    dataloader = DataLoader(TensorDataset(
        torch.FloatTensor(hyp_lists), torch.FloatTensor(goals),
        torch.LongTensor(
            [flatten_tactic_structure(tactic) for tactic in tactics])),
                            batch_size=args.batch_size,
                            num_workers=0,
                            shuffle=True,
                            pin_memory=True,
                            drop_last=True)
    print(" {:.2f}s".format(time.time() - curtime))

    curtime = time.time()
    print("Initializing modules...", end="")
    sys.stdout.flush()
    initial_encoder = maybe_cuda(
        EncoderDNN(encoded_term_size, args.hidden_size, args.hidden_size,
                   args.num_encoder_layers, args.batch_size))
    stem_decoder = maybe_cuda(
        DNNClassifier(encoded_term_size, args.hidden_size, num_stems,
                      args.num_decoder_layers))
    arg_decoder = maybe_cuda(
        DecoderGRU(encoded_term_size * 2, args.hidden_size,
                   args.num_decoder_layers, args.batch_size))
    optimizer = optimizers[args.optimizer](itertools.chain(
        initial_encoder.parameters(), stem_decoder.parameters(),
        arg_decoder.parameters()),
                                           lr=args.learning_rate)
    criterion = maybe_cuda(nn.NLLLoss())
    adjuster = scheduler.StepLR(optimizer, args.epoch_step, gamma=args.gamma)
    print(" {:.2f}s".format(time.time() - curtime))

    start = time.time()
    num_items = len(dataset)
    total_loss = 0

    print("Training...")
    for epoch in range(args.num_epochs):
        print("Epoch {}".format(epoch))
        adjuster.step()
        for batch_num, (hyps_batch, goal_batch, tacstruct_batch) \
            in enumerate(cast(Iterable[Tuple[torch.FloatTensor,
                                             torch.FloatTensor,
                                             torch.LongTensor]],
                              dataloader)):

            optimizer.zero_grad()

            predicted_stem_distribution_batch, predicted_arg_distributions_batches = \
                predictTacticTeach(initial_encoder, stem_decoder, arg_decoder,
                                   args.batch_size, args.max_args,
                                   maybe_cuda(hyps_batch),
                                   maybe_cuda(goal_batch),
                                   maybe_cuda(tacstruct_batch))

            loss = maybe_cuda(Variable(LongTensor(0)))

            loss += criterion(predicted_stem_distribution_batch,
                              maybe_cuda(tacstruct_batch[:, 0]))
            for idx in args.max_args:
                loss += criterion(predicted_arg_distributions_batches[idx],
                                  tacstruct_batch[:, idx + 1])
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * args.batch_size

            if (batch_num + 1) % args.print_every == 0:

                items_processed = (batch_num +
                                   1) * args.batch_size + epoch * len(dataset)
                progress = items_processed / num_items
                print("{} ({:7} {:5.2f}%) {:.4f}".format(
                    timeSince(start, progress), items_processed,
                    progress * 100, total_loss / items_processed))

        yield Checkpoint(initial_encoder.state_dict(),
                         stem_decoder.state_dict(), arg_decoder.state_dict(),
                         total_loss / ((batch_num + 1) * args.batch_size))