Esempio n. 1
0
 def __init__(self, goal_data_size : int,
              stem_vocab_size : int,
              token_vocab_size : int,
              hyp_features_size : int,
              hidden_size : int) -> None:
     super().__init__()
     self.hidden_size = hidden_size
     self._stem_embedding = maybe_cuda(nn.Embedding(stem_vocab_size, hidden_size))
     self._token_embedding = maybe_cuda(nn.Embedding(token_vocab_size, hidden_size))
     self._in_hidden = maybe_cuda(EncoderDNN(hidden_size + goal_data_size, hidden_size, hidden_size, 1))
     self._hyp_gru = maybe_cuda(nn.GRU(hidden_size, hidden_size))
     self._likelyhood_decoder = maybe_cuda(EncoderDNN(hidden_size + hyp_features_size, hidden_size, 1, 2))
Esempio n. 2
0
 def __init__(self, stem_vocab_size: int,
              input_vocab_size: int,
              hidden_size: int) -> None:
     super().__init__()
     self.encoder_model = GoalTokenEncoderModel(
         stem_vocab_size,
         input_vocab_size,
         hidden_size)
     self._likelyhood_layer = maybe_cuda(
         EncoderDNN(hidden_size, hidden_size, 1, 2))
Esempio n. 3
0
 def __init__(self, stem_vocab_size : int,
              input_vocab_size : int, input_length : int,
              hidden_size : int) -> None:
     super().__init__()
     self.hidden_size = hidden_size
     self._stem_embedding = maybe_cuda(nn.Embedding(stem_vocab_size, hidden_size))
     self._token_embedding = maybe_cuda(nn.Embedding(input_vocab_size, hidden_size))
     self._gru = maybe_cuda(nn.GRU(hidden_size, hidden_size))
     self._likelyhood_layer = maybe_cuda(EncoderDNN(hidden_size, hidden_size, 1, 2))
     self._softmax = maybe_cuda(nn.LogSoftmax(dim=1))
Esempio n. 4
0
 def __init__(self, goal_data_size: int,
              stem_vocab_size: int,
              token_vocab_size: int,
              hyp_features_size: int,
              hidden_size: int) -> None:
     super().__init__()
     self.arg_encoder = HypArgEncoder(stem_vocab_size,
                                      token_vocab_size,
                                      hyp_features_size,
                                      goal_data_size,
                                      hidden_size)
     self.hidden_size = hidden_size
     self._likelyhood_decoder = maybe_cuda(EncoderDNN(
         hidden_size + hyp_features_size, hidden_size, 1, 2))
Esempio n. 5
0
def train(dataset: StructDataset, args: Namespace, num_stems: int,
          encoded_term_size: int):
    curtime = time.time()
    print("Building data loader...", end="")
    sys.stdout.flush()
    hyp_lists, goals, tactics = zip(*dataset)
    for hyp_list in hyp_lists:
        assert len(hyp_list) == len(hyp_lists[0])
        assert len(hyp_list) == args.max_hyps
        for hyp in hyp_list:
            assert len(hyp) == len(hyp_list[0]), \
                "len(hyp): {}, len(hyp_list[0]): {}".format(len(hyp), len(hyp_list[0]))
    dataloader = DataLoader(TensorDataset(
        torch.FloatTensor(hyp_lists), torch.FloatTensor(goals),
        torch.LongTensor(
            [flatten_tactic_structure(tactic) for tactic in tactics])),
                            batch_size=args.batch_size,
                            num_workers=0,
                            shuffle=True,
                            pin_memory=True,
                            drop_last=True)
    print(" {:.2f}s".format(time.time() - curtime))

    curtime = time.time()
    print("Initializing modules...", end="")
    sys.stdout.flush()
    initial_encoder = maybe_cuda(
        EncoderDNN(encoded_term_size, args.hidden_size, args.hidden_size,
                   args.num_encoder_layers, args.batch_size))
    stem_decoder = maybe_cuda(
        DNNClassifier(encoded_term_size, args.hidden_size, num_stems,
                      args.num_decoder_layers))
    arg_decoder = maybe_cuda(
        DecoderGRU(encoded_term_size * 2, args.hidden_size,
                   args.num_decoder_layers, args.batch_size))
    optimizer = optimizers[args.optimizer](itertools.chain(
        initial_encoder.parameters(), stem_decoder.parameters(),
        arg_decoder.parameters()),
                                           lr=args.learning_rate)
    criterion = maybe_cuda(nn.NLLLoss())
    adjuster = scheduler.StepLR(optimizer, args.epoch_step, gamma=args.gamma)
    print(" {:.2f}s".format(time.time() - curtime))

    start = time.time()
    num_items = len(dataset)
    total_loss = 0

    print("Training...")
    for epoch in range(args.num_epochs):
        print("Epoch {}".format(epoch))
        adjuster.step()
        for batch_num, (hyps_batch, goal_batch, tacstruct_batch) \
            in enumerate(cast(Iterable[Tuple[torch.FloatTensor,
                                             torch.FloatTensor,
                                             torch.LongTensor]],
                              dataloader)):

            optimizer.zero_grad()

            predicted_stem_distribution_batch, predicted_arg_distributions_batches = \
                predictTacticTeach(initial_encoder, stem_decoder, arg_decoder,
                                   args.batch_size, args.max_args,
                                   maybe_cuda(hyps_batch),
                                   maybe_cuda(goal_batch),
                                   maybe_cuda(tacstruct_batch))

            loss = maybe_cuda(Variable(LongTensor(0)))

            loss += criterion(predicted_stem_distribution_batch,
                              maybe_cuda(tacstruct_batch[:, 0]))
            for idx in args.max_args:
                loss += criterion(predicted_arg_distributions_batches[idx],
                                  tacstruct_batch[:, idx + 1])
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * args.batch_size

            if (batch_num + 1) % args.print_every == 0:

                items_processed = (batch_num +
                                   1) * args.batch_size + epoch * len(dataset)
                progress = items_processed / num_items
                print("{} ({:7} {:5.2f}%) {:.4f}".format(
                    timeSince(start, progress), items_processed,
                    progress * 100, total_loss / items_processed))

        yield Checkpoint(initial_encoder.state_dict(),
                         stem_decoder.state_dict(), arg_decoder.state_dict(),
                         total_loss / ((batch_num + 1) * args.batch_size))