def __init__(self, goal_data_size : int, stem_vocab_size : int, token_vocab_size : int, hyp_features_size : int, hidden_size : int) -> None: super().__init__() self.hidden_size = hidden_size self._stem_embedding = maybe_cuda(nn.Embedding(stem_vocab_size, hidden_size)) self._token_embedding = maybe_cuda(nn.Embedding(token_vocab_size, hidden_size)) self._in_hidden = maybe_cuda(EncoderDNN(hidden_size + goal_data_size, hidden_size, hidden_size, 1)) self._hyp_gru = maybe_cuda(nn.GRU(hidden_size, hidden_size)) self._likelyhood_decoder = maybe_cuda(EncoderDNN(hidden_size + hyp_features_size, hidden_size, 1, 2))
def __init__(self, stem_vocab_size: int, input_vocab_size: int, hidden_size: int) -> None: super().__init__() self.encoder_model = GoalTokenEncoderModel( stem_vocab_size, input_vocab_size, hidden_size) self._likelyhood_layer = maybe_cuda( EncoderDNN(hidden_size, hidden_size, 1, 2))
def __init__(self, stem_vocab_size : int, input_vocab_size : int, input_length : int, hidden_size : int) -> None: super().__init__() self.hidden_size = hidden_size self._stem_embedding = maybe_cuda(nn.Embedding(stem_vocab_size, hidden_size)) self._token_embedding = maybe_cuda(nn.Embedding(input_vocab_size, hidden_size)) self._gru = maybe_cuda(nn.GRU(hidden_size, hidden_size)) self._likelyhood_layer = maybe_cuda(EncoderDNN(hidden_size, hidden_size, 1, 2)) self._softmax = maybe_cuda(nn.LogSoftmax(dim=1))
def __init__(self, goal_data_size: int, stem_vocab_size: int, token_vocab_size: int, hyp_features_size: int, hidden_size: int) -> None: super().__init__() self.arg_encoder = HypArgEncoder(stem_vocab_size, token_vocab_size, hyp_features_size, goal_data_size, hidden_size) self.hidden_size = hidden_size self._likelyhood_decoder = maybe_cuda(EncoderDNN( hidden_size + hyp_features_size, hidden_size, 1, 2))
def train(dataset: StructDataset, args: Namespace, num_stems: int, encoded_term_size: int): curtime = time.time() print("Building data loader...", end="") sys.stdout.flush() hyp_lists, goals, tactics = zip(*dataset) for hyp_list in hyp_lists: assert len(hyp_list) == len(hyp_lists[0]) assert len(hyp_list) == args.max_hyps for hyp in hyp_list: assert len(hyp) == len(hyp_list[0]), \ "len(hyp): {}, len(hyp_list[0]): {}".format(len(hyp), len(hyp_list[0])) dataloader = DataLoader(TensorDataset( torch.FloatTensor(hyp_lists), torch.FloatTensor(goals), torch.LongTensor( [flatten_tactic_structure(tactic) for tactic in tactics])), batch_size=args.batch_size, num_workers=0, shuffle=True, pin_memory=True, drop_last=True) print(" {:.2f}s".format(time.time() - curtime)) curtime = time.time() print("Initializing modules...", end="") sys.stdout.flush() initial_encoder = maybe_cuda( EncoderDNN(encoded_term_size, args.hidden_size, args.hidden_size, args.num_encoder_layers, args.batch_size)) stem_decoder = maybe_cuda( DNNClassifier(encoded_term_size, args.hidden_size, num_stems, args.num_decoder_layers)) arg_decoder = maybe_cuda( DecoderGRU(encoded_term_size * 2, args.hidden_size, args.num_decoder_layers, args.batch_size)) optimizer = optimizers[args.optimizer](itertools.chain( initial_encoder.parameters(), stem_decoder.parameters(), arg_decoder.parameters()), lr=args.learning_rate) criterion = maybe_cuda(nn.NLLLoss()) adjuster = scheduler.StepLR(optimizer, args.epoch_step, gamma=args.gamma) print(" {:.2f}s".format(time.time() - curtime)) start = time.time() num_items = len(dataset) total_loss = 0 print("Training...") for epoch in range(args.num_epochs): print("Epoch {}".format(epoch)) adjuster.step() for batch_num, (hyps_batch, goal_batch, tacstruct_batch) \ in enumerate(cast(Iterable[Tuple[torch.FloatTensor, torch.FloatTensor, torch.LongTensor]], dataloader)): optimizer.zero_grad() predicted_stem_distribution_batch, predicted_arg_distributions_batches = \ predictTacticTeach(initial_encoder, stem_decoder, arg_decoder, args.batch_size, args.max_args, maybe_cuda(hyps_batch), maybe_cuda(goal_batch), maybe_cuda(tacstruct_batch)) loss = maybe_cuda(Variable(LongTensor(0))) loss += criterion(predicted_stem_distribution_batch, maybe_cuda(tacstruct_batch[:, 0])) for idx in args.max_args: loss += criterion(predicted_arg_distributions_batches[idx], tacstruct_batch[:, idx + 1]) loss.backward() optimizer.step() total_loss += loss.item() * args.batch_size if (batch_num + 1) % args.print_every == 0: items_processed = (batch_num + 1) * args.batch_size + epoch * len(dataset) progress = items_processed / num_items print("{} ({:7} {:5.2f}%) {:.4f}".format( timeSince(start, progress), items_processed, progress * 100, total_loss / items_processed)) yield Checkpoint(initial_encoder.state_dict(), stem_decoder.state_dict(), arg_decoder.state_dict(), total_loss / ((batch_num + 1) * args.batch_size))