def load_saved_state(self, filename: str) -> None: checkpoint = torch.load(filename) assert checkpoint['tokenizer'] assert checkpoint['embedding'] assert checkpoint['network-state'] assert checkpoint['training-args'] args = checkpoint['training-args'] self.options = [ ("tokenizer", args.tokenizer), ("# network layers", args.num_decoder_layers), ("hidden size", args.hidden_size), ("# keywords", args.num_keywords), ("learning rate", args.learning_rate), ("# epochs", args.num_epochs), ("optimizer", args.optimizer), ("gamma", args.gamma), ("epoch step", args.epoch_step), ("context filter", args.context_filter), ] self.tokenizer = checkpoint['tokenizer'] self.embedding = checkpoint['embedding'] self.network = maybe_cuda( DNNClassifier(self.tokenizer.numTokens(), args.hidden_size, self.embedding.num_tokens(), args.num_decoder_layers)) self.network.load_state_dict(checkpoint['network-state']) self.criterion = maybe_cuda(nn.NLLLoss()) self.lock = threading.Lock()
def train(dataset : ClassifyBagDataset, input_vocab_size : int, hidden_size : int, output_vocab_size : int, num_layers : int, batch_size : int, learning_rate : float, gamma : float, epoch_step : int, num_epochs : int, print_every : int, optimizer_f : Callable[..., Optimizer]) \ -> Iterable[Checkpoint]: print("Initializing PyTorch...") inputs, outputs = zip(*dataset) dataloader = data.DataLoader(data.TensorDataset(torch.FloatTensor(inputs), torch.LongTensor(outputs)), batch_size=batch_size, num_workers=0, shuffle=True, pin_memory=True, drop_last=True) network = maybe_cuda( DNNClassifier(input_vocab_size, hidden_size, output_vocab_size, num_layers)) optimizer = optimizer_f(network.parameters(), lr=learning_rate) criterion = maybe_cuda(nn.NLLLoss()) adjuster = scheduler.StepLR(optimizer, epoch_step, gamma=gamma) start = time.time() num_items = len(dataset) * num_epochs total_loss = 0 print("Training...") for epoch in range(num_epochs): print("Epoch {}".format(epoch)) adjuster.step() for batch_num, (input_batch, output_batch) in enumerate(dataloader): optimizer.zero_grad() input_var = maybe_cuda(Variable(input_batch)) output_var = maybe_cuda(Variable(output_batch)) prediction_distribution = network(input_var) loss = cast(torch.FloatTensor, 0) # print("prediction_distribution.size(): {}" # .format(prediction_distribution.size())) loss += criterion(prediction_distribution.squeeze(), output_var) loss.backward() optimizer.step() total_loss += loss.data[0] * batch_size if (batch_num + 1) % print_every == 0: items_processed = (batch_num + 1) * batch_size + epoch * len(dataset) progress = items_processed / num_items print("{} ({:7} {:5.2f}%) {:.4f}".format( timeSince(start, progress), items_processed, progress * 100, total_loss / items_processed)) yield (network.state_dict(), total_loss / items_processed)
def __init__(self, wordf_sizes : List[int], vecf_size : int, hidden_size : int, num_layers : int, stem_vocab_size : int)\ -> None: super().__init__() self._word_features_encoder = maybe_cuda( WordFeaturesEncoder(wordf_sizes, hidden_size, 1, hidden_size)) self._features_classifier = maybe_cuda( DNNClassifier(hidden_size + vecf_size, hidden_size, stem_vocab_size, num_layers)) self._softmax = maybe_cuda(nn.LogSoftmax(dim=1)) pass
def _get_model(self, arg_values : Namespace, tactic_vocab_size : int, goal_vocab_size : int) \ -> CopyArgModel: assert self._word_feature_functions assert self._vec_feature_functions feature_vec_size = sum([feature.feature_size() for feature in self._vec_feature_functions]) word_feature_vocab_sizes = [feature.vocab_size() for feature in self._word_feature_functions] return CopyArgModel(FindArgModel(tactic_vocab_size, goal_vocab_size, arg_values.max_length, arg_values.hidden_size), WordFeaturesEncoder(word_feature_vocab_sizes, arg_values.hidden_size, 1, arg_values.hidden_size), DNNClassifier(arg_values.hidden_size + feature_vec_size, arg_values.hidden_size, tactic_vocab_size, 3))
def _get_model(self, arg_values : Namespace, num_tokens : int) \ -> DNNClassifier: return DNNClassifier(2 * (num_tokens**arg_values.num_grams), arg_values.hidden_size, 2, arg_values.num_layers)
def train(dataset: StructDataset, args: Namespace, num_stems: int, encoded_term_size: int): curtime = time.time() print("Building data loader...", end="") sys.stdout.flush() hyp_lists, goals, tactics = zip(*dataset) for hyp_list in hyp_lists: assert len(hyp_list) == len(hyp_lists[0]) assert len(hyp_list) == args.max_hyps for hyp in hyp_list: assert len(hyp) == len(hyp_list[0]), \ "len(hyp): {}, len(hyp_list[0]): {}".format(len(hyp), len(hyp_list[0])) dataloader = DataLoader(TensorDataset( torch.FloatTensor(hyp_lists), torch.FloatTensor(goals), torch.LongTensor( [flatten_tactic_structure(tactic) for tactic in tactics])), batch_size=args.batch_size, num_workers=0, shuffle=True, pin_memory=True, drop_last=True) print(" {:.2f}s".format(time.time() - curtime)) curtime = time.time() print("Initializing modules...", end="") sys.stdout.flush() initial_encoder = maybe_cuda( EncoderDNN(encoded_term_size, args.hidden_size, args.hidden_size, args.num_encoder_layers, args.batch_size)) stem_decoder = maybe_cuda( DNNClassifier(encoded_term_size, args.hidden_size, num_stems, args.num_decoder_layers)) arg_decoder = maybe_cuda( DecoderGRU(encoded_term_size * 2, args.hidden_size, args.num_decoder_layers, args.batch_size)) optimizer = optimizers[args.optimizer](itertools.chain( initial_encoder.parameters(), stem_decoder.parameters(), arg_decoder.parameters()), lr=args.learning_rate) criterion = maybe_cuda(nn.NLLLoss()) adjuster = scheduler.StepLR(optimizer, args.epoch_step, gamma=args.gamma) print(" {:.2f}s".format(time.time() - curtime)) start = time.time() num_items = len(dataset) total_loss = 0 print("Training...") for epoch in range(args.num_epochs): print("Epoch {}".format(epoch)) adjuster.step() for batch_num, (hyps_batch, goal_batch, tacstruct_batch) \ in enumerate(cast(Iterable[Tuple[torch.FloatTensor, torch.FloatTensor, torch.LongTensor]], dataloader)): optimizer.zero_grad() predicted_stem_distribution_batch, predicted_arg_distributions_batches = \ predictTacticTeach(initial_encoder, stem_decoder, arg_decoder, args.batch_size, args.max_args, maybe_cuda(hyps_batch), maybe_cuda(goal_batch), maybe_cuda(tacstruct_batch)) loss = maybe_cuda(Variable(LongTensor(0))) loss += criterion(predicted_stem_distribution_batch, maybe_cuda(tacstruct_batch[:, 0])) for idx in args.max_args: loss += criterion(predicted_arg_distributions_batches[idx], tacstruct_batch[:, idx + 1]) loss.backward() optimizer.step() total_loss += loss.item() * args.batch_size if (batch_num + 1) % args.print_every == 0: items_processed = (batch_num + 1) * args.batch_size + epoch * len(dataset) progress = items_processed / num_items print("{} ({:7} {:5.2f}%) {:.4f}".format( timeSince(start, progress), items_processed, progress * 100, total_loss / items_processed)) yield Checkpoint(initial_encoder.state_dict(), stem_decoder.state_dict(), arg_decoder.state_dict(), total_loss / ((batch_num + 1) * args.batch_size))