def predictKTacticsWithLoss_batch(self, in_data : List[TacticContext], k : int, corrects : List[str]) -> \ Tuple[List[List[Prediction]], float]: assert self.training_args if len(in_data) == 0: return [], 0 with self._lock: tokenized_goals = [self._tokenizer.toTokenList(goal) for relevant_lemmas, prev_tactics, hypotheses, goal in in_data] input_tensor = LongTensor([inputFromSentence(tokenized_goal, self.training_args.max_length) for tokenized_goal in tokenized_goals]) prediction_distributions = self._model.run(input_tensor, batch_size=len(in_data)) correct_stems = [get_stem(correct) for correct in corrects] output_var = maybe_cuda(Variable( torch.LongTensor([self._embedding.encode_token(correct_stem) if self._embedding.has_token(correct_stem) else 0 for correct_stem in correct_stems]))) loss = self._criterion(prediction_distributions, output_var).item() if k > self._embedding.num_tokens(): k = self._embedding.num_tokens() certainties_and_idxs_list = [single_distribution.view(-1).topk(k) for single_distribution in list(prediction_distributions)] results = [[Prediction(self._embedding.decode_token(stem_idx.item()) + ".", math.exp(certainty.item())) for certainty, stem_idx in zip(*certainties_and_idxs)] for certainties_and_idxs in certainties_and_idxs_list] return results, loss
def _data_tensors(self, encoded_data : ECDataset, arg_values : Namespace) \ -> List[torch.Tensor]: in_stream = torch.LongTensor([inputFromSentence(datum.goal, arg_values.max_length) for datum in encoded_data]) out_stream = torch.LongTensor([datum.tactic for datum in encoded_data]) return [in_stream, out_stream]
def _predictDistributions(self, in_datas : List[TacticContext]) \ -> torch.FloatTensor: assert self.training_args tokenized_goals = [self._tokenizer.toTokenList(in_data.goal) for in_data in in_datas] input_list = [inputFromSentence(tokenized_goal, self.training_args.max_length) for tokenized_goal in tokenized_goals] input_tensor = LongTensor(input_list).view(len(in_datas), -1) return self._model.run(input_tensor)
def train(dataset : StructDataset, input_vocab_size : int, output_vocab_size : int, hidden_size : int, learning_rate : float, num_encoder_layers : int, num_decoder_layers : int, max_length : int, num_epochs : int, batch_size : int, print_every : int, optimizer_f : Callable[..., Optimizer]) \ -> Iterable[Checkpoint]: print("Initializing PyTorch...") hyps_stream = [[inputFromSentence(hyp, max_length) for hyp in hyps] for hyps, goal, struct in dataset] pass
def predictKTactics(self, in_data : Dict[str, Union[List[str], str]], k : int) \ -> List[Prediction]: self.lock.acquire() in_sentence = LongTensor(inputFromSentence( self.tokenizer.toTokenList(in_data["goal"]), self.max_length))\ .view(1, -1) encoded_vector = self.encoder.run(in_sentence) prediction_structures, certainties = \ self.decodeKTactics(encoded_vector, k, cast(List[str], in_data["hyps"]), k * k, 3) self.lock.release() return [ Prediction( decode_tactic_structure(self.tokenizer, self.embedding, structure, cast(List[str], in_data["hyps"])), certainty) for structure, certainty in zip(prediction_structures, certainties) ]