def _optimize_model(self, arg_values : argparse.Namespace) -> \
        Iterable[GoalEncState]:
        with print_time("Loading data", guard=arg_values.verbose):
            if arg_values.start_from:
                _, (arg_values, unparsed_args, (metadata, state)) = \
                    torch.load(arg_values.start_from)
                _, tokenized_goals, outputs = \
                    goals_to_total_distances_tensors_with_meta(
                        extract_dataloader_args(arg_values),
                        str(arg_values.scrape_file), metadata)
            else:
                metadata, tokenized_goals, outputs = \
                    goals_to_total_distances_tensors(
                        extract_dataloader_args(arg_values),
                        str(arg_values.scrape_file))

        with print_time("Converting data to tensors", guard=arg_values.verbose):
            tensors = [pad_sequence([torch.LongTensor(tok_goal)
                                     for tok_goal in tokenized_goals],
                                     batch_first=True),
                       torch.FloatTensor(outputs)]

        with print_time("Building the model", guard=arg_values.verbose):
            model = self._get_model(arg_values, goal_enc_get_num_tokens(metadata))

            if arg_values.start_from:
                self.load_saved_state(arg_values, unparsed_args, state)

        return ((metadata, state) for state in
                optimize_checkpoints(tensors, arg_values, model,
                                     lambda batch_tensors, model:
                                     self._get_batch_prediction_loss(arg_values,
                                                                     batch_tensors,
                                                                     model)))
Esempio n. 2
0
 def _optimize_checkpoints(self, encoded_data : FeaturesDataset, arg_values : Namespace,
                           metadata : Tuple[Embedding, List[VecFeature], List[WordFeature]]) \
     -> Iterable[NeuralPredictorState]:
     embedding, vec_features, word_features = metadata
     return optimize_checkpoints(
         self._data_tensors(encoded_data, arg_values), arg_values,
         self._get_model(arg_values, embedding.num_tokens()),
         lambda batch_tensors, model: self._getBatchPredictionLoss(
             batch_tensors, model))
     pass
Esempio n. 3
0
 def _optimize_checkpoints(self,
                           encoded_data : ApplyDataset,
                           arg_values : Namespace,
                           tokenizer : Tokenizer) \
                           -> Iterable[NeuralPredictorState]:
     tensors = self._data_tensors(encoded_data, arg_values)
     model = self._get_model(arg_values, tokenizer.numTokens())
     return optimize_checkpoints(
         tensors, arg_values, model,
         lambda batch_tensors, model: self._getBatchPredictionLoss(
             batch_tensors, model))
Esempio n. 4
0
 def _optimize_checkpoints(self, encoded_data : EncFeaturesDataset,
                           arg_values : Namespace,
                           tokenizer : Tokenizer,
                           embedding : Embedding) \
     -> Iterable[NeuralPredictorState]:
     return optimize_checkpoints(
         self._data_tensors(encoded_data, arg_values), arg_values,
         self._get_model(arg_values, embedding.num_tokens(),
                         tokenizer.numTokens()),
         lambda batch_tensors, model: self._getBatchPredictionLoss(
             batch_tensors, model))
Esempio n. 5
0
    def _optimize_model(
            self, arg_values: argparse.Namespace
    ) -> Iterable[FeaturesDNNEvaluatorState]:
        with print_time("Loading data", guard=arg_values.verbose):
            if arg_values.start_from:
                _, (arg_values, unparsed_args,
                    (picklable_token_map,
                     state)) = torch.load(arg_values.start_from)
                token_map = tmap_from_picklable(picklable_token_map)
                _, word_features_data, vec_features_data, outputs,\
                    word_features_vocab_sizes, vec_features_size = features_to_total_distances_tensors_with_map(
                        extract_dataloader_args(arg_values),
                        str(arg_values.scrape_file), token_map)
            else:
                token_map, word_features_data, vec_features_data, outputs, \
                    word_features_vocab_sizes, vec_features_size = features_to_total_distances_tensors(
                        extract_dataloader_args(arg_values),
                        str(arg_values.scrape_file))

        # eprint(f"word data: {word_features_data[:10]}")
        # eprint(f"vec data: {vec_features_data[:10]}")
        # eprint(f"outputs: {outputs[:100]}")

        with print_time("Converting data to tensors",
                        guard=arg_values.verbose):
            tensors = [
                torch.LongTensor(word_features_data),
                torch.FloatTensor(vec_features_data),
                torch.FloatTensor(outputs)
            ]

        with print_time("Building the model", guard=arg_values.verbose):
            model = self._get_model(arg_values, word_features_vocab_sizes,
                                    vec_features_size)
            if arg_values.start_from:
                self.load_saved_state(arg_values, unparsed_args, state)

        return (
            (tmap_to_picklable(token_map), state)
            for state in optimize_checkpoints(
                tensors, arg_values, model, lambda batch_tensors, model: self.
                _get_batch_prediction_loss(arg_values, batch_tensors, model)))
Esempio n. 6
0
    def _optimize_model(
            self, arg_values: Namespace) -> Iterable[FeaturesPolyargState]:
        with print_time("Loading data", guard=arg_values.verbose):
            if arg_values.start_from:
                _, (old_arg_values, unparsed_args, metadata,
                    state) = torch.load(arg_values.start_from)
                _, data_lists, \
                    (word_features_size, vec_features_size) = \
                    features_polyarg_tensors_with_meta(
                        extract_dataloader_args(arg_values),
                        str(arg_values.scrape_file),
                        metadata)
            else:
                metadata, data_lists, \
                    (word_features_size, vec_features_size) = \
                    features_polyarg_tensors(
                        extract_dataloader_args(arg_values),
                        str(arg_values.scrape_file))
        with print_time("Converting data to tensors",
                        guard=arg_values.verbose):
            unpadded_tokenized_hyp_types, \
                unpadded_hyp_features, \
                num_hyps, \
                tokenized_goals, \
                goal_masks, \
                word_features, \
                vec_features, \
                tactic_stem_indices, \
                arg_indices = data_lists

            tensors = [
                pad_sequence([
                    torch.LongTensor(tokenized_hyps_list)
                    for tokenized_hyps_list in unpadded_tokenized_hyp_types
                ],
                             batch_first=True),
                pad_sequence([
                    torch.FloatTensor(hyp_features_vec)
                    for hyp_features_vec in unpadded_hyp_features
                ],
                             batch_first=True),
                torch.LongTensor(num_hyps),
                torch.LongTensor(tokenized_goals),
                torch.ByteTensor(goal_masks),
                torch.LongTensor(word_features),
                torch.FloatTensor(vec_features),
                torch.LongTensor(tactic_stem_indices),
                torch.LongTensor(arg_indices)
            ]
            with open("tensors.pickle", 'wb') as f:
                torch.save(tensors, f)
            eprint(tensors, guard=arg_values.print_tensors)

        with print_time("Building the model", guard=arg_values.verbose):

            if arg_values.start_from:
                self.load_saved_state(arg_values, unparsed_args, metadata,
                                      state)
                model = self._model
                epoch_start = self.num_epochs
            else:
                model = self._get_model(arg_values, word_features_size,
                                        vec_features_size,
                                        get_num_indices(metadata),
                                        get_num_tokens(metadata))
                epoch_start = 1

        assert model
        assert epoch_start
        return ((metadata, state) for state in optimize_checkpoints(
            tensors, arg_values, model,
            lambda batch_tensors, model: self._getBatchPredictionLoss(
                arg_values, batch_tensors, model), epoch_start))