def _optimize_model(self, arg_values : argparse.Namespace) -> \ Iterable[GoalEncState]: with print_time("Loading data", guard=arg_values.verbose): if arg_values.start_from: _, (arg_values, unparsed_args, (metadata, state)) = \ torch.load(arg_values.start_from) _, tokenized_goals, outputs = \ goals_to_total_distances_tensors_with_meta( extract_dataloader_args(arg_values), str(arg_values.scrape_file), metadata) else: metadata, tokenized_goals, outputs = \ goals_to_total_distances_tensors( extract_dataloader_args(arg_values), str(arg_values.scrape_file)) with print_time("Converting data to tensors", guard=arg_values.verbose): tensors = [pad_sequence([torch.LongTensor(tok_goal) for tok_goal in tokenized_goals], batch_first=True), torch.FloatTensor(outputs)] with print_time("Building the model", guard=arg_values.verbose): model = self._get_model(arg_values, goal_enc_get_num_tokens(metadata)) if arg_values.start_from: self.load_saved_state(arg_values, unparsed_args, state) return ((metadata, state) for state in optimize_checkpoints(tensors, arg_values, model, lambda batch_tensors, model: self._get_batch_prediction_loss(arg_values, batch_tensors, model)))
def _optimize_checkpoints(self, encoded_data : FeaturesDataset, arg_values : Namespace, metadata : Tuple[Embedding, List[VecFeature], List[WordFeature]]) \ -> Iterable[NeuralPredictorState]: embedding, vec_features, word_features = metadata return optimize_checkpoints( self._data_tensors(encoded_data, arg_values), arg_values, self._get_model(arg_values, embedding.num_tokens()), lambda batch_tensors, model: self._getBatchPredictionLoss( batch_tensors, model)) pass
def _optimize_checkpoints(self, encoded_data : ApplyDataset, arg_values : Namespace, tokenizer : Tokenizer) \ -> Iterable[NeuralPredictorState]: tensors = self._data_tensors(encoded_data, arg_values) model = self._get_model(arg_values, tokenizer.numTokens()) return optimize_checkpoints( tensors, arg_values, model, lambda batch_tensors, model: self._getBatchPredictionLoss( batch_tensors, model))
def _optimize_checkpoints(self, encoded_data : EncFeaturesDataset, arg_values : Namespace, tokenizer : Tokenizer, embedding : Embedding) \ -> Iterable[NeuralPredictorState]: return optimize_checkpoints( self._data_tensors(encoded_data, arg_values), arg_values, self._get_model(arg_values, embedding.num_tokens(), tokenizer.numTokens()), lambda batch_tensors, model: self._getBatchPredictionLoss( batch_tensors, model))
def _optimize_model( self, arg_values: argparse.Namespace ) -> Iterable[FeaturesDNNEvaluatorState]: with print_time("Loading data", guard=arg_values.verbose): if arg_values.start_from: _, (arg_values, unparsed_args, (picklable_token_map, state)) = torch.load(arg_values.start_from) token_map = tmap_from_picklable(picklable_token_map) _, word_features_data, vec_features_data, outputs,\ word_features_vocab_sizes, vec_features_size = features_to_total_distances_tensors_with_map( extract_dataloader_args(arg_values), str(arg_values.scrape_file), token_map) else: token_map, word_features_data, vec_features_data, outputs, \ word_features_vocab_sizes, vec_features_size = features_to_total_distances_tensors( extract_dataloader_args(arg_values), str(arg_values.scrape_file)) # eprint(f"word data: {word_features_data[:10]}") # eprint(f"vec data: {vec_features_data[:10]}") # eprint(f"outputs: {outputs[:100]}") with print_time("Converting data to tensors", guard=arg_values.verbose): tensors = [ torch.LongTensor(word_features_data), torch.FloatTensor(vec_features_data), torch.FloatTensor(outputs) ] with print_time("Building the model", guard=arg_values.verbose): model = self._get_model(arg_values, word_features_vocab_sizes, vec_features_size) if arg_values.start_from: self.load_saved_state(arg_values, unparsed_args, state) return ( (tmap_to_picklable(token_map), state) for state in optimize_checkpoints( tensors, arg_values, model, lambda batch_tensors, model: self. _get_batch_prediction_loss(arg_values, batch_tensors, model)))
def _optimize_model( self, arg_values: Namespace) -> Iterable[FeaturesPolyargState]: with print_time("Loading data", guard=arg_values.verbose): if arg_values.start_from: _, (old_arg_values, unparsed_args, metadata, state) = torch.load(arg_values.start_from) _, data_lists, \ (word_features_size, vec_features_size) = \ features_polyarg_tensors_with_meta( extract_dataloader_args(arg_values), str(arg_values.scrape_file), metadata) else: metadata, data_lists, \ (word_features_size, vec_features_size) = \ features_polyarg_tensors( extract_dataloader_args(arg_values), str(arg_values.scrape_file)) with print_time("Converting data to tensors", guard=arg_values.verbose): unpadded_tokenized_hyp_types, \ unpadded_hyp_features, \ num_hyps, \ tokenized_goals, \ goal_masks, \ word_features, \ vec_features, \ tactic_stem_indices, \ arg_indices = data_lists tensors = [ pad_sequence([ torch.LongTensor(tokenized_hyps_list) for tokenized_hyps_list in unpadded_tokenized_hyp_types ], batch_first=True), pad_sequence([ torch.FloatTensor(hyp_features_vec) for hyp_features_vec in unpadded_hyp_features ], batch_first=True), torch.LongTensor(num_hyps), torch.LongTensor(tokenized_goals), torch.ByteTensor(goal_masks), torch.LongTensor(word_features), torch.FloatTensor(vec_features), torch.LongTensor(tactic_stem_indices), torch.LongTensor(arg_indices) ] with open("tensors.pickle", 'wb') as f: torch.save(tensors, f) eprint(tensors, guard=arg_values.print_tensors) with print_time("Building the model", guard=arg_values.verbose): if arg_values.start_from: self.load_saved_state(arg_values, unparsed_args, metadata, state) model = self._model epoch_start = self.num_epochs else: model = self._get_model(arg_values, word_features_size, vec_features_size, get_num_indices(metadata), get_num_tokens(metadata)) epoch_start = 1 assert model assert epoch_start return ((metadata, state) for state in optimize_checkpoints( tensors, arg_values, model, lambda batch_tensors, model: self._getBatchPredictionLoss( arg_values, batch_tensors, model), epoch_start))