def merge_state_value_pairs_by_domain( problem_to_state_value_pairs: Dict[STRIPSProblem, List[StateValuePair]], remove_duplicates: bool = False, ) -> Dict[str, List[TrainingPair]]: """ Generates a mapping of domain to corresponding TrainingPairs. The state-value pairs are merged by domain, and corresponding TrainingPair objects are created. The TrainingPair objects contain the problem, which we use to generate the hypergraph later on. Parameters ---------- problem_to_state_value_pairs: mapping of STRIPSProblem to a list of state-value pairs remove_duplicates: whether to remove duplicate TrainingPairs, not implemented at the moment Returns ------- Mapping of domain name to List[TrainingPair] """ # Domain to training pairs. We determine a unique domain by its name domain_to_training_pairs = defaultdict(list) for problem, state_value_pairs in problem_to_state_value_pairs.items(): # Create TrainingPair objects which hold the problem context training_pairs = [ TrainingPair(problem, state_value_pair) for state_value_pair in state_value_pairs ] domain_to_training_pairs[problem.domain_name].extend(training_pairs) if remove_duplicates: # TODO: figure out best way to implement this # Options: (option 2 is strongly preferred) # 1. Remove duplicates based on state and value only # 2. Remove duplicates based on hypergraph structure, state and value raise NotImplementedError # Metrics total_num_pairs = 0 for domain, training_pairs in domain_to_training_pairs.items(): metrics_logger.add_metric( CountMetric( "NumberofMergedTrainingPairs", len(training_pairs), context={"domain": domain}, )) _log.debug( f"Merged {len(training_pairs)} training pairs for '{domain}'") total_num_pairs += len(training_pairs) _log.info(f"Merged {total_num_pairs} training pairs in total") metrics_logger.add_metric( CountMetric("TotalNumberOfMergedTrainingPairs", total_num_pairs)) return domain_to_training_pairs
def generate_optimal_state_value_pairs( problems: List[STRIPSProblem] ) -> Dict[STRIPSProblem, List[StateValuePair]]: """ Generate the state-value pairs from the optimal plans of each task by using Fast Downward. Parameters ---------- problems: List[STRIPSProblem], the problems to generate optimal state-value pairs for Returns ------- Dict[STRIPSProblem, List[StateValuePair]], a mapping of each problem to the state-value pairs encountered on the optimal plan. """ training_data: Dict[STRIPSProblem, List[StateValuePair]] = {} total_num_state_value_pairs = 0 for problem in problems: if problem in training_data: raise RuntimeError( f"Already generated optimal state-value pairs for" f" {problem.name}" ) # Generate state-value pairs for each problem state_value_pairs = _generate_optimal_state_value_pairs_for_problem( problem ) _log.debug( f"Generated {len(state_value_pairs)} state-value pairs for " f"{problem.name}" ) training_data[problem] = state_value_pairs total_num_state_value_pairs += len(state_value_pairs) _log.info( f"Generated {total_num_state_value_pairs} state-value pairs in " f"total for {len(problems)} tasks" ) metrics_logger.add_metric( CountMetric( "TotalNumberOfStateValuePairs", total_num_state_value_pairs ) ) return training_data
def stop(self): """ Stop the timer and add it to the metrics logger""" super().stop() log_str = (f"Timer {self.name} stopped. Accumulated time: " f"{round(self.total_time, 5)}s.") # Add the context if required if self.context: log_str += f" Context: {self.context}" # Log at required level, only INFO and DEBUG supported for now if self._log_level == logging.INFO: _log.info(log_str) elif self._log_level == logging.DEBUG: _log.debug(log_str) else: raise ValueError(f"Unsupported log level {self._log_level}") metrics_logger.add_metric( TimeMetric(self.name, self.total_time, context=self.context))
def get_kfold_training_data( domain_to_training_pairs: Dict[str, List[TrainingPair]], num_folds: int, num_bins: int = -1, domain_to_num_bins: Dict[str, int] = None, domain_to_min_samples: Dict[str, int] = None, shuffle: bool = True, ) -> List[Tuple[List[TrainingPair], List[TrainingPair]]]: """ Applies K-Fold to get training and validation sets. Training data from multiple domains are merged such that each fold contains data from each considered domain. Also performs stratified sampling with replacement if the number of samples for a domain is less than the minimum number of samples. Parameters ---------- domain_to_training_pairs: mapping of a domain name to a list of training pairs num_folds: number of folds, i.e. k in the paper num_bins: number of bins to split data into by target heuristic value domain_to_num_bins: mapping of a Domain name to the number of bins to use for each domain. This will override the 'num_bins' for any domains specified in the dict. domain_to_min_samples: mapping of a Domain name to the minimum to number of training samples for the domain. If a domain is not specified the minimum number of samples is assumed to be 0. shuffle: whether to shuffle the training data before splitting into folds and bins Returns ------- List[Tuple[List[TrainingPair], List[TrainingPair]]] a list of length 'num_folds', with each element representing a fold containing tuples with (training pairs, validation pairs) """ if num_folds < 2: raise ValueError("k >= 2 folds in order to apply stratified k-fold") if not (num_bins >= 1 or domain_to_num_bins): raise ValueError( "There must be at least one bin, or 'domain_to_num_bins' must be " "specified") total_num_pairs = 0 # total_num_pairs_processed includes the pairs processed for each fold total_num_pairs_processed = 0 # Stratified k-fold and mapping of fold to training pairs skf = StratifiedKFold(n_splits=num_folds, shuffle=shuffle) fold_idx_to_training_pairs = defaultdict(list) fold_idx_to_validation_pairs = defaultdict(list) # Apply quantile binning for training data in each domain and split into # the k-folds using the bins for domain, training_pairs in domain_to_training_pairs.items(): _log.info(f"Processing training data for '{domain}'") heuristic_values = [pair.value for pair in training_pairs] # Number of bins for this domain if domain_to_num_bins and domain in domain_to_num_bins: num_bins_for_domain = domain_to_num_bins[domain] _log.warning(f"Number of bins for '{domain}' overriden to " f"{num_bins_for_domain}") else: num_bins_for_domain = num_bins # Bin the target values bin_idx = _get_bins(num_bins_for_domain, heuristic_values) assert len(training_pairs) == len(heuristic_values) == len(bin_idx) min_samples = (domain_to_min_samples.get(domain, 0) if domain_to_min_samples else 0) # Resample using stratified sampling with replacement if required, by # using the binned heuristic value. resampled = False if len(training_pairs) < min_samples: _log.warning( f"Number of samples {len(training_pairs)} found for " f"{domain}, less than min required samples = {min_samples}. " "Resampling using initial heuristic bins.") training_pairs, heuristic_values = resample( training_pairs, heuristic_values, n_samples=min_samples, stratify=bin_idx, ) assert len(training_pairs) == len(heuristic_values) == min_samples # Refit the bins _log.debug("Refitting heuristic value bins since we resampled " f"training pairs to {min_samples} samples") bin_idx = _get_bins(num_bins_for_domain, heuristic_values) resampled = True # Final number of training pairs after resampling metrics_logger.add_metric( CountMetric( "FinalNumberOfTrainingPairs", len(training_pairs), context={"domain": domain}, )) total_num_pairs += len(training_pairs) # Perform the stratified k-fold split num_pairs_processed = 0 for fold_idx, (train_idx, val_idx) in enumerate(skf.split(training_pairs, bin_idx)): # Add training and validation training pairs for this fold fold_idx_to_training_pairs[fold_idx].extend(training_pairs[idx] for idx in train_idx) fold_idx_to_validation_pairs[fold_idx].extend(training_pairs[idx] for idx in val_idx) num_pairs_processed += len(train_idx) + len(val_idx) # Check we have processed expected number of training pairs if resampled: assert np.isclose(num_pairs_processed, min_samples * num_folds) else: assert np.isclose(num_pairs_processed, len(training_pairs) * num_folds) metrics_logger.add_metric( CountMetric( "NumberOfTrainingPairsProcessed", num_pairs_processed, context={ "operation": "StratifiedKFold", "domain": domain }, )) total_num_pairs_processed += num_pairs_processed # Merge training and validation pairs kfold_training_data = [] for fold_idx in range(num_folds): kfold_training_data.append(( fold_idx_to_training_pairs[fold_idx], fold_idx_to_validation_pairs[fold_idx], )) assert len(kfold_training_data) == num_folds _log.info("Finished generating k-fold training data") metrics_logger.add_metric( CountMetric("TotalFinalNumberOfTrainingPairs", total_num_pairs)) metrics_logger.add_metric( CountMetric( "TotalNumberOfTrainingPairsProcessed", total_num_pairs_processed, context={"operation": "StratifiedKFold"}, )) return kfold_training_data
def generate_strips_problems( domain_pddl: str, domain_pddls: List[str], problem_pddls: List[str] ) -> List[STRIPSProblem]: """ Generate STRIPS problems given paths to domain and problem PDDLs. Only one of `domain_pddl` and `domain_pddls` may be specified. If `domain_pddl` is specified, then it will be assumed as the domain PDDL file for all `problem_pddls`. If `domain_pddls` is specified, then each element of the list is assumed to be the domain PDDL file for the corresponding element in `problem_pddls`. Parameters ---------- domain_pddl: str domain_pddls: List[str] problem_pddls: List[str] Returns ------- List[STRIPSProblem] """ if domain_pddl and domain_pddls: raise ValueError( "Only one of domain_pddl or domain_pddls may be specified" ) else: # Sanity check if not (domain_pddl or domain_pddls): raise ValueError("At least one domain must be specified") # Generate STRIPSProblem objects if domain_pddl: problems = [ get_strips_problem(domain_pddl, problem_pddl) for problem_pddl in problem_pddls ] else: if len(domain_pddls) != len(problem_pddls): raise ValueError( "Length of domain PDDLs must be equal to length of problem " "PDDLs" ) problems = [ get_strips_problem(domain_pddl, problem_pddl) for domain_pddl, problem_pddl in zip(domain_pddls, problem_pddls) ] # Warn if there are any non-unique problems (determined by name) prob_names_counter = Counter([problem.name for problem in problems]) for prob_name, count in prob_names_counter.items(): if count > 1: _log.warning( f"There are {count} problems with the identical problem name " f"{prob_name}. This may lead to unexpected behaviour." ) # Metrics num_problems = len(problems) _log.info(f"Generated {num_problems} STRIPS Problems") metrics_logger.add_metric( CountMetric("NumberOfSTRIPSProblems", num_problems) ) return problems
def train_main(args: TrainingArgs, experiments_dir: str): """ Main runner method. Note, whichever one of `max_training_time` and `max_epochs` is reached first will be used to terminate training. Parameters ---------- args: TrainingArgs experiments_dir: directory where experiment results will be stored """ problems = args.get_strips_problems() # Generate and process training data kfold_training_data_wf = KFoldTrainingDataWorkflow( problems=problems, batch_size=args.batch_size, num_folds=args.num_folds, num_bins=args.num_bins, remove_duplicates=args.remove_duplicates, shuffle=args.shuffle, global_feature_mapper_cls=args.global_feature_mapper_cls, node_feature_mapper_cls=args.node_feature_mapper_cls, hyperedge_feature_mapper_cls=args.hyperedge_feature_mapper_cls, experiment_dir=experiments_dir, ) kfold_dataloaders: List[Tuple[DataLoader, DataLoader]] = kfold_training_data_wf.run() # Hyperparameter for STRIPS-HGN strips_hgn_hparams = Namespace( receiver_k=kfold_training_data_wf.max_receivers, sender_k=kfold_training_data_wf.max_senders, hidden_size=args.hidden_size, learning_rate=args.learning_rate, weight_decay=args.weight_decay, global_feature_mapper_cls=args.global_feature_mapper_cls, node_feature_mapper_cls=args.node_feature_mapper_cls, hyperedge_feature_mapper_cls=args.hyperedge_feature_mapper_cls, ) # Run training for each fold, keep track of best results best_train_wf: Optional[TrainSTRIPSHGNWorkflow] = None for fold_idx, (train_dataloader, val_dataloader) in enumerate(kfold_dataloaders): _log.info(f"Running training workflow for fold {fold_idx + 1} out " f"of {args.num_folds}") # Time the workflow for good measure fold_timer = TimedOperation("RunFoldTrainingTime", context={ "fold_idx": fold_idx }).start() # Create training workflow and run current_train_wf = TrainSTRIPSHGNWorkflow( strips_hgn=STRIPSHGN(hparams=strips_hgn_hparams), max_training_time=args.max_training_time, max_num_epochs=args.max_epochs, train_dataloader=train_dataloader, val_dataloader=val_dataloader, experiments_dir=experiments_dir, prefix=f"fold_{fold_idx}", early_stopping_patience=args.patience, ) current_train_wf.run() # Stop the timer so it saves as a metric fold_timer.stop() # Run post-training procedure _copy_best_model(current_train_wf) # Add metric for number of epochs trained for metrics_logger.add_metric( CountMetric( "NumberOfEpochsTrained", current_train_wf.current_epoch + 1, context={"fold_idx": fold_idx}, )) # Check if this is the best fold we have encountered if (best_train_wf is None or current_train_wf.best_val_loss < best_train_wf.best_val_loss): _log.info(f"New best val loss found at fold {fold_idx + 1} = " f"{current_train_wf.best_val_loss}") if best_train_wf: _log.info( f"Previous best val loss = {best_train_wf.best_val_loss}") best_train_wf = current_train_wf _log.info( f"Best STRIPS-HGN found at {best_train_wf.prefix} with val loss of " f"{best_train_wf.best_val_loss}. Checkpoint directory = " f"{best_train_wf.checkpoint_dir}") # Make a copy of the best fold model to the main experiments results dir best_model_fname = os.path.join(experiments_dir, _BEST_MODEL_FNAME) copyfile( os.path.join(best_train_wf.checkpoint_dir, _BEST_MODEL_FNAME), best_model_fname, ) _log.info(f"Copied best STRIPS-HGN to {best_model_fname}")
def _generate_optimal_state_value_pairs_for_problem( problem: STRIPSProblem ) -> List[StateValuePair]: """ Generates the optimal state-value pairs for a planning problem. Parameters ---------- problem: STRIPSProblem, the problem we are generating state-value pairs for Returns ------- List[StateValuePair] with the trajectories of the states and optimal heuristic values """ # Start a timer metric_context = {"domain": problem.domain_name, "problem": problem.name} timer = TimedOperation( "GenerateOptimalStateValuePairsTime", context=metric_context, log_level=TRAINING_DATA_TIMER_LOG_LEVEL, ).start() # Run Fast-Downward to get the optimal plan optimal_plan: Optional[List[str]] = get_optimal_actions_using_fd(problem) # Check some edge cases if len(optimal_plan) == 0: _log.warning(f"Initial state for {problem} is already a goal state!") return [] elif optimal_plan is None: _log.error(f"Unable to find optimal solution for {problem}") return [] name_to_action: Dict[str, STRIPSAction] = { action.name: action for action in problem.actions } # Form state-value pairs for trajectory which is at first the initial state current_state = problem.initial_state trajectory: List[StateValuePair] = [ StateValuePair(current_state, len(optimal_plan)) ] for idx, action_name in enumerate(optimal_plan): # Apply action in the current state action = name_to_action[action_name] current_state = action.apply(current_state) # Create new state-value pair remaining_plan_length = len(optimal_plan) - (idx + 1) trajectory.append(StateValuePair(current_state, remaining_plan_length)) # Check current state is a goal state and the number of pairs assert problem.is_goal_state(current_state) assert len(trajectory) == len(optimal_plan) + 1 # Stop timer and add metric for number of state-value pairs timer.stop() metrics_logger.add_metric( CountMetric( "NumberOfOptimalStateValuePairs", len(trajectory), context=metric_context, ) ) return trajectory