Example #1
0
def merge_state_value_pairs_by_domain(
    problem_to_state_value_pairs: Dict[STRIPSProblem, List[StateValuePair]],
    remove_duplicates: bool = False,
) -> Dict[str, List[TrainingPair]]:
    """
    Generates a mapping of domain to corresponding TrainingPairs.
    The state-value pairs are merged by domain, and corresponding TrainingPair
    objects are created.

    The TrainingPair objects contain the problem, which we use to generate the
    hypergraph later on.

    Parameters
    ----------
    problem_to_state_value_pairs: mapping of STRIPSProblem to a list of
        state-value pairs
    remove_duplicates: whether to remove duplicate TrainingPairs, not
        implemented at the moment

    Returns
    -------
    Mapping of domain name to List[TrainingPair]
    """
    # Domain to training pairs. We determine a unique domain by its name
    domain_to_training_pairs = defaultdict(list)

    for problem, state_value_pairs in problem_to_state_value_pairs.items():
        # Create TrainingPair objects which hold the problem context
        training_pairs = [
            TrainingPair(problem, state_value_pair)
            for state_value_pair in state_value_pairs
        ]
        domain_to_training_pairs[problem.domain_name].extend(training_pairs)

    if remove_duplicates:
        # TODO: figure out best way to implement this
        # Options: (option 2 is strongly preferred)
        #  1. Remove duplicates based on state and value only
        #  2. Remove duplicates based on hypergraph structure, state and value
        raise NotImplementedError

    # Metrics
    total_num_pairs = 0
    for domain, training_pairs in domain_to_training_pairs.items():
        metrics_logger.add_metric(
            CountMetric(
                "NumberofMergedTrainingPairs",
                len(training_pairs),
                context={"domain": domain},
            ))
        _log.debug(
            f"Merged {len(training_pairs)} training pairs for '{domain}'")
        total_num_pairs += len(training_pairs)

    _log.info(f"Merged {total_num_pairs} training pairs in total")
    metrics_logger.add_metric(
        CountMetric("TotalNumberOfMergedTrainingPairs", total_num_pairs))

    return domain_to_training_pairs
Example #2
0
def generate_optimal_state_value_pairs(
    problems: List[STRIPSProblem]
) -> Dict[STRIPSProblem, List[StateValuePair]]:
    """
    Generate the state-value pairs from the optimal plans of each task by using
    Fast Downward.

    Parameters
    ----------
    problems: List[STRIPSProblem], the problems to generate optimal
    state-value pairs for

    Returns
    -------
    Dict[STRIPSProblem, List[StateValuePair]], a mapping of each problem to the
    state-value pairs encountered on the optimal plan.
    """
    training_data: Dict[STRIPSProblem, List[StateValuePair]] = {}
    total_num_state_value_pairs = 0

    for problem in problems:
        if problem in training_data:
            raise RuntimeError(
                f"Already generated optimal state-value pairs for"
                f" {problem.name}"
            )

        # Generate state-value pairs for each problem
        state_value_pairs = _generate_optimal_state_value_pairs_for_problem(
            problem
        )

        _log.debug(
            f"Generated {len(state_value_pairs)} state-value pairs for "
            f"{problem.name}"
        )
        training_data[problem] = state_value_pairs
        total_num_state_value_pairs += len(state_value_pairs)

    _log.info(
        f"Generated {total_num_state_value_pairs} state-value pairs in "
        f"total for {len(problems)} tasks"
    )
    metrics_logger.add_metric(
        CountMetric(
            "TotalNumberOfStateValuePairs", total_num_state_value_pairs
        )
    )
    return training_data
Example #3
0
    def stop(self):
        """ Stop the timer and add it to the metrics logger"""
        super().stop()

        log_str = (f"Timer {self.name} stopped. Accumulated time: "
                   f"{round(self.total_time, 5)}s.")
        # Add the context if required
        if self.context:
            log_str += f" Context: {self.context}"

        # Log at required level, only INFO and DEBUG supported for now
        if self._log_level == logging.INFO:
            _log.info(log_str)
        elif self._log_level == logging.DEBUG:
            _log.debug(log_str)
        else:
            raise ValueError(f"Unsupported log level {self._log_level}")

        metrics_logger.add_metric(
            TimeMetric(self.name, self.total_time, context=self.context))
Example #4
0
def get_kfold_training_data(
    domain_to_training_pairs: Dict[str, List[TrainingPair]],
    num_folds: int,
    num_bins: int = -1,
    domain_to_num_bins: Dict[str, int] = None,
    domain_to_min_samples: Dict[str, int] = None,
    shuffle: bool = True,
) -> List[Tuple[List[TrainingPair], List[TrainingPair]]]:
    """
    Applies K-Fold to get training and validation sets.
    Training data from multiple domains are merged such that each fold contains
    data from each considered domain.

    Also performs stratified sampling with replacement if the number of samples
    for a domain is less than the minimum number of samples.

    Parameters
    ----------
    domain_to_training_pairs: mapping of a domain name to a list of training
        pairs
    num_folds: number of folds, i.e. k in the paper
    num_bins: number of bins to split data into by target heuristic value
    domain_to_num_bins: mapping of a Domain name to the number of bins to use
        for each domain. This will override the 'num_bins' for any domains
        specified in the dict.
    domain_to_min_samples: mapping of a Domain name to the minimum to number
        of training samples for the domain. If a domain is not specified
        the minimum number of samples is assumed to be 0.
    shuffle: whether to shuffle the training data before splitting into
        folds and bins

    Returns
    -------
    List[Tuple[List[TrainingPair], List[TrainingPair]]]
        a list of length 'num_folds', with each element representing a fold
        containing tuples with (training pairs, validation pairs)
    """
    if num_folds < 2:
        raise ValueError("k >= 2 folds in order to apply stratified k-fold")
    if not (num_bins >= 1 or domain_to_num_bins):
        raise ValueError(
            "There must be at least one bin, or 'domain_to_num_bins' must be "
            "specified")

    total_num_pairs = 0
    # total_num_pairs_processed includes the pairs processed for each fold
    total_num_pairs_processed = 0

    # Stratified k-fold and mapping of fold to training pairs
    skf = StratifiedKFold(n_splits=num_folds, shuffle=shuffle)
    fold_idx_to_training_pairs = defaultdict(list)
    fold_idx_to_validation_pairs = defaultdict(list)

    # Apply quantile binning for training data in each domain and split into
    # the k-folds using the bins
    for domain, training_pairs in domain_to_training_pairs.items():
        _log.info(f"Processing training data for '{domain}'")
        heuristic_values = [pair.value for pair in training_pairs]

        # Number of bins for this domain
        if domain_to_num_bins and domain in domain_to_num_bins:
            num_bins_for_domain = domain_to_num_bins[domain]
            _log.warning(f"Number of bins for '{domain}' overriden to "
                         f"{num_bins_for_domain}")
        else:
            num_bins_for_domain = num_bins

        # Bin the target values
        bin_idx = _get_bins(num_bins_for_domain, heuristic_values)
        assert len(training_pairs) == len(heuristic_values) == len(bin_idx)

        min_samples = (domain_to_min_samples.get(domain, 0)
                       if domain_to_min_samples else 0)

        # Resample using stratified sampling with replacement if required, by
        # using the binned heuristic value.
        resampled = False
        if len(training_pairs) < min_samples:
            _log.warning(
                f"Number of samples {len(training_pairs)} found for "
                f"{domain}, less than min required samples = {min_samples}. "
                "Resampling using initial heuristic bins.")
            training_pairs, heuristic_values = resample(
                training_pairs,
                heuristic_values,
                n_samples=min_samples,
                stratify=bin_idx,
            )
            assert len(training_pairs) == len(heuristic_values) == min_samples

            # Refit the bins
            _log.debug("Refitting heuristic value bins since we resampled "
                       f"training pairs to {min_samples} samples")
            bin_idx = _get_bins(num_bins_for_domain, heuristic_values)
            resampled = True

        # Final number of training pairs after resampling
        metrics_logger.add_metric(
            CountMetric(
                "FinalNumberOfTrainingPairs",
                len(training_pairs),
                context={"domain": domain},
            ))
        total_num_pairs += len(training_pairs)

        # Perform the stratified k-fold split
        num_pairs_processed = 0
        for fold_idx, (train_idx,
                       val_idx) in enumerate(skf.split(training_pairs,
                                                       bin_idx)):
            # Add training and validation training pairs for this fold
            fold_idx_to_training_pairs[fold_idx].extend(training_pairs[idx]
                                                        for idx in train_idx)
            fold_idx_to_validation_pairs[fold_idx].extend(training_pairs[idx]
                                                          for idx in val_idx)
            num_pairs_processed += len(train_idx) + len(val_idx)

        # Check we have processed expected number of training pairs
        if resampled:
            assert np.isclose(num_pairs_processed, min_samples * num_folds)
        else:
            assert np.isclose(num_pairs_processed,
                              len(training_pairs) * num_folds)
        metrics_logger.add_metric(
            CountMetric(
                "NumberOfTrainingPairsProcessed",
                num_pairs_processed,
                context={
                    "operation": "StratifiedKFold",
                    "domain": domain
                },
            ))
        total_num_pairs_processed += num_pairs_processed

    # Merge training and validation pairs
    kfold_training_data = []
    for fold_idx in range(num_folds):
        kfold_training_data.append((
            fold_idx_to_training_pairs[fold_idx],
            fold_idx_to_validation_pairs[fold_idx],
        ))
    assert len(kfold_training_data) == num_folds
    _log.info("Finished generating k-fold training data")

    metrics_logger.add_metric(
        CountMetric("TotalFinalNumberOfTrainingPairs", total_num_pairs))
    metrics_logger.add_metric(
        CountMetric(
            "TotalNumberOfTrainingPairsProcessed",
            total_num_pairs_processed,
            context={"operation": "StratifiedKFold"},
        ))

    return kfold_training_data
Example #5
0
def generate_strips_problems(
    domain_pddl: str, domain_pddls: List[str], problem_pddls: List[str]
) -> List[STRIPSProblem]:
    """
    Generate STRIPS problems given paths to domain and problem PDDLs.
    Only one of `domain_pddl` and `domain_pddls` may be specified.

    If `domain_pddl` is specified, then it will be assumed as the
    domain PDDL file for all `problem_pddls`.

    If `domain_pddls` is specified, then each element of the list is assumed
    to be the domain PDDL file for the corresponding element in
    `problem_pddls`.

    Parameters
    ----------
    domain_pddl: str
    domain_pddls: List[str]
    problem_pddls: List[str]

    Returns
    -------
    List[STRIPSProblem]
    """
    if domain_pddl and domain_pddls:
        raise ValueError(
            "Only one of domain_pddl or domain_pddls may be specified"
        )
    else:
        # Sanity check
        if not (domain_pddl or domain_pddls):
            raise ValueError("At least one domain must be specified")

    # Generate STRIPSProblem objects
    if domain_pddl:
        problems = [
            get_strips_problem(domain_pddl, problem_pddl)
            for problem_pddl in problem_pddls
        ]
    else:
        if len(domain_pddls) != len(problem_pddls):
            raise ValueError(
                "Length of domain PDDLs must be equal to length of problem "
                "PDDLs"
            )

        problems = [
            get_strips_problem(domain_pddl, problem_pddl)
            for domain_pddl, problem_pddl in zip(domain_pddls, problem_pddls)
        ]

    # Warn if there are any non-unique problems (determined by name)
    prob_names_counter = Counter([problem.name for problem in problems])
    for prob_name, count in prob_names_counter.items():
        if count > 1:
            _log.warning(
                f"There are {count} problems with the identical problem name "
                f"{prob_name}. This may lead to unexpected behaviour."
            )

    # Metrics
    num_problems = len(problems)
    _log.info(f"Generated {num_problems} STRIPS Problems")
    metrics_logger.add_metric(
        CountMetric("NumberOfSTRIPSProblems", num_problems)
    )

    return problems
Example #6
0
def train_main(args: TrainingArgs, experiments_dir: str):
    """
    Main runner method.

    Note, whichever one of `max_training_time` and `max_epochs` is reached
    first will be used to terminate training.

    Parameters
    ----------
    args: TrainingArgs
    experiments_dir: directory where experiment results will be stored
    """
    problems = args.get_strips_problems()

    # Generate and process training data
    kfold_training_data_wf = KFoldTrainingDataWorkflow(
        problems=problems,
        batch_size=args.batch_size,
        num_folds=args.num_folds,
        num_bins=args.num_bins,
        remove_duplicates=args.remove_duplicates,
        shuffle=args.shuffle,
        global_feature_mapper_cls=args.global_feature_mapper_cls,
        node_feature_mapper_cls=args.node_feature_mapper_cls,
        hyperedge_feature_mapper_cls=args.hyperedge_feature_mapper_cls,
        experiment_dir=experiments_dir,
    )
    kfold_dataloaders: List[Tuple[DataLoader,
                                  DataLoader]] = kfold_training_data_wf.run()

    # Hyperparameter for STRIPS-HGN
    strips_hgn_hparams = Namespace(
        receiver_k=kfold_training_data_wf.max_receivers,
        sender_k=kfold_training_data_wf.max_senders,
        hidden_size=args.hidden_size,
        learning_rate=args.learning_rate,
        weight_decay=args.weight_decay,
        global_feature_mapper_cls=args.global_feature_mapper_cls,
        node_feature_mapper_cls=args.node_feature_mapper_cls,
        hyperedge_feature_mapper_cls=args.hyperedge_feature_mapper_cls,
    )

    # Run training for each fold, keep track of best results
    best_train_wf: Optional[TrainSTRIPSHGNWorkflow] = None

    for fold_idx, (train_dataloader,
                   val_dataloader) in enumerate(kfold_dataloaders):
        _log.info(f"Running training workflow for fold {fold_idx + 1} out "
                  f"of {args.num_folds}")
        # Time the workflow for good measure
        fold_timer = TimedOperation("RunFoldTrainingTime",
                                    context={
                                        "fold_idx": fold_idx
                                    }).start()

        # Create training workflow and run
        current_train_wf = TrainSTRIPSHGNWorkflow(
            strips_hgn=STRIPSHGN(hparams=strips_hgn_hparams),
            max_training_time=args.max_training_time,
            max_num_epochs=args.max_epochs,
            train_dataloader=train_dataloader,
            val_dataloader=val_dataloader,
            experiments_dir=experiments_dir,
            prefix=f"fold_{fold_idx}",
            early_stopping_patience=args.patience,
        )
        current_train_wf.run()

        # Stop the timer so it saves as a metric
        fold_timer.stop()

        # Run post-training procedure
        _copy_best_model(current_train_wf)

        # Add metric for number of epochs trained for
        metrics_logger.add_metric(
            CountMetric(
                "NumberOfEpochsTrained",
                current_train_wf.current_epoch + 1,
                context={"fold_idx": fold_idx},
            ))

        # Check if this is the best fold we have encountered
        if (best_train_wf is None or
                current_train_wf.best_val_loss < best_train_wf.best_val_loss):
            _log.info(f"New best val loss found at fold {fold_idx + 1} = "
                      f"{current_train_wf.best_val_loss}")
            if best_train_wf:
                _log.info(
                    f"Previous best val loss = {best_train_wf.best_val_loss}")
            best_train_wf = current_train_wf

    _log.info(
        f"Best STRIPS-HGN found at {best_train_wf.prefix} with val loss of "
        f"{best_train_wf.best_val_loss}. Checkpoint directory = "
        f"{best_train_wf.checkpoint_dir}")

    # Make a copy of the best fold model to the main experiments results dir
    best_model_fname = os.path.join(experiments_dir, _BEST_MODEL_FNAME)
    copyfile(
        os.path.join(best_train_wf.checkpoint_dir, _BEST_MODEL_FNAME),
        best_model_fname,
    )
    _log.info(f"Copied best STRIPS-HGN to {best_model_fname}")
Example #7
0
def _generate_optimal_state_value_pairs_for_problem(
    problem: STRIPSProblem
) -> List[StateValuePair]:
    """
    Generates the optimal state-value pairs for a planning problem.

    Parameters
    ----------
    problem: STRIPSProblem, the problem we are generating state-value pairs for

    Returns
    -------
    List[StateValuePair] with the trajectories of the states and optimal
    heuristic values
    """
    # Start a timer
    metric_context = {"domain": problem.domain_name, "problem": problem.name}
    timer = TimedOperation(
        "GenerateOptimalStateValuePairsTime",
        context=metric_context,
        log_level=TRAINING_DATA_TIMER_LOG_LEVEL,
    ).start()

    # Run Fast-Downward to get the optimal plan
    optimal_plan: Optional[List[str]] = get_optimal_actions_using_fd(problem)

    # Check some edge cases
    if len(optimal_plan) == 0:
        _log.warning(f"Initial state for {problem} is already a goal state!")
        return []
    elif optimal_plan is None:
        _log.error(f"Unable to find optimal solution for {problem}")
        return []

    name_to_action: Dict[str, STRIPSAction] = {
        action.name: action for action in problem.actions
    }
    # Form state-value pairs for trajectory which is at first the initial state
    current_state = problem.initial_state
    trajectory: List[StateValuePair] = [
        StateValuePair(current_state, len(optimal_plan))
    ]

    for idx, action_name in enumerate(optimal_plan):
        # Apply action in the current state
        action = name_to_action[action_name]
        current_state = action.apply(current_state)

        # Create new state-value pair
        remaining_plan_length = len(optimal_plan) - (idx + 1)
        trajectory.append(StateValuePair(current_state, remaining_plan_length))

    # Check current state is a goal state and the number of pairs
    assert problem.is_goal_state(current_state)
    assert len(trajectory) == len(optimal_plan) + 1

    # Stop timer and add metric for number of state-value pairs
    timer.stop()
    metrics_logger.add_metric(
        CountMetric(
            "NumberOfOptimalStateValuePairs",
            len(trajectory),
            context=metric_context,
        )
    )
    return trajectory