Exemple #1
0
    def _create_temp_file(self,
                          domain: Any,
                          problem: Any,
                          verbose=False) -> Tuple[str, str]:

        name = next(tempfile._get_candidate_names())
        show("Generating temp PDDL file {}".format(name), verbose)
        if isinstance(domain, str):

            # it's a path to a file!
            if not exists(domain):
                raise ValueError(
                    "Could not find PDDL file at {}".format(domain))
            if not exists(problem):
                raise ValueError(
                    "Could not find PDDL file at {}".format(problem))

            with open(name, 'w') as temp_file, open(domain,
                                                    'r') as domain_file, open(
                                                        problem,
                                                        'r') as problem_file:
                temp_file.write('{}\n\n{}'.format(domain_file.read(),
                                                  problem_file.read()))
            problem_name = self._extract_problem_name(problem)
            return name, problem_name

        else:

            # it's the objects!
            with open(name, 'w') as temp_file:
                temp_file.write('{}\n\n{}'.format(domain, problem))

            return name, problem.name
Exemple #2
0
def _generate_vocabulary_parallel(operators: List[LearnedOperator], factors: List[List[int]],
                                  verbose=False, **kwargs) -> Dict[Tuple[LearnedOperator, int], List[Proposition]]:
    dist_comparator = kwargs.get('dist_comparator', _overlapping_dists)
    vocabulary = UniquePredicateList(dist_comparator)
    operator_predicates: Dict[Tuple[LearnedOperator, int], List[Proposition]] = {}

    for operator in operators:
        for i, (_, effect, _) in enumerate(operator.outcomes()):
            predicates = list()
            mask = effect.mask
            factor_list = _extract_factors(mask, factors)
            if len(factor_list) == 1:
                # Independent. Go with it as-is.
                predicate = vocabulary.append(effect)
                predicates.append(predicate)
            else:
                show('{} factors: {}'.format(len(factor_list), factor_list), verbose)

                # we have a distribution over multiple factors. So extract each factor individually
                for subset in itertools.combinations(factor_list, len(factor_list) - 1):
                    # subset is every subset of factors (but one)
                    new_dist = effect.integrate_out(np.concatenate(subset))
                    predicate = vocabulary.append(new_dist)
                    predicates.append(predicate)
            show('{} propositions generated'.format(len(predicates)), verbose)
            operator_predicates[(operator, i)] = predicates
    return operator_predicates
Exemple #3
0
def _generate_vocabulary(vocabulary: UniquePredicateList, operators: List[LearnedOperator], factors: List[List[int]],
                         verbose=False, **kwargs) -> Dict[Tuple[LearnedOperator, int], List[Proposition]]:
    """
    Generate a vocabulary for the PDDL. This includes every possible proposition that could ever be required.
    :param vocabulary: the existing vocabulary of predicates
    :param operators: the learned operators
    :param factors: the factors
    :param verbose: the verbosity level
    :return: a mapping from learned operator and probabilistic effect to the predicates in the vocabulary
    """
    # Process each option's effect sets.
    # map from (operator, probabilistic effect) -> predicates
    operator_predicates: Dict[Tuple[LearnedOperator, int], List[Proposition]] = {}

    n_jobs = kwargs.get('n_jobs', 1)
    splits = np.array_split(operators, n_jobs)
    functions = [partial(_generate_vocabulary_parallel, splits[i], factors, **kwargs) for i in range(n_jobs)]
    local_operator_predicates = run_parallel(functions)
    local_operator_predicates = dict(ChainMap(*local_operator_predicates))  # reduce to single dict

    show("Merging propositions from {} processes".format(n_jobs), verbose)

    # take all the results generated in parallel, and collapse into one result.
    for (operator, effect_idx), local_propositions in local_operator_predicates.items():
        predicates = [vocabulary.append(x.estimator) for x in local_propositions]
        operator_predicates[(operator, effect_idx)] = predicates
    return operator_predicates
Exemple #4
0
def _factorise(operators: List[LearnedOperator], n_variables: int, verbose=True) -> List[List[int]]:
    """
    Factorise the state space based on what variables are changed by the options. For more, see the JAIR 2018 paper
    :param operators: the learned operators
    :param n_variables: the number of state-space variables
    :param verbose: the verbosity level
    :return: factors: for each factor, the list of state variables
    """
    modifies = _modifies(operators, n_variables)  # check which variables are modified by each operator
    factors = list()
    options = list()

    for i in range(n_variables):
        found = False
        for x in range(len(factors)):
            f = factors[x]
            if options[x] == modifies[i]:
                f.append(i)
                found = True

        if not found:
            factors.append([i])
            options.append(modifies[i])

    show("Factors\tVariables\t\tOptions\n" + '\n'.join(
        ["F_{}\t\t{}\t{}".format(i, factors[i], options[i]) for i in range(len(factors))]), verbose)

    return factors
Exemple #5
0
def partition_options(env: gym.Env, transition_data: pd.DataFrame,
                      verbose=False, **kwargs) -> Dict[int, List[PartitionedOption]]:
    """
    Partition options so that the subgoal property is approximately preserved
    :param env: the environment
    :param transition_data: all the transition data from the environment
    :param verbose: the mode
    :return: a dictionary mapping each option to its partitions
    """
    if not isinstance(env.action_space, Discrete):
        raise ValueError("Action space must be discrete")

    n_jobs = kwargs.get('n_jobs', 1)
    show("Running on {} CPUs".format(n_jobs), verbose)
    action_splits = np.array_split(range(env.action_space.n), n_jobs)

    functions = [partial(_partition_options, action_splits[i], transition_data, verbose=verbose, **kwargs)
                 for i in range(n_jobs)]
    # run in parallel
    partitioned_options = run_parallel(functions)
    partitioned_options = dict(ChainMap(*partitioned_options))
    count = sum(len(partitions) for _, partitions in partitioned_options.items())

    show("{} total partitions discovered".format(count), verbose)
    return partitioned_options
Exemple #6
0
def visualise_symbols(directory: str,
                      env: S2SEnv,
                      symbols: Iterable[Proposition],
                      verbose=False,
                      **kwargs) -> None:
    """
    Visualise a set symbols
    :param directory: the directory to save them to
    :param env: the domain
    :param symbols: the list of propositions
    :param verbose: the verbosity level
    """

    n_samples = 100
    make_dir(directory)  # make directory if not exists
    for symbol in symbols:
        show("Visualising {}".format(symbol), verbose)
        samples = np.full((n_samples, env.observation_space.shape[0]), np.nan)
        samples[:, symbol.mask] = symbol.sample(n_samples)
        if kwargs.get('render', None) is not None:
            im = kwargs.get('render')(samples)
        else:
            im = Image.merge([
                env.render_state(state, agent_alpha=0.5) for state in samples
            ])
        filename = '{}_{}.bmp'.format(symbol, symbol.mask)
        Image.save(im, make_path(directory, filename), mode='RGB')
Exemple #7
0
def _cluster_effects(samples: pd.DataFrame, mask: List[int], verbose=False, **kwargs) -> List[pd.DataFrame]:
    """
    Cluster samples based on their effects
    :param samples: the samples
    :param mask: the state variables modified by the option
    :param verbose: the verbosity level
    :return: a list of data frames, which each element in the list representing a single cluster
    """
    epsilon = kwargs.get('effect_epsilon', 0.05)
    min_samples = kwargs.get('effect_min_samples', 5)
    data = pd2np(samples['next_state'])  # convert to numpy
    masked_data = data[:, mask]  # cluster only on state variables that changed

    db = DBSCAN(eps=epsilon, min_samples=min_samples).fit(masked_data)
    labels = db.labels_
    show("Found {}/{} noisy samples".format((labels == -1).sum(), len(labels)), verbose)
    clusters = list()
    for label in set(labels):
        if label == -1:
            # noise
            continue
        clusters.append(samples.loc[np.where(labels == label)])
    # reset the index back to zero based
    clusters = [cluster.reset_index(drop=True) for cluster in clusters]  # not in place
    return clusters
Exemple #8
0
def learn_preconditions(
        env: gym.Env,
        init_data: pd.DataFrame,
        partitioned_options: Dict[int, List[PartitionedOption]],
        verbose=False,
        **kwargs) -> Dict[Tuple[int, int], PreconditionClassifier]:
    """
    Learn all the preconditions for the partitioned options
    :param env: the domain
    :param init_data: the initiation data
    :param partitioned_options: the partitioned options (a dictionary containing a list of partitions for each option)
    :param verbose: the verbosity level
    :return: the classifiers
    """
    n_jobs = kwargs.get('n_jobs', 1)
    show("Running on {} CPUs".format(n_jobs), verbose)
    partition_splits = np.array_split(_flatten(partitioned_options), n_jobs)
    functions = [
        partial(_learn_preconditions, init_data, partition_splits[i],
                partitioned_options, verbose, **kwargs) for i in range(n_jobs)
    ]
    # run in parallel
    preconditions: List[Dict[Tuple[int, int],
                             PreconditionClassifier]] = run_parallel(functions)
    return dict(ChainMap(*preconditions))  # reduce to single dict
Exemple #9
0
    def fit(self, X: np.ndarray, verbose=False, **kwargs) -> None:
        """
        Fit the data to the effect estimator using a grid search for the hyperparameters with cross-validation
        :param X: the data
        :param verbose: the verbosity level
        """
        if kwargs.get('masked', False):
            data = X  # already been masked
        else:
            data = X[:, self.mask]

        if data.shape[1] == 0:
            # NO EFFECT!
            self._no_effect = True
            return

        bandwidth_range = kwargs.get('effect_bandwidth_range',
                                     np.arange(0.001, 0.1, 0.001))
        params = {'bandwidth': bandwidth_range}
        grid = GridSearchCV(KernelDensity(kernel='gaussian'), params, cv=3)
        grid.fit(data)
        show(
            "Best bandwidth hyperparameter: {}".format(
                grid.best_params_['bandwidth']), verbose)
        self._kde = grid.best_estimator_
Exemple #10
0
def _generate_start_symbols(transition_data: pd.DataFrame, factors: List[List[int]],
                            verbose=False, **kwargs) -> List[StateDensityEstimator]:
    show("Generating start state symbols...", verbose)

    # group by episode and get the first state from each
    initial_states = pd2np(transition_data.groupby('episode').nth(0)['state'])

    return _generate_symbols(initial_states, factors, verbose=verbose, **kwargs)
def _merge(existing_cluster: pd.DataFrame,
           new_cluster: pd.DataFrame,
           verbose=False,
           **kwargs) -> Tuple[np.ndarray, np.ndarray]:
    """
    Given an existing and new cluster, determine whether there is any overlap in their initation sets. Overlapping data
    should be extracted and put into its own cluster
    :param existing_cluster: the existing cluster
    :param new_cluster: the new cluster
    :param verbose: the verbosity level
    :return: two boolean arrays specifying, for the existing and new cluster, which data should be extracted out into
    its own cluster
    """
    # TODO: this code could be improved/optimised, but will do that another time
    epsilon = kwargs.get('init_epsilon', 0.05)
    min_samples = kwargs.get('init_min_samples', 5)

    column = 'agent_state' if kwargs.get(
        'view', View.PROBLEM) == View.AGENT else 'state'
    column = 'state'  # we check teh problem space information regardless because because if we did not (and the
    # option was not in fact stochastic), we'd have to correct it later on. So just do it here

    X = pd2np(existing_cluster[column])
    Y = pd2np(new_cluster[column])
    data = np.concatenate((X, Y))
    labels = DBSCAN(eps=epsilon, min_samples=min_samples).fit_predict(data)

    existing_labels = labels[0:len(X)]  # labels of the existing partition data
    new_labels = labels[len(X):]  # labels of the new partition data
    existing_labels_set = set(existing_labels)
    new_labels_set = set(new_labels)

    shared_labels = existing_labels_set.intersection(new_labels_set)
    shared_labels.discard(-1)  # remove noise if present
    existing_shared = np.isin(
        existing_labels, list(shared_labels)
    )  # cast set to list because numpy is stupid https://docs.scipy.org/doc/numpy/reference/generated/numpy.isin.html
    new_shared = np.isin(
        new_labels, list(shared_labels)
    )  # cast set to list because numpy is stupid https://docs.scipy.org/doc/numpy/reference/generated/numpy.isin.html

    # Handle "noise" - count as intersected if the whole group has been subsumed.
    # TODO is this actually necessary?
    if -1 in existing_labels_set and existing_labels_set.issubset(
            new_labels_set):
        idx = np.where(
            existing_labels == -1)  # find all points classifies as noise
        existing_shared[idx] = True

    if -1 in new_labels_set and new_labels_set.issubset(existing_labels_set):
        idx = np.where(new_labels == -1)  # find all points classifies as noise
        new_shared[idx] = True

    show("Splitting data from old cluster", verbose
         and len(np.unique(existing_shared)) > 1)
    show("Splitting data from new cluster", verbose
         and len(np.unique(new_shared)) > 1)
    return existing_shared, new_shared
Exemple #12
0
def _partition_options(options: Iterable[int], transition_data: pd.DataFrame,
                       verbose=False, **kwargs) -> Dict[int, List[PartitionedOption]]:
    partitioned_options = dict()
    for option in options:
        show('Partitioning option {}'.format(option), verbose)
        # partition based on data from the current option
        partitioned_options[option] = _partition_option(option,
                                                        transition_data.loc[transition_data['option'] == option],
                                                        verbose=verbose, **kwargs)
    return partitioned_options
Exemple #13
0
def _generate_state_distribution(states: np.ndarray, verbose=False, **kwargs) -> KernelDensityEstimator:
    """
    Generate a distribution over a set of states
    :param states: the states
    :param verbose: the verbosity level
    :return: a density estimation of the states
    """
    show("Fitting estimator to states", verbose)
    full_mask = range_without(0, states.shape[1])  # all the state variables
    effect = KernelDensityEstimator(full_mask)
    effect.fit(states, verbose=verbose, **kwargs)
    return effect
def find_goal_symbols(factors: List[List[int]], vocabulary: Iterable[Proposition], transition_data: pd.DataFrame,
                      verbose=False, **kwargs) -> Tuple[float, List[Proposition]]:
    """
    Find the set of symbols that best described the goal condition. In teh data, the goal being achieved is specified
    by the done flag
    :param factors: the domain factorisation
    :param vocabulary: the list of symbols
    :param transition_data: the transition data
    :param verbose: the verbosity level
    :return the probability of the symbols modelling the goal, and the list of symbols themselves
    """
    show("Searching for goal symbols", verbose)
    # the goal states
    column = get_column_by_view('next_state', kwargs)
    positive_samples = pd2np(transition_data.loc[transition_data['goal_achieved'] == True][column])
    negative_samples = pd2np(transition_data.loc[transition_data['goal_achieved'] == False][column])

    # fit a classifier to the data
    svm = _learn_precondition(positive_samples, negative_samples, verbose=verbose, **kwargs)

    # Find the existing symbols that best match the goal precondition
    show("Finding matching symbols", verbose)
    precondition_factors = _mask_to_factors(svm.mask, factors)
    candidates = list()
    for factor in precondition_factors:
        candidates.append([proposition for proposition in vocabulary if set(proposition.mask) == set(factor)])

    combinations = list(itertools.product(*candidates))
    show("Searching through {} candidates...".format(len(combinations)), verbose)

    best_score = 0
    best_candidates = None

    for count, candidates in enumerate(combinations):
        show("Checking candidate {}".format(count), verbose)
        if _masks_overlap(candidates):
            # This should never happen, but putting a check to make sure
            warn("Overlapping candidates in PDDL building!")
            continue

        # probability of propositions matching classifier
        precondition_prob = _probability_in_precondition(candidates, svm)
        if precondition_prob > best_score:
            best_score = precondition_prob
            best_candidates = candidates
    show("Best candidates with probability {}: {}".format(best_score, ' '.join([str(x) for x in best_candidates])),
         verbose)
    return best_score, list(best_candidates)
Exemple #15
0
def _cluster_data(samples: pd.DataFrame, column_name: str, epsilon: float, min_samples: int,
                  verbose=False) -> List[pd.DataFrame]:
    data = samples[column_name]
    # TODO how to get a non object dtype out of pandas???
    db = DBSCAN(eps=epsilon, min_samples=min_samples).fit(pd2np(data))
    labels = db.labels_
    show("Found {}/{} noisy samples".format((labels == -1).sum(), len(labels)), verbose)
    clusters = list()
    for label in set(labels):
        if label == -1:
            # noise
            continue
        clusters.append(samples.loc[np.where(labels == label)])
    # reset the index back to zero based
    clusters = [cluster.reset_index(drop=True) for cluster in clusters]  # not in place
    return clusters
Exemple #16
0
def _learn_preconditions(
        init_data: pd.DataFrame,
        partitioned_options: List[PartitionedOption],
        all_partitions: Dict[int, List[PartitionedOption]],
        verbose=False,
        **kwargs) -> Dict[Tuple[int, int], PreconditionClassifier]:

    state_column = 'state' if kwargs.get(
        'view', View.PROBLEM) == View.PROBLEM else 'agent_state'

    preconditions = dict()
    prev_option = None
    negative_data = None
    for partition in partitioned_options:
        option = partition.option
        if option != prev_option:
            # no need to reload if no change
            negative_data = pd2np(init_data.loc[
                (init_data['option'] == option)
                & (init_data['can_execute'] == False)][state_column])
        # must do equals False because Pandas!

        show(
            'Learning precondition for option {}, partition {}'.format(
                option, partition.partition), verbose)
        if kwargs.get('augment_negative', True):
            # augment negative samples from the initiation sets of the other partitions
            negative_samples = _augment_negative(negative_data,
                                                 partition.partition,
                                                 all_partitions[option])
        else:
            negative_samples = negative_data

        # this property gets either agent or problem-space states, whichever was used to partition in the first place
        positive_samples = partition.states

        show(
            "Calculating mask for option {}, partition {} ...".format(
                partition.option, partition.partition), verbose)
        precondition = _learn_precondition(positive_samples,
                                           negative_samples,
                                           verbose=verbose,
                                           **kwargs)
        preconditions[(option, partition.partition)] = precondition
        prev_option = option
    return preconditions
 def fit(self, X: np.ndarray, y: np.ndarray, verbose=False, **kwargs):
     """
     Fit the regressor to the reward data
     :param X: the initiation state
     :param y: the rewards received
     :param verbose: the verbosity level
     """
     c_range = kwargs.get('reward_c_range', np.arange(2, 50, 4))
     gamma_range = kwargs.get('reward_gamma_range', np.arange(2, 50, 4))
     param_grid = dict(gamma=gamma_range, C=c_range)
     grid = GridSearchCV(SVR(kernel='rbf'), param_grid=param_grid,
                         cv=3)  # 3 fold CV
     grid.fit(X, y)
     show(
         "Found best SVR hyperparams: C = {}, gamma = {}".format(
             grid.best_params_['C'], grid.best_params_['gamma']), verbose)
     self._svr = grid.best_estimator_
Exemple #18
0
def _generate_symbols(states: np.ndarray, total_factors: List[List[int]], verbose=False, **kwargs) \
        -> List[StateDensityEstimator]:
    symbols = list()
    show("Fitting estimator to states", verbose)

    full_mask = range_without(0, states.shape[1])  # all the state variables
    distribution = KernelDensityEstimator(full_mask)
    distribution.fit(states, verbose=verbose, **kwargs)

    # integrate all possible combinations of factors out of the state distribution
    factors = _extract_factors(distribution.mask, total_factors)
    # we have a distribution over multiple factors. So extract each factor individually
    for subset in itertools.combinations(factors, len(factors) - 1):
        new_dist = distribution.integrate_out(np.concatenate(subset))
        symbols.append(new_dist)

    return symbols
Exemple #19
0
def visualise_partitions(directory: str,
                         env: S2SEnv,
                         option_partitions: Dict[int, List[PartitionedOption]],
                         verbose=False,
                         **kwargs) -> None:
    """
    Visualise a set of partitions and write them to file
    :param directory: the directory to save images to
    :param env: the domain
    :param option_partitions: a dictionary listing, for each option, a list of partitions
    :param verbose: the verbosity level
    :return: a mapping that stores for each option and partition, an image of the start and end states
    (with associated probabilities)
    """
    option_descriptor = kwargs.get('option_descriptor',
                                   lambda option: 'Option-{}'.format(option)
                                   )  # a function that describes the operator
    make_dir(directory)
    for option, partitions in option_partitions.items():

        show(
            "Visualising option {} with {} partition(s)".format(
                option, len(partitions)), verbose)

        for partition in partitions:

            effects = list()
            for probability, states, _, next_states, mask, in partition.effects(
            ):
                start = env.render_states(states,
                                          alpha_object=1,
                                          alpha_player=1)
                end = env.render_states(next_states)
                effects.append((probability, start, mask, end))
            show(
                "Visualising option {}, partition {}".format(
                    option, partition.partition), verbose)
            for i, (probability, start, masks, effect) in enumerate(effects):
                filename = '{}-{}-init.bmp'.format(option_descriptor(option),
                                                   partition.partition)
                Image.save(start, make_path(directory, filename), mode='RGB')
                filename = '{}-{}-eff-{}-{}-{}.bmp'.format(
                    option_descriptor(option), partition.partition, i,
                    round(probability * 100), list(np.unique(masks)))
                Image.save(effect, make_path(directory, filename), mode='RGB')
Exemple #20
0
def _probability_in_precondition(estimators: Iterable[Proposition], precondition: PreconditionClassifier,
                                 allow_fill_in=False, verbose=False, **kwargs) -> float:
    """
    Draw samples from the estimators and feed to the precondition. Take the average result
    :param estimators: the list of estimators
    :param precondition: the precondition
    :param verbose: the verbosity level
    :return: the probability of samples drawn from the estimators being in the precondition
    """

    mask = list()
    for predicate in estimators:
        mask.extend(predicate.mask)

    # if we are not allowed to randomly sample, and we are missing state variables, then return 0
    if not allow_fill_in and not set(mask).issuperset(set(precondition.mask)):
        return 0

    keep_indices = [i for i in range(len(mask)) if mask[i] in precondition.mask]

    # Bail if no overlap.
    if len(keep_indices) == 0:
        return 0

    # TODO:
    n_samples = kwargs.get('estimator_samples', 100)
    samples = np.hstack([predicate.sample(n_samples) for predicate in estimators])
    samples = samples[:, keep_indices]

    # if the estimators are a subset of the precondition, randomly add data to fill in
    add_list = [m for m in precondition.mask if m not in mask]
    if len(add_list) > 0:
        if not allow_fill_in:
            return 0
        show("Must randomly fill in data from {} to intersect with precondition".format(add_list), verbose)
        raise NotImplementedError

    total_mask = np.array(mask)[keep_indices]
    s_prob = 0
    for pos in range(n_samples):
        point = samples[pos, :]
        t_point = np.zeros([np.max(total_mask) + 1])
        t_point[total_mask] = point
        s_prob += precondition.probability(t_point)
    return s_prob / n_samples
Exemple #21
0
def _compute_precondition_mask(positive_samples: np.ndarray, negative_samples: np.ndarray, labels: List[int],
                               verbose=False, **kwargs):
    """
    Compute the precondition mask using a feature selection procedure. These are the variables that matter when
    determining whether an option can be executed
    :param positive_samples: an array of positive states
    :param negative_samples: an array of negative states
    :param labels: labels corresponding to positive and negative states
    :param verbose: the verbosity level
    :return: the mask
    """
    samples = np.vstack((positive_samples, negative_samples))

    # compute the precondition mask through feature selection
    mask = []
    n_vars = samples.shape[1]

    # compute the score with ALL state variables
    total_score, params = _get_orig_score_params(samples, labels, **kwargs)
    show("Score with all variables: {}".format(total_score), verbose)

    threshold = kwargs.get('mask_removal_threshold', 0.02)


    # try remove each state variable in turn, see what the score is
    for m in range(n_vars):
        used_vars = range_without(0, n_vars, m)
        subset_score = _get_subset_score(samples, labels, used_vars, params)

        if total_score - subset_score > threshold:
            # removing the variable damaged the score. So keep it!
            show("Variable {} causes damage when removed. Keeping...".format(m), verbose)
            mask.append(m)

    # if no mask, just find the best one so far
    if len(mask) == 0:
        mask.append(np.argmax([_get_subset_score(samples, labels, [i], params) for i in range(n_vars)]))

    threshold = kwargs.get('mask_addition_threshold', 0.001)

    latest_score = _get_subset_score(samples, labels, mask, params)
    # now try adding variables back!
    for m in range_without(0, n_vars, *mask):
        n_score = _get_subset_score(samples, labels, mask + [m], params)
        if n_score - latest_score > threshold:
            latest_score = n_score
            mask = mask + [m]
            show("Variable {} improves the score when added. Keeping...".format(m), verbose)
            if n_score == 1:
                break  # cannot improve
    mask.sort()  # ensure mask is always sorted to avoid bugs down the line
    show("Final precondition mask: {} with score {}".format(mask, latest_score), verbose)
    return mask
    def link(self, quick_cluster: QuickCluster, verbose=False):

        used = set()
        for _, states, _, next_states, _ in self._partitioned_option.effects(
                View.PROBLEM):
            for s, s_prime in zip(states, next_states):
                self.add_link(quick_cluster, s, s_prime)
        for start, end, prob in self.links:
            if prob != 1:
                warnings.warn("Untested for case where linking prob != 1")
            used.add(start)
            show("Adding p_symbol{}".format(start), verbose)
            if end is None or start == end:
                end = -1
            else:
                used.add(end)
                show("Adding p_symbol{}".format(end), verbose)
            for operator in self._schemata:
                operator.add_link(start, end, prob)
        return used
Exemple #23
0
def learn_effects(partitioned_options: Dict[int, List[PartitionedOption]],
                  verbose=False, **kwargs) \
        -> Dict[Tuple[int, int], List[Tuple[float, StateDensityEstimator, RewardRegressor]]]:
    """
    Estimate the effects from data
    :param partitioned_options: the partitioned options (a dictionary containing a list of partitions for each option)
    :param verbose: the verbosity level
    :return: the probability, next-state estimators and reward estimators
    """
    n_jobs = kwargs.get('n_jobs', 1)
    show("Running on {} CPUs".format(n_jobs), verbose)
    partition_splits = np.array_split(_flatten(partitioned_options), n_jobs)
    functions = [
        partial(_learn_effects, partition_splits[i], verbose, **kwargs)
        for i in range(n_jobs)
    ]
    # run in parallel
    effects: List[Dict[Tuple[int, int],
                       List[Tuple[float, StateDensityEstimator,
                                  RewardRegressor]]]] = run_parallel(functions)
    return dict(ChainMap(*effects))  # reduce to single dict
def _cluster_effects(samples: pd.DataFrame,
                     mask: List[int],
                     verbose=False,
                     **kwargs) -> List[pd.DataFrame]:
    """
    Cluster samples based on their effects
    :param samples: the samples
    :param mask: the state variables modified by the option
    :param verbose: the verbosity level
    :return: a list of data frames, which each element in the list representing a single cluster
    """
    epsilon = kwargs.get('effect_epsilon', 0.05)
    min_samples = kwargs.get('effect_min_samples', 5)

    column = 'next_agent_state' if kwargs.get(
        'view', View.PROBLEM) == View.AGENT else 'next_state'

    data = pd2np(samples[column])  # convert to numpy
    masked_data = data[:, mask]  # cluster only on state variables that changed

    if len(mask) == 0:
        # we're just going to assume that everything is one class!
        labels = np.zeros(shape=(len(masked_data), ))
        if len(masked_data) < min_samples:
            labels += -1
    else:
        db = DBSCAN(eps=epsilon, min_samples=min_samples).fit(masked_data)
        labels = db.labels_
    show("Found {}/{} noisy samples".format((labels == -1).sum(), len(labels)),
         verbose)
    clusters = list()
    for label in set(labels):
        if label == -1:
            # noise
            continue
        clusters.append(samples.loc[np.where(labels == label)])
    # reset the index back to zero based
    clusters = [cluster.reset_index(drop=True)
                for cluster in clusters]  # not in place
    return clusters
Exemple #25
0
    def fit(self, X, y, verbose=False, **kwargs):
        """
        Fit the data to the classifier using a grid search for the hyperparameters with cross-validation
        :param X: the data
        :param y: the labels
        :param verbose: the verbosity level
        """
        c_range = kwargs.get('precondition_c_range', np.arange(1, 16, 2))
        gamma_range = kwargs.get('precondition_gamma_range',
                                 np.arange(4, 22, 2))

        param_grid = dict(gamma=gamma_range, C=c_range)
        grid = GridSearchCV(SVC(class_weight='balanced'),
                            param_grid=param_grid,
                            cv=3,
                            n_jobs=-1)  # 3 fold CV
        data = X[:, self.mask]
        grid.fit(data, y)

        if not self._probabilistic:
            self._classifier = grid.best_estimator_  # we're done
        else:
            # we've found the best hyperparams. Now do it again with Platt scaling turned on
            params = grid.best_params_
            show(
                "Found best SVM hyperparams: C = {}, gamma = {}".format(
                    params['C'], params['gamma']), verbose)
            # Now do Platt scaling with the optimal parameters
            self._classifier = SVC(probability=True,
                                   class_weight='balanced',
                                   C=params['C'],
                                   gamma=params['gamma'])
            self._classifier.fit(data, y)
            show(
                "Classifier score: {}".format(self._classifier.score(data, y)),
                verbose)
Exemple #26
0
def _build_pddl_operators(env: gym.Env, factors: List[List[int]], operators: List[LearnedOperator],
                          vocabulary: UniquePredicateList,
                          operator_predicates: Dict[Tuple[LearnedOperator, int], List[Proposition]],
                          verbose=False, **kwargs):
    """
    Generate the high-level PDDL operators, given the vocabulary and learned operators
    :param env: the domain
    :param factors: the factorisation of the state space
    :param operators: the earned operators
    :param vocabulary: the vocabulary
    :param operator_predicates: a mapping from learned operator and probabilistic effect to the predicates in the vocab
    :param verbose: the verbosity level
    :return: a list of PDDL operators
    """
    schemata = list()
    for i, operator in enumerate(operators):
        show("Processing {}/{} operators".format(i + 1, len(operators)), verbose)
        precondition = operator.precondition
        precondition_factors = _mask_to_factors(precondition.mask, factors)
        pddl_operators = _build_pddl_operator(env, precondition_factors, operator, vocabulary,
                                              operator_predicates, verbose=verbose,
                                              **kwargs)
        schemata.extend(pddl_operators)
    return schemata
Exemple #27
0
def _learn_effects(partitioned_options: List[PartitionedOption], verbose=False, **kwargs) \
        -> Dict[Tuple[int, int], List[Tuple[float, StateDensityEstimator, RewardRegressor]]]:
    effects = dict()
    for partition in partitioned_options:

        option = partition.option
        show(
            "Calculating effects for option {}, partition {}:".format(
                option, partition.partition), verbose)

        probabilistic_outcomes = list(
        )  # a list of tuples (prob, effect estimator, reward estimator)

        for j, (prob, states, rewards, next_states,
                masks) in enumerate(partition.effects()):
            show("Processing probabilistic effect {}".format(j), verbose)

            # make sure no issues with masks. They should all be the same,  else there's a problem with partitioning
            if not (masks == masks[0]).all():
                raise ValueError(
                    "Masks in effect for option {}, partition {} are different!"
                    .format(option, partition.partition))
            mask = sorted(masks[0])  # sorting to prevent any bugs ever!

            show("Fitting effect estimator", verbose)
            effect = KernelDensityEstimator(mask)
            effect.fit(next_states, verbose=verbose,
                       **kwargs)  # compute the effect

            if kwargs.get('specify_rewards', True):
                show("Fitting reward estimator", verbose)
                reward_estimator = SimpleRegressor()
                reward_estimator.fit(states,
                                     rewards,
                                     verbose=verbose,
                                     **kwargs)  # estimate the reward
            else:
                reward_estimator = None
            probabilistic_outcomes.append((prob, effect, reward_estimator))
        effects[(option, partition.partition)] = probabilistic_outcomes

    return effects
Exemple #28
0
def _partition_option(option: int, data: pd.DataFrame, verbose=False, **kwargs) -> List[PartitionedOption]:
    """
    Partition an option into ones that approximately possess the subgoal property
    :param option: the option
    :param data: option execution data
    :param verbose: the verbosity level
    :return: a list of partitioned options
    """

    data = data.reset_index(drop=True)  # reset the indices since the data is a subset of the full transition data
    partition_effects = list()
    # extract the masks
    masks = data['mask'].apply(tuple).unique()
    for mask in masks:
        samples = data.loc[_select_where(data['mask'], mask)].reset_index(drop=True)  # get samples with that mask
        clusters = _cluster_effects(samples, mask, verbose=verbose, **kwargs)  # cluster based on effects

        # TODO: this code could be improved/optimised, but will do that another time
        # now check if part of the data for each cluster should be extracted and placed in existing partition (because
        # initiation sets overlap)
        for cluster in clusters:
            new_clusters = list()
            for i, existing_cluster in enumerate(partition_effects):

                existing_shared, new_shared = _merge(existing_cluster, cluster, verbose=verbose, **kwargs)

                if len(np.unique(existing_shared)) > 1:
                    # split out old data
                    # the existing cluster loses some data
                    reduced_cluster = select_rows(existing_cluster, np.where(np.logical_not(existing_shared)))
                    partition_effects[i] = reduced_cluster
                    # that data gets added to a new cluster
                    new_clusters.append(select_rows(existing_cluster, np.where(existing_shared)))

                if len(np.unique(new_shared)) > 1:
                    # split out new data
                    # that data gets added to a new cluster
                    new_clusters.append(select_rows(cluster, np.where(new_shared)))
                    # the current cluster loses some data
                    cluster = select_rows(cluster, np.where(np.logical_not(new_shared)))

            new_clusters.append(cluster)
            partition_effects.extend(new_clusters)

    show('{} cluster(s) found'.format(len(partition_effects)), verbose)

    # we now have a set of distinct clusters (maximally split), but they may be over-partitioned.
    # Check overlap in initiation sets and merge into probabilistic option if so

    union_find = UnionFind(range(len(partition_effects)))
    for i in range(len(partition_effects) - 1):
        for j in range(i + 1, len(partition_effects)):
            show("Checking clusters {} and {}".format(i, j), verbose)
            if _is_overlap_init(partition_effects[i], partition_effects[j], verbose=verbose, **kwargs):
                # add to union find
                show("\tMerging clusters {} and {}".format(i, j), verbose)
                union_find.merge(i, j)  # these will be merged

    merged_clusters = defaultdict(list)  # groups of merged partitions
    for cluster_idx in union_find:
        group = union_find[cluster_idx]
        merged_clusters[group].append(partition_effects[cluster_idx])

    # now going to store in a data structure
    partitioned_options = list()
    for i, (_, partitions) in enumerate(merged_clusters.items()):
        combined_data = pd.concat(partitions, ignore_index=True)
        partitioned_options.append(PartitionedOption(option, i, combined_data, partitions))

    show('Total partitioned options: {}'.format(len(partitioned_options)), verbose)

    return partitioned_options
Exemple #29
0
def _build_pddl_operator(env: gym.Env, precondition_factors: List[List[int]], operator: LearnedOperator,
                         vocabulary: UniquePredicateList,
                         operator_predicates: Dict[Tuple[LearnedOperator, int], List[Proposition]],
                         verbose=False, **kwargs) -> List[Operator]:
    """
    Generate the PDDL representation for the given operator. There may be more than one due to disjunctive preconditions
    :param env: the domain
    :param factors: the factors making up the precondition
    :param operators: the learned operator
    :param vocabulary: the vocabulary
    :param operator_predicates: a mapping from learned operator and probabilistic effect to the predicates in the vocab
    :param verbose: the verbosity level
    """

    pddl_operators = list()

    candidates = list()  # candidates are all possible propositions that we need to consider

    # Get all symbols whose mask matches the correct factors
    for factor in precondition_factors:
        candidates.append([proposition for proposition in vocabulary if set(proposition.mask) == set(factor)])

    high_threshold = kwargs.get('high_threshold', 0.95)
    low_threshold = kwargs.get('low_threshold', 0.1)

    # when intersecting propositions with preconditions allow for the effects to be a subspace of the precondition
    # (and have the missing variables randomly sampled)
    allow_fill_in = kwargs.get('allow_fill_in', False)

    # try out all possible combinations!
    combinations = list(itertools.product(*candidates))
    show("Searching through {} candidates...".format(len(combinations)), verbose)
    found = False
    for count, candidates in enumerate(combinations):
        show("Checking candidate {}".format(count), verbose)
        if _masks_overlap(candidates):
            # This should never happen, but putting a check to make sure
            warn("Overlapping candidates in PDDL building!")
            continue
        # get the precondition masks from the candidates. Make sure sorted to avoid bugs!
        precondition_masks = sorted(
            list(itertools.chain.from_iterable([proposition.mask for proposition in candidates])))

        # probability of propositions matching classifier
        precondition_prob = _probability_in_precondition(candidates, operator.precondition, allow_fill_in)
        if precondition_prob > low_threshold:
            # we found a match!
            found = True
            show("\tFound a match!", verbose)
            precondition_prob = round(precondition_prob, 3)  # make look nice
            pddl_operator = Operator(operator)
            pddl_operator.add_preconditions(candidates)

            remaining_probability = 1
            if precondition_prob < high_threshold:
                remaining_probability = precondition_prob
                pddl_operator.add_effect([Proposition.not_failed().negate()],
                                         1 - precondition_prob)  # add failure condition

            for i, (outcome_prob, effect, reward_estimator) in enumerate(operator.outcomes()):
                prob = outcome_prob * remaining_probability
                prob = round(prob, 3)  # make look nice
                reward = None if reward_estimator is None else reward_estimator.expected_reward(env, effect, **kwargs)
                positive_effects = operator_predicates[(operator, i)]

                # Negative effects.
                # Filter: only symbols with factors that overlap the effects mask.
                negative_effects = [x for x in vocabulary if set(x.mask).issubset(set(effect.mask))]
                # Filter: remove positive effects.
                negative_effects = [x for x in negative_effects if x not in positive_effects]
                # Filter: in the precondition - only if explicitly mentioned.
                negative_effects = [x for x in negative_effects if
                                    not (set(x.mask).issubset(precondition_masks) and (x not in candidates))]
                negative_effects = [x.negate() for x in negative_effects]
                pddl_operator.add_effect(positive_effects + negative_effects, prob, reward)
            pddl_operators.append(pddl_operator)
    if not found:
        warn("No PDDL operators found for Option {}, Partition {}".format(operator.option, operator.partition))
    return pddl_operators
Exemple #30
0
def build_pddl(env: gym.Env, transition_data: pd.DataFrame, operators: List[LearnedOperator], verbose=False,
               **kwargs) -> Tuple[List[List[int]], UniquePredicateList, List[Operator]]:
    """
    Given the learned preconditions and effects, generate a valid PDDL representation
    :param env: teh domain
    :param transition_data: the transition data
    :param operators: the learned operators
    :param verbose: the verbosity level
    :return: the factors, predicates and PDDL operators
    """
    dist_comparator = kwargs.get('dist_comparator', _overlapping_dists)
    vocabulary = UniquePredicateList(dist_comparator)
    # Factorise the state space: see JAIR paper for more
    show("Factorising state space...", verbose)
    n_dims = env.observation_space.shape[-1]
    factors = _factorise(operators, n_dims, verbose=verbose)

    show("Final factors:\n\n{}".format(factors), verbose)
    #
    # generate a distribution over start states
    start_symbols = _generate_start_symbols(transition_data, factors, verbose=verbose, **kwargs)
    for new_dist in start_symbols:
        vocabulary.append(new_dist, start_predicate=True)

    n_start_propositions = len(vocabulary)
    show("Start position generated {} propositions".format(n_start_propositions), verbose)

    # TODO: leaving this out for now
    # # generate a distribution over goal states
    # goal_symbols = _generate_goal_symbols(transition_data, factors, verbose=verbose, **kwargs)
    # for new_dist in goal_symbols:
    #     vocabulary.append(new_dist, goal_predicate=True)
    # show("Goal condition generated {} propositions".format(len(vocabulary) - n_start_propositions), verbose)

    n_jobs = kwargs.get('n_jobs', 1)
    # do it in parallel!
    show("Running on {} CPUs".format(n_jobs), verbose)

    show("Generating propositions...", verbose)
    # get propositions directly from effects
    operator_predicates = _generate_vocabulary(vocabulary, operators, factors, verbose=verbose, n_jobs=n_jobs)
    show("Total propositions: {}".format(len(vocabulary)), verbose)

    show("Generating full PDDL...", verbose)

    splits = np.array_split(operators, n_jobs)
    functions = [
        partial(_build_pddl_operators, env, factors, splits[i], vocabulary, operator_predicates, verbose, **kwargs)
        for i in range(n_jobs)]
    schemata = sum(run_parallel(functions), [])

    show("Found {} PDDL operators".format(len(schemata)), verbose)
    return factors, vocabulary, schemata