def _create_temp_file(self, domain: Any, problem: Any, verbose=False) -> Tuple[str, str]: name = next(tempfile._get_candidate_names()) show("Generating temp PDDL file {}".format(name), verbose) if isinstance(domain, str): # it's a path to a file! if not exists(domain): raise ValueError( "Could not find PDDL file at {}".format(domain)) if not exists(problem): raise ValueError( "Could not find PDDL file at {}".format(problem)) with open(name, 'w') as temp_file, open(domain, 'r') as domain_file, open( problem, 'r') as problem_file: temp_file.write('{}\n\n{}'.format(domain_file.read(), problem_file.read())) problem_name = self._extract_problem_name(problem) return name, problem_name else: # it's the objects! with open(name, 'w') as temp_file: temp_file.write('{}\n\n{}'.format(domain, problem)) return name, problem.name
def _generate_vocabulary_parallel(operators: List[LearnedOperator], factors: List[List[int]], verbose=False, **kwargs) -> Dict[Tuple[LearnedOperator, int], List[Proposition]]: dist_comparator = kwargs.get('dist_comparator', _overlapping_dists) vocabulary = UniquePredicateList(dist_comparator) operator_predicates: Dict[Tuple[LearnedOperator, int], List[Proposition]] = {} for operator in operators: for i, (_, effect, _) in enumerate(operator.outcomes()): predicates = list() mask = effect.mask factor_list = _extract_factors(mask, factors) if len(factor_list) == 1: # Independent. Go with it as-is. predicate = vocabulary.append(effect) predicates.append(predicate) else: show('{} factors: {}'.format(len(factor_list), factor_list), verbose) # we have a distribution over multiple factors. So extract each factor individually for subset in itertools.combinations(factor_list, len(factor_list) - 1): # subset is every subset of factors (but one) new_dist = effect.integrate_out(np.concatenate(subset)) predicate = vocabulary.append(new_dist) predicates.append(predicate) show('{} propositions generated'.format(len(predicates)), verbose) operator_predicates[(operator, i)] = predicates return operator_predicates
def _generate_vocabulary(vocabulary: UniquePredicateList, operators: List[LearnedOperator], factors: List[List[int]], verbose=False, **kwargs) -> Dict[Tuple[LearnedOperator, int], List[Proposition]]: """ Generate a vocabulary for the PDDL. This includes every possible proposition that could ever be required. :param vocabulary: the existing vocabulary of predicates :param operators: the learned operators :param factors: the factors :param verbose: the verbosity level :return: a mapping from learned operator and probabilistic effect to the predicates in the vocabulary """ # Process each option's effect sets. # map from (operator, probabilistic effect) -> predicates operator_predicates: Dict[Tuple[LearnedOperator, int], List[Proposition]] = {} n_jobs = kwargs.get('n_jobs', 1) splits = np.array_split(operators, n_jobs) functions = [partial(_generate_vocabulary_parallel, splits[i], factors, **kwargs) for i in range(n_jobs)] local_operator_predicates = run_parallel(functions) local_operator_predicates = dict(ChainMap(*local_operator_predicates)) # reduce to single dict show("Merging propositions from {} processes".format(n_jobs), verbose) # take all the results generated in parallel, and collapse into one result. for (operator, effect_idx), local_propositions in local_operator_predicates.items(): predicates = [vocabulary.append(x.estimator) for x in local_propositions] operator_predicates[(operator, effect_idx)] = predicates return operator_predicates
def _factorise(operators: List[LearnedOperator], n_variables: int, verbose=True) -> List[List[int]]: """ Factorise the state space based on what variables are changed by the options. For more, see the JAIR 2018 paper :param operators: the learned operators :param n_variables: the number of state-space variables :param verbose: the verbosity level :return: factors: for each factor, the list of state variables """ modifies = _modifies(operators, n_variables) # check which variables are modified by each operator factors = list() options = list() for i in range(n_variables): found = False for x in range(len(factors)): f = factors[x] if options[x] == modifies[i]: f.append(i) found = True if not found: factors.append([i]) options.append(modifies[i]) show("Factors\tVariables\t\tOptions\n" + '\n'.join( ["F_{}\t\t{}\t{}".format(i, factors[i], options[i]) for i in range(len(factors))]), verbose) return factors
def partition_options(env: gym.Env, transition_data: pd.DataFrame, verbose=False, **kwargs) -> Dict[int, List[PartitionedOption]]: """ Partition options so that the subgoal property is approximately preserved :param env: the environment :param transition_data: all the transition data from the environment :param verbose: the mode :return: a dictionary mapping each option to its partitions """ if not isinstance(env.action_space, Discrete): raise ValueError("Action space must be discrete") n_jobs = kwargs.get('n_jobs', 1) show("Running on {} CPUs".format(n_jobs), verbose) action_splits = np.array_split(range(env.action_space.n), n_jobs) functions = [partial(_partition_options, action_splits[i], transition_data, verbose=verbose, **kwargs) for i in range(n_jobs)] # run in parallel partitioned_options = run_parallel(functions) partitioned_options = dict(ChainMap(*partitioned_options)) count = sum(len(partitions) for _, partitions in partitioned_options.items()) show("{} total partitions discovered".format(count), verbose) return partitioned_options
def visualise_symbols(directory: str, env: S2SEnv, symbols: Iterable[Proposition], verbose=False, **kwargs) -> None: """ Visualise a set symbols :param directory: the directory to save them to :param env: the domain :param symbols: the list of propositions :param verbose: the verbosity level """ n_samples = 100 make_dir(directory) # make directory if not exists for symbol in symbols: show("Visualising {}".format(symbol), verbose) samples = np.full((n_samples, env.observation_space.shape[0]), np.nan) samples[:, symbol.mask] = symbol.sample(n_samples) if kwargs.get('render', None) is not None: im = kwargs.get('render')(samples) else: im = Image.merge([ env.render_state(state, agent_alpha=0.5) for state in samples ]) filename = '{}_{}.bmp'.format(symbol, symbol.mask) Image.save(im, make_path(directory, filename), mode='RGB')
def _cluster_effects(samples: pd.DataFrame, mask: List[int], verbose=False, **kwargs) -> List[pd.DataFrame]: """ Cluster samples based on their effects :param samples: the samples :param mask: the state variables modified by the option :param verbose: the verbosity level :return: a list of data frames, which each element in the list representing a single cluster """ epsilon = kwargs.get('effect_epsilon', 0.05) min_samples = kwargs.get('effect_min_samples', 5) data = pd2np(samples['next_state']) # convert to numpy masked_data = data[:, mask] # cluster only on state variables that changed db = DBSCAN(eps=epsilon, min_samples=min_samples).fit(masked_data) labels = db.labels_ show("Found {}/{} noisy samples".format((labels == -1).sum(), len(labels)), verbose) clusters = list() for label in set(labels): if label == -1: # noise continue clusters.append(samples.loc[np.where(labels == label)]) # reset the index back to zero based clusters = [cluster.reset_index(drop=True) for cluster in clusters] # not in place return clusters
def learn_preconditions( env: gym.Env, init_data: pd.DataFrame, partitioned_options: Dict[int, List[PartitionedOption]], verbose=False, **kwargs) -> Dict[Tuple[int, int], PreconditionClassifier]: """ Learn all the preconditions for the partitioned options :param env: the domain :param init_data: the initiation data :param partitioned_options: the partitioned options (a dictionary containing a list of partitions for each option) :param verbose: the verbosity level :return: the classifiers """ n_jobs = kwargs.get('n_jobs', 1) show("Running on {} CPUs".format(n_jobs), verbose) partition_splits = np.array_split(_flatten(partitioned_options), n_jobs) functions = [ partial(_learn_preconditions, init_data, partition_splits[i], partitioned_options, verbose, **kwargs) for i in range(n_jobs) ] # run in parallel preconditions: List[Dict[Tuple[int, int], PreconditionClassifier]] = run_parallel(functions) return dict(ChainMap(*preconditions)) # reduce to single dict
def fit(self, X: np.ndarray, verbose=False, **kwargs) -> None: """ Fit the data to the effect estimator using a grid search for the hyperparameters with cross-validation :param X: the data :param verbose: the verbosity level """ if kwargs.get('masked', False): data = X # already been masked else: data = X[:, self.mask] if data.shape[1] == 0: # NO EFFECT! self._no_effect = True return bandwidth_range = kwargs.get('effect_bandwidth_range', np.arange(0.001, 0.1, 0.001)) params = {'bandwidth': bandwidth_range} grid = GridSearchCV(KernelDensity(kernel='gaussian'), params, cv=3) grid.fit(data) show( "Best bandwidth hyperparameter: {}".format( grid.best_params_['bandwidth']), verbose) self._kde = grid.best_estimator_
def _generate_start_symbols(transition_data: pd.DataFrame, factors: List[List[int]], verbose=False, **kwargs) -> List[StateDensityEstimator]: show("Generating start state symbols...", verbose) # group by episode and get the first state from each initial_states = pd2np(transition_data.groupby('episode').nth(0)['state']) return _generate_symbols(initial_states, factors, verbose=verbose, **kwargs)
def _merge(existing_cluster: pd.DataFrame, new_cluster: pd.DataFrame, verbose=False, **kwargs) -> Tuple[np.ndarray, np.ndarray]: """ Given an existing and new cluster, determine whether there is any overlap in their initation sets. Overlapping data should be extracted and put into its own cluster :param existing_cluster: the existing cluster :param new_cluster: the new cluster :param verbose: the verbosity level :return: two boolean arrays specifying, for the existing and new cluster, which data should be extracted out into its own cluster """ # TODO: this code could be improved/optimised, but will do that another time epsilon = kwargs.get('init_epsilon', 0.05) min_samples = kwargs.get('init_min_samples', 5) column = 'agent_state' if kwargs.get( 'view', View.PROBLEM) == View.AGENT else 'state' column = 'state' # we check teh problem space information regardless because because if we did not (and the # option was not in fact stochastic), we'd have to correct it later on. So just do it here X = pd2np(existing_cluster[column]) Y = pd2np(new_cluster[column]) data = np.concatenate((X, Y)) labels = DBSCAN(eps=epsilon, min_samples=min_samples).fit_predict(data) existing_labels = labels[0:len(X)] # labels of the existing partition data new_labels = labels[len(X):] # labels of the new partition data existing_labels_set = set(existing_labels) new_labels_set = set(new_labels) shared_labels = existing_labels_set.intersection(new_labels_set) shared_labels.discard(-1) # remove noise if present existing_shared = np.isin( existing_labels, list(shared_labels) ) # cast set to list because numpy is stupid https://docs.scipy.org/doc/numpy/reference/generated/numpy.isin.html new_shared = np.isin( new_labels, list(shared_labels) ) # cast set to list because numpy is stupid https://docs.scipy.org/doc/numpy/reference/generated/numpy.isin.html # Handle "noise" - count as intersected if the whole group has been subsumed. # TODO is this actually necessary? if -1 in existing_labels_set and existing_labels_set.issubset( new_labels_set): idx = np.where( existing_labels == -1) # find all points classifies as noise existing_shared[idx] = True if -1 in new_labels_set and new_labels_set.issubset(existing_labels_set): idx = np.where(new_labels == -1) # find all points classifies as noise new_shared[idx] = True show("Splitting data from old cluster", verbose and len(np.unique(existing_shared)) > 1) show("Splitting data from new cluster", verbose and len(np.unique(new_shared)) > 1) return existing_shared, new_shared
def _partition_options(options: Iterable[int], transition_data: pd.DataFrame, verbose=False, **kwargs) -> Dict[int, List[PartitionedOption]]: partitioned_options = dict() for option in options: show('Partitioning option {}'.format(option), verbose) # partition based on data from the current option partitioned_options[option] = _partition_option(option, transition_data.loc[transition_data['option'] == option], verbose=verbose, **kwargs) return partitioned_options
def _generate_state_distribution(states: np.ndarray, verbose=False, **kwargs) -> KernelDensityEstimator: """ Generate a distribution over a set of states :param states: the states :param verbose: the verbosity level :return: a density estimation of the states """ show("Fitting estimator to states", verbose) full_mask = range_without(0, states.shape[1]) # all the state variables effect = KernelDensityEstimator(full_mask) effect.fit(states, verbose=verbose, **kwargs) return effect
def find_goal_symbols(factors: List[List[int]], vocabulary: Iterable[Proposition], transition_data: pd.DataFrame, verbose=False, **kwargs) -> Tuple[float, List[Proposition]]: """ Find the set of symbols that best described the goal condition. In teh data, the goal being achieved is specified by the done flag :param factors: the domain factorisation :param vocabulary: the list of symbols :param transition_data: the transition data :param verbose: the verbosity level :return the probability of the symbols modelling the goal, and the list of symbols themselves """ show("Searching for goal symbols", verbose) # the goal states column = get_column_by_view('next_state', kwargs) positive_samples = pd2np(transition_data.loc[transition_data['goal_achieved'] == True][column]) negative_samples = pd2np(transition_data.loc[transition_data['goal_achieved'] == False][column]) # fit a classifier to the data svm = _learn_precondition(positive_samples, negative_samples, verbose=verbose, **kwargs) # Find the existing symbols that best match the goal precondition show("Finding matching symbols", verbose) precondition_factors = _mask_to_factors(svm.mask, factors) candidates = list() for factor in precondition_factors: candidates.append([proposition for proposition in vocabulary if set(proposition.mask) == set(factor)]) combinations = list(itertools.product(*candidates)) show("Searching through {} candidates...".format(len(combinations)), verbose) best_score = 0 best_candidates = None for count, candidates in enumerate(combinations): show("Checking candidate {}".format(count), verbose) if _masks_overlap(candidates): # This should never happen, but putting a check to make sure warn("Overlapping candidates in PDDL building!") continue # probability of propositions matching classifier precondition_prob = _probability_in_precondition(candidates, svm) if precondition_prob > best_score: best_score = precondition_prob best_candidates = candidates show("Best candidates with probability {}: {}".format(best_score, ' '.join([str(x) for x in best_candidates])), verbose) return best_score, list(best_candidates)
def _cluster_data(samples: pd.DataFrame, column_name: str, epsilon: float, min_samples: int, verbose=False) -> List[pd.DataFrame]: data = samples[column_name] # TODO how to get a non object dtype out of pandas??? db = DBSCAN(eps=epsilon, min_samples=min_samples).fit(pd2np(data)) labels = db.labels_ show("Found {}/{} noisy samples".format((labels == -1).sum(), len(labels)), verbose) clusters = list() for label in set(labels): if label == -1: # noise continue clusters.append(samples.loc[np.where(labels == label)]) # reset the index back to zero based clusters = [cluster.reset_index(drop=True) for cluster in clusters] # not in place return clusters
def _learn_preconditions( init_data: pd.DataFrame, partitioned_options: List[PartitionedOption], all_partitions: Dict[int, List[PartitionedOption]], verbose=False, **kwargs) -> Dict[Tuple[int, int], PreconditionClassifier]: state_column = 'state' if kwargs.get( 'view', View.PROBLEM) == View.PROBLEM else 'agent_state' preconditions = dict() prev_option = None negative_data = None for partition in partitioned_options: option = partition.option if option != prev_option: # no need to reload if no change negative_data = pd2np(init_data.loc[ (init_data['option'] == option) & (init_data['can_execute'] == False)][state_column]) # must do equals False because Pandas! show( 'Learning precondition for option {}, partition {}'.format( option, partition.partition), verbose) if kwargs.get('augment_negative', True): # augment negative samples from the initiation sets of the other partitions negative_samples = _augment_negative(negative_data, partition.partition, all_partitions[option]) else: negative_samples = negative_data # this property gets either agent or problem-space states, whichever was used to partition in the first place positive_samples = partition.states show( "Calculating mask for option {}, partition {} ...".format( partition.option, partition.partition), verbose) precondition = _learn_precondition(positive_samples, negative_samples, verbose=verbose, **kwargs) preconditions[(option, partition.partition)] = precondition prev_option = option return preconditions
def fit(self, X: np.ndarray, y: np.ndarray, verbose=False, **kwargs): """ Fit the regressor to the reward data :param X: the initiation state :param y: the rewards received :param verbose: the verbosity level """ c_range = kwargs.get('reward_c_range', np.arange(2, 50, 4)) gamma_range = kwargs.get('reward_gamma_range', np.arange(2, 50, 4)) param_grid = dict(gamma=gamma_range, C=c_range) grid = GridSearchCV(SVR(kernel='rbf'), param_grid=param_grid, cv=3) # 3 fold CV grid.fit(X, y) show( "Found best SVR hyperparams: C = {}, gamma = {}".format( grid.best_params_['C'], grid.best_params_['gamma']), verbose) self._svr = grid.best_estimator_
def _generate_symbols(states: np.ndarray, total_factors: List[List[int]], verbose=False, **kwargs) \ -> List[StateDensityEstimator]: symbols = list() show("Fitting estimator to states", verbose) full_mask = range_without(0, states.shape[1]) # all the state variables distribution = KernelDensityEstimator(full_mask) distribution.fit(states, verbose=verbose, **kwargs) # integrate all possible combinations of factors out of the state distribution factors = _extract_factors(distribution.mask, total_factors) # we have a distribution over multiple factors. So extract each factor individually for subset in itertools.combinations(factors, len(factors) - 1): new_dist = distribution.integrate_out(np.concatenate(subset)) symbols.append(new_dist) return symbols
def visualise_partitions(directory: str, env: S2SEnv, option_partitions: Dict[int, List[PartitionedOption]], verbose=False, **kwargs) -> None: """ Visualise a set of partitions and write them to file :param directory: the directory to save images to :param env: the domain :param option_partitions: a dictionary listing, for each option, a list of partitions :param verbose: the verbosity level :return: a mapping that stores for each option and partition, an image of the start and end states (with associated probabilities) """ option_descriptor = kwargs.get('option_descriptor', lambda option: 'Option-{}'.format(option) ) # a function that describes the operator make_dir(directory) for option, partitions in option_partitions.items(): show( "Visualising option {} with {} partition(s)".format( option, len(partitions)), verbose) for partition in partitions: effects = list() for probability, states, _, next_states, mask, in partition.effects( ): start = env.render_states(states, alpha_object=1, alpha_player=1) end = env.render_states(next_states) effects.append((probability, start, mask, end)) show( "Visualising option {}, partition {}".format( option, partition.partition), verbose) for i, (probability, start, masks, effect) in enumerate(effects): filename = '{}-{}-init.bmp'.format(option_descriptor(option), partition.partition) Image.save(start, make_path(directory, filename), mode='RGB') filename = '{}-{}-eff-{}-{}-{}.bmp'.format( option_descriptor(option), partition.partition, i, round(probability * 100), list(np.unique(masks))) Image.save(effect, make_path(directory, filename), mode='RGB')
def _probability_in_precondition(estimators: Iterable[Proposition], precondition: PreconditionClassifier, allow_fill_in=False, verbose=False, **kwargs) -> float: """ Draw samples from the estimators and feed to the precondition. Take the average result :param estimators: the list of estimators :param precondition: the precondition :param verbose: the verbosity level :return: the probability of samples drawn from the estimators being in the precondition """ mask = list() for predicate in estimators: mask.extend(predicate.mask) # if we are not allowed to randomly sample, and we are missing state variables, then return 0 if not allow_fill_in and not set(mask).issuperset(set(precondition.mask)): return 0 keep_indices = [i for i in range(len(mask)) if mask[i] in precondition.mask] # Bail if no overlap. if len(keep_indices) == 0: return 0 # TODO: n_samples = kwargs.get('estimator_samples', 100) samples = np.hstack([predicate.sample(n_samples) for predicate in estimators]) samples = samples[:, keep_indices] # if the estimators are a subset of the precondition, randomly add data to fill in add_list = [m for m in precondition.mask if m not in mask] if len(add_list) > 0: if not allow_fill_in: return 0 show("Must randomly fill in data from {} to intersect with precondition".format(add_list), verbose) raise NotImplementedError total_mask = np.array(mask)[keep_indices] s_prob = 0 for pos in range(n_samples): point = samples[pos, :] t_point = np.zeros([np.max(total_mask) + 1]) t_point[total_mask] = point s_prob += precondition.probability(t_point) return s_prob / n_samples
def _compute_precondition_mask(positive_samples: np.ndarray, negative_samples: np.ndarray, labels: List[int], verbose=False, **kwargs): """ Compute the precondition mask using a feature selection procedure. These are the variables that matter when determining whether an option can be executed :param positive_samples: an array of positive states :param negative_samples: an array of negative states :param labels: labels corresponding to positive and negative states :param verbose: the verbosity level :return: the mask """ samples = np.vstack((positive_samples, negative_samples)) # compute the precondition mask through feature selection mask = [] n_vars = samples.shape[1] # compute the score with ALL state variables total_score, params = _get_orig_score_params(samples, labels, **kwargs) show("Score with all variables: {}".format(total_score), verbose) threshold = kwargs.get('mask_removal_threshold', 0.02) # try remove each state variable in turn, see what the score is for m in range(n_vars): used_vars = range_without(0, n_vars, m) subset_score = _get_subset_score(samples, labels, used_vars, params) if total_score - subset_score > threshold: # removing the variable damaged the score. So keep it! show("Variable {} causes damage when removed. Keeping...".format(m), verbose) mask.append(m) # if no mask, just find the best one so far if len(mask) == 0: mask.append(np.argmax([_get_subset_score(samples, labels, [i], params) for i in range(n_vars)])) threshold = kwargs.get('mask_addition_threshold', 0.001) latest_score = _get_subset_score(samples, labels, mask, params) # now try adding variables back! for m in range_without(0, n_vars, *mask): n_score = _get_subset_score(samples, labels, mask + [m], params) if n_score - latest_score > threshold: latest_score = n_score mask = mask + [m] show("Variable {} improves the score when added. Keeping...".format(m), verbose) if n_score == 1: break # cannot improve mask.sort() # ensure mask is always sorted to avoid bugs down the line show("Final precondition mask: {} with score {}".format(mask, latest_score), verbose) return mask
def link(self, quick_cluster: QuickCluster, verbose=False): used = set() for _, states, _, next_states, _ in self._partitioned_option.effects( View.PROBLEM): for s, s_prime in zip(states, next_states): self.add_link(quick_cluster, s, s_prime) for start, end, prob in self.links: if prob != 1: warnings.warn("Untested for case where linking prob != 1") used.add(start) show("Adding p_symbol{}".format(start), verbose) if end is None or start == end: end = -1 else: used.add(end) show("Adding p_symbol{}".format(end), verbose) for operator in self._schemata: operator.add_link(start, end, prob) return used
def learn_effects(partitioned_options: Dict[int, List[PartitionedOption]], verbose=False, **kwargs) \ -> Dict[Tuple[int, int], List[Tuple[float, StateDensityEstimator, RewardRegressor]]]: """ Estimate the effects from data :param partitioned_options: the partitioned options (a dictionary containing a list of partitions for each option) :param verbose: the verbosity level :return: the probability, next-state estimators and reward estimators """ n_jobs = kwargs.get('n_jobs', 1) show("Running on {} CPUs".format(n_jobs), verbose) partition_splits = np.array_split(_flatten(partitioned_options), n_jobs) functions = [ partial(_learn_effects, partition_splits[i], verbose, **kwargs) for i in range(n_jobs) ] # run in parallel effects: List[Dict[Tuple[int, int], List[Tuple[float, StateDensityEstimator, RewardRegressor]]]] = run_parallel(functions) return dict(ChainMap(*effects)) # reduce to single dict
def _cluster_effects(samples: pd.DataFrame, mask: List[int], verbose=False, **kwargs) -> List[pd.DataFrame]: """ Cluster samples based on their effects :param samples: the samples :param mask: the state variables modified by the option :param verbose: the verbosity level :return: a list of data frames, which each element in the list representing a single cluster """ epsilon = kwargs.get('effect_epsilon', 0.05) min_samples = kwargs.get('effect_min_samples', 5) column = 'next_agent_state' if kwargs.get( 'view', View.PROBLEM) == View.AGENT else 'next_state' data = pd2np(samples[column]) # convert to numpy masked_data = data[:, mask] # cluster only on state variables that changed if len(mask) == 0: # we're just going to assume that everything is one class! labels = np.zeros(shape=(len(masked_data), )) if len(masked_data) < min_samples: labels += -1 else: db = DBSCAN(eps=epsilon, min_samples=min_samples).fit(masked_data) labels = db.labels_ show("Found {}/{} noisy samples".format((labels == -1).sum(), len(labels)), verbose) clusters = list() for label in set(labels): if label == -1: # noise continue clusters.append(samples.loc[np.where(labels == label)]) # reset the index back to zero based clusters = [cluster.reset_index(drop=True) for cluster in clusters] # not in place return clusters
def fit(self, X, y, verbose=False, **kwargs): """ Fit the data to the classifier using a grid search for the hyperparameters with cross-validation :param X: the data :param y: the labels :param verbose: the verbosity level """ c_range = kwargs.get('precondition_c_range', np.arange(1, 16, 2)) gamma_range = kwargs.get('precondition_gamma_range', np.arange(4, 22, 2)) param_grid = dict(gamma=gamma_range, C=c_range) grid = GridSearchCV(SVC(class_weight='balanced'), param_grid=param_grid, cv=3, n_jobs=-1) # 3 fold CV data = X[:, self.mask] grid.fit(data, y) if not self._probabilistic: self._classifier = grid.best_estimator_ # we're done else: # we've found the best hyperparams. Now do it again with Platt scaling turned on params = grid.best_params_ show( "Found best SVM hyperparams: C = {}, gamma = {}".format( params['C'], params['gamma']), verbose) # Now do Platt scaling with the optimal parameters self._classifier = SVC(probability=True, class_weight='balanced', C=params['C'], gamma=params['gamma']) self._classifier.fit(data, y) show( "Classifier score: {}".format(self._classifier.score(data, y)), verbose)
def _build_pddl_operators(env: gym.Env, factors: List[List[int]], operators: List[LearnedOperator], vocabulary: UniquePredicateList, operator_predicates: Dict[Tuple[LearnedOperator, int], List[Proposition]], verbose=False, **kwargs): """ Generate the high-level PDDL operators, given the vocabulary and learned operators :param env: the domain :param factors: the factorisation of the state space :param operators: the earned operators :param vocabulary: the vocabulary :param operator_predicates: a mapping from learned operator and probabilistic effect to the predicates in the vocab :param verbose: the verbosity level :return: a list of PDDL operators """ schemata = list() for i, operator in enumerate(operators): show("Processing {}/{} operators".format(i + 1, len(operators)), verbose) precondition = operator.precondition precondition_factors = _mask_to_factors(precondition.mask, factors) pddl_operators = _build_pddl_operator(env, precondition_factors, operator, vocabulary, operator_predicates, verbose=verbose, **kwargs) schemata.extend(pddl_operators) return schemata
def _learn_effects(partitioned_options: List[PartitionedOption], verbose=False, **kwargs) \ -> Dict[Tuple[int, int], List[Tuple[float, StateDensityEstimator, RewardRegressor]]]: effects = dict() for partition in partitioned_options: option = partition.option show( "Calculating effects for option {}, partition {}:".format( option, partition.partition), verbose) probabilistic_outcomes = list( ) # a list of tuples (prob, effect estimator, reward estimator) for j, (prob, states, rewards, next_states, masks) in enumerate(partition.effects()): show("Processing probabilistic effect {}".format(j), verbose) # make sure no issues with masks. They should all be the same, else there's a problem with partitioning if not (masks == masks[0]).all(): raise ValueError( "Masks in effect for option {}, partition {} are different!" .format(option, partition.partition)) mask = sorted(masks[0]) # sorting to prevent any bugs ever! show("Fitting effect estimator", verbose) effect = KernelDensityEstimator(mask) effect.fit(next_states, verbose=verbose, **kwargs) # compute the effect if kwargs.get('specify_rewards', True): show("Fitting reward estimator", verbose) reward_estimator = SimpleRegressor() reward_estimator.fit(states, rewards, verbose=verbose, **kwargs) # estimate the reward else: reward_estimator = None probabilistic_outcomes.append((prob, effect, reward_estimator)) effects[(option, partition.partition)] = probabilistic_outcomes return effects
def _partition_option(option: int, data: pd.DataFrame, verbose=False, **kwargs) -> List[PartitionedOption]: """ Partition an option into ones that approximately possess the subgoal property :param option: the option :param data: option execution data :param verbose: the verbosity level :return: a list of partitioned options """ data = data.reset_index(drop=True) # reset the indices since the data is a subset of the full transition data partition_effects = list() # extract the masks masks = data['mask'].apply(tuple).unique() for mask in masks: samples = data.loc[_select_where(data['mask'], mask)].reset_index(drop=True) # get samples with that mask clusters = _cluster_effects(samples, mask, verbose=verbose, **kwargs) # cluster based on effects # TODO: this code could be improved/optimised, but will do that another time # now check if part of the data for each cluster should be extracted and placed in existing partition (because # initiation sets overlap) for cluster in clusters: new_clusters = list() for i, existing_cluster in enumerate(partition_effects): existing_shared, new_shared = _merge(existing_cluster, cluster, verbose=verbose, **kwargs) if len(np.unique(existing_shared)) > 1: # split out old data # the existing cluster loses some data reduced_cluster = select_rows(existing_cluster, np.where(np.logical_not(existing_shared))) partition_effects[i] = reduced_cluster # that data gets added to a new cluster new_clusters.append(select_rows(existing_cluster, np.where(existing_shared))) if len(np.unique(new_shared)) > 1: # split out new data # that data gets added to a new cluster new_clusters.append(select_rows(cluster, np.where(new_shared))) # the current cluster loses some data cluster = select_rows(cluster, np.where(np.logical_not(new_shared))) new_clusters.append(cluster) partition_effects.extend(new_clusters) show('{} cluster(s) found'.format(len(partition_effects)), verbose) # we now have a set of distinct clusters (maximally split), but they may be over-partitioned. # Check overlap in initiation sets and merge into probabilistic option if so union_find = UnionFind(range(len(partition_effects))) for i in range(len(partition_effects) - 1): for j in range(i + 1, len(partition_effects)): show("Checking clusters {} and {}".format(i, j), verbose) if _is_overlap_init(partition_effects[i], partition_effects[j], verbose=verbose, **kwargs): # add to union find show("\tMerging clusters {} and {}".format(i, j), verbose) union_find.merge(i, j) # these will be merged merged_clusters = defaultdict(list) # groups of merged partitions for cluster_idx in union_find: group = union_find[cluster_idx] merged_clusters[group].append(partition_effects[cluster_idx]) # now going to store in a data structure partitioned_options = list() for i, (_, partitions) in enumerate(merged_clusters.items()): combined_data = pd.concat(partitions, ignore_index=True) partitioned_options.append(PartitionedOption(option, i, combined_data, partitions)) show('Total partitioned options: {}'.format(len(partitioned_options)), verbose) return partitioned_options
def _build_pddl_operator(env: gym.Env, precondition_factors: List[List[int]], operator: LearnedOperator, vocabulary: UniquePredicateList, operator_predicates: Dict[Tuple[LearnedOperator, int], List[Proposition]], verbose=False, **kwargs) -> List[Operator]: """ Generate the PDDL representation for the given operator. There may be more than one due to disjunctive preconditions :param env: the domain :param factors: the factors making up the precondition :param operators: the learned operator :param vocabulary: the vocabulary :param operator_predicates: a mapping from learned operator and probabilistic effect to the predicates in the vocab :param verbose: the verbosity level """ pddl_operators = list() candidates = list() # candidates are all possible propositions that we need to consider # Get all symbols whose mask matches the correct factors for factor in precondition_factors: candidates.append([proposition for proposition in vocabulary if set(proposition.mask) == set(factor)]) high_threshold = kwargs.get('high_threshold', 0.95) low_threshold = kwargs.get('low_threshold', 0.1) # when intersecting propositions with preconditions allow for the effects to be a subspace of the precondition # (and have the missing variables randomly sampled) allow_fill_in = kwargs.get('allow_fill_in', False) # try out all possible combinations! combinations = list(itertools.product(*candidates)) show("Searching through {} candidates...".format(len(combinations)), verbose) found = False for count, candidates in enumerate(combinations): show("Checking candidate {}".format(count), verbose) if _masks_overlap(candidates): # This should never happen, but putting a check to make sure warn("Overlapping candidates in PDDL building!") continue # get the precondition masks from the candidates. Make sure sorted to avoid bugs! precondition_masks = sorted( list(itertools.chain.from_iterable([proposition.mask for proposition in candidates]))) # probability of propositions matching classifier precondition_prob = _probability_in_precondition(candidates, operator.precondition, allow_fill_in) if precondition_prob > low_threshold: # we found a match! found = True show("\tFound a match!", verbose) precondition_prob = round(precondition_prob, 3) # make look nice pddl_operator = Operator(operator) pddl_operator.add_preconditions(candidates) remaining_probability = 1 if precondition_prob < high_threshold: remaining_probability = precondition_prob pddl_operator.add_effect([Proposition.not_failed().negate()], 1 - precondition_prob) # add failure condition for i, (outcome_prob, effect, reward_estimator) in enumerate(operator.outcomes()): prob = outcome_prob * remaining_probability prob = round(prob, 3) # make look nice reward = None if reward_estimator is None else reward_estimator.expected_reward(env, effect, **kwargs) positive_effects = operator_predicates[(operator, i)] # Negative effects. # Filter: only symbols with factors that overlap the effects mask. negative_effects = [x for x in vocabulary if set(x.mask).issubset(set(effect.mask))] # Filter: remove positive effects. negative_effects = [x for x in negative_effects if x not in positive_effects] # Filter: in the precondition - only if explicitly mentioned. negative_effects = [x for x in negative_effects if not (set(x.mask).issubset(precondition_masks) and (x not in candidates))] negative_effects = [x.negate() for x in negative_effects] pddl_operator.add_effect(positive_effects + negative_effects, prob, reward) pddl_operators.append(pddl_operator) if not found: warn("No PDDL operators found for Option {}, Partition {}".format(operator.option, operator.partition)) return pddl_operators
def build_pddl(env: gym.Env, transition_data: pd.DataFrame, operators: List[LearnedOperator], verbose=False, **kwargs) -> Tuple[List[List[int]], UniquePredicateList, List[Operator]]: """ Given the learned preconditions and effects, generate a valid PDDL representation :param env: teh domain :param transition_data: the transition data :param operators: the learned operators :param verbose: the verbosity level :return: the factors, predicates and PDDL operators """ dist_comparator = kwargs.get('dist_comparator', _overlapping_dists) vocabulary = UniquePredicateList(dist_comparator) # Factorise the state space: see JAIR paper for more show("Factorising state space...", verbose) n_dims = env.observation_space.shape[-1] factors = _factorise(operators, n_dims, verbose=verbose) show("Final factors:\n\n{}".format(factors), verbose) # # generate a distribution over start states start_symbols = _generate_start_symbols(transition_data, factors, verbose=verbose, **kwargs) for new_dist in start_symbols: vocabulary.append(new_dist, start_predicate=True) n_start_propositions = len(vocabulary) show("Start position generated {} propositions".format(n_start_propositions), verbose) # TODO: leaving this out for now # # generate a distribution over goal states # goal_symbols = _generate_goal_symbols(transition_data, factors, verbose=verbose, **kwargs) # for new_dist in goal_symbols: # vocabulary.append(new_dist, goal_predicate=True) # show("Goal condition generated {} propositions".format(len(vocabulary) - n_start_propositions), verbose) n_jobs = kwargs.get('n_jobs', 1) # do it in parallel! show("Running on {} CPUs".format(n_jobs), verbose) show("Generating propositions...", verbose) # get propositions directly from effects operator_predicates = _generate_vocabulary(vocabulary, operators, factors, verbose=verbose, n_jobs=n_jobs) show("Total propositions: {}".format(len(vocabulary)), verbose) show("Generating full PDDL...", verbose) splits = np.array_split(operators, n_jobs) functions = [ partial(_build_pddl_operators, env, factors, splits[i], vocabulary, operator_predicates, verbose, **kwargs) for i in range(n_jobs)] schemata = sum(run_parallel(functions), []) show("Found {} PDDL operators".format(len(schemata)), verbose) return factors, vocabulary, schemata