def partition_options(env: gym.Env, transition_data: pd.DataFrame, verbose=False, **kwargs) -> Dict[int, List[PartitionedOption]]: """ Partition options so that the subgoal property is approximately preserved :param env: the environment :param transition_data: all the transition data from the environment :param verbose: the mode :return: a dictionary mapping each option to its partitions """ if not isinstance(env.action_space, Discrete): raise ValueError("Action space must be discrete") n_jobs = kwargs.get('n_jobs', 1) show("Running on {} CPUs".format(n_jobs), verbose) action_splits = np.array_split(range(env.action_space.n), n_jobs) functions = [partial(_partition_options, action_splits[i], transition_data, verbose=verbose, **kwargs) for i in range(n_jobs)] # run in parallel partitioned_options = run_parallel(functions) partitioned_options = dict(ChainMap(*partitioned_options)) count = sum(len(partitions) for _, partitions in partitioned_options.items()) show("{} total partitions discovered".format(count), verbose) return partitioned_options
def _generate_vocabulary(vocabulary: UniquePredicateList, operators: List[LearnedOperator], factors: List[List[int]], verbose=False, **kwargs) -> Dict[Tuple[LearnedOperator, int], List[Proposition]]: """ Generate a vocabulary for the PDDL. This includes every possible proposition that could ever be required. :param vocabulary: the existing vocabulary of predicates :param operators: the learned operators :param factors: the factors :param verbose: the verbosity level :return: a mapping from learned operator and probabilistic effect to the predicates in the vocabulary """ # Process each option's effect sets. # map from (operator, probabilistic effect) -> predicates operator_predicates: Dict[Tuple[LearnedOperator, int], List[Proposition]] = {} n_jobs = kwargs.get('n_jobs', 1) splits = np.array_split(operators, n_jobs) functions = [partial(_generate_vocabulary_parallel, splits[i], factors, **kwargs) for i in range(n_jobs)] local_operator_predicates = run_parallel(functions) local_operator_predicates = dict(ChainMap(*local_operator_predicates)) # reduce to single dict show("Merging propositions from {} processes".format(n_jobs), verbose) # take all the results generated in parallel, and collapse into one result. for (operator, effect_idx), local_propositions in local_operator_predicates.items(): predicates = [vocabulary.append(x.estimator) for x in local_propositions] operator_predicates[(operator, effect_idx)] = predicates return operator_predicates
def learn_preconditions( env: gym.Env, init_data: pd.DataFrame, partitioned_options: Dict[int, List[PartitionedOption]], verbose=False, **kwargs) -> Dict[Tuple[int, int], PreconditionClassifier]: """ Learn all the preconditions for the partitioned options :param env: the domain :param init_data: the initiation data :param partitioned_options: the partitioned options (a dictionary containing a list of partitions for each option) :param verbose: the verbosity level :return: the classifiers """ n_jobs = kwargs.get('n_jobs', 1) show("Running on {} CPUs".format(n_jobs), verbose) partition_splits = np.array_split(_flatten(partitioned_options), n_jobs) functions = [ partial(_learn_preconditions, init_data, partition_splits[i], partitioned_options, verbose, **kwargs) for i in range(n_jobs) ] # run in parallel preconditions: List[Dict[Tuple[int, int], PreconditionClassifier]] = run_parallel(functions) return dict(ChainMap(*preconditions)) # reduce to single dict
def build_pddl(env: MultiViewEnv, transition_data: pd.DataFrame, operators: List[LearnedOperator], verbose=False, **kwargs) -> Tuple[List[List[int]], UniquePredicateList, List[PDDLOperator]]: """ Given the learned preconditions and effects, generate a valid PDDL representation :param env: teh domain :param transition_data: the transition data :param operators: the learned operators :param verbose: the verbosity level :return: the factors, predicates and PDDL operators """ n_jobs = kwargs.get('n_jobs', 1) dist_comparator = kwargs.get('dist_comparator', _overlapping_dists) vocabulary = UniquePredicateList(dist_comparator) # Factorise the state space: see JAIR paper for more show("Factorising state space...", verbose) factors = _factorise(operators, env.n_dims(kwargs.get('view', View.PROBLEM)), verbose=verbose) show("Final factors:\n\n{}".format(factors), verbose) # # generate a distribution over start states start_symbols = _generate_start_symbols(transition_data, factors, verbose=verbose, **kwargs) for new_dist in start_symbols: vocabulary.append(new_dist, start_predicate=True) n_start_propositions = len(vocabulary) show("Start position generated {} propositions".format(n_start_propositions), verbose) # TODO: leaving this out for now # # generate a distribution over goal states # goal_symbols = _generate_goal_symbols(transition_data, factors, verbose=verbose, **kwargs) # for new_dist in goal_symbols: # vocabulary.append(new_dist, goal_predicate=True) # show("Goal condition generated {} propositions".format(len(vocabulary) - n_start_propositions), verbose) show("Running on {} CPUs".format(n_jobs), verbose) show("Generating propositions...", verbose) # get propositions directly from effects operator_predicates = _generate_vocabulary(vocabulary, operators, factors, verbose=verbose, n_jobs=n_jobs) show("Total propositions: {}".format(len(vocabulary)), verbose) save((factors, vocabulary, operator_predicates)) (factors, vocabulary, operator_predicates) = load() show("Generating full PDDL...", verbose) splits = np.array_split(operators, n_jobs) functions = [ partial(_build_pddl_operators, env, factors, splits[i], vocabulary, operator_predicates, verbose, **kwargs) for i in range(n_jobs)] schemata = sum(run_parallel(functions), []) show("Found {} PDDL operators".format(len(schemata)), verbose) return factors, vocabulary, schemata
def collect_data(env: S2SWrapper, max_timestep=np.inf, max_episode=np.inf, verbose=False, seed=None, n_jobs=1, **kwargs) -> (pd.DataFrame, pd.DataFrame): """ Collect data from the environment through uniform random exploration in parallel :param env: the environment :param max_timestep: the maximum number of timesteps in total (not to be confused with maximum time steps per episode) Default is infinity :param max_episode: the maximum number of episodes. Default is infinity :param verbose: whether to print additional information :param seed: the random seed. Use for reproducibility :param n_jobs: the number of processes to spawn to collect data in parallel. If -1, use all CPUs :return: data frames holding transition and initation data """ if max_timestep == np.inf and max_episode == np.inf: raise ValueError( 'Must specify at least a maximum timestep or episode limit') if seed is not None: random.seed(seed) np.random.seed(seed) if n_jobs == -1: n_jobs = multiprocessing.cpu_count() # run collection in parallel max_timestep /= n_jobs max_episode /= n_jobs functions = [ partial(_collect_data, env, np.random.randint(0, 1000000), max_timestep, max_episode, verbose, int(max_episode * i), **kwargs) for i in range(n_jobs) ] results = run_parallel(functions) transition_data = pd.concat([x[0] for x in results], ignore_index=True) initiation_data = pd.concat([x[1] for x in results], ignore_index=True) return transition_data, initiation_data
def learn_effects(partitioned_options: Dict[int, List[PartitionedOption]], verbose=False, **kwargs) \ -> Dict[Tuple[int, int], List[Tuple[float, StateDensityEstimator, RewardRegressor]]]: """ Estimate the effects from data :param partitioned_options: the partitioned options (a dictionary containing a list of partitions for each option) :param verbose: the verbosity level :return: the probability, next-state estimators and reward estimators """ n_jobs = kwargs.get('n_jobs', 1) show("Running on {} CPUs".format(n_jobs), verbose) partition_splits = np.array_split(_flatten(partitioned_options), n_jobs) functions = [ partial(_learn_effects, partition_splits[i], verbose, **kwargs) for i in range(n_jobs) ] # run in parallel effects: List[Dict[Tuple[int, int], List[Tuple[float, StateDensityEstimator, RewardRegressor]]]] = run_parallel(functions) return dict(ChainMap(*effects)) # reduce to single dict