Esempio n. 1
0
def partition_options(env: gym.Env, transition_data: pd.DataFrame,
                      verbose=False, **kwargs) -> Dict[int, List[PartitionedOption]]:
    """
    Partition options so that the subgoal property is approximately preserved
    :param env: the environment
    :param transition_data: all the transition data from the environment
    :param verbose: the mode
    :return: a dictionary mapping each option to its partitions
    """
    if not isinstance(env.action_space, Discrete):
        raise ValueError("Action space must be discrete")

    n_jobs = kwargs.get('n_jobs', 1)
    show("Running on {} CPUs".format(n_jobs), verbose)
    action_splits = np.array_split(range(env.action_space.n), n_jobs)

    functions = [partial(_partition_options, action_splits[i], transition_data, verbose=verbose, **kwargs)
                 for i in range(n_jobs)]
    # run in parallel
    partitioned_options = run_parallel(functions)
    partitioned_options = dict(ChainMap(*partitioned_options))
    count = sum(len(partitions) for _, partitions in partitioned_options.items())

    show("{} total partitions discovered".format(count), verbose)
    return partitioned_options
Esempio n. 2
0
def _generate_vocabulary(vocabulary: UniquePredicateList, operators: List[LearnedOperator], factors: List[List[int]],
                         verbose=False, **kwargs) -> Dict[Tuple[LearnedOperator, int], List[Proposition]]:
    """
    Generate a vocabulary for the PDDL. This includes every possible proposition that could ever be required.
    :param vocabulary: the existing vocabulary of predicates
    :param operators: the learned operators
    :param factors: the factors
    :param verbose: the verbosity level
    :return: a mapping from learned operator and probabilistic effect to the predicates in the vocabulary
    """
    # Process each option's effect sets.
    # map from (operator, probabilistic effect) -> predicates
    operator_predicates: Dict[Tuple[LearnedOperator, int], List[Proposition]] = {}

    n_jobs = kwargs.get('n_jobs', 1)
    splits = np.array_split(operators, n_jobs)
    functions = [partial(_generate_vocabulary_parallel, splits[i], factors, **kwargs) for i in range(n_jobs)]
    local_operator_predicates = run_parallel(functions)
    local_operator_predicates = dict(ChainMap(*local_operator_predicates))  # reduce to single dict

    show("Merging propositions from {} processes".format(n_jobs), verbose)

    # take all the results generated in parallel, and collapse into one result.
    for (operator, effect_idx), local_propositions in local_operator_predicates.items():
        predicates = [vocabulary.append(x.estimator) for x in local_propositions]
        operator_predicates[(operator, effect_idx)] = predicates
    return operator_predicates
Esempio n. 3
0
def learn_preconditions(
        env: gym.Env,
        init_data: pd.DataFrame,
        partitioned_options: Dict[int, List[PartitionedOption]],
        verbose=False,
        **kwargs) -> Dict[Tuple[int, int], PreconditionClassifier]:
    """
    Learn all the preconditions for the partitioned options
    :param env: the domain
    :param init_data: the initiation data
    :param partitioned_options: the partitioned options (a dictionary containing a list of partitions for each option)
    :param verbose: the verbosity level
    :return: the classifiers
    """
    n_jobs = kwargs.get('n_jobs', 1)
    show("Running on {} CPUs".format(n_jobs), verbose)
    partition_splits = np.array_split(_flatten(partitioned_options), n_jobs)
    functions = [
        partial(_learn_preconditions, init_data, partition_splits[i],
                partitioned_options, verbose, **kwargs) for i in range(n_jobs)
    ]
    # run in parallel
    preconditions: List[Dict[Tuple[int, int],
                             PreconditionClassifier]] = run_parallel(functions)
    return dict(ChainMap(*preconditions))  # reduce to single dict
def build_pddl(env: MultiViewEnv, transition_data: pd.DataFrame, operators: List[LearnedOperator], verbose=False,
               **kwargs) -> Tuple[List[List[int]], UniquePredicateList, List[PDDLOperator]]:
    """
    Given the learned preconditions and effects, generate a valid PDDL representation
    :param env: teh domain
    :param transition_data: the transition data
    :param operators: the learned operators
    :param verbose: the verbosity level
    :return: the factors, predicates and PDDL operators
    """
    n_jobs = kwargs.get('n_jobs', 1)
    dist_comparator = kwargs.get('dist_comparator', _overlapping_dists)
    vocabulary = UniquePredicateList(dist_comparator)
    # Factorise the state space: see JAIR paper for more
    show("Factorising state space...", verbose)
    factors = _factorise(operators, env.n_dims(kwargs.get('view', View.PROBLEM)), verbose=verbose)

    show("Final factors:\n\n{}".format(factors), verbose)
    #
    # generate a distribution over start states
    start_symbols = _generate_start_symbols(transition_data, factors, verbose=verbose, **kwargs)
    for new_dist in start_symbols:
        vocabulary.append(new_dist, start_predicate=True)

    n_start_propositions = len(vocabulary)
    show("Start position generated {} propositions".format(n_start_propositions), verbose)

    # TODO: leaving this out for now
    # # generate a distribution over goal states
    # goal_symbols = _generate_goal_symbols(transition_data, factors, verbose=verbose, **kwargs)
    # for new_dist in goal_symbols:
    #     vocabulary.append(new_dist, goal_predicate=True)
    # show("Goal condition generated {} propositions".format(len(vocabulary) - n_start_propositions), verbose)

    show("Running on {} CPUs".format(n_jobs), verbose)

    show("Generating propositions...", verbose)
    # get propositions directly from effects
    operator_predicates = _generate_vocabulary(vocabulary, operators, factors, verbose=verbose, n_jobs=n_jobs)
    show("Total propositions: {}".format(len(vocabulary)), verbose)

    save((factors, vocabulary, operator_predicates))
    (factors, vocabulary, operator_predicates) = load()

    show("Generating full PDDL...", verbose)

    splits = np.array_split(operators, n_jobs)
    functions = [
        partial(_build_pddl_operators, env, factors, splits[i], vocabulary, operator_predicates, verbose, **kwargs)
        for i in range(n_jobs)]
    schemata = sum(run_parallel(functions), [])

    show("Found {} PDDL operators".format(len(schemata)), verbose)
    return factors, vocabulary, schemata
Esempio n. 5
0
def collect_data(env: S2SWrapper,
                 max_timestep=np.inf,
                 max_episode=np.inf,
                 verbose=False,
                 seed=None,
                 n_jobs=1,
                 **kwargs) -> (pd.DataFrame, pd.DataFrame):
    """
    Collect data from the environment through uniform random exploration in parallel

    :param env: the environment
    :param max_timestep: the maximum number of timesteps in total (not to be confused with maximum time steps per episode) Default is infinity
    :param max_episode: the maximum number of episodes. Default is infinity
    :param verbose: whether to print additional information
    :param seed: the random seed. Use for reproducibility
    :param n_jobs: the number of processes to spawn to collect data in parallel. If -1, use all CPUs
    :return: data frames holding transition and initation data
    """
    if max_timestep == np.inf and max_episode == np.inf:
        raise ValueError(
            'Must specify at least a maximum timestep or episode limit')

    if seed is not None:
        random.seed(seed)
        np.random.seed(seed)

    if n_jobs == -1:
        n_jobs = multiprocessing.cpu_count()

    # run collection in parallel
    max_timestep /= n_jobs
    max_episode /= n_jobs

    functions = [
        partial(_collect_data, env,
                np.random.randint(0,
                                  1000000), max_timestep, max_episode, verbose,
                int(max_episode * i), **kwargs) for i in range(n_jobs)
    ]

    results = run_parallel(functions)
    transition_data = pd.concat([x[0] for x in results], ignore_index=True)
    initiation_data = pd.concat([x[1] for x in results], ignore_index=True)
    return transition_data, initiation_data
Esempio n. 6
0
def learn_effects(partitioned_options: Dict[int, List[PartitionedOption]],
                  verbose=False, **kwargs) \
        -> Dict[Tuple[int, int], List[Tuple[float, StateDensityEstimator, RewardRegressor]]]:
    """
    Estimate the effects from data
    :param partitioned_options: the partitioned options (a dictionary containing a list of partitions for each option)
    :param verbose: the verbosity level
    :return: the probability, next-state estimators and reward estimators
    """
    n_jobs = kwargs.get('n_jobs', 1)
    show("Running on {} CPUs".format(n_jobs), verbose)
    partition_splits = np.array_split(_flatten(partitioned_options), n_jobs)
    functions = [
        partial(_learn_effects, partition_splits[i], verbose, **kwargs)
        for i in range(n_jobs)
    ]
    # run in parallel
    effects: List[Dict[Tuple[int, int],
                       List[Tuple[float, StateDensityEstimator,
                                  RewardRegressor]]]] = run_parallel(functions)
    return dict(ChainMap(*effects))  # reduce to single dict