Esempio n. 1
0
    def objective_two(
        individual: cgp.IndividualSingleGenome,
        cum_reward_threshold: int,
        gamma: float,
        seed: int,
        rng: np.random.Generator,
    ):
        if not individual.fitness_is_None():
            return individual

        # environment initialization
        env = gym.make('CartPole-v0')

        try:
            individual.fitness = inner_objective(
                individual,
                network=individual.network,
                env=env,
                cum_reward_threshold=cum_reward_threshold,
                gamma=gamma,
                seed=seed,
                rng=rng,
                mode='episode_min')
        except ZeroDivisionError:
            individual.fitness = -np.inf

        return individual
Esempio n. 2
0
def _create_individual(genome, fitness=None, individual_type="SingleGenome"):
    if individual_type == "SingleGenome":
        ind = IndividualSingleGenome(genome)
        if fitness is not None:
            ind.fitness = fitness
        return ind
    elif individual_type == "MultiGenome":
        ind = IndividualMultiGenome([genome])
        if fitness is not None:
            ind.fitness = fitness
        return ind
    else:
        raise NotImplementedError("Unknown individual type.")
Esempio n. 3
0
def objective(
        individual: cgp.IndividualSingleGenome,
        network_params: dict,
        curriculum_params: dict,
        seeds
):

    if not individual.fitness_is_None():
        return individual
    try:
        individual.fitness = inner_objective(individual, network_params, curriculum_params, seeds)
    except ZeroDivisionError:
        individual.fitness = -np.inf
    return individual
Esempio n. 4
0
    def objective_one(
        individual: cgp.IndividualSingleGenome,
        n_episodes: int,
        gamma: float,
        seed: int,
        rng: np.random.Generator,
    ):

        if not individual.fitness_is_None():
            return individual

        # environment initialization
        env = gym.make('CartPole-v0')
        env.seed(seed=seed)
        env.action_space.seed(seed)

        # network initialization
        torch.manual_seed(seed=seed)
        network = Network(n_inputs=env.observation_space.shape[0],
                          n_hidden=100,
                          n_outputs=env.action_space.n,
                          learning_rate=2e-4,
                          weight_update_mode='evolved_rule')

        try:
            individual.fitness = inner_objective(individual,
                                                 network=network,
                                                 env=env,
                                                 n_episodes=n_episodes,
                                                 gamma=gamma,
                                                 seed=seed,
                                                 rng=rng,
                                                 mode='reward_max')
        except ZeroDivisionError:
            individual.fitness = -np.inf

        # Todo write network.state_dict() to ind (and possibly pickle dumps)
        individual.network = network  # assign the trained network to the individual for objective 2
        return individual