def objective_two( individual: cgp.IndividualSingleGenome, cum_reward_threshold: int, gamma: float, seed: int, rng: np.random.Generator, ): if not individual.fitness_is_None(): return individual # environment initialization env = gym.make('CartPole-v0') try: individual.fitness = inner_objective( individual, network=individual.network, env=env, cum_reward_threshold=cum_reward_threshold, gamma=gamma, seed=seed, rng=rng, mode='episode_min') except ZeroDivisionError: individual.fitness = -np.inf return individual
def _create_individual(genome, fitness=None, individual_type="SingleGenome"): if individual_type == "SingleGenome": ind = IndividualSingleGenome(genome) if fitness is not None: ind.fitness = fitness return ind elif individual_type == "MultiGenome": ind = IndividualMultiGenome([genome]) if fitness is not None: ind.fitness = fitness return ind else: raise NotImplementedError("Unknown individual type.")
def objective( individual: cgp.IndividualSingleGenome, network_params: dict, curriculum_params: dict, seeds ): if not individual.fitness_is_None(): return individual try: individual.fitness = inner_objective(individual, network_params, curriculum_params, seeds) except ZeroDivisionError: individual.fitness = -np.inf return individual
def objective_one( individual: cgp.IndividualSingleGenome, n_episodes: int, gamma: float, seed: int, rng: np.random.Generator, ): if not individual.fitness_is_None(): return individual # environment initialization env = gym.make('CartPole-v0') env.seed(seed=seed) env.action_space.seed(seed) # network initialization torch.manual_seed(seed=seed) network = Network(n_inputs=env.observation_space.shape[0], n_hidden=100, n_outputs=env.action_space.n, learning_rate=2e-4, weight_update_mode='evolved_rule') try: individual.fitness = inner_objective(individual, network=network, env=env, n_episodes=n_episodes, gamma=gamma, seed=seed, rng=rng, mode='reward_max') except ZeroDivisionError: individual.fitness = -np.inf # Todo write network.state_dict() to ind (and possibly pickle dumps) individual.network = network # assign the trained network to the individual for objective 2 return individual