Esempio n. 1
0
def main():
    # creates an environment
    env = gym.make("Pendulum-v0")

    # create a mutation probability schedule
    mut_sch = sch.ExponentialDecaySchedule(initial_prob=.2, decay_factor=1e-2)

    # cross over probability schedule
    cross_sch = sch.ConstantSchedule(0.7)

    # random process for introducing noise
    rand_proc = OrnsteinUhlenbeckProcess(theta=0.01)

    # Evolution operators information
    ev_conf = EvolutionConfig(sel_args={"k": 30, "tournsize": 3},
                              sel_func=tools.selTournament,
                              cross_args={"indpb": 0.2},
                              cross_func=tools.cxUniform,
                              mut_args={"mu": 0, "sigma": 0.1, "indpb": 0.1},
                              mut_func=tools.mutGaussian)

    # GA configuration
    ga_conf = GeneticAlgConfiguration(evol_config=ev_conf,
                                      pop_size=30,
                                      num_gens=10,
                                      mf_tuning_range=[-0.1, 0.1],
                                      lin_vars_file="res/pendulum_linvars.xml",
                                      gft_file="res/pendulum.xml",
                                      load_init_pop_file=None,
                                      apply_evolution=True,
                                      defuzz_method=dfz.centroid,
                                      mutation_prob_schdl=mut_sch,
                                      cross_prob_schdl=cross_sch,
                                      learn_rb_ops=False)

    # Sim execution configuration
    sim_conf = SimExecutionConfiguration(env=env,
                                         agents=[Agent(0, PendulumObs)],
                                         max_time_steps=600,
                                         episodes_per_ind=1,
                                         noise_process=rand_proc,
                                         action_space=Const.CONTINUOUS,
                                         persist_cache_per_ind=False,
                                         visualize_env=True)
    runner = Runner(ga_config=ga_conf,
                    sim_config=sim_conf,
                    seed=5,
                    episode_finished_callback=episode_finished,
                    epoch_finished_callback=epoch_finished,
                    sim_finished_callback=sim_finished,
                    evolution_finished_callback=
                    lambda pop, m_prob, c_prob, epoch: mut_prob_series.addrecord(epoch, mut_sch.prob))

    runner.run()
Esempio n. 2
0
def main():
    # creates an environment
    env = gym.make("MountainCarContinuous-v0")

    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Individuals Performance")
    avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta)))))
    gen_series = ana.Series(name="Generation Performance")
    mut_prob_series = ana.Series(name="Mutation probability")

    # create linguistic variables in a registry
    reg = xmlToLinvars(open(LIN_VARS_FILE).read())

    # create GFT with linguistic variables in the registry
    reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.centroid)

    # create GA instance with the registry object
    ga = GeneticAlgorithm(registry=reg, seed=5)

    # create a mutation probability schedule
    mut_sch = sch.ExponentialDecaySchedule(initial_prob=.1, decay_factor=1e-2)

    # create GFT algorithm object with the registry
    rand_proc = OrnsteinUhlenbeckProcess(theta=0.01)
    alg = Algorithm(registry=reg, random_process=rand_proc)

    # create a cache for managing simulation data
    cache = Cache(reg.gft_dict.keys())

    # get initial population
    if LOAD_INIT_POP:
        pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE)
        pop = pop[::-1]
        print("Num. of loaded individuals =", len(pop))
    else:
        pop = ga.generate_initial_population(POP_SIZE)

    # initialize epoch or generation counter
    epoch = 0

    # initialize individual counter
    ind_count = 0

    # create an object for retrieving input values
    obs_accessor = MountainCarObs()

    # perform the simulation for a specified number of generations
    while epoch < NUM_OF_GENS:

        # Run the simulation with the current population
        for ind in pop:
            ind_count += 1

            # initialize reward accumulator for the individual
            total_reward = 0

            # configure the GFT with the current individual
            alg.configuregft(chromosome=ind)

            # control the environment with the configured GFT

            # reset the environment
            observation = env.reset()

            # set the received observation as the current array for retrieving input values
            obs_accessor.current_observation = observation

            # run through the time steps of the simulation
            for t in range(MAX_TIME_STEPS):

                # show the environment
                env.render()

                # since only one agent applies to this case study set a dummy agent ID
                agent_id = 0

                # get an action
                actions_dict, input_vec_dict = alg.executebfc(obs_accessor, agent_id, add_noise=True)

                # mark the GFSs that executed for the agent in this time step
                cache.mark(output_dict_keys=actions_dict.keys())

                # apply the selected action to the environment and observe feedback
                next_state, reward, done, _ = env.step(list(actions_dict.values()))
                reward = reward_shaping(pos=next_state[0], r=reward)

                # decompose the received reward
                reward_dict = cache.decomposeReward(reward)

                # create experiences for the agent with respect to each GFSs that executed for the agent
                exp_dict = cache.createExperiences(agent_id=agent_id, action=list(actions_dict.values()),
                                                   dec_reward_dict=reward_dict,
                                                   input_vec_dict=input_vec_dict, output_dict=actions_dict,
                                                   next_state_dict=None)

                # add the experiences of the agent to the cache
                cache.addExperiences(time_step=t, exp_dict=exp_dict)

                # set the received observation as the current array for retrieving input values
                obs_accessor.current_observation = next_state

                # accumulate the rewards of all time steps
                total_reward += reward

                # if the episode is over end the current episode
                if done:
                    break

            # save contents of the cache and clear it for the next episode
            # cache.compute_states_value(gamma=.9)
            cache.save_csv(path="data/")
            print(
                "Episode: {t}/{T} | score: {r}".format(t=ind_count, T=(NUM_OF_GENS * POP_SIZE),
                                                       r=total_reward))

            # set the return from the environment as the fitness value of the current individual
            ind.fitness.values = (total_reward,)

            # save qualified individual
            if SAVE_BEST and total_reward >= SCORE_THRESHOLD:
                document = Document(name=QLFD_IND_FILE)
                document.addline(line=Line().add(text=Text(str(ind))))
                document.save(append=True)

            # store the performance of this individual in the corresponding series
            all_ind_series.addrecord(ind_count, total_reward)
            weighted_avg.update(total_reward)
            avg_series.addrecord(ind_count, weighted_avg.value)

        # Logging and other I/O operations
        print("Epoch {} completed".format(epoch))
        record = ga.stats.compile(pop)
        print("Statistics for epoch {} = {}".format(epoch, record))
        ga.logbook.record(epoch=epoch, **record)

        # store max return
        gen_series.addrecord(epoch, record["max"])
        if APPLY_EVO:
            # perform evolution
            offspring = applyEvolution(population=pop, ga_alg=ga, mut_sch=mut_sch, epoch=epoch)

            # set offspring as current population
            pop = offspring

        # update mutation probability series
        mut_prob_series.addrecord(epoch, mut_sch.prob)
        # increment epoch
        epoch += 1

    # print logbook
    ga.logbook.header = "epoch", "avg", "std", "min", "max"
    print(ga.logbook)

    # plotting
    plot_charts(avg_series, mut_prob_series)

    # terminates environment
    env.close()
Esempio n. 3
0
style.use("seaborn-paper")

# for reproducibility
np.random.seed(1)
random.seed(1)
tf.set_random_seed(1)

LIN_VARS_FILE = "cartpole_linvars.xml"
GFT_FILE = "cartpole_gft.xml"
model_path = "cartpole_model.h5"

MAX_NUM_EPISODES = 100
TIME_STEPS_BEFORE_TRAIN = 1000
epsilon = 1e-4

tau_sch = sch.ExponentialDecaySchedule(initial_prob=0.1, decay_factor=1e-1)


def main():
    # creates an environment
    env = gym.make("CartPole-v1")

    # print observation space ranges
    print("observation space ranges\nhigh = {}\nlow = {}\n".format(str(env.observation_space.high),
                                                                   str(env.observation_space.low)))
    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Episode Performance")
    avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta)))))

    # create linguistic variables in a registry
Esempio n. 4
0
 def test_ExponentialDecaySchedule(self):
     s = sch.ExponentialDecaySchedule(init_prob, decay_factor)
     runschedule(s)