def main(): # creates an environment env = gym.make("MountainCarContinuous-v0") # chart series weighted_avg = ana.WeightedAvg(beta=0.9) all_ind_series = ana.Series(name="Individuals Performance") avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta))))) gen_series = ana.Series(name="Generation Performance") mut_prob_series = ana.Series(name="Mutation probability") # create linguistic variables in a registry reg = xmlToLinvars(open(LIN_VARS_FILE).read()) # create GFT with linguistic variables in the registry reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.centroid) # create GA instance with the registry object ga = GeneticAlgorithm(registry=reg, seed=5) # create a mutation probability schedule mut_sch = sch.ExponentialDecaySchedule(initial_prob=.1, decay_factor=1e-2) # create GFT algorithm object with the registry rand_proc = OrnsteinUhlenbeckProcess(theta=0.01) alg = Algorithm(registry=reg, random_process=rand_proc) # create a cache for managing simulation data cache = Cache(reg.gft_dict.keys()) # get initial population if LOAD_INIT_POP: pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE) pop = pop[::-1] print("Num. of loaded individuals =", len(pop)) else: pop = ga.generate_initial_population(POP_SIZE) # initialize epoch or generation counter epoch = 0 # initialize individual counter ind_count = 0 # create an object for retrieving input values obs_accessor = MountainCarObs() # perform the simulation for a specified number of generations while epoch < NUM_OF_GENS: # Run the simulation with the current population for ind in pop: ind_count += 1 # initialize reward accumulator for the individual total_reward = 0 # configure the GFT with the current individual alg.configuregft(chromosome=ind) # control the environment with the configured GFT # reset the environment observation = env.reset() # set the received observation as the current array for retrieving input values obs_accessor.current_observation = observation # run through the time steps of the simulation for t in range(MAX_TIME_STEPS): # show the environment env.render() # since only one agent applies to this case study set a dummy agent ID agent_id = 0 # get an action actions_dict, input_vec_dict = alg.executebfc(obs_accessor, agent_id, add_noise=True) # mark the GFSs that executed for the agent in this time step cache.mark(output_dict_keys=actions_dict.keys()) # apply the selected action to the environment and observe feedback next_state, reward, done, _ = env.step(list(actions_dict.values())) reward = reward_shaping(pos=next_state[0], r=reward) # decompose the received reward reward_dict = cache.decomposeReward(reward) # create experiences for the agent with respect to each GFSs that executed for the agent exp_dict = cache.createExperiences(agent_id=agent_id, action=list(actions_dict.values()), dec_reward_dict=reward_dict, input_vec_dict=input_vec_dict, output_dict=actions_dict, next_state_dict=None) # add the experiences of the agent to the cache cache.addExperiences(time_step=t, exp_dict=exp_dict) # set the received observation as the current array for retrieving input values obs_accessor.current_observation = next_state # accumulate the rewards of all time steps total_reward += reward # if the episode is over end the current episode if done: break # save contents of the cache and clear it for the next episode # cache.compute_states_value(gamma=.9) cache.save_csv(path="data/") print( "Episode: {t}/{T} | score: {r}".format(t=ind_count, T=(NUM_OF_GENS * POP_SIZE), r=total_reward)) # set the return from the environment as the fitness value of the current individual ind.fitness.values = (total_reward,) # save qualified individual if SAVE_BEST and total_reward >= SCORE_THRESHOLD: document = Document(name=QLFD_IND_FILE) document.addline(line=Line().add(text=Text(str(ind)))) document.save(append=True) # store the performance of this individual in the corresponding series all_ind_series.addrecord(ind_count, total_reward) weighted_avg.update(total_reward) avg_series.addrecord(ind_count, weighted_avg.value) # Logging and other I/O operations print("Epoch {} completed".format(epoch)) record = ga.stats.compile(pop) print("Statistics for epoch {} = {}".format(epoch, record)) ga.logbook.record(epoch=epoch, **record) # store max return gen_series.addrecord(epoch, record["max"]) if APPLY_EVO: # perform evolution offspring = applyEvolution(population=pop, ga_alg=ga, mut_sch=mut_sch, epoch=epoch) # set offspring as current population pop = offspring # update mutation probability series mut_prob_series.addrecord(epoch, mut_sch.prob) # increment epoch epoch += 1 # print logbook ga.logbook.header = "epoch", "avg", "std", "min", "max" print(ga.logbook) # plotting plot_charts(avg_series, mut_prob_series) # terminates environment env.close()
import fuzzrl.core.plot.analysis as ana import gym import matplotlib.pyplot as plt import seaborn as sb from fuzzrl.core.fuzzy.runner import * from fuzzrl.core.io.simdata import Document, Text, Line from fuzzrl.core.conf import Defuzz as dfz sb.set() SAVE_BEST = True SCORE_THRESHOLD = 450 QLFD_IND_FILE = "data/qualified.txt" # chart series weighted_avg = ana.WeightedAvg(beta=0.9) all_ind_series = ana.Series(name="Individuals Performance") avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta))))) gen_series = ana.Series(name="Generation Performance") mut_prob_series = ana.Series(name="Mutation probability") def episode_finished(ind, ind_i, total_eps, total_r): print("Episode: {}/{} | score: {}".format(ind_i, total_eps, total_r)) # save qualified individual if SAVE_BEST and total_r > SCORE_THRESHOLD: document = Document(name=QLFD_IND_FILE) document.addline(line=Line().add(text=Text(str(ind)))) document.save(append=True)
def main(): # creates an environment env = gym.make("CartPole-v1") # print observation space ranges print("observation space ranges\nhigh = {}\nlow = {}\n".format(str(env.observation_space.high), str(env.observation_space.low))) # chart series weighted_avg = ana.WeightedAvg(beta=0.9) all_ind_series = ana.Series(name="Episode Performance") avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta))))) # create linguistic variables in a registry reg = xmlToLinvars(open(LIN_VARS_FILE).read()) # create GFT with linguistic variables in the registry reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.max_of_maximum) # Load pretrained NN model weights params = [10, 50, 30, 2] model = neural_net(num_inputs=4, params=params, lr=0.1, load=model_path, loss=neg_log_likelihood) reg.nn_models_dict["CartPoleMovement"] = model # create GFT algorithm object with the registry alg = Algorithm(registry=reg) # create a cache for managing simulation data cache = Cache(reg.nn_models_dict.keys()) # create an object for retrieving input values obs_cartpole = CartPoleObs() # replay buffer cart_move_exp_rep = ReplayBuffer(max_size=1000) ts_elapsed = 0 for i_episode in range(MAX_NUM_EPISODES): # get initial state state = env.reset() # initialize reward accumulator for the individual total_reward = 0 # set the current state for retrieving specific inputs obs_cartpole.current_observation = state while True: # show the environment env.render() # since only one agent applies to this case study set a dummy agent ID agent_id = 0 # get an action code, action, input_vec_dict, probs_dict = alg.executenntree(obs_cartpole, agent_id, action_selection_func=greedy_strategy, func_args=None) # apply the selected action to the environment and observe feedback next_state, reward, done, _ = env.step(code) # set the received observation as the current array for retrieving input values obs_cartpole.current_observation = next_state # mark the models that executed for the agent in this time step cache.mark(output_dict_keys=probs_dict.keys()) # decompose the received reward reward_dict = cache.decomposeReward(reward) # create experiences for the agent with respect to each GFSs that executed for the agent state_dict = {"CartPoleMovement": np.array([obs_cartpole.getCartPosition(agent_id), obs_cartpole.getCartVelocity(agent_id), obs_cartpole.getPoleAngle(agent_id), obs_cartpole.getPoleVelocity(agent_id)])} exp_dict = cache.createExperiences(agent_id=agent_id, action=code, dec_reward_dict=reward_dict, input_vec_dict=input_vec_dict, output_dict=probs_dict, next_state_dict=state_dict) # accumulate the rewards of all time steps total_reward += reward # add the experiences of an agent to their corresponding replay buffers for key, exp in exp_dict.items(): if key == "CartPoleMovement": cart_move_exp_rep.add(exp) # increment time steps played ts_elapsed += 1 if ts_elapsed >= TIME_STEPS_BEFORE_TRAIN: # print("train the model") pass # if the episode is over end the current episode if done: break print("Episode: {}/{} | score: {}".format(i_episode + 1, MAX_NUM_EPISODES, total_reward)) avg_series.addrecord(i_episode, weighted_avg.update(total_reward)) plt.figure(0) plt.title("Cartpole with simple NN") plt.plot(avg_series.data()['x'], avg_series.data()['y']) plt.xlabel("episode") plt.ylabel("score") plt.show()
def main(): # creates an environment env = gym.make(rlmarsenvs.carmunk_id) # print observation space ranges print("observation space ranges\nhigh = {}\nlow = {}\n".format( str(env.observation_space.high), str(env.observation_space.low))) # chart series weighted_avg = ana.WeightedAvg(beta=0.9) all_ind_series = ana.Series(name="Individuals Performance") avg_series = ana.Series(name="Average (window = {})".format( round((1 / (1 - weighted_avg.beta))))) gen_series = ana.Series(name="Generation Performance") mut_prob_series = ana.Series(name="Mutation probability") # create linguistic variables in a registry reg = xmlToLinvars(open(LIN_VARS_FILE).read()) # create GFT with linguistic variables in the registry reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.max_of_maximum) # create GA instance with the registry object ga = GeneticAlgorithm(registry=reg, seed=123) # create a mutation probability schedule # mut_sch = sch.TimeBasedSchedule(decay_factor=1e-4) mut_sch = sch.LinearDecaySchedule(initial_prob=1.025, decay_factor=1e-2) # create GFT algorithm object with the registry alg = Algorithm(registry=reg) # create a cache for managing simulation data cache = Cache(reg.gft_dict.keys()) # get initial population if LOAD_INIT_POP: pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE) pop = pop[::-1] else: pop = ga.generate_initial_population(POP_SIZE) # initialize epoch or generation counter epoch = 0 # initialize individual counter ind_count = 0 # create an object for retrieving input values obs_carmunk = CarmunkObs() # Tau for Boltzmann exploration strategy tau_sch = sch.LinearDecaySchedule(initial_prob=20, decay_factor=0.02) # perform the simulation for a specified number of generations while epoch < NUM_OF_GENS: # Run the simulation with the current population for ind in pop: ind_count += 1 # initialize reward accumulator for the individual total_reward = 0 # configure the GFT with the current individual alg.configuregft(chromosome=ind) # control the environment with the configured GFT # for i_episode in range(NUM_EPISODES_PER_IND): # reset the environment observation = env.reset() # set the received observation as the current array for retrieving input values obs_carmunk.current_observation = observation # run through the time steps of the simulation t = 0 while True: t += 1 # show the environment env.render() # since only one agent applies to this case study set a dummy agent ID agent_id = 0 # get an action code, action, input_vec_dict, probs_dict = alg.executegft( obs_carmunk, agent_id) # apply the selected action to the environment and observe feedback next_state, reward, done, _ = env.step(code) # mark the GFSs that executed for the agent in this time step cache.mark(output_dict_keys=probs_dict.keys()) # decompose the received reward reward_dict = cache.decomposeReward(reward) # create experiences for the agent with respect to each GFSs that executed for the agent exp_dict = cache.createExperiences( agent_id=agent_id, action=code, dec_reward_dict=reward_dict, input_vec_dict=input_vec_dict, output_dict=probs_dict) # add the experiences of the agent to the cache cache.addExperiences(time_step=t, exp_dict=exp_dict) # set the received observation as the current array for retrieving input values obs_carmunk.current_observation = next_state # accumulate the rewards of all time steps total_reward += reward # if the episode is over end the current episode if done: break # save contents of the cache and clear it for the next episode cache.save_csv() # if total_reward < 50: # total_reward = - 50 print("Episode finished after {} time steps".format(t + 1)) print("Episode: {}/{} | score: {}".format(ind_count, (NUM_OF_GENS * POP_SIZE), total_reward)) # set the return from the environment as the fitness value of the current individual ind.fitness.values = (total_reward, ) # save qualified individual if SAVE_BEST and total_reward > SCORE_THRESHOLD: document = Document(name=QLFD_IND_FILE) document.addline(line=Line().add(text=Text(str(ind)))) document.save(append=True) # store the performance of this individual in the corresponding series all_ind_series.addrecord(ind_count, total_reward) weighted_avg.update(total_reward) avg_series.addrecord(ind_count, weighted_avg.value) # Logging and other I/O operations print("Epoch {} completed".format(epoch)) record = ga.stats.compile(pop) print("Statistics for epoch {} = {}".format(epoch, record)) ga.logbook.record(epoch=epoch, **record) # store max return gen_series.addrecord(epoch, record["max"]) if APPLY_EVO: # perform evolution offspring = applyEvolution(population=pop, ga_alg=ga, mut_sch=mut_sch, epoch=epoch) # set offspring as current population pop = offspring # update mutation probability series mut_prob_series.addrecord(epoch, mut_sch.prob) # increment epoch epoch += 1 # print logbook ga.logbook.header = "epoch", "avg", "std", "min", "max" print(ga.logbook) # plotting plot_charts(avg_series, mut_prob_series) # terminates environment env.close()