def main(): # creates an environment env = gym.make("MountainCarContinuous-v0") # chart series weighted_avg = ana.WeightedAvg(beta=0.9) all_ind_series = ana.Series(name="Individuals Performance") avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta))))) gen_series = ana.Series(name="Generation Performance") mut_prob_series = ana.Series(name="Mutation probability") # create linguistic variables in a registry reg = xmlToLinvars(open(LIN_VARS_FILE).read()) # create GFT with linguistic variables in the registry reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.centroid) # create GA instance with the registry object ga = GeneticAlgorithm(registry=reg, seed=5) # create a mutation probability schedule mut_sch = sch.ExponentialDecaySchedule(initial_prob=.1, decay_factor=1e-2) # create GFT algorithm object with the registry rand_proc = OrnsteinUhlenbeckProcess(theta=0.01) alg = Algorithm(registry=reg, random_process=rand_proc) # create a cache for managing simulation data cache = Cache(reg.gft_dict.keys()) # get initial population if LOAD_INIT_POP: pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE) pop = pop[::-1] print("Num. of loaded individuals =", len(pop)) else: pop = ga.generate_initial_population(POP_SIZE) # initialize epoch or generation counter epoch = 0 # initialize individual counter ind_count = 0 # create an object for retrieving input values obs_accessor = MountainCarObs() # perform the simulation for a specified number of generations while epoch < NUM_OF_GENS: # Run the simulation with the current population for ind in pop: ind_count += 1 # initialize reward accumulator for the individual total_reward = 0 # configure the GFT with the current individual alg.configuregft(chromosome=ind) # control the environment with the configured GFT # reset the environment observation = env.reset() # set the received observation as the current array for retrieving input values obs_accessor.current_observation = observation # run through the time steps of the simulation for t in range(MAX_TIME_STEPS): # show the environment env.render() # since only one agent applies to this case study set a dummy agent ID agent_id = 0 # get an action actions_dict, input_vec_dict = alg.executebfc(obs_accessor, agent_id, add_noise=True) # mark the GFSs that executed for the agent in this time step cache.mark(output_dict_keys=actions_dict.keys()) # apply the selected action to the environment and observe feedback next_state, reward, done, _ = env.step(list(actions_dict.values())) reward = reward_shaping(pos=next_state[0], r=reward) # decompose the received reward reward_dict = cache.decomposeReward(reward) # create experiences for the agent with respect to each GFSs that executed for the agent exp_dict = cache.createExperiences(agent_id=agent_id, action=list(actions_dict.values()), dec_reward_dict=reward_dict, input_vec_dict=input_vec_dict, output_dict=actions_dict, next_state_dict=None) # add the experiences of the agent to the cache cache.addExperiences(time_step=t, exp_dict=exp_dict) # set the received observation as the current array for retrieving input values obs_accessor.current_observation = next_state # accumulate the rewards of all time steps total_reward += reward # if the episode is over end the current episode if done: break # save contents of the cache and clear it for the next episode # cache.compute_states_value(gamma=.9) cache.save_csv(path="data/") print( "Episode: {t}/{T} | score: {r}".format(t=ind_count, T=(NUM_OF_GENS * POP_SIZE), r=total_reward)) # set the return from the environment as the fitness value of the current individual ind.fitness.values = (total_reward,) # save qualified individual if SAVE_BEST and total_reward >= SCORE_THRESHOLD: document = Document(name=QLFD_IND_FILE) document.addline(line=Line().add(text=Text(str(ind)))) document.save(append=True) # store the performance of this individual in the corresponding series all_ind_series.addrecord(ind_count, total_reward) weighted_avg.update(total_reward) avg_series.addrecord(ind_count, weighted_avg.value) # Logging and other I/O operations print("Epoch {} completed".format(epoch)) record = ga.stats.compile(pop) print("Statistics for epoch {} = {}".format(epoch, record)) ga.logbook.record(epoch=epoch, **record) # store max return gen_series.addrecord(epoch, record["max"]) if APPLY_EVO: # perform evolution offspring = applyEvolution(population=pop, ga_alg=ga, mut_sch=mut_sch, epoch=epoch) # set offspring as current population pop = offspring # update mutation probability series mut_prob_series.addrecord(epoch, mut_sch.prob) # increment epoch epoch += 1 # print logbook ga.logbook.header = "epoch", "avg", "std", "min", "max" print(ga.logbook) # plotting plot_charts(avg_series, mut_prob_series) # terminates environment env.close()
def main(): # creates an environment env = gym.make("CartPole-v1") # print observation space ranges print("observation space ranges\nhigh = {}\nlow = {}\n".format(str(env.observation_space.high), str(env.observation_space.low))) # chart series weighted_avg = ana.WeightedAvg(beta=0.9) all_ind_series = ana.Series(name="Episode Performance") avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta))))) # create linguistic variables in a registry reg = xmlToLinvars(open(LIN_VARS_FILE).read()) # create GFT with linguistic variables in the registry reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.max_of_maximum) # Load pretrained NN model weights params = [10, 50, 30, 2] model = neural_net(num_inputs=4, params=params, lr=0.1, load=model_path, loss=neg_log_likelihood) reg.nn_models_dict["CartPoleMovement"] = model # create GFT algorithm object with the registry alg = Algorithm(registry=reg) # create a cache for managing simulation data cache = Cache(reg.nn_models_dict.keys()) # create an object for retrieving input values obs_cartpole = CartPoleObs() # replay buffer cart_move_exp_rep = ReplayBuffer(max_size=1000) ts_elapsed = 0 for i_episode in range(MAX_NUM_EPISODES): # get initial state state = env.reset() # initialize reward accumulator for the individual total_reward = 0 # set the current state for retrieving specific inputs obs_cartpole.current_observation = state while True: # show the environment env.render() # since only one agent applies to this case study set a dummy agent ID agent_id = 0 # get an action code, action, input_vec_dict, probs_dict = alg.executenntree(obs_cartpole, agent_id, action_selection_func=greedy_strategy, func_args=None) # apply the selected action to the environment and observe feedback next_state, reward, done, _ = env.step(code) # set the received observation as the current array for retrieving input values obs_cartpole.current_observation = next_state # mark the models that executed for the agent in this time step cache.mark(output_dict_keys=probs_dict.keys()) # decompose the received reward reward_dict = cache.decomposeReward(reward) # create experiences for the agent with respect to each GFSs that executed for the agent state_dict = {"CartPoleMovement": np.array([obs_cartpole.getCartPosition(agent_id), obs_cartpole.getCartVelocity(agent_id), obs_cartpole.getPoleAngle(agent_id), obs_cartpole.getPoleVelocity(agent_id)])} exp_dict = cache.createExperiences(agent_id=agent_id, action=code, dec_reward_dict=reward_dict, input_vec_dict=input_vec_dict, output_dict=probs_dict, next_state_dict=state_dict) # accumulate the rewards of all time steps total_reward += reward # add the experiences of an agent to their corresponding replay buffers for key, exp in exp_dict.items(): if key == "CartPoleMovement": cart_move_exp_rep.add(exp) # increment time steps played ts_elapsed += 1 if ts_elapsed >= TIME_STEPS_BEFORE_TRAIN: # print("train the model") pass # if the episode is over end the current episode if done: break print("Episode: {}/{} | score: {}".format(i_episode + 1, MAX_NUM_EPISODES, total_reward)) avg_series.addrecord(i_episode, weighted_avg.update(total_reward)) plt.figure(0) plt.title("Cartpole with simple NN") plt.plot(avg_series.data()['x'], avg_series.data()['y']) plt.xlabel("episode") plt.ylabel("score") plt.show()
def run(self): # initialize GA ops self.ga_config.init_ops(self.seed) reg = self.ga_config.registry ga = self.ga_config.ga # initialize sim exec. config self.sim_config.init_ops(reg) # create GFT algorithm object with the registry alg = Algorithm(registry=reg, random_process=self.sim_config.random_process) # create a cache for managing simulation data cache = Cache(reg.gft_dict.keys()) # get initial population if self.ga_config.load_init_pop_file is not None: pop = ga.load_initial_population(self.ga_config.load_init_pop_file, self.ga_config.pop_size) # pop = pop[::-1] print("Num. of loaded individuals =", len(pop)) else: pop = ga.generate_initial_population(self.ga_config.pop_size) # initialize epoch or generation counter epoch = 0 # initialize individual counter ind_count = 0 env = self.sim_config.env agents = self.sim_config.agents while epoch < self.ga_config.num_gens: # Run the simulation with the current population for ind in pop: ind_count += 1 # initialize reward accumulator for the individual total_reward = 0 # configure the GFT with the current individual alg.configuregft(chromosome=ind) # reset the environment obs = env.reset() # set the received observation as the current array for retrieving input values for i, agent in enumerate(agents): agent.obs_accessor.current_observation = obs if len(agents) == 1 else obs[i] for i_ep in range(self.sim_config.episodes_per_ind): # run through the time steps of the simulation for t in range(self.sim_config.max_time_steps): if self.sim_config.render: # show the environment env.render() a = None for agent in agents: if self.sim_config.action_space_type == Const.DISCRETE: # get an action code, action, input_vec_dict, probs_dict = alg.executegft(agent.obs_accessor, agent_id=agent.id) # mark the GFSs that executed for the agent in this time step cache.mark(output_dict_keys=probs_dict.keys()) # store intermediate values agent.temp_values = (code, action, input_vec_dict, probs_dict) else: # get an action actions_dict, input_vec_dict = alg.executebfc(agent.obs_accessor, agent_id=agent.id, add_noise=True) agent.temp_values = (actions_dict, input_vec_dict) cache.mark(output_dict_keys=actions_dict.keys()) # apply the selected action to the environment and observe feedback a = agents[0].temp_values[0] if len(agents) == 1 else [a.temp_values[0] for a in agents] if self.sim_config.action_space_type == Const.DISCRETE: a = np.array(a).astype('int').tolist() if hasattr(a, "__iter__") else int(a) else: a = list(a.values()) next_state, reward, done, _ = env.step(a) if self.r_shaping_callback is not None and callable(self.r_shaping_callback): reward = self.r_shaping_callback(next_state, reward, done) if self.time_step_finished_callback is not None and callable(self.time_step_finished_callback): self.time_step_finished_callback(next_state, reward) if hasattr(reward, "__iter__"): acc_reward = 0 for r in reward: acc_reward += r reward = acc_reward # decompose the received reward reward_dict = cache.decomposeReward(reward) for i, agent in enumerate(agents): if self.sim_config.action_space_type == Const.DISCRETE: code, action, input_vec_dict, probs_dict = agent.temp_values # create experiences for the agent with respect to each GFSs that executed for the agent exp_dict = cache.createExperiences(agent_id=agent.id, action=code, dec_reward_dict=reward_dict, input_vec_dict=input_vec_dict, output_dict=probs_dict, next_state_dict=None) else: actions_dict, input_vec_dict = agent.temp_values exp_dict = cache.createExperiences(agent_id=agent.id, action=list(actions_dict.values()), dec_reward_dict=reward_dict, input_vec_dict=input_vec_dict, output_dict=actions_dict, next_state_dict=None) # add the experiences of the agent to the cache cache.addExperiences(time_step=t, exp_dict=exp_dict) # set the received observation as the current array for retrieving input values agent.obs_accessor.current_observation = next_state if len(agents) == 1 else next_state[i] # accumulate the rewards of all time steps total_reward += reward # if the episode is over end the current episode if done: break # set the return from the environment as the fitness value of the current individual total_reward /= float(self.sim_config.episodes_per_ind) ind.fitness.values = (total_reward,) # save contents of the cache and clear it for the next episode # cache.compute_states_value(gamma=.9) if self.sim_config.persist_cached_data: cache.save_csv(path="data/") if self.episode_finished_callback is not None and callable(self.episode_finished_callback): self.episode_finished_callback(ind, ind_count, self.ga_config.num_gens * self.ga_config.pop_size, total_reward) # GA stats by DEAP record = ga.stats.compile(pop) ga.logbook.record(epoch=epoch, **record) if self.epoch_finished_callback is not None and callable(self.epoch_finished_callback): self.epoch_finished_callback(epoch, pop, record) # perform evolution if self.ga_config.apply_evolution: m_prob = self.ga_config.mutation_prob_schdl.get_prob(epoch) c_prob = self.ga_config.cross_prob_schdl.get_prob(epoch) ev = self.ga_config.evol_config pop = ga.evolve(pop, selop=ev.selection_op, crossop=ev.crossover_op, mutop=ev.mutation_op, mut_prob=m_prob, cross_prob=c_prob) if self.evolution_finished_callback is not None and callable(self.evolution_finished_callback): self.evolution_finished_callback(pop, m_prob, c_prob, epoch) epoch += 1 if self.sim_finished_callback is not None and callable(self.sim_finished_callback): self.sim_finished_callback(ga, pop) env.close()
def startsim(): # sets up the registry reg = xmlToGFT(open(GFT_FILE).read(), registry=xmlToLinvars(open(LIN_VARS_FILE).read()), defuzz_method=dfz.max_of_maximum) # create the GA object for accessing GA operations ga = GeneticAlgorithm(registry=reg) # get the algorithm for execution alg = Algorithm(registry=reg) # get the initial population population = ga.generate_initial_population(POP_SIZE) # print initial population for child in population: print(child) # get the one GFS for debugging gfs = reg.gft_dict[list(reg.gft_dict.keys())[2]] # # the total number of GFSs in the GFT # num_gfs = len(reg.gft_dict) # # extracts the RB and MF segments of a chromosome/individual # rb_chrom = population[0, gfs.descriptor.position] # mf_chrom = population[0, gfs.descriptor.position + num_gfs] # # # construct the control system of the selected GFS # gfs.contructControlSystemSim(rb_chrom, mf_chrom) # print(str(population[0].fitness.values)) # configure the KB alg.configuregft(np.array(population[0])) # execute gfs code, action, input_vec_dict, probs_dict = alg.executegft(TestObserve(), 0) print("code = {0}, action = {1}\nprobs = {2}".format( code, action, str(probs_dict))) # print generated rules rules = gfs.rules for i in range(len(rules)): print("[{0}] {1}".format(i + 1, str(rules[i]))) # RB and MF redefinition test tic = time.time() alg.configuregft(np.array(population[1])) code, action, input_vec_dict, probs_dict = alg.executegft(TestObserve(), 0) print("\ncode = {0}, action = {1}\nprobs = {2}".format( code, action, str(probs_dict))) print("time for redefintion =", (time.time() - tic)) rules = gfs.rules for i in range(len(rules)): print("[{0}] {1}".format(i + 1, str(rules[i]))) # dummy fitness values for i in range(len(population)): population[i].fitness.values = (random.randint(0, 10), 0) # create selection operator selargs = {"k": len(population), "tournsize": 3} selop = Operator(tools.selTournament, **selargs) # create crossover operator crossargs = {"indpb": 0.2} crossop = Operator(tools.cxUniform, **crossargs) # create mutation operator mutargs = {"mu": 0, "sigma": 1, "indpb": 0.2} mutop = Operator(tools.mutGaussian, **mutargs) # Perform one step of evolution offspring = ga.evolve(population, selop=selop, crossop=crossop, mutop=mutop, mut_prob=0.2, cross_prob=0.7) assert offspring is not None # print out the offspring of the evolution step print("Num of offspring =", len(offspring)) for child in offspring: print(child)
def main(): # creates an environment env = gym.make(rlmarsenvs.carmunk_id) # print observation space ranges print("observation space ranges\nhigh = {}\nlow = {}\n".format( str(env.observation_space.high), str(env.observation_space.low))) # chart series weighted_avg = ana.WeightedAvg(beta=0.9) all_ind_series = ana.Series(name="Individuals Performance") avg_series = ana.Series(name="Average (window = {})".format( round((1 / (1 - weighted_avg.beta))))) gen_series = ana.Series(name="Generation Performance") mut_prob_series = ana.Series(name="Mutation probability") # create linguistic variables in a registry reg = xmlToLinvars(open(LIN_VARS_FILE).read()) # create GFT with linguistic variables in the registry reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.max_of_maximum) # create GA instance with the registry object ga = GeneticAlgorithm(registry=reg, seed=123) # create a mutation probability schedule # mut_sch = sch.TimeBasedSchedule(decay_factor=1e-4) mut_sch = sch.LinearDecaySchedule(initial_prob=1.025, decay_factor=1e-2) # create GFT algorithm object with the registry alg = Algorithm(registry=reg) # create a cache for managing simulation data cache = Cache(reg.gft_dict.keys()) # get initial population if LOAD_INIT_POP: pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE) pop = pop[::-1] else: pop = ga.generate_initial_population(POP_SIZE) # initialize epoch or generation counter epoch = 0 # initialize individual counter ind_count = 0 # create an object for retrieving input values obs_carmunk = CarmunkObs() # Tau for Boltzmann exploration strategy tau_sch = sch.LinearDecaySchedule(initial_prob=20, decay_factor=0.02) # perform the simulation for a specified number of generations while epoch < NUM_OF_GENS: # Run the simulation with the current population for ind in pop: ind_count += 1 # initialize reward accumulator for the individual total_reward = 0 # configure the GFT with the current individual alg.configuregft(chromosome=ind) # control the environment with the configured GFT # for i_episode in range(NUM_EPISODES_PER_IND): # reset the environment observation = env.reset() # set the received observation as the current array for retrieving input values obs_carmunk.current_observation = observation # run through the time steps of the simulation t = 0 while True: t += 1 # show the environment env.render() # since only one agent applies to this case study set a dummy agent ID agent_id = 0 # get an action code, action, input_vec_dict, probs_dict = alg.executegft( obs_carmunk, agent_id) # apply the selected action to the environment and observe feedback next_state, reward, done, _ = env.step(code) # mark the GFSs that executed for the agent in this time step cache.mark(output_dict_keys=probs_dict.keys()) # decompose the received reward reward_dict = cache.decomposeReward(reward) # create experiences for the agent with respect to each GFSs that executed for the agent exp_dict = cache.createExperiences( agent_id=agent_id, action=code, dec_reward_dict=reward_dict, input_vec_dict=input_vec_dict, output_dict=probs_dict) # add the experiences of the agent to the cache cache.addExperiences(time_step=t, exp_dict=exp_dict) # set the received observation as the current array for retrieving input values obs_carmunk.current_observation = next_state # accumulate the rewards of all time steps total_reward += reward # if the episode is over end the current episode if done: break # save contents of the cache and clear it for the next episode cache.save_csv() # if total_reward < 50: # total_reward = - 50 print("Episode finished after {} time steps".format(t + 1)) print("Episode: {}/{} | score: {}".format(ind_count, (NUM_OF_GENS * POP_SIZE), total_reward)) # set the return from the environment as the fitness value of the current individual ind.fitness.values = (total_reward, ) # save qualified individual if SAVE_BEST and total_reward > SCORE_THRESHOLD: document = Document(name=QLFD_IND_FILE) document.addline(line=Line().add(text=Text(str(ind)))) document.save(append=True) # store the performance of this individual in the corresponding series all_ind_series.addrecord(ind_count, total_reward) weighted_avg.update(total_reward) avg_series.addrecord(ind_count, weighted_avg.value) # Logging and other I/O operations print("Epoch {} completed".format(epoch)) record = ga.stats.compile(pop) print("Statistics for epoch {} = {}".format(epoch, record)) ga.logbook.record(epoch=epoch, **record) # store max return gen_series.addrecord(epoch, record["max"]) if APPLY_EVO: # perform evolution offspring = applyEvolution(population=pop, ga_alg=ga, mut_sch=mut_sch, epoch=epoch) # set offspring as current population pop = offspring # update mutation probability series mut_prob_series.addrecord(epoch, mut_sch.prob) # increment epoch epoch += 1 # print logbook ga.logbook.header = "epoch", "avg", "std", "min", "max" print(ga.logbook) # plotting plot_charts(avg_series, mut_prob_series) # terminates environment env.close()