Example #1
0
def main():
    # creates an environment
    env = gym.make("MountainCarContinuous-v0")

    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Individuals Performance")
    avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta)))))
    gen_series = ana.Series(name="Generation Performance")
    mut_prob_series = ana.Series(name="Mutation probability")

    # create linguistic variables in a registry
    reg = xmlToLinvars(open(LIN_VARS_FILE).read())

    # create GFT with linguistic variables in the registry
    reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.centroid)

    # create GA instance with the registry object
    ga = GeneticAlgorithm(registry=reg, seed=5)

    # create a mutation probability schedule
    mut_sch = sch.ExponentialDecaySchedule(initial_prob=.1, decay_factor=1e-2)

    # create GFT algorithm object with the registry
    rand_proc = OrnsteinUhlenbeckProcess(theta=0.01)
    alg = Algorithm(registry=reg, random_process=rand_proc)

    # create a cache for managing simulation data
    cache = Cache(reg.gft_dict.keys())

    # get initial population
    if LOAD_INIT_POP:
        pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE)
        pop = pop[::-1]
        print("Num. of loaded individuals =", len(pop))
    else:
        pop = ga.generate_initial_population(POP_SIZE)

    # initialize epoch or generation counter
    epoch = 0

    # initialize individual counter
    ind_count = 0

    # create an object for retrieving input values
    obs_accessor = MountainCarObs()

    # perform the simulation for a specified number of generations
    while epoch < NUM_OF_GENS:

        # Run the simulation with the current population
        for ind in pop:
            ind_count += 1

            # initialize reward accumulator for the individual
            total_reward = 0

            # configure the GFT with the current individual
            alg.configuregft(chromosome=ind)

            # control the environment with the configured GFT

            # reset the environment
            observation = env.reset()

            # set the received observation as the current array for retrieving input values
            obs_accessor.current_observation = observation

            # run through the time steps of the simulation
            for t in range(MAX_TIME_STEPS):

                # show the environment
                env.render()

                # since only one agent applies to this case study set a dummy agent ID
                agent_id = 0

                # get an action
                actions_dict, input_vec_dict = alg.executebfc(obs_accessor, agent_id, add_noise=True)

                # mark the GFSs that executed for the agent in this time step
                cache.mark(output_dict_keys=actions_dict.keys())

                # apply the selected action to the environment and observe feedback
                next_state, reward, done, _ = env.step(list(actions_dict.values()))
                reward = reward_shaping(pos=next_state[0], r=reward)

                # decompose the received reward
                reward_dict = cache.decomposeReward(reward)

                # create experiences for the agent with respect to each GFSs that executed for the agent
                exp_dict = cache.createExperiences(agent_id=agent_id, action=list(actions_dict.values()),
                                                   dec_reward_dict=reward_dict,
                                                   input_vec_dict=input_vec_dict, output_dict=actions_dict,
                                                   next_state_dict=None)

                # add the experiences of the agent to the cache
                cache.addExperiences(time_step=t, exp_dict=exp_dict)

                # set the received observation as the current array for retrieving input values
                obs_accessor.current_observation = next_state

                # accumulate the rewards of all time steps
                total_reward += reward

                # if the episode is over end the current episode
                if done:
                    break

            # save contents of the cache and clear it for the next episode
            # cache.compute_states_value(gamma=.9)
            cache.save_csv(path="data/")
            print(
                "Episode: {t}/{T} | score: {r}".format(t=ind_count, T=(NUM_OF_GENS * POP_SIZE),
                                                       r=total_reward))

            # set the return from the environment as the fitness value of the current individual
            ind.fitness.values = (total_reward,)

            # save qualified individual
            if SAVE_BEST and total_reward >= SCORE_THRESHOLD:
                document = Document(name=QLFD_IND_FILE)
                document.addline(line=Line().add(text=Text(str(ind))))
                document.save(append=True)

            # store the performance of this individual in the corresponding series
            all_ind_series.addrecord(ind_count, total_reward)
            weighted_avg.update(total_reward)
            avg_series.addrecord(ind_count, weighted_avg.value)

        # Logging and other I/O operations
        print("Epoch {} completed".format(epoch))
        record = ga.stats.compile(pop)
        print("Statistics for epoch {} = {}".format(epoch, record))
        ga.logbook.record(epoch=epoch, **record)

        # store max return
        gen_series.addrecord(epoch, record["max"])
        if APPLY_EVO:
            # perform evolution
            offspring = applyEvolution(population=pop, ga_alg=ga, mut_sch=mut_sch, epoch=epoch)

            # set offspring as current population
            pop = offspring

        # update mutation probability series
        mut_prob_series.addrecord(epoch, mut_sch.prob)
        # increment epoch
        epoch += 1

    # print logbook
    ga.logbook.header = "epoch", "avg", "std", "min", "max"
    print(ga.logbook)

    # plotting
    plot_charts(avg_series, mut_prob_series)

    # terminates environment
    env.close()
Example #2
0
def main():
    # creates an environment
    env = gym.make("CartPole-v1")

    # print observation space ranges
    print("observation space ranges\nhigh = {}\nlow = {}\n".format(str(env.observation_space.high),
                                                                   str(env.observation_space.low)))
    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Episode Performance")
    avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta)))))

    # create linguistic variables in a registry
    reg = xmlToLinvars(open(LIN_VARS_FILE).read())

    # create GFT with linguistic variables in the registry
    reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.max_of_maximum)

    # Load pretrained NN model weights
    params = [10, 50, 30, 2]
    model = neural_net(num_inputs=4, params=params, lr=0.1, load=model_path, loss=neg_log_likelihood)
    reg.nn_models_dict["CartPoleMovement"] = model

    # create GFT algorithm object with the registry
    alg = Algorithm(registry=reg)

    # create a cache for managing simulation data
    cache = Cache(reg.nn_models_dict.keys())

    # create an object for retrieving input values
    obs_cartpole = CartPoleObs()

    # replay buffer
    cart_move_exp_rep = ReplayBuffer(max_size=1000)

    ts_elapsed = 0

    for i_episode in range(MAX_NUM_EPISODES):
        # get initial state
        state = env.reset()

        # initialize reward accumulator for the individual
        total_reward = 0

        # set the current state for retrieving specific inputs
        obs_cartpole.current_observation = state

        while True:
            # show the environment
            env.render()

            # since only one agent applies to this case study set a dummy agent ID
            agent_id = 0

            # get an action
            code, action, input_vec_dict, probs_dict = alg.executenntree(obs_cartpole, agent_id,
                                                                         action_selection_func=greedy_strategy,
                                                                         func_args=None)

            # apply the selected action to the environment and observe feedback
            next_state, reward, done, _ = env.step(code)

            # set the received observation as the current array for retrieving input values
            obs_cartpole.current_observation = next_state

            # mark the models that executed for the agent in this time step
            cache.mark(output_dict_keys=probs_dict.keys())

            # decompose the received reward
            reward_dict = cache.decomposeReward(reward)

            # create experiences for the agent with respect to each GFSs that executed for the agent
            state_dict = {"CartPoleMovement": np.array([obs_cartpole.getCartPosition(agent_id),
                                                        obs_cartpole.getCartVelocity(agent_id),
                                                        obs_cartpole.getPoleAngle(agent_id),
                                                        obs_cartpole.getPoleVelocity(agent_id)])}
            exp_dict = cache.createExperiences(agent_id=agent_id, action=code, dec_reward_dict=reward_dict,
                                               input_vec_dict=input_vec_dict, output_dict=probs_dict,
                                               next_state_dict=state_dict)

            # accumulate the rewards of all time steps
            total_reward += reward

            # add the experiences of an agent to their corresponding replay buffers
            for key, exp in exp_dict.items():
                if key == "CartPoleMovement":
                    cart_move_exp_rep.add(exp)

            # increment time steps played
            ts_elapsed += 1

            if ts_elapsed >= TIME_STEPS_BEFORE_TRAIN:
                # print("train the model")
                pass

            # if the episode is over end the current episode
            if done:
                break

        print("Episode: {}/{} | score: {}".format(i_episode + 1, MAX_NUM_EPISODES, total_reward))

        avg_series.addrecord(i_episode, weighted_avg.update(total_reward))

    plt.figure(0)
    plt.title("Cartpole with simple NN")
    plt.plot(avg_series.data()['x'], avg_series.data()['y'])
    plt.xlabel("episode")
    plt.ylabel("score")
    plt.show()
Example #3
0
    def run(self):
        # initialize GA ops
        self.ga_config.init_ops(self.seed)
        reg = self.ga_config.registry
        ga = self.ga_config.ga

        # initialize sim exec. config
        self.sim_config.init_ops(reg)

        # create GFT algorithm object with the registry
        alg = Algorithm(registry=reg, random_process=self.sim_config.random_process)

        # create a cache for managing simulation data
        cache = Cache(reg.gft_dict.keys())

        # get initial population
        if self.ga_config.load_init_pop_file is not None:
            pop = ga.load_initial_population(self.ga_config.load_init_pop_file, self.ga_config.pop_size)
            # pop = pop[::-1]
            print("Num. of loaded individuals =", len(pop))
        else:
            pop = ga.generate_initial_population(self.ga_config.pop_size)

        # initialize epoch or generation counter
        epoch = 0

        # initialize individual counter
        ind_count = 0

        env = self.sim_config.env

        agents = self.sim_config.agents

        while epoch < self.ga_config.num_gens:

            # Run the simulation with the current population
            for ind in pop:
                ind_count += 1

                # initialize reward accumulator for the individual
                total_reward = 0

                # configure the GFT with the current individual
                alg.configuregft(chromosome=ind)

                # reset the environment
                obs = env.reset()

                # set the received observation as the current array for retrieving input values
                for i, agent in enumerate(agents):
                    agent.obs_accessor.current_observation = obs if len(agents) == 1 else obs[i]

                for i_ep in range(self.sim_config.episodes_per_ind):
                    # run through the time steps of the simulation
                    for t in range(self.sim_config.max_time_steps):

                        if self.sim_config.render:
                            # show the environment
                            env.render()

                        a = None
                        for agent in agents:
                            if self.sim_config.action_space_type == Const.DISCRETE:
                                # get an action
                                code, action, input_vec_dict, probs_dict = alg.executegft(agent.obs_accessor,
                                                                                          agent_id=agent.id)

                                # mark the GFSs that executed for the agent in this time step
                                cache.mark(output_dict_keys=probs_dict.keys())

                                # store intermediate values
                                agent.temp_values = (code, action, input_vec_dict, probs_dict)
                            else:
                                # get an action
                                actions_dict, input_vec_dict = alg.executebfc(agent.obs_accessor, agent_id=agent.id,
                                                                              add_noise=True)
                                agent.temp_values = (actions_dict, input_vec_dict)
                                cache.mark(output_dict_keys=actions_dict.keys())

                        # apply the selected action to the environment and observe feedback
                        a = agents[0].temp_values[0] if len(agents) == 1 else [a.temp_values[0] for a in agents]
                        if self.sim_config.action_space_type == Const.DISCRETE:
                            a = np.array(a).astype('int').tolist() if hasattr(a, "__iter__") else int(a)
                        else:
                            a = list(a.values())
                        next_state, reward, done, _ = env.step(a)

                        if self.r_shaping_callback is not None and callable(self.r_shaping_callback):
                            reward = self.r_shaping_callback(next_state, reward, done)

                        if self.time_step_finished_callback is not None and callable(self.time_step_finished_callback):
                            self.time_step_finished_callback(next_state, reward)

                        if hasattr(reward, "__iter__"):
                            acc_reward = 0
                            for r in reward:
                                acc_reward += r
                            reward = acc_reward

                        # decompose the received reward
                        reward_dict = cache.decomposeReward(reward)

                        for i, agent in enumerate(agents):
                            if self.sim_config.action_space_type == Const.DISCRETE:
                                code, action, input_vec_dict, probs_dict = agent.temp_values
                                # create experiences for the agent with respect to each GFSs that executed for the agent
                                exp_dict = cache.createExperiences(agent_id=agent.id, action=code,
                                                                   dec_reward_dict=reward_dict,
                                                                   input_vec_dict=input_vec_dict,
                                                                   output_dict=probs_dict,
                                                                   next_state_dict=None)
                            else:
                                actions_dict, input_vec_dict = agent.temp_values
                                exp_dict = cache.createExperiences(agent_id=agent.id,
                                                                   action=list(actions_dict.values()),
                                                                   dec_reward_dict=reward_dict,
                                                                   input_vec_dict=input_vec_dict,
                                                                   output_dict=actions_dict,
                                                                   next_state_dict=None)

                            # add the experiences of the agent to the cache
                            cache.addExperiences(time_step=t, exp_dict=exp_dict)

                            # set the received observation as the current array for retrieving input values
                            agent.obs_accessor.current_observation = next_state if len(agents) == 1 else next_state[i]

                        # accumulate the rewards of all time steps
                        total_reward += reward

                        # if the episode is over end the current episode
                        if done:
                            break

                # set the return from the environment as the fitness value of the current individual
                total_reward /= float(self.sim_config.episodes_per_ind)
                ind.fitness.values = (total_reward,)

                # save contents of the cache and clear it for the next episode
                # cache.compute_states_value(gamma=.9)
                if self.sim_config.persist_cached_data:
                    cache.save_csv(path="data/")
                if self.episode_finished_callback is not None and callable(self.episode_finished_callback):
                    self.episode_finished_callback(ind, ind_count, self.ga_config.num_gens * self.ga_config.pop_size,
                                                   total_reward)
            # GA stats by DEAP
            record = ga.stats.compile(pop)
            ga.logbook.record(epoch=epoch, **record)

            if self.epoch_finished_callback is not None and callable(self.epoch_finished_callback):
                self.epoch_finished_callback(epoch, pop, record)
            # perform evolution
            if self.ga_config.apply_evolution:
                m_prob = self.ga_config.mutation_prob_schdl.get_prob(epoch)
                c_prob = self.ga_config.cross_prob_schdl.get_prob(epoch)
                ev = self.ga_config.evol_config
                pop = ga.evolve(pop, selop=ev.selection_op, crossop=ev.crossover_op, mutop=ev.mutation_op,
                                mut_prob=m_prob, cross_prob=c_prob)
                if self.evolution_finished_callback is not None and callable(self.evolution_finished_callback):
                    self.evolution_finished_callback(pop, m_prob, c_prob, epoch)

            epoch += 1

        if self.sim_finished_callback is not None and callable(self.sim_finished_callback):
            self.sim_finished_callback(ga, pop)

        env.close()
Example #4
0
def startsim():
    # sets up the registry
    reg = xmlToGFT(open(GFT_FILE).read(),
                   registry=xmlToLinvars(open(LIN_VARS_FILE).read()),
                   defuzz_method=dfz.max_of_maximum)

    # create the GA object for accessing GA operations
    ga = GeneticAlgorithm(registry=reg)

    # get the algorithm for execution
    alg = Algorithm(registry=reg)

    # get the initial population
    population = ga.generate_initial_population(POP_SIZE)

    # print initial population
    for child in population:
        print(child)

    # get the one GFS for debugging
    gfs = reg.gft_dict[list(reg.gft_dict.keys())[2]]

    # # the total number of GFSs in the GFT
    # num_gfs = len(reg.gft_dict)

    # # extracts the RB and MF segments of a chromosome/individual
    # rb_chrom = population[0, gfs.descriptor.position]
    # mf_chrom = population[0, gfs.descriptor.position + num_gfs]
    #
    # # construct the control system of the selected GFS
    # gfs.contructControlSystemSim(rb_chrom, mf_chrom)

    # print(str(population[0].fitness.values))

    # configure the KB
    alg.configuregft(np.array(population[0]))

    # execute gfs
    code, action, input_vec_dict, probs_dict = alg.executegft(TestObserve(), 0)
    print("code = {0}, action = {1}\nprobs = {2}".format(
        code, action, str(probs_dict)))

    # print generated rules
    rules = gfs.rules
    for i in range(len(rules)):
        print("[{0}] {1}".format(i + 1, str(rules[i])))

    # RB and MF redefinition test
    tic = time.time()
    alg.configuregft(np.array(population[1]))
    code, action, input_vec_dict, probs_dict = alg.executegft(TestObserve(), 0)
    print("\ncode = {0}, action = {1}\nprobs = {2}".format(
        code, action, str(probs_dict)))
    print("time for redefintion =", (time.time() - tic))
    rules = gfs.rules
    for i in range(len(rules)):
        print("[{0}] {1}".format(i + 1, str(rules[i])))

    # dummy fitness values
    for i in range(len(population)):
        population[i].fitness.values = (random.randint(0, 10), 0)

    # create selection operator
    selargs = {"k": len(population), "tournsize": 3}
    selop = Operator(tools.selTournament, **selargs)

    # create crossover operator
    crossargs = {"indpb": 0.2}
    crossop = Operator(tools.cxUniform, **crossargs)

    # create mutation operator
    mutargs = {"mu": 0, "sigma": 1, "indpb": 0.2}
    mutop = Operator(tools.mutGaussian, **mutargs)

    # Perform one step of evolution
    offspring = ga.evolve(population,
                          selop=selop,
                          crossop=crossop,
                          mutop=mutop,
                          mut_prob=0.2,
                          cross_prob=0.7)

    assert offspring is not None

    # print out the offspring of the evolution step
    print("Num of offspring =", len(offspring))
    for child in offspring:
        print(child)
Example #5
0
def main():
    # creates an environment
    env = gym.make(rlmarsenvs.carmunk_id)

    # print observation space ranges
    print("observation space ranges\nhigh = {}\nlow = {}\n".format(
        str(env.observation_space.high), str(env.observation_space.low)))
    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Individuals Performance")
    avg_series = ana.Series(name="Average (window = {})".format(
        round((1 / (1 - weighted_avg.beta)))))
    gen_series = ana.Series(name="Generation Performance")
    mut_prob_series = ana.Series(name="Mutation probability")

    # create linguistic variables in a registry
    reg = xmlToLinvars(open(LIN_VARS_FILE).read())

    # create GFT with linguistic variables in the registry
    reg = xmlToGFT(open(GFT_FILE).read(),
                   registry=reg,
                   defuzz_method=dfz.max_of_maximum)

    # create GA instance with the registry object
    ga = GeneticAlgorithm(registry=reg, seed=123)

    # create a mutation probability schedule
    # mut_sch = sch.TimeBasedSchedule(decay_factor=1e-4)
    mut_sch = sch.LinearDecaySchedule(initial_prob=1.025, decay_factor=1e-2)

    # create GFT algorithm object with the registry
    alg = Algorithm(registry=reg)

    # create a cache for managing simulation data
    cache = Cache(reg.gft_dict.keys())

    # get initial population
    if LOAD_INIT_POP:
        pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE)
        pop = pop[::-1]
    else:
        pop = ga.generate_initial_population(POP_SIZE)

    # initialize epoch or generation counter
    epoch = 0

    # initialize individual counter
    ind_count = 0

    # create an object for retrieving input values
    obs_carmunk = CarmunkObs()

    # Tau for Boltzmann exploration strategy
    tau_sch = sch.LinearDecaySchedule(initial_prob=20, decay_factor=0.02)

    # perform the simulation for a specified number of generations
    while epoch < NUM_OF_GENS:

        # Run the simulation with the current population
        for ind in pop:
            ind_count += 1

            # initialize reward accumulator for the individual
            total_reward = 0

            # configure the GFT with the current individual
            alg.configuregft(chromosome=ind)

            # control the environment with the configured GFT
            # for i_episode in range(NUM_EPISODES_PER_IND):

            # reset the environment
            observation = env.reset()

            # set the received observation as the current array for retrieving input values
            obs_carmunk.current_observation = observation

            # run through the time steps of the simulation
            t = 0
            while True:
                t += 1

                # show the environment
                env.render()

                # since only one agent applies to this case study set a dummy agent ID
                agent_id = 0

                # get an action
                code, action, input_vec_dict, probs_dict = alg.executegft(
                    obs_carmunk, agent_id)

                # apply the selected action to the environment and observe feedback
                next_state, reward, done, _ = env.step(code)

                # mark the GFSs that executed for the agent in this time step
                cache.mark(output_dict_keys=probs_dict.keys())

                # decompose the received reward
                reward_dict = cache.decomposeReward(reward)

                # create experiences for the agent with respect to each GFSs that executed for the agent
                exp_dict = cache.createExperiences(
                    agent_id=agent_id,
                    action=code,
                    dec_reward_dict=reward_dict,
                    input_vec_dict=input_vec_dict,
                    output_dict=probs_dict)

                # add the experiences of the agent to the cache
                cache.addExperiences(time_step=t, exp_dict=exp_dict)

                # set the received observation as the current array for retrieving input values
                obs_carmunk.current_observation = next_state

                # accumulate the rewards of all time steps
                total_reward += reward

                # if the episode is over end the current episode
                if done:
                    break

            # save contents of the cache and clear it for the next episode
            cache.save_csv()

            # if total_reward < 50:
            #     total_reward = - 50
            print("Episode finished after {} time steps".format(t + 1))
            print("Episode: {}/{} | score: {}".format(ind_count,
                                                      (NUM_OF_GENS * POP_SIZE),
                                                      total_reward))

            # set the return from the environment as the fitness value of the current individual
            ind.fitness.values = (total_reward, )

            # save qualified individual
            if SAVE_BEST and total_reward > SCORE_THRESHOLD:
                document = Document(name=QLFD_IND_FILE)
                document.addline(line=Line().add(text=Text(str(ind))))
                document.save(append=True)

            # store the performance of this individual in the corresponding series
            all_ind_series.addrecord(ind_count, total_reward)
            weighted_avg.update(total_reward)
            avg_series.addrecord(ind_count, weighted_avg.value)

        # Logging and other I/O operations
        print("Epoch {} completed".format(epoch))
        record = ga.stats.compile(pop)
        print("Statistics for epoch {} = {}".format(epoch, record))
        ga.logbook.record(epoch=epoch, **record)

        # store max return
        gen_series.addrecord(epoch, record["max"])
        if APPLY_EVO:
            # perform evolution
            offspring = applyEvolution(population=pop,
                                       ga_alg=ga,
                                       mut_sch=mut_sch,
                                       epoch=epoch)

            # set offspring as current population
            pop = offspring

        # update mutation probability series
        mut_prob_series.addrecord(epoch, mut_sch.prob)
        # increment epoch
        epoch += 1

    # print logbook
    ga.logbook.header = "epoch", "avg", "std", "min", "max"
    print(ga.logbook)

    # plotting
    plot_charts(avg_series, mut_prob_series)

    # terminates environment
    env.close()