Python Algorithm Examples

Programming Language: Python

Namespace/Package Name: fuzzrl.core.algorithm.alg

Class/Type: Algorithm

Examples at hotexamples.com: 5

Python Algorithm - 5 examples found. These are the top rated real world Python examples of fuzzrl.core.algorithm.alg.Algorithm extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Algorithm(5)

configuregft(4)

executegft(3)

executebfc(2)

executenntree(1)

Example #1

Show file

File: mountaincarcont.py Project: bbrighttaer/fuzzprj

def main():
    # creates an environment
    env = gym.make("MountainCarContinuous-v0")

    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Individuals Performance")
    avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta)))))
    gen_series = ana.Series(name="Generation Performance")
    mut_prob_series = ana.Series(name="Mutation probability")

    # create linguistic variables in a registry
    reg = xmlToLinvars(open(LIN_VARS_FILE).read())

    # create GFT with linguistic variables in the registry
    reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.centroid)

    # create GA instance with the registry object
    ga = GeneticAlgorithm(registry=reg, seed=5)

    # create a mutation probability schedule
    mut_sch = sch.ExponentialDecaySchedule(initial_prob=.1, decay_factor=1e-2)

    # create GFT algorithm object with the registry
    rand_proc = OrnsteinUhlenbeckProcess(theta=0.01)
    alg = Algorithm(registry=reg, random_process=rand_proc)

    # create a cache for managing simulation data
    cache = Cache(reg.gft_dict.keys())

    # get initial population
    if LOAD_INIT_POP:
        pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE)
        pop = pop[::-1]
        print("Num. of loaded individuals =", len(pop))
    else:
        pop = ga.generate_initial_population(POP_SIZE)

    # initialize epoch or generation counter
    epoch = 0

    # initialize individual counter
    ind_count = 0

    # create an object for retrieving input values
    obs_accessor = MountainCarObs()

    # perform the simulation for a specified number of generations
    while epoch < NUM_OF_GENS:

        # Run the simulation with the current population
        for ind in pop:
            ind_count += 1

            # initialize reward accumulator for the individual
            total_reward = 0

            # configure the GFT with the current individual
            alg.configuregft(chromosome=ind)

            # control the environment with the configured GFT

            # reset the environment
            observation = env.reset()

            # set the received observation as the current array for retrieving input values
            obs_accessor.current_observation = observation

            # run through the time steps of the simulation
            for t in range(MAX_TIME_STEPS):

                # show the environment
                env.render()

                # since only one agent applies to this case study set a dummy agent ID
                agent_id = 0

                # get an action
                actions_dict, input_vec_dict = alg.executebfc(obs_accessor, agent_id, add_noise=True)

                # mark the GFSs that executed for the agent in this time step
                cache.mark(output_dict_keys=actions_dict.keys())

                # apply the selected action to the environment and observe feedback
                next_state, reward, done, _ = env.step(list(actions_dict.values()))
                reward = reward_shaping(pos=next_state[0], r=reward)

                # decompose the received reward
                reward_dict = cache.decomposeReward(reward)

                # create experiences for the agent with respect to each GFSs that executed for the agent
                exp_dict = cache.createExperiences(agent_id=agent_id, action=list(actions_dict.values()),
                                                   dec_reward_dict=reward_dict,
                                                   input_vec_dict=input_vec_dict, output_dict=actions_dict,
                                                   next_state_dict=None)

                # add the experiences of the agent to the cache
                cache.addExperiences(time_step=t, exp_dict=exp_dict)

                # set the received observation as the current array for retrieving input values
                obs_accessor.current_observation = next_state

                # accumulate the rewards of all time steps
                total_reward += reward

                # if the episode is over end the current episode
                if done:
                    break

            # save contents of the cache and clear it for the next episode
            # cache.compute_states_value(gamma=.9)
            cache.save_csv(path="data/")
            print(
                "Episode: {t}/{T} | score: {r}".format(t=ind_count, T=(NUM_OF_GENS * POP_SIZE),
                                                       r=total_reward))

            # set the return from the environment as the fitness value of the current individual
            ind.fitness.values = (total_reward,)

            # save qualified individual
            if SAVE_BEST and total_reward >= SCORE_THRESHOLD:
                document = Document(name=QLFD_IND_FILE)
                document.addline(line=Line().add(text=Text(str(ind))))
                document.save(append=True)

            # store the performance of this individual in the corresponding series
            all_ind_series.addrecord(ind_count, total_reward)
            weighted_avg.update(total_reward)
            avg_series.addrecord(ind_count, weighted_avg.value)

        # Logging and other I/O operations
        print("Epoch {} completed".format(epoch))
        record = ga.stats.compile(pop)
        print("Statistics for epoch {} = {}".format(epoch, record))
        ga.logbook.record(epoch=epoch, **record)

        # store max return
        gen_series.addrecord(epoch, record["max"])
        if APPLY_EVO:
            # perform evolution
            offspring = applyEvolution(population=pop, ga_alg=ga, mut_sch=mut_sch, epoch=epoch)

            # set offspring as current population
            pop = offspring

        # update mutation probability series
        mut_prob_series.addrecord(epoch, mut_sch.prob)
        # increment epoch
        epoch += 1

    # print logbook
    ga.logbook.header = "epoch", "avg", "std", "min", "max"
    print(ga.logbook)

    # plotting
    plot_charts(avg_series, mut_prob_series)

    # terminates environment
    env.close()

Example #2

Show file

File: cartpole_nn_sim.py Project: bbrighttaer/fuzzprj

def main():
    # creates an environment
    env = gym.make("CartPole-v1")

    # print observation space ranges
    print("observation space ranges\nhigh = {}\nlow = {}\n".format(str(env.observation_space.high),
                                                                   str(env.observation_space.low)))
    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Episode Performance")
    avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta)))))

    # create linguistic variables in a registry
    reg = xmlToLinvars(open(LIN_VARS_FILE).read())

    # create GFT with linguistic variables in the registry
    reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.max_of_maximum)

    # Load pretrained NN model weights
    params = [10, 50, 30, 2]
    model = neural_net(num_inputs=4, params=params, lr=0.1, load=model_path, loss=neg_log_likelihood)
    reg.nn_models_dict["CartPoleMovement"] = model

    # create GFT algorithm object with the registry
    alg = Algorithm(registry=reg)

    # create a cache for managing simulation data
    cache = Cache(reg.nn_models_dict.keys())

    # create an object for retrieving input values
    obs_cartpole = CartPoleObs()

    # replay buffer
    cart_move_exp_rep = ReplayBuffer(max_size=1000)

    ts_elapsed = 0

    for i_episode in range(MAX_NUM_EPISODES):
        # get initial state
        state = env.reset()

        # initialize reward accumulator for the individual
        total_reward = 0

        # set the current state for retrieving specific inputs
        obs_cartpole.current_observation = state

        while True:
            # show the environment
            env.render()

            # since only one agent applies to this case study set a dummy agent ID
            agent_id = 0

            # get an action
            code, action, input_vec_dict, probs_dict = alg.executenntree(obs_cartpole, agent_id,
                                                                         action_selection_func=greedy_strategy,
                                                                         func_args=None)

            # apply the selected action to the environment and observe feedback
            next_state, reward, done, _ = env.step(code)

            # set the received observation as the current array for retrieving input values
            obs_cartpole.current_observation = next_state

            # mark the models that executed for the agent in this time step
            cache.mark(output_dict_keys=probs_dict.keys())

            # decompose the received reward
            reward_dict = cache.decomposeReward(reward)

            # create experiences for the agent with respect to each GFSs that executed for the agent
            state_dict = {"CartPoleMovement": np.array([obs_cartpole.getCartPosition(agent_id),
                                                        obs_cartpole.getCartVelocity(agent_id),
                                                        obs_cartpole.getPoleAngle(agent_id),
                                                        obs_cartpole.getPoleVelocity(agent_id)])}
            exp_dict = cache.createExperiences(agent_id=agent_id, action=code, dec_reward_dict=reward_dict,
                                               input_vec_dict=input_vec_dict, output_dict=probs_dict,
                                               next_state_dict=state_dict)

            # accumulate the rewards of all time steps
            total_reward += reward

            # add the experiences of an agent to their corresponding replay buffers
            for key, exp in exp_dict.items():
                if key == "CartPoleMovement":
                    cart_move_exp_rep.add(exp)

            # increment time steps played
            ts_elapsed += 1

            if ts_elapsed >= TIME_STEPS_BEFORE_TRAIN:
                # print("train the model")
                pass

            # if the episode is over end the current episode
            if done:
                break

        print("Episode: {}/{} | score: {}".format(i_episode + 1, MAX_NUM_EPISODES, total_reward))

        avg_series.addrecord(i_episode, weighted_avg.update(total_reward))

    plt.figure(0)
    plt.title("Cartpole with simple NN")
    plt.plot(avg_series.data()['x'], avg_series.data()['y'])
    plt.xlabel("episode")
    plt.ylabel("score")
    plt.show()

Example #3

Show file

File: runner.py Project: bbrighttaer/fuzzprj

    def run(self):
        # initialize GA ops
        self.ga_config.init_ops(self.seed)
        reg = self.ga_config.registry
        ga = self.ga_config.ga

        # initialize sim exec. config
        self.sim_config.init_ops(reg)

        # create GFT algorithm object with the registry
        alg = Algorithm(registry=reg, random_process=self.sim_config.random_process)

        # create a cache for managing simulation data
        cache = Cache(reg.gft_dict.keys())

        # get initial population
        if self.ga_config.load_init_pop_file is not None:
            pop = ga.load_initial_population(self.ga_config.load_init_pop_file, self.ga_config.pop_size)
            # pop = pop[::-1]
            print("Num. of loaded individuals =", len(pop))
        else:
            pop = ga.generate_initial_population(self.ga_config.pop_size)

        # initialize epoch or generation counter
        epoch = 0

        # initialize individual counter
        ind_count = 0

        env = self.sim_config.env

        agents = self.sim_config.agents

        while epoch < self.ga_config.num_gens:

            # Run the simulation with the current population
            for ind in pop:
                ind_count += 1

                # initialize reward accumulator for the individual
                total_reward = 0

                # configure the GFT with the current individual
                alg.configuregft(chromosome=ind)

                # reset the environment
                obs = env.reset()

                # set the received observation as the current array for retrieving input values
                for i, agent in enumerate(agents):
                    agent.obs_accessor.current_observation = obs if len(agents) == 1 else obs[i]

                for i_ep in range(self.sim_config.episodes_per_ind):
                    # run through the time steps of the simulation
                    for t in range(self.sim_config.max_time_steps):

                        if self.sim_config.render:
                            # show the environment
                            env.render()

                        a = None
                        for agent in agents:
                            if self.sim_config.action_space_type == Const.DISCRETE:
                                # get an action
                                code, action, input_vec_dict, probs_dict = alg.executegft(agent.obs_accessor,
                                                                                          agent_id=agent.id)

                                # mark the GFSs that executed for the agent in this time step
                                cache.mark(output_dict_keys=probs_dict.keys())

                                # store intermediate values
                                agent.temp_values = (code, action, input_vec_dict, probs_dict)
                            else:
                                # get an action
                                actions_dict, input_vec_dict = alg.executebfc(agent.obs_accessor, agent_id=agent.id,
                                                                              add_noise=True)
                                agent.temp_values = (actions_dict, input_vec_dict)
                                cache.mark(output_dict_keys=actions_dict.keys())

                        # apply the selected action to the environment and observe feedback
                        a = agents[0].temp_values[0] if len(agents) == 1 else [a.temp_values[0] for a in agents]
                        if self.sim_config.action_space_type == Const.DISCRETE:
                            a = np.array(a).astype('int').tolist() if hasattr(a, "__iter__") else int(a)
                        else:
                            a = list(a.values())
                        next_state, reward, done, _ = env.step(a)

                        if self.r_shaping_callback is not None and callable(self.r_shaping_callback):
                            reward = self.r_shaping_callback(next_state, reward, done)

                        if self.time_step_finished_callback is not None and callable(self.time_step_finished_callback):
                            self.time_step_finished_callback(next_state, reward)

                        if hasattr(reward, "__iter__"):
                            acc_reward = 0
                            for r in reward:
                                acc_reward += r
                            reward = acc_reward

                        # decompose the received reward
                        reward_dict = cache.decomposeReward(reward)

                        for i, agent in enumerate(agents):
                            if self.sim_config.action_space_type == Const.DISCRETE:
                                code, action, input_vec_dict, probs_dict = agent.temp_values
                                # create experiences for the agent with respect to each GFSs that executed for the agent
                                exp_dict = cache.createExperiences(agent_id=agent.id, action=code,
                                                                   dec_reward_dict=reward_dict,
                                                                   input_vec_dict=input_vec_dict,
                                                                   output_dict=probs_dict,
                                                                   next_state_dict=None)
                            else:
                                actions_dict, input_vec_dict = agent.temp_values
                                exp_dict = cache.createExperiences(agent_id=agent.id,
                                                                   action=list(actions_dict.values()),
                                                                   dec_reward_dict=reward_dict,
                                                                   input_vec_dict=input_vec_dict,
                                                                   output_dict=actions_dict,
                                                                   next_state_dict=None)

                            # add the experiences of the agent to the cache
                            cache.addExperiences(time_step=t, exp_dict=exp_dict)

                            # set the received observation as the current array for retrieving input values
                            agent.obs_accessor.current_observation = next_state if len(agents) == 1 else next_state[i]

                        # accumulate the rewards of all time steps
                        total_reward += reward

                        # if the episode is over end the current episode
                        if done:
                            break

                # set the return from the environment as the fitness value of the current individual
                total_reward /= float(self.sim_config.episodes_per_ind)
                ind.fitness.values = (total_reward,)

                # save contents of the cache and clear it for the next episode
                # cache.compute_states_value(gamma=.9)
                if self.sim_config.persist_cached_data:
                    cache.save_csv(path="data/")
                if self.episode_finished_callback is not None and callable(self.episode_finished_callback):
                    self.episode_finished_callback(ind, ind_count, self.ga_config.num_gens * self.ga_config.pop_size,
                                                   total_reward)
            # GA stats by DEAP
            record = ga.stats.compile(pop)
            ga.logbook.record(epoch=epoch, **record)

            if self.epoch_finished_callback is not None and callable(self.epoch_finished_callback):
                self.epoch_finished_callback(epoch, pop, record)
            # perform evolution
            if self.ga_config.apply_evolution:
                m_prob = self.ga_config.mutation_prob_schdl.get_prob(epoch)
                c_prob = self.ga_config.cross_prob_schdl.get_prob(epoch)
                ev = self.ga_config.evol_config
                pop = ga.evolve(pop, selop=ev.selection_op, crossop=ev.crossover_op, mutop=ev.mutation_op,
                                mut_prob=m_prob, cross_prob=c_prob)
                if self.evolution_finished_callback is not None and callable(self.evolution_finished_callback):
                    self.evolution_finished_callback(pop, m_prob, c_prob, epoch)

            epoch += 1

        if self.sim_finished_callback is not None and callable(self.sim_finished_callback):
            self.sim_finished_callback(ga, pop)

        env.close()

Example #4

Show file

File: debugsim.py Project: bbrighttaer/fuzzprj

def startsim():
    # sets up the registry
    reg = xmlToGFT(open(GFT_FILE).read(),
                   registry=xmlToLinvars(open(LIN_VARS_FILE).read()),
                   defuzz_method=dfz.max_of_maximum)

    # create the GA object for accessing GA operations
    ga = GeneticAlgorithm(registry=reg)

    # get the algorithm for execution
    alg = Algorithm(registry=reg)

    # get the initial population
    population = ga.generate_initial_population(POP_SIZE)

    # print initial population
    for child in population:
        print(child)

    # get the one GFS for debugging
    gfs = reg.gft_dict[list(reg.gft_dict.keys())[2]]

    # # the total number of GFSs in the GFT
    # num_gfs = len(reg.gft_dict)

    # # extracts the RB and MF segments of a chromosome/individual
    # rb_chrom = population[0, gfs.descriptor.position]
    # mf_chrom = population[0, gfs.descriptor.position + num_gfs]
    #
    # # construct the control system of the selected GFS
    # gfs.contructControlSystemSim(rb_chrom, mf_chrom)

    # print(str(population[0].fitness.values))

    # configure the KB
    alg.configuregft(np.array(population[0]))

    # execute gfs
    code, action, input_vec_dict, probs_dict = alg.executegft(TestObserve(), 0)
    print("code = {0}, action = {1}\nprobs = {2}".format(
        code, action, str(probs_dict)))

    # print generated rules
    rules = gfs.rules
    for i in range(len(rules)):
        print("[{0}] {1}".format(i + 1, str(rules[i])))

    # RB and MF redefinition test
    tic = time.time()
    alg.configuregft(np.array(population[1]))
    code, action, input_vec_dict, probs_dict = alg.executegft(TestObserve(), 0)
    print("\ncode = {0}, action = {1}\nprobs = {2}".format(
        code, action, str(probs_dict)))
    print("time for redefintion =", (time.time() - tic))
    rules = gfs.rules
    for i in range(len(rules)):
        print("[{0}] {1}".format(i + 1, str(rules[i])))

    # dummy fitness values
    for i in range(len(population)):
        population[i].fitness.values = (random.randint(0, 10), 0)

    # create selection operator
    selargs = {"k": len(population), "tournsize": 3}
    selop = Operator(tools.selTournament, **selargs)

    # create crossover operator
    crossargs = {"indpb": 0.2}
    crossop = Operator(tools.cxUniform, **crossargs)

    # create mutation operator
    mutargs = {"mu": 0, "sigma": 1, "indpb": 0.2}
    mutop = Operator(tools.mutGaussian, **mutargs)

    # Perform one step of evolution
    offspring = ga.evolve(population,
                          selop=selop,
                          crossop=crossop,
                          mutop=mutop,
                          mut_prob=0.2,
                          cross_prob=0.7)

    assert offspring is not None

    # print out the offspring of the evolution step
    print("Num of offspring =", len(offspring))
    for child in offspring:
        print(child)

Example #5

Show file

File: carmunk_sim.py Project: bbrighttaer/fuzzprj

def main():
    # creates an environment
    env = gym.make(rlmarsenvs.carmunk_id)

    # print observation space ranges
    print("observation space ranges\nhigh = {}\nlow = {}\n".format(
        str(env.observation_space.high), str(env.observation_space.low)))
    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Individuals Performance")
    avg_series = ana.Series(name="Average (window = {})".format(
        round((1 / (1 - weighted_avg.beta)))))
    gen_series = ana.Series(name="Generation Performance")
    mut_prob_series = ana.Series(name="Mutation probability")

    # create linguistic variables in a registry
    reg = xmlToLinvars(open(LIN_VARS_FILE).read())

    # create GFT with linguistic variables in the registry
    reg = xmlToGFT(open(GFT_FILE).read(),
                   registry=reg,
                   defuzz_method=dfz.max_of_maximum)

    # create GA instance with the registry object
    ga = GeneticAlgorithm(registry=reg, seed=123)

    # create a mutation probability schedule
    # mut_sch = sch.TimeBasedSchedule(decay_factor=1e-4)
    mut_sch = sch.LinearDecaySchedule(initial_prob=1.025, decay_factor=1e-2)

    # create GFT algorithm object with the registry
    alg = Algorithm(registry=reg)

    # create a cache for managing simulation data
    cache = Cache(reg.gft_dict.keys())

    # get initial population
    if LOAD_INIT_POP:
        pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE)
        pop = pop[::-1]
    else:
        pop = ga.generate_initial_population(POP_SIZE)

    # initialize epoch or generation counter
    epoch = 0

    # initialize individual counter
    ind_count = 0

    # create an object for retrieving input values
    obs_carmunk = CarmunkObs()

    # Tau for Boltzmann exploration strategy
    tau_sch = sch.LinearDecaySchedule(initial_prob=20, decay_factor=0.02)

    # perform the simulation for a specified number of generations
    while epoch < NUM_OF_GENS:

        # Run the simulation with the current population
        for ind in pop:
            ind_count += 1

            # initialize reward accumulator for the individual
            total_reward = 0

            # configure the GFT with the current individual
            alg.configuregft(chromosome=ind)

            # control the environment with the configured GFT
            # for i_episode in range(NUM_EPISODES_PER_IND):

            # reset the environment
            observation = env.reset()

            # set the received observation as the current array for retrieving input values
            obs_carmunk.current_observation = observation

            # run through the time steps of the simulation
            t = 0
            while True:
                t += 1

                # show the environment
                env.render()

                # since only one agent applies to this case study set a dummy agent ID
                agent_id = 0

                # get an action
                code, action, input_vec_dict, probs_dict = alg.executegft(
                    obs_carmunk, agent_id)

                # apply the selected action to the environment and observe feedback
                next_state, reward, done, _ = env.step(code)

                # mark the GFSs that executed for the agent in this time step
                cache.mark(output_dict_keys=probs_dict.keys())

                # decompose the received reward
                reward_dict = cache.decomposeReward(reward)

                # create experiences for the agent with respect to each GFSs that executed for the agent
                exp_dict = cache.createExperiences(
                    agent_id=agent_id,
                    action=code,
                    dec_reward_dict=reward_dict,
                    input_vec_dict=input_vec_dict,
                    output_dict=probs_dict)

                # add the experiences of the agent to the cache
                cache.addExperiences(time_step=t, exp_dict=exp_dict)

                # set the received observation as the current array for retrieving input values
                obs_carmunk.current_observation = next_state

                # accumulate the rewards of all time steps
                total_reward += reward

                # if the episode is over end the current episode
                if done:
                    break

            # save contents of the cache and clear it for the next episode
            cache.save_csv()

            # if total_reward < 50:
            #     total_reward = - 50
            print("Episode finished after {} time steps".format(t + 1))
            print("Episode: {}/{} | score: {}".format(ind_count,
                                                      (NUM_OF_GENS * POP_SIZE),
                                                      total_reward))

            # set the return from the environment as the fitness value of the current individual
            ind.fitness.values = (total_reward, )

            # save qualified individual
            if SAVE_BEST and total_reward > SCORE_THRESHOLD:
                document = Document(name=QLFD_IND_FILE)
                document.addline(line=Line().add(text=Text(str(ind))))
                document.save(append=True)

            # store the performance of this individual in the corresponding series
            all_ind_series.addrecord(ind_count, total_reward)
            weighted_avg.update(total_reward)
            avg_series.addrecord(ind_count, weighted_avg.value)

        # Logging and other I/O operations
        print("Epoch {} completed".format(epoch))
        record = ga.stats.compile(pop)
        print("Statistics for epoch {} = {}".format(epoch, record))
        ga.logbook.record(epoch=epoch, **record)

        # store max return
        gen_series.addrecord(epoch, record["max"])
        if APPLY_EVO:
            # perform evolution
            offspring = applyEvolution(population=pop,
                                       ga_alg=ga,
                                       mut_sch=mut_sch,
                                       epoch=epoch)

            # set offspring as current population
            pop = offspring

        # update mutation probability series
        mut_prob_series.addrecord(epoch, mut_sch.prob)
        # increment epoch
        epoch += 1

    # print logbook
    ga.logbook.header = "epoch", "avg", "std", "min", "max"
    print(ga.logbook)

    # plotting
    plot_charts(avg_series, mut_prob_series)

    # terminates environment
    env.close()