Example #1
0
    def test_should_traverse(self, env):
        # given
        cfg = Configuration(8,
                            8,
                            epsilon=1.0,
                            biased_exploration=0.5,
                            do_ga=False,
                            metrics_trial_frequency=1,
                            user_metrics_collector_fcn=self._maze_metrics)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(env, 300)

        # then
        assert 90 < count_macroclassifiers(population) < 200

        assert self._get_knowledge(metrics) == 100

        assert count_macroclassifiers(population) == count_reliable(population)

        assert count_macroclassifiers(population) \
            == count_microclassifiers(population)

        assert self._get_total_steps(metrics) > 5000
Example #2
0
    def test_should_traverse_with_ga(self, env):
        # given
        cfg = Configuration(8,
                            8,
                            epsilon=0.8,
                            biased_exploration=0.5,
                            mu=0.3,
                            chi=0.0,
                            do_ga=True,
                            metrics_trial_frequency=1,
                            user_metrics_collector_fcn=self._maze_metrics)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(env, 300)

        # then
        assert abs(380 - count_macroclassifiers(population)) < 55
        assert abs(100 - self._get_knowledge(metrics)) < 5

        assert count_macroclassifiers(population) \
            > count_reliable(population)

        assert count_macroclassifiers(population) \
            < count_microclassifiers(population)

        assert self._get_total_steps(metrics) > 2500
Example #3
0
def get_actors():
    mp = gym.make('boolean-multiplexer-6bit-v0')
    cfg = Configuration(mp.env.observation_space.n,
                        2,
                        environment_adapter=MultiplexerAdapter(),
                        do_ga=True)

    return ACS2(cfg), mp
Example #4
0
def get_actors():
    mp = gym.make('boolean-multiplexer-6bit-v0')
    cfg = Configuration(mp.env.observation_space.n,
                        2,
                        perception_mapper_fcn=_map_perception,
                        do_ga=True)

    return ACS2(cfg), mp
Example #5
0
def get_actors():
    mp = gym.make('boolean-multiplexer-37bit-v0')
    cfg = Configuration(mp.env.observation_space.n,
                        2,
                        performance_fcn=evaluate_performance,
                        performance_fcn_params={'ctrl_bits': 5},
                        do_ga=True)

    return ACS2(cfg), mp
Example #6
0
def start_single_experiment(env, trials, **kwargs):
    env.reset()
    cfg = Configuration(**kwargs)

    agent = ACS2(cfg)
    population, metrics = agent.explore_exploit(env, trials)

    metrics_df = parse_metrics(metrics)

    return population, metrics_df
Example #7
0
    def test_should_be_no_duplicated_classifiers_without_ga(self, mp):
        # given
        cfg = Configuration(mp.env.observation_space.n, 2,
                            environment_adapter=MultiplexerAdapter(),
                            do_ga=False)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(mp, 10)

        # then
        assert count_macroclassifiers(population) == len(set(population))
Example #8
0
    def test_should_be_no_duplicated_classifiers_without_ga(self, mp):
        # given
        cfg = Configuration(mp.env.observation_space.n, 2,
                            perception_mapper_fcn=_map_perception,
                            do_ga=False)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(mp, 10)

        # then
        assert count_macroclassifiers(population) == len(set(population))
Example #9
0
    def test_should_evaluate_knowledge(self, mp):
        # given
        cfg = Configuration(mp.env.observation_space.n, 2,
                            do_ga=False,
                            environment_adapter=MultiplexerAdapter())
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(mp, 10)

        # then
        for metric in metrics:
            assert metric['reward'] in {0, 1000}
Example #10
0
    def test_should_evaluate_knowledge(self, mp):
        # given
        cfg = Configuration(mp.env.observation_space.n, 2,
                            do_ga=False,
                            perception_mapper_fcn=_map_perception,
                            performance_fcn=calculate_performance,
                            performance_fcn_params={'ctrl_bits': 2})
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(mp, 10)

        # then
        for metric in metrics:
            assert metric['performance']['was_correct'] in {0, 1}
def start_single_experiment(env, explore_trials, exploit_trials, **kwargs):
    # Prepare the environment
    env.reset()

    cfg = Configuration(**kwargs)

    explorer = ACS2(cfg)
    population_explore, metrics_explore = explorer.explore(env, explore_trials)

    exploiter = ACS2(cfg, population_explore)
    population_exploit, metrics_exploit = explorer.exploit(env, exploit_trials)

    # Parse results into DataFrame
    df = parse_experiments_results(metrics_explore, metrics_exploit,
                                   cfg.metrics_trial_frequency)

    return population_exploit, df
Example #12
0
    def test_should_gain_knowledge(self, env):
        # given
        cfg = Configuration(env.observation_space.n,
                            env.action_space.n,
                            epsilon=1.0,
                            do_ga=False,
                            do_action_planning=True,
                            action_planning_frequency=50,
                            metrics_trial_frequency=1,
                            user_metrics_collector_fcn=handeye_metrics)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(env, 20)

        # then
        assert metrics[-1]['knowledge'] > 0.0
        assert metrics[-1]['with_block'] > 0.0
        assert metrics[-1]['no_block'] > 0.0
Example #13
0
    def test_should_evaluate_knowledge(self, env):
        # given
        cfg = Configuration(env.observation_space.n,
                            env.action_space.n,
                            epsilon=1.0,
                            do_ga=False,
                            do_action_planning=True,
                            action_planning_frequency=50,
                            user_metrics_collector_fcn=handeye_metrics)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(env, 10)

        # then
        for metric in metrics:
            assert 0.0 <= metric['knowledge'] <= 100.0
            assert 0.0 <= metric['with_block'] <= 100.0
            assert 0.0 <= metric['no_block'] <= 100.0
Example #14
0
    def test_should_traverse(self, env):
        # given
        cfg = Configuration(8,
                            8,
                            epsilon=1.0,
                            do_ga=False,
                            performance_fcn=calculate_performance)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(env, 300)

        # then
        assert 90 < count_macroclassifiers(population) < 200

        assert 100 == self._get_knowledge(metrics)

        assert count_macroclassifiers(population) == count_reliable(population)

        assert count_macroclassifiers(population) \
            == count_microclassifiers(population)

        assert self._get_total_steps(metrics) > 5000
Example #15
0

class MultiplexerAdapter(EnvironmentAdapter):
    @staticmethod
    def to_genotype(env_state):
        return [str(x) for x in env_state]


if __name__ == '__main__':
    # Load desired environment
    mp = gym.make('boolean-multiplexer-6bit-v0')

    # Create agent
    cfg = Configuration(mp.env.observation_space.n,
                        2,
                        do_ga=False,
                        environment_adapter=MultiplexerAdapter(),
                        metrics_trial_frequency=50,
                        user_metrics_collector_fcn=mpx_metrics)
    agent = ACS2(cfg)

    # Explore the environment
    population, explore_metrics = agent.explore(mp, 1500)

    # Exploit the environment
    agent = ACS2(cfg, population)
    population, exploit_metrics = agent.exploit(mp, 50)

    # See how it went
    for metric in explore_metrics:
        print(metric)
Example #16
0

if __name__ == '__main__':
    # Load desired environment
    environment = gym.make('TaxiGoal-v0')

    environment.reset()

    environment.render()

    # Configure and create the agent
    cfg = Configuration(1,
                        6,
                        epsilon=1.0,
                        do_ga=False,
                        environment_adapter=TaxiAdapter,
                        metrics_trial_frequency=1,
                        user_metrics_collector_fcn=taxi_metrics,
                        do_action_planning=True,
                        action_planning_frequency=50)
    logging.info(cfg)

    # Explore the environment
    agent = ACS2(cfg)
    population, explore_metrics = agent.explore(environment, 1000)

    # Exploit the environment
    agent = ACS2(cfg, population)
    population, exploit_metric = agent.exploit(environment, 10)

    for metric in exploit_metric:
Example #17
0
# collect more metrics
def cp_metrics(pop, env):
    metrics = {}
    metrics['avg_fitness'] = avg_fitness(pop)
    metrics.update(population_metrics(pop, env))

    return metrics


cfg = Configuration(classifier_length=4,
                    number_of_possible_actions=2,
                    epsilon=0.9,
                    beta=0.05,
                    gamma=0.95,
                    theta_exp=50,
                    theta_ga=50,
                    do_ga=True,
                    mu=0.03,
                    u_max=4,
                    metrics_trial_frequency=5,
                    user_metrics_collector_fcn=cp_metrics,
                    environment_adapter=CartPoleAdapter)

if __name__ == '__main__':
    agent = ACS2(cfg)
    population_explore, metrics_explore = agent.explore(env,
                                                        trials,
                                                        decay=True)

    print(len(population_explore))
Example #18
0
# import logging
from examples.acs2.go_self_play.environment import GoBoard
from lcs.agents.acs2 import Configuration, ClassifiersList

# Configure logger
# logging.basicConfig(level=logging.INFO)

GAMES = 5000  # How many games to play
ALL_MOVES = 0

# Commons
board = GoBoard()  # Initialize board of size 9x9
cfg = Configuration(81, 81, epsilon=0.6, do_ga=True)
population = ClassifiersList(cfg=cfg)


def determine_player(moves):
    """Returns current player mark based on the move number"""
    return ['W', 'B'][moves % 2]


def switch_perception(perception):
    return perception \
        .replace('O', 't') \
        .replace('X', 'O') \
        .replace('t', 'X')


def print_metrics(game, moves, population):
    print("Game [{}] finished".format(game))
    print("Total moves: [{}]".format(moves))
Example #19
0
if __name__ == '__main__':
    env = gym.make('Go9x9-v0')  # TODO: removed from GYM after 0.9 :(
    state = env.reset()

    # Create a mapping dictionary of moves
    # A key is number 0,1,...num_moves, and the key is corresponding
    # action in Pachi environment
    moves = {idx: map_moves(env, move) for idx, move in enumerate(moves_9x9())}

    CLASSIFIER_LENGTH = env._state.board.size**2
    NUMBER_OF_POSSIBLE_ACTIONS = len(moves)

    cfg = Configuration(classifier_length=CLASSIFIER_LENGTH,
                        number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS,
                        perception_mapper_fcn=process_state,
                        environment_metrics_fcn=calculate_environment_metrics,
                        action_mapping_dict=moves,
                        epsilon=0.4,
                        do_ga=True)

    logging.info(cfg)

    # Create the agent
    agent = ACS2(cfg)
    population, metrics = agent.explore_exploit(env, 50)

    # Store metrics in file
    logging.info("Dumping data to files ...")
    pickle.dump(population, open("go_population.pkl", "wb"))
    pickle.dump(metrics, open("go_metrics.pkl", "wb"))
Example #20
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-e", "--environment", default="Maze4-v0")
    parser.add_argument("--epsilon", default=1.0, type=float)
    parser.add_argument("--ga", action="store_true")
    parser.add_argument("--explore-trials", default=50, type=int)
    parser.add_argument("--exploit-trials", default=10, type=int)
    args = parser.parse_args()

    maze = gym.make(args.environment)

    freq = 10
    plot = pl.Plots('conf.txt', freq)
    cfg = Configuration(8,
                        8,
                        epsilon=args.epsilon,
                        do_ga=args.ga,
                        user_metrics_collector_fcn=plot.get_logger(),
                        metrics_trial_frequency=freq)

    agent = ACS2(cfg)
    for i in range(3):
        population, m = agent.explore(maze, 200, decay=False)
        plot.add_data("mix", f'explore {i+1}', m)
        population, m = agent.exploit(maze, 200)
        plot.add_data("mix", f'exploit {i+1}', m)
    plot.add_data("mix_no_div", '')
    plot.draw()
    plot.save_datasets('test.csv')
Example #21
0
    def to_genotype(phenotype):
        return phenotype,


if __name__ == '__main__':
    # Load desired environment
    grid = gym.make(f'grid-{grid_size}-v0')

    # Configure and create the agent
    cfg = Configuration(classifier_length=2,
                        number_of_possible_actions=4,
                        epsilon=0.9,
                        beta=0.2,
                        gamma=0.95,
                        theta_i=0.1,
                        theta_as=50,
                        theta_exp=50,
                        theta_ga=50,
                        do_ga=False,
                        mu=0.04,
                        u_max=2,
                        metrics_trial_frequency=5,
                        user_metrics_collector_fcn=grid_metrics)

    # Explore the environment
    agent1 = ACS2(cfg)
    population, explore_metrics = agent1.explore(grid, 500, decay=False)

    for cl in sorted(population, key=lambda c: -c.fitness):
        if cl.does_anticipate_change():
            print_cl(cl)
Example #22
0
    print(f"{cl.condition} - {action} - {cl.effect} "
          f"[fit: {cl.fitness:.3f}, r: {cl.r:.2f}, ir: {cl.ir:.2f}]")


if __name__ == '__main__':
    # Load desired environment
    grid = gym.make('grid-10-v0')

    # Configure and create the agent
    cfg = Configuration(
        classifier_length=2,
        number_of_possible_actions=4,
        epsilon=0.9,
        beta=0.03,
        gamma=0.97,
        theta_i=0.1,
        theta_as=10,
        theta_exp=50,
        theta_ga=50,
        do_ga=True,
        mu=0.04,
        u_max=2,
        metrics_trial_frequency=10)

    # Explore the environment
    agent1 = ACS2(cfg)
    population, explore_metrics = agent1.explore(grid, 1000, decay=False)

    for cl in sorted(population, key=lambda c: -c.fitness):
        if cl.does_anticipate_change():
            print_cl(cl)
Example #23
0
from examples.acs2.boolean_multiplexer.utils import calculate_performance
from lcs.agents.acs2 import ACS2, Configuration


def _map_perception(perception):
    return [str(x) for x in perception]


if __name__ == '__main__':
    # Load desired environment
    mp = gym.make('boolean-multiplexer-6bit-v0')

    # Create agent
    cfg = Configuration(mp.env.observation_space.n, 2,
                        do_ga=False,
                        perception_mapper_fcn=_map_perception,
                        performance_fcn=calculate_performance,
                        performance_fcn_params={'ctrl_bits': 2})
    agent = ACS2(cfg)

    # Explore the environment
    population, _ = agent.explore(mp, 1500)

    # Exploit the environment
    agent = ACS2(cfg, population)
    population, metrics = agent.exploit(mp, 50)

    # See how it went
    for metric in metrics:
        print(metric)
Example #24
0
def run(environment, explore_trials, exploit_trials, position_bins,
        velocity_bins, biased_exploration_prob, decay, gamma):
    bins = [position_bins, velocity_bins]

    with mlflow.start_run():
        logging.info("Initializing environment...")
        env = gym.make(environment)
        env._max_episode_steps = 1000

        logging.info("Creating real-value discretizer...")
        _range, _low = (env.observation_space.high - env.observation_space.low,
                        env.observation_space.low)

        class MountainCarAdapter(EnvironmentAdapter):
            @classmethod
            def to_genotype(cls, obs):
                r = (obs + np.abs(_low)) / _range
                b = (r * bins).astype(int)
                return b.astype(str).tolist()

        logging.info("Creating custom metrics")

        def mc_metrics(pop, env):
            metrics = {
                'avg_fitness':
                np.mean([cl.fitness for cl in pop if cl.is_reliable()])
            }
            metrics.update(population_metrics(pop, env))

            return metrics

        logging.info("Building agent configuration...")
        cfg = Configuration(classifier_length=2,
                            number_of_possible_actions=3,
                            epsilon=1.0,
                            biased_exploration=biased_exploration_prob,
                            beta=0.2,
                            gamma=gamma,
                            theta_as=50,
                            theta_exp=100,
                            theta_ga=50,
                            do_ga=True,
                            mu=0.03,
                            chi=0.0,
                            metrics_trial_frequency=5,
                            user_metrics_collector_fcn=mc_metrics,
                            environment_adapter=MountainCarAdapter)

        trials = int(explore_trials / 2)

        logging.info(
            f"Running {trials} experiments with pure exploration (decay = False)"
        )
        agent = ACS2(cfg)
        population, metrics_1 = agent.explore(env, trials, decay=False)

        logging.info(
            f"Running {trials} experiments with optional decay (decay = {decay})"
        )
        agent = ACS2(cfg, population)
        population, metrics_2 = agent.explore(env, trials, decay=decay)

        logging.info("Generating metrics...")
        explore_metrics_df = merge_metrics(metrics_1, metrics_2, cfg)
        log_metrics("explore", explore_metrics_df)

        logging.info("Logging population artifact...")
        log_population_artifact("explore-population", population)
        log_metrics_artifact("explore-metrics", explore_metrics_df)

        logging.info("Generating plots...")
        avg_window = int(trials / 100)
        plot_steps_in_trial("explore-steps.png",
                            explore_metrics_df,
                            window=avg_window)
        plot_avg_fitness("explore-fitness.png",
                         explore_metrics_df,
                         window=avg_window)
        plot_reward("explore-reward.png",
                    explore_metrics_df,
                    window=avg_window)
        plot_classifiers("explore-classifiers.png",
                         explore_metrics_df,
                         window=avg_window)

        logging.info(f"Running {exploit_trials} exploit trials")
        exploiter = ACS2(cfg, deepcopy(population))
        population_exploit, metrics_3 = agent.exploit(env, exploit_trials)

        logging.info("Generating metrics")
        exploit_metrics_df = metrics_to_df(metrics_3)
        log_metrics_artifact("exploit-metrics", exploit_metrics_df)
        log_metrics("exploit", exploit_metrics_df)
Example #25
0
# noinspection PyUnresolvedReferences
import gym_handeye
from lcs.agents.acs2 import ACS2, Configuration
from examples.acs2.handeye.utils import handeye_metrics

# Configure logger
logging.basicConfig(level=logging.INFO)

if __name__ == '__main__':
    # Load desired environment
    hand_eye = gym.make('HandEye3-v0')

    # Configure and create the agent
    cfg = Configuration(hand_eye.observation_space.n,
                        hand_eye.action_space.n,
                        epsilon=1.0,
                        do_ga=False,
                        do_action_planning=False,
                        user_metrics_collector_fcn=handeye_metrics)

    # Explore the environment
    logging.info("Exploring HandEye")
    agent = ACS2(cfg)
    population, explore_metrics = agent.explore(hand_eye, 50)

    for metric in explore_metrics:
        logging.info(metric)

    # Exploit the environment
    logging.info("Exploiting HandEye")
    agent = ACS2(cfg, population)
    population, exploit_metric = agent.exploit(hand_eye, 10)
Example #26
0
    def to_genotype(cls, phenotype):
        return phenotype,


if __name__ == '__main__':
    # Load desired environment
    corridor = gym.make('corridor-40-v0')

    # Configure and create the agent
    cfg = Configuration(classifier_length=1,
                        number_of_possible_actions=2,
                        action_selector=EpsilonGreedy,
                        epsilon=0.8,
                        beta=0.03,
                        gamma=0.97,
                        theta_exp=50,
                        theta_ga=50,
                        do_ga=True,
                        mu=0.02,
                        u_max=1,
                        metrics_trial_frequency=20,
                        environment_adapter=CorridorAdapter)

    # Explore the environment
    logging.info("Exploring environment")
    agent = ACS2(cfg)
    population, explore_metrics = agent.explore(corridor, 1000)

    population = sorted(population, key=lambda cl: -cl.fitness)

    print("ok")
Example #27
0
from examples.acs2.maze.utils import calculate_performance
from lcs.agents.acs2 import ACS2, Configuration

# Configure logger
logging.basicConfig(level=logging.INFO)


if __name__ == '__main__':

    # Load desired environment
    maze = gym.make('BMaze4-v0')

    # Configure and create the agent
    cfg = Configuration(8, 8,
                        epsilon=1.0,
                        do_ga=False,
                        performance_fcn=calculate_performance)
    logging.info(cfg)

    # Explore the environment
    agent = ACS2(cfg)
    population, explore_metrics = agent.explore(maze, 50)

    # Exploit the environment
    agent = ACS2(cfg, population)
    population, exploit_metric = agent.exploit(maze, 10)

    for metric in exploit_metric:
        logging.info(metric)
Example #28
0
 def cfg(self):
     return Configuration(8, 8)
Example #29
0
 def cfg(self):
     return Configuration(8, 8, theta_r=0.9)
Example #30
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-e", "--environment", default="Maze4-v0")
    parser.add_argument("--epsilon", default=1.0, type=float)
    parser.add_argument("--ga", action="store_true")
    parser.add_argument("--explore-trials", default=50, type=int)
    parser.add_argument("--exploit-trials", default=10, type=int)
    args = parser.parse_args()

    # Load desired environment
    maze = gym.make(args.environment)

    # Configure and create the agent
    cfg = Configuration(8,
                        8,
                        epsilon=args.epsilon,
                        do_ga=args.ga,
                        metrics_trial_frequency=1,
                        user_metrics_collector_fcn=maze_metrics)

    # Explore the environment
    logging.info("Exploring maze")
    agent = ACS2(cfg)
    population, explore_metrics = agent.explore(maze, args.explore_trials)

    for metric in explore_metrics:
        logger.info(metric)

    # Exploit the environment
    logging.info("Exploiting maze")
    agent = ACS2(cfg, population)
    population, exploit_metric = agent.exploit(maze, args.exploit_trials)