コード例 #1
0
ファイル: test_Maze.py プロジェクト: tigra/pyalcs-fixes
    def test_should_traverse(self, env):
        # given
        cfg = Configuration(8,
                            8,
                            epsilon=1.0,
                            biased_exploration=0.5,
                            do_ga=False,
                            metrics_trial_frequency=1,
                            user_metrics_collector_fcn=self._maze_metrics)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(env, 300)

        # then
        assert 90 < count_macroclassifiers(population) < 200

        assert self._get_knowledge(metrics) == 100

        assert count_macroclassifiers(population) == count_reliable(population)

        assert count_macroclassifiers(population) \
            == count_microclassifiers(population)

        assert self._get_total_steps(metrics) > 5000
コード例 #2
0
ファイル: test_Maze.py プロジェクト: tigra/pyalcs-fixes
    def test_should_traverse_with_ga(self, env):
        # given
        cfg = Configuration(8,
                            8,
                            epsilon=0.8,
                            biased_exploration=0.5,
                            mu=0.3,
                            chi=0.0,
                            do_ga=True,
                            metrics_trial_frequency=1,
                            user_metrics_collector_fcn=self._maze_metrics)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(env, 300)

        # then
        assert abs(380 - count_macroclassifiers(population)) < 55
        assert abs(100 - self._get_knowledge(metrics)) < 5

        assert count_macroclassifiers(population) \
            > count_reliable(population)

        assert count_macroclassifiers(population) \
            < count_microclassifiers(population)

        assert self._get_total_steps(metrics) > 2500
コード例 #3
0
def start_single_experiment(env, explore_trials, exploit_trials, **kwargs):
    # Prepare the environment
    env.reset()

    cfg = Configuration(**kwargs)

    explorer = ACS2(cfg)
    population_explore, metrics_explore = explorer.explore(env, explore_trials)

    exploiter = ACS2(cfg, population_explore)
    population_exploit, metrics_exploit = explorer.exploit(env, exploit_trials)

    # Parse results into DataFrame
    df = parse_experiments_results(metrics_explore, metrics_exploit,
                                   cfg.metrics_trial_frequency)

    return population_exploit, df
コード例 #4
0
def get_actors():
    mp = gym.make('boolean-multiplexer-6bit-v0')
    cfg = Configuration(mp.env.observation_space.n,
                        2,
                        perception_mapper_fcn=_map_perception,
                        do_ga=True)

    return ACS2(cfg), mp
コード例 #5
0
def get_actors():
    mp = gym.make('boolean-multiplexer-6bit-v0')
    cfg = Configuration(mp.env.observation_space.n,
                        2,
                        environment_adapter=MultiplexerAdapter(),
                        do_ga=True)

    return ACS2(cfg), mp
コード例 #6
0
ファイル: perform_experiment.py プロジェクト: smiszym/pyalcs
def get_actors():
    mp = gym.make('boolean-multiplexer-37bit-v0')
    cfg = Configuration(mp.env.observation_space.n,
                        2,
                        performance_fcn=evaluate_performance,
                        performance_fcn_params={'ctrl_bits': 5},
                        do_ga=True)

    return ACS2(cfg), mp
コード例 #7
0
def start_single_experiment(env, trials, **kwargs):
    env.reset()
    cfg = Configuration(**kwargs)

    agent = ACS2(cfg)
    population, metrics = agent.explore_exploit(env, trials)

    metrics_df = parse_metrics(metrics)

    return population, metrics_df
コード例 #8
0
    def test_should_be_no_duplicated_classifiers_without_ga(self, mp):
        # given
        cfg = Configuration(mp.env.observation_space.n, 2,
                            environment_adapter=MultiplexerAdapter(),
                            do_ga=False)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(mp, 10)

        # then
        assert count_macroclassifiers(population) == len(set(population))
コード例 #9
0
    def test_should_be_no_duplicated_classifiers_without_ga(self, mp):
        # given
        cfg = Configuration(mp.env.observation_space.n, 2,
                            perception_mapper_fcn=_map_perception,
                            do_ga=False)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(mp, 10)

        # then
        assert count_macroclassifiers(population) == len(set(population))
コード例 #10
0
    def test_should_evaluate_knowledge(self, mp):
        # given
        cfg = Configuration(mp.env.observation_space.n, 2,
                            do_ga=False,
                            environment_adapter=MultiplexerAdapter())
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(mp, 10)

        # then
        for metric in metrics:
            assert metric['reward'] in {0, 1000}
コード例 #11
0
    def test_should_evaluate_knowledge(self, mp):
        # given
        cfg = Configuration(mp.env.observation_space.n, 2,
                            do_ga=False,
                            perception_mapper_fcn=_map_perception,
                            performance_fcn=calculate_performance,
                            performance_fcn_params={'ctrl_bits': 2})
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(mp, 10)

        # then
        for metric in metrics:
            assert metric['performance']['was_correct'] in {0, 1}
コード例 #12
0
    def test_should_gain_knowledge(self, env):
        # given
        cfg = Configuration(env.observation_space.n,
                            env.action_space.n,
                            epsilon=1.0,
                            do_ga=False,
                            do_action_planning=True,
                            action_planning_frequency=50,
                            metrics_trial_frequency=1,
                            user_metrics_collector_fcn=handeye_metrics)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(env, 20)

        # then
        assert metrics[-1]['knowledge'] > 0.0
        assert metrics[-1]['with_block'] > 0.0
        assert metrics[-1]['no_block'] > 0.0
コード例 #13
0
    def test_should_evaluate_knowledge(self, env):
        # given
        cfg = Configuration(env.observation_space.n,
                            env.action_space.n,
                            epsilon=1.0,
                            do_ga=False,
                            do_action_planning=True,
                            action_planning_frequency=50,
                            user_metrics_collector_fcn=handeye_metrics)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(env, 10)

        # then
        for metric in metrics:
            assert 0.0 <= metric['knowledge'] <= 100.0
            assert 0.0 <= metric['with_block'] <= 100.0
            assert 0.0 <= metric['no_block'] <= 100.0
コード例 #14
0
    def test_should_traverse(self, env):
        # given
        cfg = Configuration(8,
                            8,
                            epsilon=1.0,
                            do_ga=False,
                            performance_fcn=calculate_performance)
        agent = ACS2(cfg)

        # when
        population, metrics = agent.explore(env, 300)

        # then
        assert 90 < count_macroclassifiers(population) < 200

        assert 100 == self._get_knowledge(metrics)

        assert count_macroclassifiers(population) == count_reliable(population)

        assert count_macroclassifiers(population) \
            == count_microclassifiers(population)

        assert self._get_total_steps(metrics) > 5000
コード例 #15
0
class MultiplexerAdapter(EnvironmentAdapter):
    @staticmethod
    def to_genotype(env_state):
        return [str(x) for x in env_state]


if __name__ == '__main__':
    # Load desired environment
    mp = gym.make('boolean-multiplexer-6bit-v0')

    # Create agent
    cfg = Configuration(mp.env.observation_space.n,
                        2,
                        do_ga=False,
                        environment_adapter=MultiplexerAdapter(),
                        metrics_trial_frequency=50,
                        user_metrics_collector_fcn=mpx_metrics)
    agent = ACS2(cfg)

    # Explore the environment
    population, explore_metrics = agent.explore(mp, 1500)

    # Exploit the environment
    agent = ACS2(cfg, population)
    population, exploit_metrics = agent.exploit(mp, 50)

    # See how it went
    for metric in explore_metrics:
        print(metric)
コード例 #16
0
                        number_of_possible_actions=4,
                        epsilon=0.9,
                        beta=0.2,
                        gamma=0.95,
                        theta_i=0.1,
                        theta_as=50,
                        theta_exp=50,
                        theta_ga=50,
                        do_ga=False,
                        mu=0.04,
                        u_max=2,
                        metrics_trial_frequency=5,
                        user_metrics_collector_fcn=grid_metrics)

    # Explore the environment
    agent1 = ACS2(cfg)
    population, explore_metrics = agent1.explore(grid, 500, decay=False)

    for cl in sorted(population, key=lambda c: -c.fitness):
        if cl.does_anticipate_change():
            print_cl(cl)

    # Exploit
    agent2 = ACS2(cfg, population)
    pop_exploit, metric_exploit = agent2.exploit(grid, 30)

    # Print classifiers
    for cl in sorted(pop_exploit, key=lambda c: -c.fitness):
        if cl.does_anticipate_change():
            print_cl(cl)
コード例 #17
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-e", "--environment", default="Maze4-v0")
    parser.add_argument("--epsilon", default=1.0, type=float)
    parser.add_argument("--ga", action="store_true")
    parser.add_argument("--explore-trials", default=50, type=int)
    parser.add_argument("--exploit-trials", default=10, type=int)
    args = parser.parse_args()

    maze = gym.make(args.environment)

    freq = 10
    plot = pl.Plots('conf.txt', freq)
    cfg = Configuration(8,
                        8,
                        epsilon=args.epsilon,
                        do_ga=args.ga,
                        user_metrics_collector_fcn=plot.get_logger(),
                        metrics_trial_frequency=freq)

    agent = ACS2(cfg)
    for i in range(3):
        population, m = agent.explore(maze, 200, decay=False)
        plot.add_data("mix", f'explore {i+1}', m)
        population, m = agent.exploit(maze, 200)
        plot.add_data("mix", f'exploit {i+1}', m)
    plot.add_data("mix_no_div", '')
    plot.draw()
    plot.save_datasets('test.csv')
コード例 #18
0
def run(environment, explore_trials, exploit_trials, position_bins,
        velocity_bins, biased_exploration_prob, decay, gamma):
    bins = [position_bins, velocity_bins]

    with mlflow.start_run():
        logging.info("Initializing environment...")
        env = gym.make(environment)
        env._max_episode_steps = 1000

        logging.info("Creating real-value discretizer...")
        _range, _low = (env.observation_space.high - env.observation_space.low,
                        env.observation_space.low)

        class MountainCarAdapter(EnvironmentAdapter):
            @classmethod
            def to_genotype(cls, obs):
                r = (obs + np.abs(_low)) / _range
                b = (r * bins).astype(int)
                return b.astype(str).tolist()

        logging.info("Creating custom metrics")

        def mc_metrics(pop, env):
            metrics = {
                'avg_fitness':
                np.mean([cl.fitness for cl in pop if cl.is_reliable()])
            }
            metrics.update(population_metrics(pop, env))

            return metrics

        logging.info("Building agent configuration...")
        cfg = Configuration(classifier_length=2,
                            number_of_possible_actions=3,
                            epsilon=1.0,
                            biased_exploration=biased_exploration_prob,
                            beta=0.2,
                            gamma=gamma,
                            theta_as=50,
                            theta_exp=100,
                            theta_ga=50,
                            do_ga=True,
                            mu=0.03,
                            chi=0.0,
                            metrics_trial_frequency=5,
                            user_metrics_collector_fcn=mc_metrics,
                            environment_adapter=MountainCarAdapter)

        trials = int(explore_trials / 2)

        logging.info(
            f"Running {trials} experiments with pure exploration (decay = False)"
        )
        agent = ACS2(cfg)
        population, metrics_1 = agent.explore(env, trials, decay=False)

        logging.info(
            f"Running {trials} experiments with optional decay (decay = {decay})"
        )
        agent = ACS2(cfg, population)
        population, metrics_2 = agent.explore(env, trials, decay=decay)

        logging.info("Generating metrics...")
        explore_metrics_df = merge_metrics(metrics_1, metrics_2, cfg)
        log_metrics("explore", explore_metrics_df)

        logging.info("Logging population artifact...")
        log_population_artifact("explore-population", population)
        log_metrics_artifact("explore-metrics", explore_metrics_df)

        logging.info("Generating plots...")
        avg_window = int(trials / 100)
        plot_steps_in_trial("explore-steps.png",
                            explore_metrics_df,
                            window=avg_window)
        plot_avg_fitness("explore-fitness.png",
                         explore_metrics_df,
                         window=avg_window)
        plot_reward("explore-reward.png",
                    explore_metrics_df,
                    window=avg_window)
        plot_classifiers("explore-classifiers.png",
                         explore_metrics_df,
                         window=avg_window)

        logging.info(f"Running {exploit_trials} exploit trials")
        exploiter = ACS2(cfg, deepcopy(population))
        population_exploit, metrics_3 = agent.exploit(env, exploit_trials)

        logging.info("Generating metrics")
        exploit_metrics_df = metrics_to_df(metrics_3)
        log_metrics_artifact("exploit-metrics", exploit_metrics_df)
        log_metrics("exploit", exploit_metrics_df)