def test_should_traverse(self, env): # given cfg = Configuration(8, 8, epsilon=1.0, biased_exploration=0.5, do_ga=False, metrics_trial_frequency=1, user_metrics_collector_fcn=self._maze_metrics) agent = ACS2(cfg) # when population, metrics = agent.explore(env, 300) # then assert 90 < count_macroclassifiers(population) < 200 assert self._get_knowledge(metrics) == 100 assert count_macroclassifiers(population) == count_reliable(population) assert count_macroclassifiers(population) \ == count_microclassifiers(population) assert self._get_total_steps(metrics) > 5000
def test_should_traverse_with_ga(self, env): # given cfg = Configuration(8, 8, epsilon=0.8, biased_exploration=0.5, mu=0.3, chi=0.0, do_ga=True, metrics_trial_frequency=1, user_metrics_collector_fcn=self._maze_metrics) agent = ACS2(cfg) # when population, metrics = agent.explore(env, 300) # then assert abs(380 - count_macroclassifiers(population)) < 55 assert abs(100 - self._get_knowledge(metrics)) < 5 assert count_macroclassifiers(population) \ > count_reliable(population) assert count_macroclassifiers(population) \ < count_microclassifiers(population) assert self._get_total_steps(metrics) > 2500
def get_actors(): mp = gym.make('boolean-multiplexer-6bit-v0') cfg = Configuration(mp.env.observation_space.n, 2, environment_adapter=MultiplexerAdapter(), do_ga=True) return ACS2(cfg), mp
def get_actors(): mp = gym.make('boolean-multiplexer-6bit-v0') cfg = Configuration(mp.env.observation_space.n, 2, perception_mapper_fcn=_map_perception, do_ga=True) return ACS2(cfg), mp
def get_actors(): mp = gym.make('boolean-multiplexer-37bit-v0') cfg = Configuration(mp.env.observation_space.n, 2, performance_fcn=evaluate_performance, performance_fcn_params={'ctrl_bits': 5}, do_ga=True) return ACS2(cfg), mp
def start_single_experiment(env, trials, **kwargs): env.reset() cfg = Configuration(**kwargs) agent = ACS2(cfg) population, metrics = agent.explore_exploit(env, trials) metrics_df = parse_metrics(metrics) return population, metrics_df
def test_should_be_no_duplicated_classifiers_without_ga(self, mp): # given cfg = Configuration(mp.env.observation_space.n, 2, environment_adapter=MultiplexerAdapter(), do_ga=False) agent = ACS2(cfg) # when population, metrics = agent.explore(mp, 10) # then assert count_macroclassifiers(population) == len(set(population))
def test_should_be_no_duplicated_classifiers_without_ga(self, mp): # given cfg = Configuration(mp.env.observation_space.n, 2, perception_mapper_fcn=_map_perception, do_ga=False) agent = ACS2(cfg) # when population, metrics = agent.explore(mp, 10) # then assert count_macroclassifiers(population) == len(set(population))
def test_should_evaluate_knowledge(self, mp): # given cfg = Configuration(mp.env.observation_space.n, 2, do_ga=False, environment_adapter=MultiplexerAdapter()) agent = ACS2(cfg) # when population, metrics = agent.explore(mp, 10) # then for metric in metrics: assert metric['reward'] in {0, 1000}
def test_should_evaluate_knowledge(self, mp): # given cfg = Configuration(mp.env.observation_space.n, 2, do_ga=False, perception_mapper_fcn=_map_perception, performance_fcn=calculate_performance, performance_fcn_params={'ctrl_bits': 2}) agent = ACS2(cfg) # when population, metrics = agent.explore(mp, 10) # then for metric in metrics: assert metric['performance']['was_correct'] in {0, 1}
def start_single_experiment(env, explore_trials, exploit_trials, **kwargs): # Prepare the environment env.reset() cfg = Configuration(**kwargs) explorer = ACS2(cfg) population_explore, metrics_explore = explorer.explore(env, explore_trials) exploiter = ACS2(cfg, population_explore) population_exploit, metrics_exploit = explorer.exploit(env, exploit_trials) # Parse results into DataFrame df = parse_experiments_results(metrics_explore, metrics_exploit, cfg.metrics_trial_frequency) return population_exploit, df
def test_should_gain_knowledge(self, env): # given cfg = Configuration(env.observation_space.n, env.action_space.n, epsilon=1.0, do_ga=False, do_action_planning=True, action_planning_frequency=50, metrics_trial_frequency=1, user_metrics_collector_fcn=handeye_metrics) agent = ACS2(cfg) # when population, metrics = agent.explore(env, 20) # then assert metrics[-1]['knowledge'] > 0.0 assert metrics[-1]['with_block'] > 0.0 assert metrics[-1]['no_block'] > 0.0
def test_should_evaluate_knowledge(self, env): # given cfg = Configuration(env.observation_space.n, env.action_space.n, epsilon=1.0, do_ga=False, do_action_planning=True, action_planning_frequency=50, user_metrics_collector_fcn=handeye_metrics) agent = ACS2(cfg) # when population, metrics = agent.explore(env, 10) # then for metric in metrics: assert 0.0 <= metric['knowledge'] <= 100.0 assert 0.0 <= metric['with_block'] <= 100.0 assert 0.0 <= metric['no_block'] <= 100.0
def test_should_traverse(self, env): # given cfg = Configuration(8, 8, epsilon=1.0, do_ga=False, performance_fcn=calculate_performance) agent = ACS2(cfg) # when population, metrics = agent.explore(env, 300) # then assert 90 < count_macroclassifiers(population) < 200 assert 100 == self._get_knowledge(metrics) assert count_macroclassifiers(population) == count_reliable(population) assert count_macroclassifiers(population) \ == count_microclassifiers(population) assert self._get_total_steps(metrics) > 5000
class MultiplexerAdapter(EnvironmentAdapter): @staticmethod def to_genotype(env_state): return [str(x) for x in env_state] if __name__ == '__main__': # Load desired environment mp = gym.make('boolean-multiplexer-6bit-v0') # Create agent cfg = Configuration(mp.env.observation_space.n, 2, do_ga=False, environment_adapter=MultiplexerAdapter(), metrics_trial_frequency=50, user_metrics_collector_fcn=mpx_metrics) agent = ACS2(cfg) # Explore the environment population, explore_metrics = agent.explore(mp, 1500) # Exploit the environment agent = ACS2(cfg, population) population, exploit_metrics = agent.exploit(mp, 50) # See how it went for metric in explore_metrics: print(metric)
if __name__ == '__main__': # Load desired environment environment = gym.make('TaxiGoal-v0') environment.reset() environment.render() # Configure and create the agent cfg = Configuration(1, 6, epsilon=1.0, do_ga=False, environment_adapter=TaxiAdapter, metrics_trial_frequency=1, user_metrics_collector_fcn=taxi_metrics, do_action_planning=True, action_planning_frequency=50) logging.info(cfg) # Explore the environment agent = ACS2(cfg) population, explore_metrics = agent.explore(environment, 1000) # Exploit the environment agent = ACS2(cfg, population) population, exploit_metric = agent.exploit(environment, 10) for metric in exploit_metric:
# collect more metrics def cp_metrics(pop, env): metrics = {} metrics['avg_fitness'] = avg_fitness(pop) metrics.update(population_metrics(pop, env)) return metrics cfg = Configuration(classifier_length=4, number_of_possible_actions=2, epsilon=0.9, beta=0.05, gamma=0.95, theta_exp=50, theta_ga=50, do_ga=True, mu=0.03, u_max=4, metrics_trial_frequency=5, user_metrics_collector_fcn=cp_metrics, environment_adapter=CartPoleAdapter) if __name__ == '__main__': agent = ACS2(cfg) population_explore, metrics_explore = agent.explore(env, trials, decay=True) print(len(population_explore))
# import logging from examples.acs2.go_self_play.environment import GoBoard from lcs.agents.acs2 import Configuration, ClassifiersList # Configure logger # logging.basicConfig(level=logging.INFO) GAMES = 5000 # How many games to play ALL_MOVES = 0 # Commons board = GoBoard() # Initialize board of size 9x9 cfg = Configuration(81, 81, epsilon=0.6, do_ga=True) population = ClassifiersList(cfg=cfg) def determine_player(moves): """Returns current player mark based on the move number""" return ['W', 'B'][moves % 2] def switch_perception(perception): return perception \ .replace('O', 't') \ .replace('X', 'O') \ .replace('t', 'X') def print_metrics(game, moves, population): print("Game [{}] finished".format(game)) print("Total moves: [{}]".format(moves))
if __name__ == '__main__': env = gym.make('Go9x9-v0') # TODO: removed from GYM after 0.9 :( state = env.reset() # Create a mapping dictionary of moves # A key is number 0,1,...num_moves, and the key is corresponding # action in Pachi environment moves = {idx: map_moves(env, move) for idx, move in enumerate(moves_9x9())} CLASSIFIER_LENGTH = env._state.board.size**2 NUMBER_OF_POSSIBLE_ACTIONS = len(moves) cfg = Configuration(classifier_length=CLASSIFIER_LENGTH, number_of_possible_actions=NUMBER_OF_POSSIBLE_ACTIONS, perception_mapper_fcn=process_state, environment_metrics_fcn=calculate_environment_metrics, action_mapping_dict=moves, epsilon=0.4, do_ga=True) logging.info(cfg) # Create the agent agent = ACS2(cfg) population, metrics = agent.explore_exploit(env, 50) # Store metrics in file logging.info("Dumping data to files ...") pickle.dump(population, open("go_population.pkl", "wb")) pickle.dump(metrics, open("go_metrics.pkl", "wb"))
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-e", "--environment", default="Maze4-v0") parser.add_argument("--epsilon", default=1.0, type=float) parser.add_argument("--ga", action="store_true") parser.add_argument("--explore-trials", default=50, type=int) parser.add_argument("--exploit-trials", default=10, type=int) args = parser.parse_args() maze = gym.make(args.environment) freq = 10 plot = pl.Plots('conf.txt', freq) cfg = Configuration(8, 8, epsilon=args.epsilon, do_ga=args.ga, user_metrics_collector_fcn=plot.get_logger(), metrics_trial_frequency=freq) agent = ACS2(cfg) for i in range(3): population, m = agent.explore(maze, 200, decay=False) plot.add_data("mix", f'explore {i+1}', m) population, m = agent.exploit(maze, 200) plot.add_data("mix", f'exploit {i+1}', m) plot.add_data("mix_no_div", '') plot.draw() plot.save_datasets('test.csv')
def to_genotype(phenotype): return phenotype, if __name__ == '__main__': # Load desired environment grid = gym.make(f'grid-{grid_size}-v0') # Configure and create the agent cfg = Configuration(classifier_length=2, number_of_possible_actions=4, epsilon=0.9, beta=0.2, gamma=0.95, theta_i=0.1, theta_as=50, theta_exp=50, theta_ga=50, do_ga=False, mu=0.04, u_max=2, metrics_trial_frequency=5, user_metrics_collector_fcn=grid_metrics) # Explore the environment agent1 = ACS2(cfg) population, explore_metrics = agent1.explore(grid, 500, decay=False) for cl in sorted(population, key=lambda c: -c.fitness): if cl.does_anticipate_change(): print_cl(cl)
print(f"{cl.condition} - {action} - {cl.effect} " f"[fit: {cl.fitness:.3f}, r: {cl.r:.2f}, ir: {cl.ir:.2f}]") if __name__ == '__main__': # Load desired environment grid = gym.make('grid-10-v0') # Configure and create the agent cfg = Configuration( classifier_length=2, number_of_possible_actions=4, epsilon=0.9, beta=0.03, gamma=0.97, theta_i=0.1, theta_as=10, theta_exp=50, theta_ga=50, do_ga=True, mu=0.04, u_max=2, metrics_trial_frequency=10) # Explore the environment agent1 = ACS2(cfg) population, explore_metrics = agent1.explore(grid, 1000, decay=False) for cl in sorted(population, key=lambda c: -c.fitness): if cl.does_anticipate_change(): print_cl(cl)
from examples.acs2.boolean_multiplexer.utils import calculate_performance from lcs.agents.acs2 import ACS2, Configuration def _map_perception(perception): return [str(x) for x in perception] if __name__ == '__main__': # Load desired environment mp = gym.make('boolean-multiplexer-6bit-v0') # Create agent cfg = Configuration(mp.env.observation_space.n, 2, do_ga=False, perception_mapper_fcn=_map_perception, performance_fcn=calculate_performance, performance_fcn_params={'ctrl_bits': 2}) agent = ACS2(cfg) # Explore the environment population, _ = agent.explore(mp, 1500) # Exploit the environment agent = ACS2(cfg, population) population, metrics = agent.exploit(mp, 50) # See how it went for metric in metrics: print(metric)
def run(environment, explore_trials, exploit_trials, position_bins, velocity_bins, biased_exploration_prob, decay, gamma): bins = [position_bins, velocity_bins] with mlflow.start_run(): logging.info("Initializing environment...") env = gym.make(environment) env._max_episode_steps = 1000 logging.info("Creating real-value discretizer...") _range, _low = (env.observation_space.high - env.observation_space.low, env.observation_space.low) class MountainCarAdapter(EnvironmentAdapter): @classmethod def to_genotype(cls, obs): r = (obs + np.abs(_low)) / _range b = (r * bins).astype(int) return b.astype(str).tolist() logging.info("Creating custom metrics") def mc_metrics(pop, env): metrics = { 'avg_fitness': np.mean([cl.fitness for cl in pop if cl.is_reliable()]) } metrics.update(population_metrics(pop, env)) return metrics logging.info("Building agent configuration...") cfg = Configuration(classifier_length=2, number_of_possible_actions=3, epsilon=1.0, biased_exploration=biased_exploration_prob, beta=0.2, gamma=gamma, theta_as=50, theta_exp=100, theta_ga=50, do_ga=True, mu=0.03, chi=0.0, metrics_trial_frequency=5, user_metrics_collector_fcn=mc_metrics, environment_adapter=MountainCarAdapter) trials = int(explore_trials / 2) logging.info( f"Running {trials} experiments with pure exploration (decay = False)" ) agent = ACS2(cfg) population, metrics_1 = agent.explore(env, trials, decay=False) logging.info( f"Running {trials} experiments with optional decay (decay = {decay})" ) agent = ACS2(cfg, population) population, metrics_2 = agent.explore(env, trials, decay=decay) logging.info("Generating metrics...") explore_metrics_df = merge_metrics(metrics_1, metrics_2, cfg) log_metrics("explore", explore_metrics_df) logging.info("Logging population artifact...") log_population_artifact("explore-population", population) log_metrics_artifact("explore-metrics", explore_metrics_df) logging.info("Generating plots...") avg_window = int(trials / 100) plot_steps_in_trial("explore-steps.png", explore_metrics_df, window=avg_window) plot_avg_fitness("explore-fitness.png", explore_metrics_df, window=avg_window) plot_reward("explore-reward.png", explore_metrics_df, window=avg_window) plot_classifiers("explore-classifiers.png", explore_metrics_df, window=avg_window) logging.info(f"Running {exploit_trials} exploit trials") exploiter = ACS2(cfg, deepcopy(population)) population_exploit, metrics_3 = agent.exploit(env, exploit_trials) logging.info("Generating metrics") exploit_metrics_df = metrics_to_df(metrics_3) log_metrics_artifact("exploit-metrics", exploit_metrics_df) log_metrics("exploit", exploit_metrics_df)
# noinspection PyUnresolvedReferences import gym_handeye from lcs.agents.acs2 import ACS2, Configuration from examples.acs2.handeye.utils import handeye_metrics # Configure logger logging.basicConfig(level=logging.INFO) if __name__ == '__main__': # Load desired environment hand_eye = gym.make('HandEye3-v0') # Configure and create the agent cfg = Configuration(hand_eye.observation_space.n, hand_eye.action_space.n, epsilon=1.0, do_ga=False, do_action_planning=False, user_metrics_collector_fcn=handeye_metrics) # Explore the environment logging.info("Exploring HandEye") agent = ACS2(cfg) population, explore_metrics = agent.explore(hand_eye, 50) for metric in explore_metrics: logging.info(metric) # Exploit the environment logging.info("Exploiting HandEye") agent = ACS2(cfg, population) population, exploit_metric = agent.exploit(hand_eye, 10)
def to_genotype(cls, phenotype): return phenotype, if __name__ == '__main__': # Load desired environment corridor = gym.make('corridor-40-v0') # Configure and create the agent cfg = Configuration(classifier_length=1, number_of_possible_actions=2, action_selector=EpsilonGreedy, epsilon=0.8, beta=0.03, gamma=0.97, theta_exp=50, theta_ga=50, do_ga=True, mu=0.02, u_max=1, metrics_trial_frequency=20, environment_adapter=CorridorAdapter) # Explore the environment logging.info("Exploring environment") agent = ACS2(cfg) population, explore_metrics = agent.explore(corridor, 1000) population = sorted(population, key=lambda cl: -cl.fitness) print("ok")
from examples.acs2.maze.utils import calculate_performance from lcs.agents.acs2 import ACS2, Configuration # Configure logger logging.basicConfig(level=logging.INFO) if __name__ == '__main__': # Load desired environment maze = gym.make('BMaze4-v0') # Configure and create the agent cfg = Configuration(8, 8, epsilon=1.0, do_ga=False, performance_fcn=calculate_performance) logging.info(cfg) # Explore the environment agent = ACS2(cfg) population, explore_metrics = agent.explore(maze, 50) # Exploit the environment agent = ACS2(cfg, population) population, exploit_metric = agent.exploit(maze, 10) for metric in exploit_metric: logging.info(metric)
def cfg(self): return Configuration(8, 8)
def cfg(self): return Configuration(8, 8, theta_r=0.9)
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-e", "--environment", default="Maze4-v0") parser.add_argument("--epsilon", default=1.0, type=float) parser.add_argument("--ga", action="store_true") parser.add_argument("--explore-trials", default=50, type=int) parser.add_argument("--exploit-trials", default=10, type=int) args = parser.parse_args() # Load desired environment maze = gym.make(args.environment) # Configure and create the agent cfg = Configuration(8, 8, epsilon=args.epsilon, do_ga=args.ga, metrics_trial_frequency=1, user_metrics_collector_fcn=maze_metrics) # Explore the environment logging.info("Exploring maze") agent = ACS2(cfg) population, explore_metrics = agent.explore(maze, args.explore_trials) for metric in explore_metrics: logger.info(metric) # Exploit the environment logging.info("Exploiting maze") agent = ACS2(cfg, population) population, exploit_metric = agent.exploit(maze, args.exploit_trials)