def main(argv): args = parser.parse_args(argv[1:]) if args.usage == 'help': return parser.print_help() if is_environments_gen(args): _write_env_file(args) elif is_environments_list(args): all_registry = registry.all() registry_envs_name = [ trim_env_spec_name(env.__repr__()) for env in all_registry ] for environment in registry_envs_name: print(environment) elif is_environments_act(args): env = gym.make(args.environment_name) if is_action_type('dqn', args): if args.pre_defined_state_size == 'nesgym': pre_state_size = 172032 elif args.pre_defined_state_size == 'gym': pre_state_size = env.observation_space.shape[0] elif args.pre_defined_state_size == 'gym-atari': pre_state_size = 100800 elif args.pre_defined_state_size == 'gym-atari-extend': pre_state_size = 120000 elif args.pre_defined_state_size == 'gym-atari-small': pre_state_size = 100800 elif args.pre_defined_state_size == 'gym-gomoku': pre_state_size = 361 # state_size = (1,) + env.observation_space.shape state_size = pre_state_size action_size = env.action_space.n agent = DQNAgent(state_size, action_size) # try: # agent.load('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps, # args.i_episodes)) # except Exception: # pass done = False batch_size = 64 i_episodes = args.i_episodes timesteps = args.timesteps factor = args.seed_factor for i_episode in range(i_episodes): state = env.reset() if is_action_type('dqn', args): state = np.reshape(state, [1, pre_state_size]) for t in range(timesteps): try: if args.render == 'present': env.render() if args.render == 'presented': env.render(args.render) if args.action_type == 'alternate': action_choice = i_episodes * 2 action = random_action_space_sample_choice( action_choice, env, factor) elif args.action_type == 'specific': action = env.action_space.sample() elif args.action_type == 'conditional': action_choice = i_episodes action = random_action_space_sample_choice( action_choice, env, factor) elif args.action_type == 'numerical': action = env.action_space.n elif is_action_type('dqn', args) and len(state) == 5: action = agent.act(state) elif is_action_type('dqn', args) and len(state) != 5: action = env.action_space.sample() collect_stat(action, ['input', 'actions'], stats) observation, reward, done, info = env.step(action) if is_action_type('dqn', args): reward = reward if not done else -10 observation = np.reshape(observation, [1, pre_state_size]) agent.remember(state, action, reward, observation, done) state = observation # collect_stat(observation,['observation'],stats) collect_stat(reward, ['rewards'], stats) # collect_stat(done,['output','done'],stats) # collect_stat(info,['output','info'],stats) if done: max_episodes_range = (i_episodes - 1) episode_timesteps_iteration_limit = max_episodes_range - 1 is_latest_episode = is_filled_latest_episode_with_iteration( i_episode, episode_timesteps_iteration_limit) increased_timestep = increase_timestep(t) print('i_episode {}'.format(i_episode)) print('Episode finished after {} timesteps'.format( increased_timestep)) if is_action_type('dqn', args): print('Episode: {}/{}, score: {}, e: {:.2}'.format( i_episode, i_episodes, t, agent.epsilon)) collect_stat(t, ['output', 'timestep', 'iteration'], stats) collect_stat(increased_timestep, ['output', 'timestep', 'increased'], stats) is_latest_episode_to_save_state = lambda args_cached: is_latest_episode and args_cached.output_stats_filename if is_latest_episode_to_save_state(args): filename = args.output_stats_filename pre_df = { # 'observations': stats['observations'], 'rewards': stats['rewards'], # 'done-output': stats['output']['done'], # 'info-output': stats['output']['info'], # 'iteration-timestep': stats['output']['timestep']['iteration'], # 'increased-timestep': stats['output']['timestep']['increased'], 'actions-input': stats['input']['actions'] } df = pd.DataFrame(pre_df) stamp = lambda: '%s' % (int(datetime.now(). timestamp())) with open( 'data/{}-{}.csv'.format(stamp(), filename), 'w') as f: f.write(df.to_csv()) f.close() print('Statistics file saved ({}.csv)!'.format( filename)) del df del filename print(check_output_env_label()) del is_latest_episode_to_save_state del increased_timestep del is_latest_episode del episode_timesteps_iteration_limit del max_episodes_range break except Exception as e: print('Rendering execution ({})'.format(e)) finally: print('Execution of timestep done') if is_action_type('dqn', args) and (len(agent.memory) > batch_size): agent.replay(batch_size) # agent.save('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps, # args.i_episodes)) # env.close() else: parser.print_help()
# Use set_state_from_obs pass def render(self, mode="human", **kwargs): raise Exception("This runtime does not support rendering") def seed(self, seed=None): raise Exception("This runtime should not be seeded") def theta_from_obs(observation: np.ndarray) -> float: cos_theta, sin_theta, _ = observation return np.math.atan2(sin_theta, cos_theta) if "Pendulum-Ignition-PyTest-v0" not in [spec.id for spec in list(registry.all())]: register( id="Pendulum-Ignition-PyTest-v0", entry_point="gym_ignition.runtimes.gazebo_runtime:GazeboRuntime", max_episode_steps=1000, kwargs={ "task_cls": PendulumSwingUp, "robot_cls": gazebo.pendulum.PendulumGazeboRobot, "model": "Pendulum/Pendulum.urdf", "world": "DefaultEmptyWorld.world", "rtf": 100, "agent_rate": 4000, "physics_rate": 4000, "hard_reset": False, }, )
import gym import numpy as np import pytest from gym.envs import registry from gym.envs.registration import register from gym_ignition.robots.sim import gazebo, pybullet from gym_ignition.tasks.cartpole_discrete import CartPoleDiscrete from gym_ignition.tasks.pendulum_swingup import PendulumSwingUp from gym_ignition.utils import logger # Set verbosity logger.set_level(gym.logger.DEBUG) if "Pendulum-Ignition-PyTest-v0" not in [ spec.id for spec in list(registry.all()) ]: register( id="Pendulum-Ignition-PyTest-v0", entry_point="gym_ignition.runtimes.gazebo_runtime:GazeboRuntime", max_episode_steps=1000, kwargs={ "task_cls": PendulumSwingUp, "robot_cls": gazebo.pendulum.PendulumGazeboRobot, "model": "Pendulum/Pendulum.urdf", "world": "DefaultEmptyWorld.world", "rtf": 100, "agent_rate": 4000, "physics_rate": 4000, "hard_reset": False, },
# coding=utf-8 """ Open AI Gym plugin for ACN-Sim. Provides several customizable environments for training reinforcement learning (RL) agents. See tutorial X for examples of usage. """ from typing import List, Dict from gym.envs import registry from gym.envs.registration import register, EnvSpec from .interfaces import GymTrainedInterface, GymTrainingInterface from .envs import * all_envs: List[EnvSpec] = list(registry.all()) env_ids = [env_spec.id for env_spec in all_envs] gym_env_dict: Dict[str, str] = { "custom-acnsim-v0": "gym_acnportal.gym_acnsim.envs:CustomSimEnv", "default-acnsim-v0": "gym_acnportal.gym_acnsim.envs:make_default_sim_env", "rebuilding-acnsim-v0": "gym_acnportal.gym_acnsim.envs:RebuildingEnv", "default-rebuilding-acnsim-v0": "gym_acnportal.gym_acnsim.envs:make_rebuilding_default_sim_env", } for env_name, env_entry_point in gym_env_dict.items(): if env_name not in env_ids: register(id=env_name, entry_point=env_entry_point) del register, registry, all_envs, gym_env_dict, List, Dict