Beispiel #1
0
def main(argv):
    args = parser.parse_args(argv[1:])

    if args.usage == 'help':
        return parser.print_help()

    if is_environments_gen(args):
        _write_env_file(args)
    elif is_environments_list(args):
        all_registry = registry.all()
        registry_envs_name = [
            trim_env_spec_name(env.__repr__()) for env in all_registry
        ]
        for environment in registry_envs_name:
            print(environment)
    elif is_environments_act(args):
        env = gym.make(args.environment_name)
        if is_action_type('dqn', args):
            if args.pre_defined_state_size == 'nesgym':
                pre_state_size = 172032
            elif args.pre_defined_state_size == 'gym':
                pre_state_size = env.observation_space.shape[0]
            elif args.pre_defined_state_size == 'gym-atari':
                pre_state_size = 100800
            elif args.pre_defined_state_size == 'gym-atari-extend':
                pre_state_size = 120000
            elif args.pre_defined_state_size == 'gym-atari-small':
                pre_state_size = 100800
            elif args.pre_defined_state_size == 'gym-gomoku':
                pre_state_size = 361
            # state_size = (1,) + env.observation_space.shape
            state_size = pre_state_size
            action_size = env.action_space.n
            agent = DQNAgent(state_size, action_size)
            # try:
            #     agent.load('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps,
            #                                           args.i_episodes))
            # except Exception:
            #     pass
            done = False
            batch_size = 64
        i_episodes = args.i_episodes
        timesteps = args.timesteps
        factor = args.seed_factor
        for i_episode in range(i_episodes):
            state = env.reset()
            if is_action_type('dqn', args):
                state = np.reshape(state, [1, pre_state_size])
            for t in range(timesteps):
                try:
                    if args.render == 'present': env.render()
                    if args.render == 'presented': env.render(args.render)
                    if args.action_type == 'alternate':
                        action_choice = i_episodes * 2
                        action = random_action_space_sample_choice(
                            action_choice, env, factor)
                    elif args.action_type == 'specific':
                        action = env.action_space.sample()
                    elif args.action_type == 'conditional':
                        action_choice = i_episodes
                        action = random_action_space_sample_choice(
                            action_choice, env, factor)
                    elif args.action_type == 'numerical':
                        action = env.action_space.n
                    elif is_action_type('dqn', args) and len(state) == 5:
                        action = agent.act(state)
                    elif is_action_type('dqn', args) and len(state) != 5:
                        action = env.action_space.sample()
                    collect_stat(action, ['input', 'actions'], stats)
                    observation, reward, done, info = env.step(action)
                    if is_action_type('dqn', args):
                        reward = reward if not done else -10
                        observation = np.reshape(observation,
                                                 [1, pre_state_size])
                        agent.remember(state, action, reward, observation,
                                       done)
                        state = observation
                    # collect_stat(observation,['observation'],stats)
                    collect_stat(reward, ['rewards'], stats)
                    # collect_stat(done,['output','done'],stats)
                    # collect_stat(info,['output','info'],stats)
                    if done:
                        max_episodes_range = (i_episodes - 1)
                        episode_timesteps_iteration_limit = max_episodes_range - 1
                        is_latest_episode = is_filled_latest_episode_with_iteration(
                            i_episode, episode_timesteps_iteration_limit)
                        increased_timestep = increase_timestep(t)
                        print('i_episode {}'.format(i_episode))
                        print('Episode finished after {} timesteps'.format(
                            increased_timestep))
                        if is_action_type('dqn', args):
                            print('Episode: {}/{}, score: {}, e: {:.2}'.format(
                                i_episode, i_episodes, t, agent.epsilon))
                        collect_stat(t, ['output', 'timestep', 'iteration'],
                                     stats)
                        collect_stat(increased_timestep,
                                     ['output', 'timestep', 'increased'],
                                     stats)
                        is_latest_episode_to_save_state = lambda args_cached: is_latest_episode and args_cached.output_stats_filename
                        if is_latest_episode_to_save_state(args):
                            filename = args.output_stats_filename
                            pre_df = {
                                # 'observations': stats['observations'],
                                'rewards': stats['rewards'],
                                # 'done-output': stats['output']['done'],
                                # 'info-output': stats['output']['info'],
                                # 'iteration-timestep': stats['output']['timestep']['iteration'],
                                # 'increased-timestep': stats['output']['timestep']['increased'],
                                'actions-input': stats['input']['actions']
                            }
                            df = pd.DataFrame(pre_df)
                            stamp = lambda: '%s' % (int(datetime.now().
                                                        timestamp()))
                            with open(
                                    'data/{}-{}.csv'.format(stamp(), filename),
                                    'w') as f:
                                f.write(df.to_csv())
                                f.close()
                            print('Statistics file saved ({}.csv)!'.format(
                                filename))
                            del df
                            del filename
                        print(check_output_env_label())
                        del is_latest_episode_to_save_state
                        del increased_timestep
                        del is_latest_episode
                        del episode_timesteps_iteration_limit
                        del max_episodes_range
                        break
                except Exception as e:
                    print('Rendering execution ({})'.format(e))
                finally:
                    print('Execution of timestep done')
            if is_action_type('dqn',
                              args) and (len(agent.memory) > batch_size):
                agent.replay(batch_size)
        # agent.save('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps,
        #                                       args.i_episodes))
        # env.close()
    else:
        parser.print_help()
        # Use set_state_from_obs
        pass

    def render(self, mode="human", **kwargs):
        raise Exception("This runtime does not support rendering")

    def seed(self, seed=None):
        raise Exception("This runtime should not be seeded")


def theta_from_obs(observation: np.ndarray) -> float:
    cos_theta, sin_theta, _ = observation
    return np.math.atan2(sin_theta, cos_theta)


if "Pendulum-Ignition-PyTest-v0" not in [spec.id for spec in list(registry.all())]:
    register(
        id="Pendulum-Ignition-PyTest-v0",
        entry_point="gym_ignition.runtimes.gazebo_runtime:GazeboRuntime",
        max_episode_steps=1000,
        kwargs={
            "task_cls": PendulumSwingUp,
            "robot_cls": gazebo.pendulum.PendulumGazeboRobot,
            "model": "Pendulum/Pendulum.urdf",
            "world": "DefaultEmptyWorld.world",
            "rtf": 100,
            "agent_rate": 4000,
            "physics_rate": 4000,
            "hard_reset": False,
        },
    )
import gym
import numpy as np
import pytest
from gym.envs import registry
from gym.envs.registration import register
from gym_ignition.robots.sim import gazebo, pybullet
from gym_ignition.tasks.cartpole_discrete import CartPoleDiscrete
from gym_ignition.tasks.pendulum_swingup import PendulumSwingUp
from gym_ignition.utils import logger

# Set verbosity
logger.set_level(gym.logger.DEBUG)

if "Pendulum-Ignition-PyTest-v0" not in [
        spec.id for spec in list(registry.all())
]:
    register(
        id="Pendulum-Ignition-PyTest-v0",
        entry_point="gym_ignition.runtimes.gazebo_runtime:GazeboRuntime",
        max_episode_steps=1000,
        kwargs={
            "task_cls": PendulumSwingUp,
            "robot_cls": gazebo.pendulum.PendulumGazeboRobot,
            "model": "Pendulum/Pendulum.urdf",
            "world": "DefaultEmptyWorld.world",
            "rtf": 100,
            "agent_rate": 4000,
            "physics_rate": 4000,
            "hard_reset": False,
        },
Beispiel #4
0
# coding=utf-8
"""
Open AI Gym plugin for ACN-Sim. Provides several customizable
environments for training reinforcement learning (RL) agents. See
tutorial X for examples of usage.
"""
from typing import List, Dict

from gym.envs import registry
from gym.envs.registration import register, EnvSpec
from .interfaces import GymTrainedInterface, GymTrainingInterface
from .envs import *

all_envs: List[EnvSpec] = list(registry.all())
env_ids = [env_spec.id for env_spec in all_envs]
gym_env_dict: Dict[str, str] = {
    "custom-acnsim-v0":
    "gym_acnportal.gym_acnsim.envs:CustomSimEnv",
    "default-acnsim-v0":
    "gym_acnportal.gym_acnsim.envs:make_default_sim_env",
    "rebuilding-acnsim-v0":
    "gym_acnportal.gym_acnsim.envs:RebuildingEnv",
    "default-rebuilding-acnsim-v0":
    "gym_acnportal.gym_acnsim.envs:make_rebuilding_default_sim_env",
}
for env_name, env_entry_point in gym_env_dict.items():
    if env_name not in env_ids:
        register(id=env_name, entry_point=env_entry_point)
del register, registry, all_envs, gym_env_dict, List, Dict