Beispiel #1
0
    def create_env(n_envs, eval_env=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :return: (Union[gym.Env, VecEnv])
        :return: (gym.Env)
        """
        global hyperparams

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env else save_path

        if is_atari:
            if args.verbose > 0:
                print("Using Atari wrapper")
            env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
            # Frame-stacking with 4 frames
            env = VecFrameStack(env, n_stack=4)
        elif algo_ in ['dqn', 'ddpg']:
            if hyperparams.get('normalize', False):
                print("WARNING: normalization not supported yet for DDPG/DQN")
            env = gym.make(env_id)
            env.seed(args.seed)
            if env_wrapper is not None:
                env = env_wrapper(env)
        else:
            if n_envs == 1:
                env = DummyVecEnv([
                    make_env(env_id,
                             0,
                             args.seed,
                             wrapper_class=env_wrapper,
                             log_dir=log_dir)
                ])
            else:
                # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                # On most env, SubprocVecEnv does not help and is quite memory hungry
                env = DummyVecEnv([
                    make_env(env_id,
                             i,
                             args.seed,
                             log_dir=log_dir,
                             wrapper_class=env_wrapper) for i in range(n_envs)
                ])
            if normalize:
                if args.verbose > 0:
                    if len(normalize_kwargs) > 0:
                        print("Normalization activated: {}".format(
                            normalize_kwargs))
                    else:
                        print("Normalizing input and reward")
                env = VecNormalize(env, **normalize_kwargs)
        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print("Stacking {} frames".format(n_stack))
            del hyperparams['frame_stack']
        return env
def test_sync_vec_normalize():
    env = DummyVecEnv([make_env])

    assert unwrap_vec_normalize(env) is None

    env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.)

    assert isinstance(unwrap_vec_normalize(env), VecNormalize)

    env = VecFrameStack(env, 1)

    assert isinstance(unwrap_vec_normalize(env), VecNormalize)

    eval_env = DummyVecEnv([make_env])
    eval_env = VecNormalize(eval_env, training=False, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.)
    eval_env = VecFrameStack(eval_env, 1)

    env.reset()
    # Initialize running mean
    for _ in range(100):
        env.step([env.action_space.sample()])

    obs = env.reset()
    original_obs = env.get_original_obs()
    # Normalization must be different
    assert not np.allclose(obs, eval_env.normalize_obs(original_obs))

    sync_envs_normalization(env, eval_env)

    # Now they must be synced
    assert np.allclose(obs, eval_env.normalize_obs(original_obs))
Beispiel #3
0
def train(env_id,
          num_timesteps,
          seed,
          policy,
          attack=False,
          n_envs=8,
          nminibatches=4,
          n_steps=128):

    model = PPO2.load("model.pkl")
    env = VecFrameStack(make_atari_env(env_id, n_envs, seed), 4)
    if attack:
        env = VecFrameStack(
            make_adversarial_atari_env(env_id, n_envs, seed, model), 4)

    policy = {
        'cnn': CnnPolicy,
        'lstm': CnnLstmPolicy,
        'lnlstm': CnnLnLstmPolicy,
        'mlp': MlpPolicy
    }[policy]
    #    model = PPO2(policy=policy, env=env, n_steps=n_steps, nminibatches=nminibatches,
    #                lam=0.95, gamma=0.99, noptepochs=4, ent_coef=.01,
    #                 learning_rate=lambda f: f * 2.5e-4, cliprange=lambda f: f * 0.1, verbose=1)
    model.learn(total_timesteps=num_timesteps)
    model.save("model")
    env.close()
    # Free memory
    del model
def create_test_env(env_id,
                    n_envs=1,
                    is_atari=False,
                    stats_path=None,
                    seed=0,
                    log_dir='',
                    should_render=True,
                    hyperparams=None,
                    env_kwargs=None):

    if hyperparams is None:
        hyperparams = {}

    if env_kwargs is None:
        env_kwargs = {}

    # Create the environment and wrap it if necessary
    if is_atari:
        print("Using Atari wrapper")
        env = make_atari_env(env_id, num_env=n_envs, seed=seed)
        # Frame-stacking with 4 frames
        env = VecFrameStack(env, n_stack=4)
    else:
        # start_method = 'spawn' for thread safe
        env = DummyVecEnv([
            make_env(env_id,
                     i,
                     seed,
                     log_dir,
                     wrapper_class=None,
                     env_kwargs=env_kwargs) for i in range(n_envs)
        ])

    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if hyperparams['normalize']:
            print("Loading running average")
            print("with params: {}".format(hyperparams['normalize_kwargs']))
            env = VecNormalize(env,
                               training=False,
                               **hyperparams['normalize_kwargs'])

            if os.path.exists(os.path.join(stats_path, 'vecnormalize.pkl')):
                env = VecNormalize.load(
                    os.path.join(stats_path, 'vecnormalize.pkl'), env)
                # Deactivate training and reward normalization
                env.training = False
                env.norm_reward = False
            else:
                # Legacy:
                env.load_running_average(stats_path)

        n_stack = hyperparams.get('frame_stack', 0)
        if n_stack > 0:
            print("Stacking {} frames".format(n_stack))
            env = VecFrameStack(env, n_stack)
    return env
Beispiel #5
0
    def __init__(self, trial_context: PyTorchTrialContext) -> None:
        self.context = trial_context
        self.download_directory = f"/tmp/data-rank{self.context.distributed.get_rank()}"
        # self.logger = TorchWriter()
        self.n_stack = self.context.get_hparam("n_stack")
        self.env_name = self.context.get_hparam("env_name")
        self.num_envs = self.context.get_hparam("num_envs")
        self.rollout_size = self.context.get_hparam("rollout_size")
        self.curiousity = self.context.get_hparam("curiousity")
        self.lr = self.context.get_hparam("lr")
        self.icm_beta = self.context.get_hparam("icm_beta")
        self.value_coeff = self.context.get_hparam("value_coeff")
        self.entropy_coeff = self.context.get_hparam("entropy_coeff")
        self.max_grad_norm = self.context.get_hparam("max_grad_norm")

        env = make_atari_env(self.env_name, num_env=self.num_envs, seed=42)
        self.env = VecFrameStack(env, n_stack=self.n_stack)
        eval_env = make_atari_env(self.env_name, num_env=1, seed=42)
        self.eval_env = VecFrameStack(eval_env, n_stack=self.n_stack)

        # constants
        self.in_size = self.context.get_hparam("in_size")  # in_size
        self.num_actions = env.action_space.n

        def init_(m):
            return init(m, nn.init.orthogonal_,
                        lambda x: nn.init.constant_(x, 0))

        self.feat_enc_net = self.context.Model(
            FeatureEncoderNet(self.n_stack, self.in_size))
        self.actor = self.context.Model(
            init_(nn.Linear(self.feat_enc_net.hidden_size, self.num_actions)))
        self.critic = self.context.Model(
            init_(nn.Linear(self.feat_enc_net.hidden_size, 1)))
        self.set_recurrent_buffers(self.num_envs)

        params = list(self.feat_enc_net.parameters()) + list(
            self.actor.parameters()) + list(self.critic.parameters())
        self.opt = self.context.Optimizer(torch.optim.Adam(params, self.lr))

        self.is_cuda = torch.cuda.is_available()
        self.storage = RolloutStorage(self.rollout_size,
                                      self.num_envs,
                                      self.env.observation_space.shape[0:-1],
                                      self.n_stack,
                                      is_cuda=self.is_cuda,
                                      value_coeff=self.value_coeff,
                                      entropy_coeff=self.entropy_coeff)

        obs = self.env.reset()
        self.storage.states[0].copy_(self.storage.obs2tensor(obs))

        self.writer = SummaryWriter(log_dir="/tmp/tensorboard")
        self.global_eval_count = 0
Beispiel #6
0
        def create_env(n_envs):
            """
            Create the environment and wrap it if necessary
            :param n_envs: (int)
            :return: (gym.Env)
            """
            global hyperparams

            if is_atari:
                if args.verbose > 0:
                    print("Using Atari wrapper")
                env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
                # Frame-stacking with 4 frames
                env = VecFrameStack(env, n_stack=4)
            elif args.algo in ['dqn', 'ddpg']:
                if hyperparams.get('normalize', False):
                    print(
                        "WARNING: normalization not supported yet for DDPG/DQN"
                    )
                # No env_wrapper applied for now as not using make_env()
                env = gym.make(env_id)
                env.seed(args.seed)
            else:
                if n_envs == 1:
                    env = DummyVecEnv([
                        make_env(env_id,
                                 0,
                                 args.seed,
                                 wrapper_class=env_wrapper)
                    ])
                else:
                    # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                    # On most env, SubprocVecEnv does not help and is quite memory hungry
                    env = DummyVecEnv([
                        make_env(env_id,
                                 i,
                                 args.seed,
                                 wrapper_class=env_wrapper)
                        for i in range(n_envs)
                    ])
                if normalize:
                    if args.verbose > 0:
                        print("Normalizing input and return")
                    env = VecNormalize(env, **normalize_kwargs)
            # Optional Frame-stacking
            if hyperparams.get('frame_stack', False):
                n_stack = hyperparams['frame_stack']
                env = VecFrameStack(env, n_stack)
                print("Stacking {} frames".format(n_stack))
                del hyperparams['frame_stack']
            return env
Beispiel #7
0
def createEnvs(args,
               allow_early_resets=False,
               env_kwargs=None,
               load_path_normalise=None):
    """
    :param args: (argparse.Namespace Object)
    :param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs
    :param env_kwargs: (dict) The extra arguments for the environment
    :param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted.
    :return: (Gym VecEnv)
    """
    # imported here to prevent cyclic imports

    envs = [
        makeEnv(args.env,
                args.seed,
                i,
                args.log_dir,
                allow_early_resets=allow_early_resets,
                env_kwargs=env_kwargs) for i in range(args.num_cpu)
    ]

    if len(envs) == 1:
        # No need for subprocesses when having only one env
        envs = DummyVecEnv(envs)
    else:
        envs = SubprocVecEnv(envs)

    envs = VecFrameStack(envs, args.num_stack)

    envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
    # envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)

    return envs
Beispiel #8
0
def train(env_id, num_timesteps, seed, policy, lr_schedule, num_env):
    """
    Train A2C model for atari environment, for testing purposes

    :param env_id: (str) Environment ID
    :param num_timesteps: (int) The total number of samples
    :param seed: (int) The initial seed for training
    :param policy: (A2CPolicy) The policy model to use (MLP, CNN, LSTM, ...)
    :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant',
                                 'double_linear_con', 'middle_drop' or 'double_middle_drop')
    :param num_env: (int) The number of environments
    """
    policy_fn = None
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = CnnLstmPolicy
    elif policy == 'lnlstm':
        policy_fn = CnnLnLstmPolicy
    if policy_fn is None:
        raise ValueError("Error: policy {} not implemented".format(policy))

    env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)

    model = A2C(policy_fn, env, lr_schedule=lr_schedule, seed=seed)
    model.learn(total_timesteps=int(num_timesteps * 1.1))
    env.close()
Beispiel #9
0
def train(env_id, num_timesteps, seed, policy,
          n_envs=8, nminibatches=4, n_steps=128):
    """
    Train PPO2 model for atari environment, for testing purposes

    :param env_id: (str) the environment id string
    :param num_timesteps: (int) the number of timesteps to run
    :param seed: (int) Used to seed the random generator.
    :param policy: (Object) The policy model to use (MLP, CNN, LSTM, ...)
    :param n_envs: (int) Number of parallel environments
    :param nminibatches: (int) Number of training minibatches per update. For recurrent policies,
        the number of environments run in parallel should be a multiple of nminibatches.
    :param n_steps: (int) The number of steps to run for each environment per update
        (i.e. batch size is n_steps * n_env where n_env is number of environment copies running in parallel)
    """
    env = make_atari_env(env_id, n_envs, seed)
    env = VecFrameStack(env, 4)
    policy = {'cnn': CnnPolicy, 'lstm': CnnLstmPolicy, 'lnlstm': CnnLnLstmPolicy, 'mlp': MlpPolicy}[policy]
    model = PPO2(policy=policy, env=env, n_steps=n_steps, nminibatches=nminibatches,
                 lam=0.95, gamma=0.99, noptepochs=4, ent_coef=.01,
                 learning_rate=lambda f: f * 2.5e-4, cliprange=lambda f: f * 0.1, verbose=1)
    model.learn(total_timesteps=num_timesteps)
    model.save('/serverdata/rohit/stablebaselines/atari/ppo/{}'.format(env_id), 'csv')

    env.close()
    # Free memory
    del model
Beispiel #10
0
def train(env_id, num_timesteps, seed, policy, lr_schedule, num_cpu):
    """
    train an ACER model on atari

    :param env_id: (str) Environment ID
    :param num_timesteps: (int) The total number of samples
    :param seed: (int) The initial seed for training
    :param policy: (A2CPolicy) The policy model to use (MLP, CNN, LSTM, ...)
    :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant',
                                 'double_linear_con', 'middle_drop' or 'double_middle_drop')
    :param num_cpu: (int) The number of cpu to train on
    """
    env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = CnnLstmPolicy
    else:
        warnings.warn("Policy {} not implemented".format(policy))
        return

    model = ACER(policy_fn, env, lr_schedule=lr_schedule, buffer_size=5000)
    model.learn(total_timesteps=int(num_timesteps * 1.1), seed=seed)
    env.close()
    # Free memory
    del model
Beispiel #11
0
def main(cfg, run_dir):
    run_name = make_run_name(cfg)
    output_dir = run_dir / run_name
    output_dir.mkdir(parents=True)

    with (output_dir / 'config.json').open('w') as fp:
        json.dump(cfg, fp, indent=2)

    # Setting log levels to cut out minor errors
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    tf.logging.set_verbosity(tf.logging.ERROR)

    log_dir = output_dir / cfg['log_dir']
    tensorboard_dir = output_dir / cfg['tb_dir']

    configure(log_dir=str(log_dir),
              format_strs=['log', 'csv', 'tensorboard'],
              tensorboard_dir=str(tensorboard_dir))

    # Create and wrap the environment
    logging.info('Starting {env_name}'.format(**cfg))
    env = make_atari_env(env_id=cfg['env_name'],
                         num_env=8,
                         seed=cfg['train_seed'])
    env = VecFrameStack(env, n_stack=4)
    if cfg['normalize']:
        env = VecNormalize(env)

    # Setting all known random seeds (Python, Numpy, TF, Gym if available)
    set_global_seeds(cfg['train_seed'])

    logging.info('Running {algo}'.format(**cfg))

    algo = get_algo(cfg['algo'])
    policy = cfg['policy_type']
    feature_extractor = get_network_builder(cfg['network'])
    attn_loss = get_loss(cfg['attn_loss'])()
    model = algo(
        policy=policy,
        env=env,
        verbose=1,
        learning_rate=lambda frac: 0.00025 * frac,
        attn_loss=attn_loss,
        attn_coef=cfg['attn_coef'],
        policy_kwargs={
            'cnn_extractor': feature_extractor,
        },
        tensorboard_log=str(tensorboard_dir),
    )

    logging.info('Training for {time_steps} steps'.format(**cfg))

    # Training
    model.learn(
        total_timesteps=cfg['time_steps'],
        log_interval=cfg['log_interval'],
        tb_log_name=None,
        callback=Callback(output_dir),
    )
def test_frame_stack():
    env = DummyVecEnv([lambda: gym.make("Pendulum-v0")])
    obs = env.reset()
    print("Before FrameStack, observation.shape =", obs.shape)   # (1, 3)

    frame_stack_env = VecFrameStack(env, n_stack=4)      # 叠加连续的 4 帧组成状态
    obs = frame_stack_env.reset()
    print("After FrameStack, observation.shape =", obs.shape)   # (1, 12)
Beispiel #13
0
def load_train_env(num_envs, robot_radius, rew_fnc, num_stacks, stack_offset,
                   debug, task_mode, policy, disc_action_space, normalize):
    # Choosing environment wrapper according to the policy
    if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy":
        if disc_action_space:
            env_temp = RosEnvDiscImg
        else:
            env_temp = RosEnvContImg
    elif policy == "CNN1DPolicy":
        if disc_action_space:
            env_temp = RosEnvDiscRawScanPrepWp
        else:
            env_temp = RosEnvContRawScanPrepWp
    elif policy == "CNN1DPolicy_multi_input":
        if disc_action_space:
            env_temp = RosEnvDiscRaw
        else:
            env_temp = RosEnvContRaw
    elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2":
        if disc_action_space:
            env_temp = RosEnvDiscImgVel
        else:
            env_temp = RosEnvContImgVel

    env = SubprocVecEnv([
        lambda k=k: Monitor(env_temp(
            "sim%d" % (k + 1), StateCollector("sim%s" %
                                              (k + 1), "train"), stack_offset,
            num_stacks, robot_radius, rew_fnc, debug, "train", task_mode),
                            '%s/%s/sim_%d' %
                            (path_to_models, agent_name, k + 1),
                            allow_early_resets=True) for k in range(num_envs)
    ])

    # Normalizing?
    if normalize:
        env = VecNormalize(env,
                           training=True,
                           norm_obs=True,
                           norm_reward=False,
                           clip_obs=100.0,
                           clip_reward=10.0,
                           gamma=0.99,
                           epsilon=1e-08)
    else:
        env = env

    # Stack of data?
    if num_stacks > 1:
        env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset)

    return env
Beispiel #14
0
def load_stable_baselines_env(cfg_path, vector_length, mp, n_stack, number_maps, action_frame_repeat,
                              scaled_resolution):
    env_fn = lambda: MazeExplorer.load_vizdoom_env(cfg_path, number_maps, action_frame_repeat, scaled_resolution)

    if mp:
        env = SubprocVecEnv([env_fn for _ in range(vector_length)])
    else:
        env = DummyVecEnv([env_fn for _ in range(vector_length)])

    if n_stack > 0:
        env = VecFrameStack(env, n_stack=n_stack)

    return env
Beispiel #15
0
def create_env(env_name, config=None, n_workers=8, image_based=True, **kwargs):
    """
    Parses the environment to correctly return the attributes based on the spec and type
    Creates a corresponding vectorized environment
    """
    def make_rl(**kwargs):
        """
        Decorator for custom RL environments
        """
        def _init():
            env_obj = getattr(rl.environments, env_name)
            env = env_obj(config)
            return env

        return _init

    def make_gym(rank, seed=0, **kwargs):
        """
        Decorator for gym environments
        """
        def _init():
            env = gym.make(env_name)
            env.seed(seed + rank)
            return env

        return _init

    if config is not None:
        n_workers = config['main']['n_workers']
    mapping = {'gym': make_gym, 'rl': make_rl}
    env_type = get_env_type(env_name)
    env_decorator = mapping[env_type]
    vectorized_decorator = [env_decorator(rank=x) for x in range(n_workers)]

    # Parallelize
    if n_workers > 1:
        method = 'spawn' if sys.platform == 'win32' else 'forkserver'
        vectorized = SubprocVecEnv(vectorized_decorator, start_method=method)
    else:  # Non multi-processing env
        vectorized = DummyVecEnv(vectorized_decorator)

    # Frame-stacking for CNN based environments
    if 'frame_stack' in config['main'].keys():
        if config['main']['frame_stack'] != 0:
            vectorized = VecFrameStack(vectorized,
                                       n_stack=config['main']['frame_stack'])
    if 'normalize' in config['main'].keys():
        vectorized = VecNormalize(vectorized, clip_obs=1, clip_reward=1)

    return vectorized
Beispiel #16
0
    def evaluate(self, n_episodes=2):

        logging.basicConfig(level=logging.INFO)

        id = 'BreakoutNoFrameskip-v4'
        num_env = 1
        n_stack = 4
        left_lives = 5
        seed = 0
        episodes = 0
        score = 0
        frames = 0
        frames_per_episode = list()
        scores = [list() for i in range(n_episodes)]

        env = make_atari_env(id, num_env=num_env, seed=seed)
        env = VecFrameStack(env, n_stack=n_stack)
        obs = env.reset()

        while (n_episodes - episodes) > 0:
            frames += 1
            action, _states = self.predict(obs)
            obs, rewards, dones, info = env.step(action)
            env.render()
            score += rewards[0]
            if dones:
                logging.debug('You died')
                logging.debug(f'Score = {score}')
                scores[episodes].append(score)
                score = 0
                left_lives -= 1
            if not left_lives:
                logging.debug('Episode ended')
                logging.info(f'Scores per life: {scores[episodes]}')
                frames_per_episode.append(frames)
                frames = 0
                episodes += 1
                left_lives = 5

        s = list(map(sum, scores))
        avg_s = int(sum(s) / len(s))
        avg_f = int(sum(frames_per_episode) / len(frames_per_episode))

        logging.info(f'Played {n_episodes} episodes')
        logging.info(f'Scores per episode : {s}')
        logging.info(f'Average score per episode : {avg_s}')
        logging.info(f'Average number of frames per episode : {avg_f}')

        return avg_f, avg_s
def test_pretrain_images(tmp_path):
    env = make_atari_env("PongNoFrameskip-v4", num_env=1, seed=0)
    env = VecFrameStack(env, n_stack=4)
    model = PPO2('CnnPolicy', env)
    generate_expert_traj(model, str(tmp_path / 'expert_pong'), n_timesteps=0, n_episodes=1,
                         image_folder=str(tmp_path / 'pretrain_recorded_images'))

    expert_path = str(tmp_path / 'expert_pong.npz')
    dataset = ExpertDataset(expert_path=expert_path, traj_limitation=1, batch_size=32,
                            sequential_preprocessing=True)
    model.pretrain(dataset, n_epochs=2)

    shutil.rmtree(str(tmp_path / 'pretrain_recorded_images'))
    env.close()
    del dataset, model, env
Beispiel #18
0
def create_test_env(level=0,
                    stats_path=None,
                    seed=0,
                    log_dir='',
                    hyperparams=None):
    """
    Create environment for testing a trained agent

    :param level: (int)
    :param stats_path: (str) path to folder containing saved running averaged
    :param seed: (int) Seed for random number generator
    :param log_dir: (str) Where to log rewards
    :param hyperparams: (dict) Additional hyperparams (ex: n_stack)
    :return: (gym.Env)
    """
    # HACK to save logs
    if log_dir is not None:
        os.environ["OPENAI_LOG_FORMAT"] = 'csv'
        os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir)
        os.makedirs(log_dir, exist_ok=True)
        logger.configure()

    vae_path = hyperparams['vae_path']
    if vae_path == '':
        vae_path = os.path.join(stats_path, 'vae.pkl')
    vae = None
    if stats_path is not None and os.path.isfile(vae_path):
        vae = load_vae(vae_path)

    env = DummyVecEnv(
        [make_env(level, seed, log_dir, vae=vae, frame_skip=TEST_FRAME_SKIP)])

    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if hyperparams['normalize']:
            print("Loading running average")
            print("with params: {}".format(hyperparams['normalize_kwargs']))
            env = VecNormalize(env,
                               training=False,
                               **hyperparams['normalize_kwargs'])
            env.load_running_average(stats_path)

        n_stack = hyperparams.get('frame_stack', 0)
        if n_stack > 0:
            print("Stacking {} frames".format(n_stack))
            env = VecFrameStack(env, n_stack)
    return env
Beispiel #19
0
    def __init__(self, env_list=default_envs, algos_list=default_algos):
        self.env_list = env_list
        self.algos_list = algos_list
        self.n_algos = len(self.algos_list)
        self.envs = dict()
        self.rewards = defaultdict(dict)
        self.models = defaultdict(dict)  # HAY QUE GUARDAR LOS MODELOS PARA ENSEMBLE

        for env_name in self.env_list:
            new_env = make_atari_env(env_name, num_env=1, seed=0)
            new_env = VecFrameStack(new_env, n_stack=4)
            self.envs[env_name] = new_env

        for algo in self.algos_list:
            for env_name, env in self.envs.items():
                self.models[env_name][algo] = loader(algo, env_name)
Beispiel #20
0
    def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None):
        # Even though DeepQ is single core only, we need to use the pipe system to work
        if env_kwargs is not None and env_kwargs.get("use_srl", False):
            srl_model = MultiprocessSRLModel(1, args.env, env_kwargs)
            env_kwargs["state_dim"] = srl_model.state_dim
            env_kwargs["srl_pipe"] = srl_model.pipe

        envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)])
        envs = VecFrameStack(envs, args.num_stack)

        if args.srl_model != "raw_pixels":
            printYellow("Using MLP policy because working on state representation")
            envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
            envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)

        return envs
Beispiel #21
0
def createEnvs(args,
               allow_early_resets=False,
               env_kwargs=None,
               load_path_normalise=None):
    """
    :param args: (argparse.Namespace Object)
    :param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs
    :param env_kwargs: (dict) The extra arguments for the environment
    :param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted.
    :return: (Gym VecEnv)
    """
    # imported here to prevent cyclic imports
    from environments.registry import registered_env
    from state_representation.registry import registered_srl, SRLType

    assert not (registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \
        "Error: cannot have more than 1 CPU for the environment {}".format(args.env)

    if env_kwargs is not None and registered_srl[
            args.srl_model][0] == SRLType.SRL:
        srl_model = MultiprocessSRLModel(args.num_cpu, args.env, env_kwargs)
        env_kwargs["state_dim"] = srl_model.state_dim
        env_kwargs["srl_pipe"] = srl_model.pipe
    envs = [
        makeEnv(args.env,
                args.seed,
                i,
                args.log_dir,
                allow_early_resets=allow_early_resets,
                env_kwargs=env_kwargs) for i in range(args.num_cpu)
    ]

    if len(envs) == 1:
        # No need for subprocesses when having only one env
        envs = DummyVecEnv(envs)
    else:
        envs = SubprocVecEnv(envs)

    envs = VecFrameStack(envs, args.num_stack)

    if args.srl_model != "raw_pixels":
        printYellow("Using MLP policy because working on state representation")
        envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
        envs = loadRunningAverage(envs,
                                  load_path_normalise=load_path_normalise)

    return envs
def load_train_env(ns, state_collector, robot_radius, rew_fnc, num_stacks,
                   stack_offset, debug, task_mode, rl_mode, policy,
                   disc_action_space, normalize):
    # Choosing environment wrapper according to the policy
    if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy":
        if disc_action_space:
            env_temp = RosEnvDiscImg
        else:
            env_temp = RosEnvContImg
    elif policy in ["CNN1DPolicy", "CNN1DPolicy2", "CNN1DPolicy3"]:
        if disc_action_space:
            env_temp = RosEnvDiscRawScanPrepWp
        else:
            env_temp = RosEnvContRawScanPrepWp
    elif policy == "CNN1DPolicy_multi_input":
        if disc_action_space:
            env_temp = RosEnvDiscRaw
        else:
            env_temp = RosEnvContRaw
    elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2":
        if disc_action_space:
            env_temp = RosEnvDiscImgVel
        else:
            env_temp = RosEnvContImgVel

    env_raw = DummyVecEnv([
        lambda: env_temp(ns, state_collector, stack_offset, num_stacks,
                         robot_radius, rew_fnc, debug, rl_mode, task_mode)
    ])

    if normalize:
        env = VecNormalize(env_raw,
                           training=True,
                           norm_obs=True,
                           norm_reward=False,
                           clip_obs=100.0,
                           clip_reward=10.0,
                           gamma=0.99,
                           epsilon=1e-08)
    else:
        env = env_raw

    # Stack of data?
    if num_stacks > 1:
        env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset)

    return env
Beispiel #23
0
def test():
    model = PPO2.load("model.pkl")
    sess = model.sess
    
    env = VecFrameStack(make_atari_env("SpaceInvadersNoFrameskip-v0", 1, 123), 4)

    pi = model.act_model
    action_dist = pi.action
    action_one = pi.deterministic_action

    o = env.reset()

    while(True):
        env.render()
        # a, _, _, _ = pi.step(obs=o, deterministic=True)
        a = sess.run(action_one, {pi.obs_ph: o})
        o, r, d, _ = env.step(a)
Beispiel #24
0
def create_env(args, env_id, godot_instances, params, session_path, eval=False):
    n = 1 if eval else args.n_agents_per_env
    env = SubprocVecEnv([make_godot_env(env_id, f'{obs_port}_{i}', obs_port, action_port,
                                        args, session_path, eval, seed=obs_port * i)
                         for i in range(n) for obs_port, action_port in godot_instances])

    vecnorm_path = get_vec_normalize_filepath(params, args)
    if vecnorm_path.exists():
        print(f'found vecnormalize data file @ {vecnorm_path.absolute()}. loading existing file.')
        env = VecNormalize.load(vecnorm_path, env)
    else:
        print(f'unable to find existing vecnormalize data file @ vecnorm_path.absolute(). creating a new one.')
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=1.0, clip_reward=100.0)

    if args.n_stack > 1:
        env = VecFrameStack(env, n_stack=args.n_stack)

    return env
Beispiel #25
0
def test_ppo(env_id, seed, path_to_policy_params, n_envs = 1):
    
    """
     env_id: typr str, identifies each environment uniquely
     num_timesteps: number of timesteps to run the algorithm
     seed: initial random seed
     policy: policy to be followed (mlp, cnn, lstm, etc)
     n_env: number of envs to run in parallel
     nminibatches: number of minibatches of mini batch gradient descent (first-order optimization) to update the policy params
     n_steps: number of steps in each update
    """
    
    # Train PPO algorithm for num_timesteps
    # stack 4 frames for the vectorized environment
    # Note: PPO2 works only with vectorized environment
    env = VecFrameStack(make_atari_env(env_id = env_id, num_env = n_envs, seed=seed), 4)
    # define the policy
    # create model object for class PPO2
    # The policy is CnnPolicy from stable baselines and has been trained for 2e7 time steps on Pong
    
    model = PPO2.load(path_to_policy_params)
    vr = video_recorder.VideoRecorder(env, base_path="./videos/Pong_test_without_attack", enabled="./videos/Pong_test_without_attack" is not None)
    
    obs = env.reset()
    ep_rew = [0.0]
    ep = 0
    for i in range(50000):
      action, _states = model.predict(obs)
      obs, rewards, dones, info = env.step(action)
      ep_rew[-1] += rewards
      env.render()
      vr.capture_frame()
      if dones:
        obs = env.reset()
        print('Net reward for episode ',ep,': ',ep_rew[-1])
        if((ep+1)%10 == 0):
          print('Mean reward for last 10 episodes: ',np.mean(ep_rew[-10:]))
        ep_rew.append(0.0)
        ep += 1
        print('Number of timesteps completed: ', i+1)
    env.close()
    vr.close()
Beispiel #26
0
def run_gail():
    parser = argparse.ArgumentParser()
    parser.add_argument('expert',
                        type=str,
                        default=None,
                        help='Expert path (*.npz)')
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--note', type=str, default='test')
    parser.add_argument('--env', type=str, default='PongNoFrameskip-v4')
    parser.add_argument('--num-steps', type=int, default=1000000)
    parser.add_argument('--policy',
                        type=str,
                        default='CnnPolicy',
                        choices=[
                            'CnnPolicy', 'CnnLstmPolicy', 'CnnLnLstmPolicy',
                            'MlpPolicy', 'MlpLstmPolicy', 'MlpLnLstmPolicy'
                        ],
                        help='Policy architecture')
    args = parser.parse_args()

    logger.configure(os.path.join('logs', args.env, args.note))
    logger.info(args)

    if 'NoFrameskip' in args.env:
        env = VecFrameStack(make_atari_env(args.env, 1, args.seed), 4)
    else:
        import gym
        env = gym.make(args.env)

    dataset = ExpertDataset(expert_path=args.expert,
                            batch_size=128,
                            train_fraction=0.99,
                            verbose=1)
    model = GAIL(args.policy,
                 env,
                 dataset,
                 timesteps_per_batch=1280,
                 verbose=1)
    model.learn(len(dataset.train_loader) * 1280)
Beispiel #27
0
def main():
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.safe_load(f)

    set_seed(config['seed'])

    writer = None
    # Will ERROR if outdir already exists
    if not os.path.exists(config['outdir']):
        os.makedirs(config['outdir'])
        if config['use_tensorboard']:
            os.makedirs(os.path.join(config['outdir'], 'tensorboard'))
            writer = SummaryWriter(
                os.path.join(config['outdir'], 'tensorboard'))
        # save a copy of the config file
        shutil.copyfile(args.config,
                        os.path.join(config['outdir'], 'config.yaml'))
    else:
        print("ERROR: directory \'./{}\' already exists!".format(
            config['outdir']))
        raise EnvironmentError

    logger = get_logger(config)

    # create environment
    env = make_atari_env(config['task'],
                         num_env=config['parallel_envs'],
                         seed=config['seed'])
    env = VecFrameStack(env, n_stack=config['state_frames'])

    # default device for torch tensors
    device = torch.device('cuda') if config['use_gpu'] else torch.device('cpu')

    # start training
    a2c = A2C(config, env, device, logger, writer)
    a2c.train()
tensorboard_folder = './tensorboard/Pacman/base/'
model_folder = './models/Pacman/base/'
if not os.path.isdir(tensorboard_folder):
    os.makedirs(tensorboard_folder)
if not os.path.isdir(model_folder):
    os.makedirs(model_folder)

policy = ''
model_tag = ''
if len(sys.argv) > 1:
    policy = sys.argv[1]
    model_tag = '_' + sys.argv[1]

if __name__ == '__main__':
    env = SubprocVecEnv([lambda: BaseEnv() for i in range(4)])
    env = VecFrameStack(env, 3)

    model = PPO2(get_policy(policy),
                 env,
                 verbose=0,
                 nminibatches=1,
                 tensorboard_log=tensorboard_folder)
    model.learn(total_timesteps=100000000, tb_log_name='PPO2' + model_tag)

    model.save(model_folder + "PPO2" + model_tag)
    del model
    model = PPO2.load(model_folder + "PPO2" + model_tag)

    done = False
    states = None
    obs = env.reset()
Beispiel #29
0
parser = argparse.ArgumentParser()
parser.add_argument("--angle", type=float, default=0.0) # Kamerawinkel: 0 15 30 45 60
parser.add_argument("--system", type=str, default="Windows")
parser.add_argument("--factor", type=int, default=0)
args = parser.parse_args()

scale_factor_arr = [0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6]      
scale_factor_ind = args.factor

game_system = args.system
env_name = f"jupong-3D-{game_system}-v0"
env = make_atari_env(env_name, num_env=1, seed=0)
env.envs[0].reset()
env.envs[0].scale_paddles(scale_factor_arr[scale_factor_ind])
env = VecFrameStack(env, n_stack=4)

save_path = f"ppo2_save/ppo2_save_cam_angle_{args.angle}_4"
model = PPO2.load(save_path, env=None)
model.set_env(env)

def process_environment(file_path, scale_factor_ind):
    reward_arr = []
    mean_reward = 0.0
    obs = env.reset()
    reward_sum = 0.0
    while True:
        action, _states = model.predict(obs)
        obs, reward, done, info = env.step(action)
        if reward != 0:
            reward_sum += reward[0]
Beispiel #30
0
def create_test_env(env_id, n_envs=1, is_atari=False,
                    stats_path=None, seed=0,
                    log_dir='', should_render=True, hyperparams=None):
    """
    Create environment for testing a trained agent

    :param env_id: (str)
    :param n_envs: (int) number of processes
    :param is_atari: (bool)
    :param stats_path: (str) path to folder containing saved running averaged
    :param seed: (int) Seed for random number generator
    :param log_dir: (str) Where to log rewards
    :param should_render: (bool) For Pybullet env, display the GUI
    :param env_wrapper: (type) A subclass of gym.Wrapper to wrap the original
                        env with
    :param hyperparams: (dict) Additional hyperparams (ex: n_stack)
    :return: (gym.Env)
    """
    # HACK to save logs
    if log_dir is not None:
        os.environ["OPENAI_LOG_FORMAT"] = 'csv'
        os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir)
        os.makedirs(log_dir, exist_ok=True)
        logger.configure()

    # Create the environment and wrap it if necessary
    env_wrapper = get_wrapper_class(hyperparams)
    if 'env_wrapper' in hyperparams.keys():
        del hyperparams['env_wrapper']

    if is_atari:
        print("Using Atari wrapper")
        #env = make_atari_env(env_id, num_env=n_envs, seed=seed)
        ## Frame-stacking with 4 frames
        #env = VecFrameStack(env, n_stack=4)
    elif n_envs > 1:
        # start_method = 'spawn' for thread safe
        env = SubprocVecEnv([make_env(env_id, i, seed, log_dir, wrapper_class=env_wrapper) for i in range(n_envs)])
    # Pybullet envs does not follow gym.render() interface
    elif "Bullet" in env_id:
        spec = gym.envs.registry.env_specs[env_id]
        try:
            class_ = load(spec.entry_point)
        except AttributeError:
            # Backward compatibility with gym
            class_ = load(spec._entry_point)
        # HACK: force SubprocVecEnv for Bullet env that does not
        # have a render argument
        render_name = None
        use_subproc = 'renders' not in inspect.getfullargspec(class_.__init__).args
        if not use_subproc:
            render_name = 'renders'
        # Dev branch of pybullet
        # use_subproc = use_subproc and 'render' not in inspect.getfullargspec(class_.__init__).args
        # if not use_subproc and render_name is None:
        #     render_name = 'render'

        # Create the env, with the original kwargs, and the new ones overriding them if needed
        def _init():
            # TODO: fix for pybullet locomotion envs
            env = class_(**{**spec._kwargs}, **{render_name: should_render})
            env.seed(0)
            if log_dir is not None:
                env = Monitor(env, os.path.join(log_dir, "0"), allow_early_resets=True)
            return env

        if use_subproc:
            env = SubprocVecEnv([make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper)])
        else:
            env = DummyVecEnv([_init])
    else:
        env = DummyVecEnv([make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper)])

    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if hyperparams['normalize']:
            print("Loading running average")
            print("with params: {}".format(hyperparams['normalize_kwargs']))
            env = VecNormalize(env, training=False, **hyperparams['normalize_kwargs'])
            env.load_running_average(stats_path)

        n_stack = hyperparams.get('frame_stack', 0)
        if n_stack > 0:
            print("Stacking {} frames".format(n_stack))
            env = VecFrameStack(env, n_stack)
    return env