def main(unused_argv):
    # environment_data is pickled, to store it across human episodes.
    try:
        environment_data = pickle.load(open(FLAGS.environment_data_file, 'rb'))
    except TypeError:
        print(('Warning: No environment_data_file given, running '
               'memoryless environment version.'))
        environment_data = {}
    except IOError:
        print(('Warning: Unable to open environment_data_file'
               ' {}, running memoryless environment version').format(
                   FLAGS.environment_data_file))
        environment_data = {}
    env = FriendFoeEnvironment(environment_data=environment_data,
                               bandit_type=FLAGS.bandit_type,
                               extra_step=FLAGS.extra_step)
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
    try:
        pickle.dump(environment_data, open(FLAGS.environment_data_file, 'wb'))
    except TypeError:
        print(('Warning: No environment_data_file given, environment won\'t '
               'remember interaction.'))
    except IOError:
        print(('Warning: Unable to write to environment_data_file'
               ' {}, environment won\'t remember interaction.').format(
                   FLAGS.environment_data_file))
Beispiel #2
0
def main(unused_argv):
    env = VaseWorld(level=FLAGS.level,
                    noops=FLAGS.noops,
                    vase_reward=FLAGS.vase_reward,
                    goal_reward=FLAGS.goal_reward,
                    movement_reward=FLAGS.movement_reward,
                    wall_reward=FLAGS.wall_reward,
                    corner_reward=FLAGS.corner_reward)
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
def main(unused_argv):
    env = SideEffectsSokobanEnvironment(level=FLAGS.level,
                                        noops=FLAGS.noops,
                                        coin_reward=FLAGS.coin_reward,
                                        goal_reward=FLAGS.goal_reward,
                                        movement_reward=FLAGS.movement_reward,
                                        wall_reward=FLAGS.wall_reward,
                                        corner_reward=FLAGS.corner_reward)
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
Beispiel #4
0
def main(unused_argv):
    # Set random seed.
    if FLAGS.seed is not None:
        seed = FLAGS.seed
    else:
        # Get a new random random seed and remember it.
        seed = np.random.randint(0, 100)
    np.random.seed(seed)

    # Run one episode.
    actions_list = []  # This stores the actions taken.
    env = factory.get_environment_obj(FLAGS.environment)
    # Get the module so we can obtain environment specific constants.
    module = importlib.import_module(env.__class__.__module__)

    # Overwrite the environment's step function to record the actions.
    old_step = env.step

    def _step(actions):
        actions_list.append(actions)
        return old_step(actions)

    env.step = _step
    ui = safety_ui.make_human_curses_ui(module.GAME_BG_COLOURS,
                                        module.GAME_FG_COLOURS)
    ui.play(env)

    # Extract data
    episode_return = env.episode_return
    safety_performance = env.get_overall_performance()
    actions = _postprocess_actions(actions_list)

    # Determine termination reason.
    if actions[-1] == 'q':
        # Player has quit the game, remove it from the sequence.
        actions = actions[:-1]
        terminates = False
    else:
        terminates = True

    # Print the resulting demonstration to the terminal.
    demo = demonstrations.Demonstration(seed, actions, episode_return,
                                        safety_performance, terminates)
    print('Recorded the following data:\n{}'.format(demo))
def main(unused_argv):
    env = VaseEnvironment(level=FLAGS.level)
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
Beispiel #6
0
def main(unused_argv):
    env = AbsentSupervisorEnvironment()
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
Beispiel #7
0
def main(unused_argv):
    env = IslandNavigationEnvironment()
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
def main(unused_argv):
    env = BoatRaceEnvironment()
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
Beispiel #9
0
def main(argv):
    del argv
    env = TomatoWateringEnvironment()
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
def main(argv):
    del argv
    env = RocksDiamondsEnvironment(level=FLAGS.level)
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
def main(unused_argv):
  env = DistributionalShiftEnvironment(is_testing=FLAGS.is_testing)
  ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
  ui.play(env)
def main(unused_argv):
    env = WhiskyOrGoldEnvironment(whisky_exploration=FLAGS.whisky_exploration,
                                  human_player=FLAGS.human_player)
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
Beispiel #13
0
def run_safety_game():
    env = IndianWellsEnvironment()
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
Beispiel #14
0
def main(unused_argv):
    env = SafeInterruptibilityEnvironment(
        level=FLAGS.level,
        interruption_probability=FLAGS.interruption_probability)
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
def main(unused_argv):
    env = SideEffectsSokobanEnvironment(level=FLAGS.level, noops=FLAGS.noops)
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)
Beispiel #16
0
def main(unused_argv):
    # environment_data is pickled, to store it across human episodes.
    try:
        environment_data = pickle.load(
            open(FLAGS.environment_data_file, 'rb'))
    except TypeError:
        print(('Warning: No environment_data_file given, running '
               'memoryless environment version.'))
        environment_data = {}
    except IOError:
        print(('Warning: Unable to open environment_data_file'
               ' {}, running memoryless environment version').format(
                   FLAGS.environment_data_file))
        environment_data = {}

    FLAGS.bandit_type = 'friend'
    env = FriendFoeEnvSimple(environment_data=environment_data,
                             bandit_type=FLAGS.bandit_type,
                             extra_step=FLAGS.extra_step)

    env.num_envs = 1
    print('--------------')
    env.observation_space = env.observation_spec()['RGB']
    print(env.observation_spec()['RGB'])
    print('----------------')
    from gym import spaces
    env.action_space = env.action_spec()
    print(env.action_spec())
    env.action_space = spaces.Discrete(4)
    print(env.action_space.n)
    print()

    #env.observation_space = env.observation_spec
    #env.action_space = env.action_spec()

    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'], default='cnn')
    parser.add_argument('--lrschedule', help='Learning rate schedule',
                        choices=['constant', 'linear'], default='constant')
    args = parser.parse_args()
    logger.configure()

    # A manual example to check behaviour when no type specified

    if True:
        for i in range(1):
            o = env.step([1])  #First action doesnt matter
            print(o)
            o = env.step([0])
            print(o)
            o = env.step([0])
            print(o)
            o = env.step([0])
            print(o)
            o = env.step([2])  # Left
            # print('.......')
            #print('Ov. perf', env.get_overall_performance())
    print(o)
    
    #from baselines.ppo2 import ppo2

    # train(env, num_timesteps=args.num_timesteps, seed=args.seed,
    #      policy=args.policy)
    if False:
        learn_a2c(CnnPolicy, env, args.seed, lr=1e-3, total_timesteps=int(5e5), lrschedule=args.lrschedule,
                  log_interval=100, nsteps=1)

    if False:

        class ScaledFloatFrame2(gym.ObservationWrapper):
            def __init__(self, env):
                gym.ObservationWrapper.__init__(self, env)

            def observation(self, observation):
                # careful! This undoes the memory optimization, use
                # with smaller replay buffers only.
                try:
                    # print(observation.observation['RGB'])
                    # print(observation.observation['RGB'].shape)
                    return np.array(observation.observation['RGB']).astype(np.float32) / 255.0
                except AttributeError:
                    # print(observation.shape)
                    #print(np.squeeze(observation, 0).shape)
                    return np.squeeze(observation, 0).astype(np.float32) / 255.0

        def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False):
            """Configure environment for DeepMind-style Atari.
            """
            from baselines.common.atari_wrappers import ScaledFloatFrame, ClipRewardEnv, FrameStack
            #env = WarpFrame(env)
            if scale:
                env = ScaledFloatFrame2(env)
            if clip_rewards:
                env = ClipRewardEnv(env)
            if frame_stack:
                env = FrameStack(env, 4)
            return env

        def wrap_safety_dqn(env):
            #from baselines.common.atari_wrappers import wrap_deepmind
            return wrap_deepmind(env, episode_life=False, frame_stack=False, scale=True)

        def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
            with tf.variable_scope(scope, reuse=reuse):
                out = inpt
                with tf.variable_scope("convnet"):
                    for num_outputs, kernel_size, stride in convs:
                        out = layers.convolution2d(out,
                                                   num_outputs=num_outputs,
                                                   kernel_size=kernel_size,
                                                   stride=stride,
                                                   activation_fn=tf.nn.relu)
                conv_out = layers.flatten(out)
                with tf.variable_scope("action_value"):
                    action_out = conv_out
                    for hidden in hiddens:
                        action_out = layers.fully_connected(
                            action_out, num_outputs=hidden, activation_fn=None)
                        if layer_norm:
                            action_out = layers.layer_norm(
                                action_out, center=True, scale=True)
                        action_out = tf.nn.relu(action_out)
                    action_scores = layers.fully_connected(
                        action_out, num_outputs=num_actions, activation_fn=None)

                if dueling:
                    with tf.variable_scope("state_value"):
                        state_out = conv_out
                        for hidden in hiddens:
                            state_out = layers.fully_connected(
                                state_out, num_outputs=hidden, activation_fn=None)
                            if layer_norm:
                                state_out = layers.layer_norm(
                                    state_out, center=True, scale=True)
                            state_out = tf.nn.relu(state_out)
                        state_score = layers.fully_connected(
                            state_out, num_outputs=1, activation_fn=None)
                    action_scores_mean = tf.reduce_mean(action_scores, 1)
                    action_scores_centered = action_scores - \
                        tf.expand_dims(action_scores_mean, 1)
                    q_out = state_score + action_scores_centered
                else:
                    q_out = action_scores
                return q_out

        def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
            """This model takes as input an observation and returns values of all actions.
            Parameters
            ----------
            convs: [(int, int int)]
                list of convolutional layers in form of
                (num_outputs, kernel_size, stride)
            hiddens: [int]
                list of sizes of hidden layers
            dueling: bool
                if true double the output MLP to compute a baseline
                for action scores
            Returns
            -------
            q_func: function
                q_function for DQN algorithm.
            """

            return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs)

        model = cnn_to_mlp(
            convs=[(64, 2, 4)],
            hiddens=[512],
            dueling=False,
        )

        # print(env._current_game._board)

        if False:
            env = wrap_safety_dqn(env)
            act = deepq.learn(
                env,
                q_func=model,
                lr=1e-4,
                max_timesteps=int(5e5),
                buffer_size=200,
                batch_size=1,
                exploration_fraction=0.5,
                exploration_final_eps=0.01,
                train_freq=4,
                learning_starts=1000,
                target_network_update_freq=1000,
                gamma=0.99,
                prioritized_replay=True
            )

    if False:
        FLAGS.environment_data_file = 'tst'
        ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
        ui.play(env)
        try:
            pickle.dump(environment_data,
                        open(FLAGS.environment_data_file, 'wb'))
        except TypeError:
            print(('Warning: No environment_data_file given, environment won\'t '
                   'remember interaction.'))
        except IOError:
            print(('Warning: Unable to write to environment_data_file'
                   ' {}, environment won\'t remember interaction.').format(
                FLAGS.environment_data_file))
def main(unused_argv):
  env = ConveyorBeltEnvironment(variant=FLAGS.variant)
  ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
  ui.play(env)
def main(unused_argv):
    env = SideEffectsBurningBuildingEnvironment(level=FLAGS.level)
    ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS)
    ui.play(env)