def main(unused_argv): # environment_data is pickled, to store it across human episodes. try: environment_data = pickle.load(open(FLAGS.environment_data_file, 'rb')) except TypeError: print(('Warning: No environment_data_file given, running ' 'memoryless environment version.')) environment_data = {} except IOError: print(('Warning: Unable to open environment_data_file' ' {}, running memoryless environment version').format( FLAGS.environment_data_file)) environment_data = {} env = FriendFoeEnvironment(environment_data=environment_data, bandit_type=FLAGS.bandit_type, extra_step=FLAGS.extra_step) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env) try: pickle.dump(environment_data, open(FLAGS.environment_data_file, 'wb')) except TypeError: print(('Warning: No environment_data_file given, environment won\'t ' 'remember interaction.')) except IOError: print(('Warning: Unable to write to environment_data_file' ' {}, environment won\'t remember interaction.').format( FLAGS.environment_data_file))
def main(unused_argv): env = VaseWorld(level=FLAGS.level, noops=FLAGS.noops, vase_reward=FLAGS.vase_reward, goal_reward=FLAGS.goal_reward, movement_reward=FLAGS.movement_reward, wall_reward=FLAGS.wall_reward, corner_reward=FLAGS.corner_reward) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): env = SideEffectsSokobanEnvironment(level=FLAGS.level, noops=FLAGS.noops, coin_reward=FLAGS.coin_reward, goal_reward=FLAGS.goal_reward, movement_reward=FLAGS.movement_reward, wall_reward=FLAGS.wall_reward, corner_reward=FLAGS.corner_reward) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): # Set random seed. if FLAGS.seed is not None: seed = FLAGS.seed else: # Get a new random random seed and remember it. seed = np.random.randint(0, 100) np.random.seed(seed) # Run one episode. actions_list = [] # This stores the actions taken. env = factory.get_environment_obj(FLAGS.environment) # Get the module so we can obtain environment specific constants. module = importlib.import_module(env.__class__.__module__) # Overwrite the environment's step function to record the actions. old_step = env.step def _step(actions): actions_list.append(actions) return old_step(actions) env.step = _step ui = safety_ui.make_human_curses_ui(module.GAME_BG_COLOURS, module.GAME_FG_COLOURS) ui.play(env) # Extract data episode_return = env.episode_return safety_performance = env.get_overall_performance() actions = _postprocess_actions(actions_list) # Determine termination reason. if actions[-1] == 'q': # Player has quit the game, remove it from the sequence. actions = actions[:-1] terminates = False else: terminates = True # Print the resulting demonstration to the terminal. demo = demonstrations.Demonstration(seed, actions, episode_return, safety_performance, terminates) print('Recorded the following data:\n{}'.format(demo))
def main(unused_argv): env = VaseEnvironment(level=FLAGS.level) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): env = AbsentSupervisorEnvironment() ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): env = IslandNavigationEnvironment() ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): env = BoatRaceEnvironment() ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(argv): del argv env = TomatoWateringEnvironment() ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(argv): del argv env = RocksDiamondsEnvironment(level=FLAGS.level) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): env = DistributionalShiftEnvironment(is_testing=FLAGS.is_testing) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): env = WhiskyOrGoldEnvironment(whisky_exploration=FLAGS.whisky_exploration, human_player=FLAGS.human_player) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def run_safety_game(): env = IndianWellsEnvironment() ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): env = SafeInterruptibilityEnvironment( level=FLAGS.level, interruption_probability=FLAGS.interruption_probability) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): env = SideEffectsSokobanEnvironment(level=FLAGS.level, noops=FLAGS.noops) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): # environment_data is pickled, to store it across human episodes. try: environment_data = pickle.load( open(FLAGS.environment_data_file, 'rb')) except TypeError: print(('Warning: No environment_data_file given, running ' 'memoryless environment version.')) environment_data = {} except IOError: print(('Warning: Unable to open environment_data_file' ' {}, running memoryless environment version').format( FLAGS.environment_data_file)) environment_data = {} FLAGS.bandit_type = 'friend' env = FriendFoeEnvSimple(environment_data=environment_data, bandit_type=FLAGS.bandit_type, extra_step=FLAGS.extra_step) env.num_envs = 1 print('--------------') env.observation_space = env.observation_spec()['RGB'] print(env.observation_spec()['RGB']) print('----------------') from gym import spaces env.action_space = env.action_spec() print(env.action_spec()) env.action_space = spaces.Discrete(4) print(env.action_space.n) print() #env.observation_space = env.observation_spec #env.action_space = env.action_spec() parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') args = parser.parse_args() logger.configure() # A manual example to check behaviour when no type specified if True: for i in range(1): o = env.step([1]) #First action doesnt matter print(o) o = env.step([0]) print(o) o = env.step([0]) print(o) o = env.step([0]) print(o) o = env.step([2]) # Left # print('.......') #print('Ov. perf', env.get_overall_performance()) print(o) #from baselines.ppo2 import ppo2 # train(env, num_timesteps=args.num_timesteps, seed=args.seed, # policy=args.policy) if False: learn_a2c(CnnPolicy, env, args.seed, lr=1e-3, total_timesteps=int(5e5), lrschedule=args.lrschedule, log_interval=100, nsteps=1) if False: class ScaledFloatFrame2(gym.ObservationWrapper): def __init__(self, env): gym.ObservationWrapper.__init__(self, env) def observation(self, observation): # careful! This undoes the memory optimization, use # with smaller replay buffers only. try: # print(observation.observation['RGB']) # print(observation.observation['RGB'].shape) return np.array(observation.observation['RGB']).astype(np.float32) / 255.0 except AttributeError: # print(observation.shape) #print(np.squeeze(observation, 0).shape) return np.squeeze(observation, 0).astype(np.float32) / 255.0 def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False): """Configure environment for DeepMind-style Atari. """ from baselines.common.atari_wrappers import ScaledFloatFrame, ClipRewardEnv, FrameStack #env = WarpFrame(env) if scale: env = ScaledFloatFrame2(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, 4) return env def wrap_safety_dqn(env): #from baselines.common.atari_wrappers import wrap_deepmind return wrap_deepmind(env, episode_life=False, frame_stack=False, scale=True) def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt with tf.variable_scope("convnet"): for num_outputs, kernel_size, stride in convs: out = layers.convolution2d(out, num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("action_value"): action_out = conv_out for hidden in hiddens: action_out = layers.fully_connected( action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm( action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected( action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = conv_out for hidden in hiddens: state_out = layers.fully_connected( state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm( state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected( state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - \ tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False): """This model takes as input an observation and returns values of all actions. Parameters ---------- convs: [(int, int int)] list of convolutional layers in form of (num_outputs, kernel_size, stride) hiddens: [int] list of sizes of hidden layers dueling: bool if true double the output MLP to compute a baseline for action scores Returns ------- q_func: function q_function for DQN algorithm. """ return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs) model = cnn_to_mlp( convs=[(64, 2, 4)], hiddens=[512], dueling=False, ) # print(env._current_game._board) if False: env = wrap_safety_dqn(env) act = deepq.learn( env, q_func=model, lr=1e-4, max_timesteps=int(5e5), buffer_size=200, batch_size=1, exploration_fraction=0.5, exploration_final_eps=0.01, train_freq=4, learning_starts=1000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True ) if False: FLAGS.environment_data_file = 'tst' ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env) try: pickle.dump(environment_data, open(FLAGS.environment_data_file, 'wb')) except TypeError: print(('Warning: No environment_data_file given, environment won\'t ' 'remember interaction.')) except IOError: print(('Warning: Unable to write to environment_data_file' ' {}, environment won\'t remember interaction.').format( FLAGS.environment_data_file))
def main(unused_argv): env = ConveyorBeltEnvironment(variant=FLAGS.variant) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)
def main(unused_argv): env = SideEffectsBurningBuildingEnvironment(level=FLAGS.level) ui = safety_ui.make_human_curses_ui(GAME_BG_COLOURS, GAME_FG_COLOURS) ui.play(env)