Esempio n. 1
0
def init_policies(observation_space, action_space, base_kwargs, num_agents,
                  base):
    actor_critics = [
        Policy(observation_space.shape[1:],
               action_space
               if num_agents == 1 else Discrete(action_space.nvec[0]),
               base=get_base(base),
               base_kwargs=base_kwargs) for _ in range(num_agents)
    ]
    shared_cpu_actor_critics = [
        Policy(observation_space.shape[1:],
               action_space
               if num_agents == 1 else Discrete(action_space.nvec[0]),
               base=get_base(base),
               base_kwargs=base_kwargs).share_memory()
        for _ in range(num_agents)
    ]
    shared_cpu_actor_critics_env_actor = [
        Policy(observation_space.shape[1:],
               action_space
               if num_agents == 1 else Discrete(action_space.nvec[0]),
               base=get_base(base),
               base_kwargs=base_kwargs).share_memory()
        for _ in range(num_agents)
    ]
    pytorch_total_params = sum(p.numel()
                               for p in actor_critics[0].parameters()
                               if p.requires_grad)
    print('number of params ', pytorch_total_params)
    return actor_critics, shared_cpu_actor_critics, shared_cpu_actor_critics_env_actor
Esempio n. 2
0
File: batch.py Progetto: srsohn/msgi
 def __init__(self, args):
     self.num_envs = args.num_processes
     self.num_graphs = 500
     root = os.getcwd()
     if args.env_name == 'playground':
         args.game_config = Playground()
         args.render, args.game, args.fixed_map_mode = args.render, args.env_name, True
         self.envs = [
             Mazebase_high(args) for i in range(args.num_processes)
         ]
         self.graph = Batch_SubtaskGraph(args)
         self.observation_space = Box(low=0,
                                      high=1,
                                      shape=self.envs[0].obs_shape,
                                      dtype=np.float32)
         self.feat_dim = args.game_config.feat_dim
         self.max_task = self.envs[0].n_actions
         self.state_space = Box(low=0,
                                high=1,
                                shape=self.envs[0].obs_shape,
                                dtype=np.float32)
         self.action_space = Discrete(self.envs[0].n_actions)
         self.feed_time = True
         self.feed_prev_ard = True
         self.load_graph = False
     elif args.env_name == 'mining':
         args.game_config = Mining()
         args.render, args.game, args.fixed_map_mode = args.render, args.env_name, True
         self.envs = [
             Mazebase_high(args) for i in range(args.num_processes)
         ]
         if args.mode == 'meta_train':
             self.graph = Batch_SubtaskGraph(args)
             self.load_graph = False
         else:
             seed = args.seed
             if seed < 1:
                 seed = 1
             args.graph_config = dict(folder=os.path.join(
                 root, 'environment', 'data', 'task_graph_mining', 'new'),
                                      gamename='eval1_mining_' + str(seed))
             self.graph = SubtaskGraph(args)
             self.load_graph = True
             self.num_graphs = self.graph.num_graph
         self.observation_space = Box(low=0,
                                      high=1,
                                      shape=self.envs[0].obs_shape,
                                      dtype=np.float32)
         self.feat_dim = args.game_config.feat_dim
         self.max_task = self.envs[0].n_actions
         self.state_space = Box(low=0,
                                high=1,
                                shape=self.envs[0].obs_shape,
                                dtype=np.float32)
         self.action_space = Discrete(self.envs[0].n_actions)
         self.feed_time = True
         self.feed_prev_ard = True
Esempio n. 3
0
 def action_space(self):
     """See class definition."""
     # Accelerate (Lane change to left (0), Lane change to right (1), No lane change (2)),
     # Decelerate (No lane change (3)),
     # Maintain Speed (Lane change to left (4), Lane change to right (5), No lane change (6)),
     # Emergency Brake (No lane change (7))
     speed = Discrete(4)
     # Lane change to left, Lane change to right, No lane change
     lane_change = Discrete(3)
     return Discrete(7)
 def __init__(self):
     self.seed_num = None
     self.dealer = []
     self.player = []
     # ACE, 2, 3, 4, 5, 6, 7, 8, 9, 10, Jack, Queen, King
     self.deck = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10])
     self.action_space = Discrete(N_ACTIONS)
     self.observation_space = Tuple(
         (Discrete(11), Discrete(32), Discrete(2)))
     self.reward_range = (-1, 1)
     self.dealer_stop = DEALER_SICK_SUM
Esempio n. 5
0
 def __init__(self, env):
     super().__init__(env)
     conf = self.env.unwrapped.conf
     rows_num = conf["bricks_rows"]
     self.observation_space = Dict({
         "paddle_x":
         Discrete(81),
         "ball_x":
         Discrete(81),
         "ball_y":
         Discrete(106),
         "bricks_status_matrix":
         Box(low=0, high=1, shape=(rows_num, 18), dtype=np.uint8)
     })
Esempio n. 6
0
    def __init__(self,
                 n,
                 goal_length,
                 num_distractor,
                 distractor_length,
                 max_steps=2**10):
        self.goal_length = goal_length
        self.num_distractor = num_distractor
        self.distractor_length = distractor_length
        self.n = n
        self.pairs = goal_length - 1 + distractor_length * num_distractor

        self.step_cost = 1e-1
        self.reward_gem = 10
        self.reward_key = 0

        self.max_steps = max_steps
        self.action_space = Discrete(len(action_space))
        self.observation_space = Box(low=0,
                                     high=255,
                                     shape=(n, n, 3),
                                     dtype=np.uint8)

        self.owned_key = grid_color

        self.reset()
def make_atari(env_id, max_episode_steps=None):
    splt = env_id.split("|")
    env_id = splt[0]
    env = gym.make(env_id)
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)

    if len(splt) > 1:
        splt_id = splt[1]
        funcType = type(env.step)
        # Overwrite env action space!
        if splt[1][:-1] == "extra_dangling":
            print("act: ", env.action_space)
            # quit()
            def new_step(self, action):
                #if action == 4:
                #    action = 3 # note: this is duplicate, not dangling!
                # print("action:", action)
                action = action % 6
                return self._step(action) # ignore dangling action
            env.action_space = Discrete(6+6*int(splt[1][-1]))
            env._step = env.step
            env.step = new_step.__get__(env, type(env)) #funcType(new_step,env,type(env))
        elif splt[1] == "extra_duplicate":
            def new_step(self, action):
                if action[-1] != 0.0:
                    action[-2] = 1.0 # duplicate action!
                return self.step(action[:-1]) # ignore dangling action
            env.step = new_step

    return env
Esempio n. 8
0
    def __init__(self):
        self.game = DoomGame()
        self.game.load_config("O:\\Doom\\a2c\\scenarios\\dodge\\dodge.cfg")
        self.game.set_doom_scenario_path(
            "O:\\Doom\\a2c\\scenarios\\dodge\\dodge1.wad")
        #self.game.load_config('O:\\Doom\\scenarios\\cig_flat.cfg')
        #self.game.set_doom_scenario_path('O:\\Doom\\scenarios\\cig_flat_small.wad')
        #self.game.set_doom_map("map03")

        self.game.add_game_args(
            "-host 1 -deathmatch +timelimit 1.0 "
            "+sv_forcerespawn 1 +sv_noautoaim 1 +sv_respawnprotect 1 +sv_spawnfarthest 1 +sv_nocrouch 1 "
            "+viz_respawn_delay 0")

        self.game.set_mode(Mode.PLAYER)
        self.game.set_labels_buffer_enabled(True)
        self.game.set_depth_buffer_enabled(True)
        self.game.set_screen_resolution(ScreenResolution.RES_320X240)

        self.action_space = Discrete(3)
        self.observation_space = Box(low=0,
                                     high=255,
                                     shape=(168, 168, 3),
                                     dtype=np.uint8)
        self.available_actions = [[1, 0], [0, 1], [0, 0]]
        #self.available_actions = [[0,0,0,1,0,0],[0,0,1,0,0,0],[0,0,0,0,0,0]]
        self.bots = 1
Esempio n. 9
0
    def __init__(self,
                 max_slack=float(1e9),
                 queue_size=int(1e6),
                 max_wrongs=3,
                 past_steps=10,
                 seed=None,
                 target_queue_type="LSTF"):
        # Fixed variables.
        self.max_slack = max_slack
        self.past_steps = past_steps
        self.max_wrongs = max_wrongs
        self.seed = seed
        self.queue_size = queue_size
        self.target_queue_type = target_queue_type
        self.init_queue = list()
        init_slack_rng, _ = seeding.np_random(self.seed)
        self.observation_space = Space([queue_size], np.dtype(int))
        self.action_space = Discrete(queue_size)
        for i in range(self.queue_size):
            self.init_queue.append(init_slack_rng.randint(self.max_slack))

        # Variables that can be reset.
        self.slack_rng, _ = seeding.np_random(self.seed)
        self.wrong_deques = [0] * self.past_steps
        self.wrong_deque_idx = 0
        self.queue = list(self.init_queue)
Esempio n. 10
0
    def __init__(self, env_config):
        game = Catcher(width=screen_wh, height=screen_wh)

        fps = 30  # fps we want to run at
        frame_skip = 2
        num_steps = 2
        force_fps = False  # False for slower speed
        display_screen = True
        # make a PLE instance.
        self.env = PLE(game,
                       fps=fps,
                       frame_skip=frame_skip,
                       num_steps=num_steps,
                       force_fps=force_fps,
                       display_screen=display_screen)
        self.env.init()
        self.action_dict = {0: None, 1: 97, 2: 100}
        #PLE env starts with black screen
        self.env.act(self.env.NOOP)

        self.action_space = Discrete(3)
        self.k = 4
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(screen_wh, screen_wh,
                                                   1 * self.k))
        self.frames = deque([], maxlen=self.k)
Esempio n. 11
0
 def __init__(self, env):
     super(OTWrapper, self).__init__(env)
     self.observation_space = Box(low=0,
                                  high=255,
                                  dtype=np.uint8,
                                  shape=(84, 84, 1))
     self.action_space = Discrete(len(HUMAN_ACTIONS))
Esempio n. 12
0
    def __init__(self, env, args):
        super().__init__(
            env,
            args,
            # default values for this algorithm
            default_learning_rate=7e-4,
            default_discount_factor=0.99,
            default_num_updates=10**7,
        )
        # default values which are currently unavailable from the cmdline. maybe add these as flags
        self.processes = 16
        self.frames_per_process = 128
        self.lam = 0.95
        self.entropy_coef = 0.01
        self.value_loss_coef = 0.5
        self.max_grad_norm = 0.5
        self.optim_eps = 1e-5
        self.clip_eps = 0.2
        self.epochs = 4
        self.batch_size = 256
        self.seed = 666 if not self.seed else self.seed

        self.frames_per_update = self.frames_per_process * self.processes
        self.update_shape = (self.frames_per_process, self.processes)
        seed_all(self.seed)

        obs_space = {"image": self.env.observation_space.spaces["image"].shape}
        self.model = {"acmodel": ACModel(obs_space, Discrete(3)).to(device)}
Esempio n. 13
0
    def __init__(self, n, goal_length, num_distractor, distractor_length,
                 viewport_size=5, max_steps=300, world=None, silence=False):
        self.goal_length = goal_length
        self.num_distractor = num_distractor
        self.distractor_length = distractor_length
        self.viewport_size = viewport_size
        self.n = n
        self.num_pairs = goal_length - 1 + distractor_length * num_distractor

        # Penalties and Rewards
        self.step_cost = 0.1
        self.reward_gem = 10
        self.reward_key = 0

        # Other Settings
        self.viewer = None
        self.max_steps = max_steps
        self.action_space = Discrete(len(ACTION_LOOKUP))
        self.observation_space = Box(low=0, high=255, shape=(n, n, 3), dtype=np.uint8)
        self.silence = silence

        # Game initialization
        self.owned_key = np.array(grid_color, dtype=np.float64)

        self.np_random_seed = None

        self.world = None
        self.reset(world)

        self.fig = plt.figure()
        self.ax = self.fig.add_subplot(1, 1, 1)
        self.img = self.ax.imshow(self.world, vmin=0, vmax=255, interpolation='none')
        self.fig.canvas.draw()
        self.axbackground = self.fig.canvas.copy_from_bbox(self.ax.bbox)
        plt.show(block=False)
Esempio n. 14
0
    def __init__(self, sc_env, dim,
            id=np.random.randint(1000),
            verbose_freq=1,
            agg_n_episodes=100,
            reselect_army_freq=5
    ):
        """
        :param sc_env: SC2Env
        :param dim: screen dimension of sc2_env
        :param id: "name" for this environment
        :param verbose_freq: print results every n episode, 0 is no printing
        :param agg_n_episodes: print results of this many last episodes
        :param reselect_army_freq: reselect army every n timesteps,
            is needed if get new units like in DefeatRoaches,
            too frequent selecting might hurt the scores little because one timestep is wasted
        """
        self.sc2_env = sc_env
        self.dim = dim
        self.verbose_freq = verbose_freq

        # self.action_space = Discrete(dim ** 2)
        self.action_space = Discrete(2)
        self.observation_space = Box(
            low=0, high=SCREEN_FEATURES.player_relative.scale, shape=[dim, dim, 1]
        )
        self.rolling_episode_score = np.zeros(agg_n_episodes, dtype=np.float32)
        self.agg_n_episodes = agg_n_episodes
        self.id = id
        self.attack_move_action_id = [
            k for k in actions.FUNCTIONS
            if k.name == 'Attack_screen'
        ][0].id
        self.reselect_army_freq = reselect_army_freq
        self.step_counter = 0
        self.episode_counter = 0
Esempio n. 15
0
 def __init__(self, game):
     self.action_space = Discrete(3)
     self.observation_space = Box(low=0,
                                  high=255,
                                  shape=(84, 84, 3),
                                  dtype=np.uint8)
     self._game = game
Esempio n. 16
0
 def __init__(self, _iter):
     self.reset()
     self.observation_space = np.array([0, 0])
     self.iter = _iter
     self.action_space = Discrete(4)
     self.fig_num = plt.figure().number
     plt.close()
    def action_space(self):
        """Identify the dimensions and bounds of the action space.
          Actions characterized using a dict that map agent choices to traffic light actions:
            ie. for single light: 0, 1
                                 to not switch or to switch traffic light respectively
                                 {0: (0), 1:(1)}
                for multi light: 0, 1, 2, 3, 4 ...
                                 agents values corresponding to action for each traffic light
                                 example for 3 lights: {1: (1,0,0), 2:(0,1,0) ..

        Returns
        -------
        gym.spaces.Discrete object
            contains shape and bounds of action space characterized

        """

        # get all combinations of actions [(1,0,0), (0,1,0)...
        lst = list(itertools.product([0, 1], repeat=self.num_traffic_lights))

        # create dict mapping agents actions to list {1: (1,0,0), 2:(0,1,0) ..
        for i in np.arange(len(lst)):
            self.action_dict.update({i: lst[i]})

        return Discrete(len(lst))
Esempio n. 18
0
 def __init__(self, env, mode = "no_act"):
     super().__init__(env)
     self.env = env
     self.action_space = Discrete(NUM_ACTS)
     self.facing = 1 # 1 if facing right, 0 if facing left
     self.observation_space = Box(0, 1, shape=(NUM_OBS,))
     self.mode = mode
Esempio n. 19
0
    def __init__(self,
                 dim_room=(10, 10),
                 max_steps=120,
                 num_boxes=4,
                 num_gen_steps=None,
                 reset=True):

        # General Configuration
        self.dim_room = dim_room
        if num_gen_steps == None:
            self.num_gen_steps = int(1.7 * (dim_room[0] + dim_room[1]))
        else:
            self.num_gen_steps = num_gen_steps

        self.num_boxes = num_boxes
        self.boxes_on_target = 0

        # Penalties and Rewards
        self.penalty_for_step = -0.1
        self.penalty_box_off_target = -1
        self.reward_box_on_target = 1
        self.reward_finished = 10
        self.reward_last = 0

        # Other Settings
        self.viewer = None
        self.max_steps = max_steps
        self.action_space = Discrete(len(ACTION_LOOKUP))
        screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
        self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
        
        if reset:
            # Initialize Room
            _ = self.reset()
Esempio n. 20
0
 def __init__(self, size, sleep=0, dict_state=False, ma_rew=0):
     self.size = size
     self.sleep = sleep
     self.dict_state = dict_state
     self.ma_rew = ma_rew
     self.action_space = Discrete(2)
     self.reset()
Esempio n. 21
0
    def __init__(self,
                 n,
                 goal_length,
                 num_distractor,
                 distractor_length,
                 max_steps=300,
                 world=None):
        self.goal_length = goal_length
        self.num_distractor = num_distractor
        self.distractor_length = distractor_length
        self.n = n
        self.num_pairs = goal_length - 1 + distractor_length * num_distractor

        # Penalties and Rewards
        self.step_cost = 0.1
        self.reward_gem = 10
        self.reward_key = 0

        # Other Settings
        self.viewer = None
        self.max_steps = max_steps
        self.action_space = Discrete(len(ACTION_LOOKUP))
        self.observation_space = Box(low=0,
                                     high=255,
                                     shape=(n, n, 3),
                                     dtype=np.uint8)

        # Game initialization
        self.owned_key = [220, 220, 220]

        self.np_random_seed = None
        self.reset(world)
Esempio n. 22
0
    def test_model_action(self):
        np.random.seed(1)

        model = MagicMock()
        target = MagicMock()

        model.predict = MagicMock(
            side_effect=[np.array([[0.23, 0.75, 0.11, 0.007]])])
        normalizer = Mock()
        normalizer.normalize_state.return_value = np.ones(shape=(84, 84))

        agent = DeepQAgent(action_space=Discrete(4), normalizer=normalizer, experience_size=100, model_network=model,
                           target_network=target, epsilon=0)
        agent.episode_step = 3
        agent.step_counter = 3

        state = np.random.randint(256, size=(210, 16, 3))

        self.assertEquals(agent.act(state), 1, "Should Make Action according to the model")

        self.assertEquals(agent.episode_step, 4, "Step 4")
        self.assertEquals(agent.step_counter, 4, "Step 4")
        self.assertTrue(np.array_equal(agent.frame[:, :, 3], np.ones(shape=(84, 84))))

        normalizer.normalize_state.assert_called_once_with(state)

        frame = np.zeros((84, 84, 4))
        frame[:, :, 3] = np.ones(shape=(84, 84))

        self.assertTrue(np.array_equal(frame, model.predict.call_args_list[0][0][0][0]))

        self.assertTrue(np.array_equal(frame, agent.frame))
        self.assertEquals(agent.last_action, 1)
Esempio n. 23
0
    def __init__(self, n, goal_length, num_distractor, distractor_length, max_steps=10**6, collect_key=True, world=None):
        self.goal_length = goal_length
        self.num_distractor = num_distractor
        self.distractor_length = distractor_length
        self.n = n
        self.num_pairs = goal_length - 1 + distractor_length * num_distractor
        self.collect_key = collect_key  # if True, keys are collected immediately when available

        # Penalties and Rewards
        self.step_cost = 0
        self.reward_gem = 10
        self.reward_key = 1
        self.reward_distractor = -1

        # Other Settings
        self.viewer = None
        self.max_steps = max_steps
        self.action_space = Discrete(len(ACTION_LOOKUP))
        self.observation_space = Box(low=0, high=255, shape=(n+2, n+2, 3), dtype=np.uint8)

        # Game initialization
        self.owned_key = [220, 220, 220]

        self.np_random_seed = None
        self.reset(world)

        self.num_env_steps = 0
        self.episode_reward = 0

        self.last_frames = deque(maxlen=3)
Esempio n. 24
0
    def __init__(self):
        self.game = DoomGame()
        self.game.load_config("O:\\Doom\\scenarios\\cig_flat.cfg")
        self.game.set_doom_scenario_path(
            "O:\\Doom\\scenarios\\cig_flat_small.wad")
        self.game.set_doom_map("map01")
        self.game.add_game_args(
            "-host 1 -deathmatch +timelimit 1.0 "
            "+sv_forcerespawn 1 +sv_noautoaim 1 +sv_respawnprotect 1 +sv_spawnfarthest 1 +sv_nocrouch 1 "
            "+viz_respawn_delay 1")
        self.game.add_game_args("+name AI +colorset 0")
        self.game.set_doom_map("map02")
        self.game.add_available_game_variable(GameVariable.POSITION_X)
        self.game.add_available_game_variable(GameVariable.POSITION_Y)
        self.game.add_available_game_variable(
            GameVariable.SELECTED_WEAPON_AMMO)
        self.game.add_available_game_variable(GameVariable.HEALTH)
        self.game.add_available_game_variable(GameVariable.ARMOR)
        #self.game.set_labels_buffer_enabled(True)
        self.game.set_depth_buffer_enabled(True)
        self.game.set_mode(Mode.PLAYER)
        self.game.init()
        self.action_space = Discrete(3)
        self.observation_space = Box(low=0,
                                     high=255,
                                     shape=(84, 84, 3),
                                     dtype=np.uint8)

        self._reset_path_history()
Esempio n. 25
0
 def __init__(self):
     self.observation_space = Box(0 * np.ones(1),
                                  1.0 * np.ones(1),
                                  dtype=np.float64)
     self.action_space = Discrete(2)
     self.num_envs = 1
     self.cnt = 0
     self.length = 50
Esempio n. 26
0
 def action_space(self):
     if self.discrete:
         return Discrete(2**self.num_traffic_lights)
     else:
         return Box(low=0,
                    high=1,
                    shape=(self.num_traffic_lights, ),
                    dtype=np.float32)
 def action_space(self):
     """See class definition."""
     if self.env_params.additional_params['communicate']:
         accel = Box(low=-3.0, high=3.0, shape=(1, ), dtype=np.float32)
         communicate = Discrete(2)
         return Tuple((accel, communicate))
     else:
         return Box(low=-3.0, high=3.0, shape=(1, ), dtype=np.float32)
    def __init__(self,
                 dim_room=(10, 10),
                 max_steps=120,
                 num_boxes=4,
                 num_gen_steps=None,
                 seed=0,
                 reset=True,
                 fixed_env=False,
                 randomized_init_position=True):

        self.seed(seed)

        # General Configuration
        self.dim_room = dim_room
        if num_gen_steps == None:
            self.num_gen_steps = int(1.7 * (dim_room[0] + dim_room[1]))
        else:
            self.num_gen_steps = num_gen_steps

        self.num_boxes = num_boxes
        self.boxes_on_target = 0
        self.num_env_steps = 0

        # Penalties and Rewards
        self.penalty_for_step = -0.1
        self.penalty_box_off_target = -1
        self.reward_box_on_target = 1
        self.reward_finished = 10
        self.reward_last = 0

        # Other Settings
        self.viewer = None
        self.max_steps = max_steps
        self.action_space = Discrete(len(ACTION_LOOKUP))
        screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
        self.observation_space = Box(low=0,
                                     high=255,
                                     shape=(screen_height, screen_width, 3),
                                     dtype=np.uint8)

        self.epsilon = 0.8  #determinstic:1

        self.fixed_env = fixed_env
        self.randomized_init_position = randomized_init_position

        if fixed_env:
            try:
                s = load_obj('state_sokoban')
                self.init_state(s)
            except:
                assert False
                print("no saved initial state, creating one...")
                s = self.get_state()
                save_obj(s, 'state_sokoban')

        if reset and not fixed_env:
            # Initialize Room
            _ = self.reset()
Esempio n. 29
0
 def action_space(self):
     """See class definition."""
     if self.discrete:
         return Discrete(2**self.num_traffic_lights)
     else:
         return Box(low=-1,
                    high=1,
                    shape=(self.num_traffic_lights, ),
                    dtype=np.float32)
 def __init__(self):
     # load pkl model
     # define the state, action space
     # define reward function
     self.state = None
     self.action_space = Discrete(n=100)
     self.observation_space = None
     self.step_count = 0
     pass