Ejemplo n.º 1
0
class GameManager:
    def __init__(self, id):

        self.visualize = False

        if Config.VISUALIZE and int(id / len(Config.PATH_TO_WORLD)) == 0:
            self.visualize = True
        elif Config.PLAY_MODE:
            self.visualize = True

        world_name = Config.PATH_TO_WORLD[id % len(Config.PATH_TO_WORLD)]
        self.env = Environment(world_name)
        print("Env {} for Agent {} started.".format(world_name, id))

        self.env.set_mode(Config.MODE, Config.TERMINATE_AT_END)
        self.env.set_observation_rotation_size(Config.OBSERVATION_ROTATION_SIZE)
        self.env.use_observation_rotation_size(Config.USE_OBSERVATION_ROTATION)
        self.env.set_cluster_size(Config.CLUSTER_SIZE)

        self.reset()

    def reset(self):
        observation, _, _, _ = self.env.reset()
        input_laser, rotation = self.process_observation(observation)
        map = StateMap(input_laser)
        obs = np.array([ [map.S_image], [rotation] ])
        return obs

    def step(self, action):
        self._update_display()
        if action is None:
            observation, reward, done, info = self.env.step(0, 0, 20)

            input_laser, rotation = self.process_observation(observation)
            map = StateMap(input_laser)
            #obs = np.array([[map.States_map, map.Reward_map], [rotation]])
            obs = np.array([[map.S_image], [rotation]])
            reward = 0
            done = False
        else:

            linear, angular = map_action(action)
            observation, reward, done, info = self.env.step(linear, angular, 20)
            input_laser, rotation = self.process_observation(observation)
            map = StateMap(input_laser)
            obs = np.array([[map.S_image], [rotation]])

        return obs, reward, done, info

    def _update_display(self):
        if self.visualize:
            self.env.visualize()

    def observation_size(self):
        return self.env.observation_size()

    def process_observation(self, observation):
        laser_scan = np.array(observation[:Config.OBSERVATION_SIZE])
        oriontaion = np.array(observation[Config.OBSERVATION_SIZE:])
        return laser_scan, oriontaion
Ejemplo n.º 2
0
class GameManager:
    def __init__(self, id):

        self.visualize = False

        if Config.VISUALIZE and int(id / len(Config.PATH_TO_WORLD)) == 0:
            self.visualize = True
        elif Config.PLAY_MODE:
            self.visualize = True

        world_name = Config.PATH_TO_WORLD[id % len(Config.PATH_TO_WORLD)]
        self.env = Environment(world_name)
        print("Env {} for Agent {} started.".format(world_name, id))

        self.env.set_mode(Config.MODE, Config.TERMINATE_AT_END)
        self.env.set_observation_rotation_size(
            Config.OBSERVATION_ROTATION_SIZE)
        self.env.use_observation_rotation_size(Config.USE_OBSERVATION_ROTATION)
        self.env.set_cluster_size(Config.CLUSTER_SIZE)

        self.reset()

    def reset(self):
        observation, _, _, _ = self.env.reset()
        return observation

    def step(self, action):
        self._update_display()
        if action is None:
            observation, reward, done, info = self.env.step(0, 0, 20)
            reward = 0
            done = False
        else:
            linear, angular = map_action(action)
            observation, reward, done, info = self.env.step(
                linear, angular, 20)
        return observation, reward, done, info

    def _update_display(self):
        if self.visualize:
            self.env.visualize()

    def observation_size(self):
        return self.env.observation_size()
Ejemplo n.º 3
0
	def process_reward(self, reward):
		return reward

	def process_info(self, info):
		return info

	def process_action(self, action):
		return action

	def process_state_batch(self, batch):
		return batch[:, 0, :]

env = Environment("Simulation2d/svg/proto_4", 4)
env.use_observation_rotation_size(True)
env.set_observation_rotation_size(128)
env.set_mode(Mode.ALL_RANDOM)

processor = DQNAgentProc()
states = env.observation_size()
actions = action_mapper.ACTION_SIZE

if DEBUG:
	print('states: {0}'.format(states))
	print('actions: {0}'.format(actions))

def build_callbacks(env_name):
    weights_filename = 'new_results/'+ env_name +'{step}.h5f'
    log_filename = 'new_log/{}.json'.format(env_name)
    callbacks = [ModelIntervalCheckpoint(weights_filename, interval=10000)]
    callbacks += [FileLogger(log_filename, interval=1000)]
Ejemplo n.º 4
0
class Worker(object):
    def __init__(self, name, globalAC):
        if MULTIPLE_ROOMS:
            if name == "W_0" or name == "W_1" or name == "W_2":
                self.env = Environment(ENV_NAME)
            elif name == "W_3" or name == "W_4" or name == "W_5":
                self.env = Environment(ENV_NAME_2)
            else:
                self.env = Environment(ENV_NAME_3)
        else:
            self.env = Environment(ENV_NAME)

        self.env.set_cluster_size(CLUSTER_SIZE)
        self.env.set_observation_rotation_size(64)  # TODO
        self.env.use_observation_rotation_size(True)
        self.name = name
        self.AC = ACNet(name, globalAC)

    def convert_action(self, action):
        angular = 0
        linear = 0

        if action == 0:
            angular = 1.0
            linear = 0.5
        elif action == 1:
            angular = 0.5
            linear = 0.75
        elif action == 2:
            angular = 0.0
            linear = 1.0
        elif action == 3:
            angular = -0.5
            linear = 0.75
        else:
            angular = -1.0
            linear = 0.5

        return linear, angular

    def work(self):
        global GLOBAL_RUNNING_R, GLOBAL_EP
        total_step = 1
        buffer_s, buffer_a, buffer_r = [], [], []
        while not COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP:
            s, _, _, _ = self.env.reset()
            s = np.reshape(s, [1, N_S])
            ep_r = 0
            # rnn_state = SESS.run(self.AC.init_state)    # zero rnn state at beginning
            # keep_state = deepcopy(rnn_state)      # keep rnn state for updating global net
            for ep_t in range(MAX_EP_STEP):

                # a, rnn_state_ = self.AC.choose_action(s, rnn_state)  # get the action and next rnn state
                a = self.AC.choose_action(
                    s)  # get the action and next rnn state
                b = np.asarray(a)
                b = b[0][0]

                action = np.argmax(b)

                linear, angular = self.convert_action(action)

                s_, r, done, _ = self.env.step(linear, angular, SKIP_LRF)
                s_ = np.reshape(s_, [1, N_S])

                # if (self.name == 'W_0' or self.name == "W_3") and VISUALIZE:
                if (self.name == 'W_0') and VISUALIZE:
                    self.env.visualize()

                done = True if ep_t == MAX_EP_STEP - 1 else done

                ep_r += r
                buffer_s.append(s)
                buffer_a.append(b)
                buffer_r.append(r)
                # buffer_r.append((r+8)/8)    # normalize

                if total_step % UPDATE_GLOBAL_ITER == 0 or done:  # update global and assign to local net
                    if done:
                        v_s_ = 0  # terminal
                    else:
                        # v_s_ = SESS.run(self.AC.v, {self.AC.s: s_, self.AC.init_state: rnn_state_})[0, 0]
                        v_s_ = SESS.run(self.AC.v, {self.AC.s: s_})[0, 0]
                    buffer_v_target = []
                    for r in buffer_r[::-1]:  # reverse buffer r
                        v_s_ = r + GAMMA * v_s_
                        buffer_v_target.append(v_s_)
                    buffer_v_target.reverse()

                    buffer_s, buffer_a, buffer_v_target = np.vstack(
                        buffer_s), np.vstack(buffer_a), np.vstack(
                            buffer_v_target)
                    feed_dict = {
                        self.AC.s: buffer_s,
                        self.AC.a_his: buffer_a,
                        self.AC.v_target: buffer_v_target,
                        # self.AC.init_state: keep_state,
                    }

                    self.AC.update_global(feed_dict)
                    buffer_s, buffer_a, buffer_r = [], [], []
                    self.AC.pull_global()

                    # keep_state = deepcopy(rnn_state_)   # replace the keep_state as the new initial rnn state_

                s = s_
                # rnn_state = rnn_state_  # renew rnn state
                total_step += 1

                if done:
                    if len(GLOBAL_RUNNING_R
                           ) == 0:  # record running episode reward
                        GLOBAL_RUNNING_R.append(ep_r)
                    else:
                        GLOBAL_RUNNING_R.append(0.9 * GLOBAL_RUNNING_R[-1] +
                                                0.1 * ep_r)

                    if self.name == "W_0":
                        print(self.name, "Ep:", GLOBAL_EP, "Ep_r:", ep_r)
                        # print(
                        #     self.name,
                        #     "Ep:", GLOBAL_EP,
                        #     "| Ep_r: %i" % GLOBAL_RUNNING_R[-1],
                        #       )
                    GLOBAL_EP += 1
                    if GLOBAL_EP % SAVE_INTERVAL == 0:
                        print("Versuche zu Speichern...")
                        self.AC.save_global()
                        print("...gespeichert!")
                    break