class GameManager: def __init__(self, id): self.visualize = False if Config.VISUALIZE and int(id / len(Config.PATH_TO_WORLD)) == 0: self.visualize = True elif Config.PLAY_MODE: self.visualize = True world_name = Config.PATH_TO_WORLD[id % len(Config.PATH_TO_WORLD)] self.env = Environment(world_name) print("Env {} for Agent {} started.".format(world_name, id)) self.env.set_mode(Config.MODE, Config.TERMINATE_AT_END) self.env.set_observation_rotation_size(Config.OBSERVATION_ROTATION_SIZE) self.env.use_observation_rotation_size(Config.USE_OBSERVATION_ROTATION) self.env.set_cluster_size(Config.CLUSTER_SIZE) self.reset() def reset(self): observation, _, _, _ = self.env.reset() input_laser, rotation = self.process_observation(observation) map = StateMap(input_laser) obs = np.array([ [map.S_image], [rotation] ]) return obs def step(self, action): self._update_display() if action is None: observation, reward, done, info = self.env.step(0, 0, 20) input_laser, rotation = self.process_observation(observation) map = StateMap(input_laser) #obs = np.array([[map.States_map, map.Reward_map], [rotation]]) obs = np.array([[map.S_image], [rotation]]) reward = 0 done = False else: linear, angular = map_action(action) observation, reward, done, info = self.env.step(linear, angular, 20) input_laser, rotation = self.process_observation(observation) map = StateMap(input_laser) obs = np.array([[map.S_image], [rotation]]) return obs, reward, done, info def _update_display(self): if self.visualize: self.env.visualize() def observation_size(self): return self.env.observation_size() def process_observation(self, observation): laser_scan = np.array(observation[:Config.OBSERVATION_SIZE]) oriontaion = np.array(observation[Config.OBSERVATION_SIZE:]) return laser_scan, oriontaion
class GameManager: def __init__(self, id): self.visualize = False if Config.VISUALIZE and int(id / len(Config.PATH_TO_WORLD)) == 0: self.visualize = True elif Config.PLAY_MODE: self.visualize = True world_name = Config.PATH_TO_WORLD[id % len(Config.PATH_TO_WORLD)] self.env = Environment(world_name) print("Env {} for Agent {} started.".format(world_name, id)) self.env.set_mode(Config.MODE, Config.TERMINATE_AT_END) self.env.set_observation_rotation_size( Config.OBSERVATION_ROTATION_SIZE) self.env.use_observation_rotation_size(Config.USE_OBSERVATION_ROTATION) self.env.set_cluster_size(Config.CLUSTER_SIZE) self.reset() def reset(self): observation, _, _, _ = self.env.reset() return observation def step(self, action): self._update_display() if action is None: observation, reward, done, info = self.env.step(0, 0, 20) reward = 0 done = False else: linear, angular = map_action(action) observation, reward, done, info = self.env.step( linear, angular, 20) return observation, reward, done, info def _update_display(self): if self.visualize: self.env.visualize() def observation_size(self): return self.env.observation_size()
def process_reward(self, reward): return reward def process_info(self, info): return info def process_action(self, action): return action def process_state_batch(self, batch): return batch[:, 0, :] env = Environment("Simulation2d/svg/proto_4", 4) env.use_observation_rotation_size(True) env.set_observation_rotation_size(128) env.set_mode(Mode.ALL_RANDOM) processor = DQNAgentProc() states = env.observation_size() actions = action_mapper.ACTION_SIZE if DEBUG: print('states: {0}'.format(states)) print('actions: {0}'.format(actions)) def build_callbacks(env_name): weights_filename = 'new_results/'+ env_name +'{step}.h5f' log_filename = 'new_log/{}.json'.format(env_name) callbacks = [ModelIntervalCheckpoint(weights_filename, interval=10000)] callbacks += [FileLogger(log_filename, interval=1000)]
class Worker(object): def __init__(self, name, globalAC): if MULTIPLE_ROOMS: if name == "W_0" or name == "W_1" or name == "W_2": self.env = Environment(ENV_NAME) elif name == "W_3" or name == "W_4" or name == "W_5": self.env = Environment(ENV_NAME_2) else: self.env = Environment(ENV_NAME_3) else: self.env = Environment(ENV_NAME) self.env.set_cluster_size(CLUSTER_SIZE) self.env.set_observation_rotation_size(64) # TODO self.env.use_observation_rotation_size(True) self.name = name self.AC = ACNet(name, globalAC) def convert_action(self, action): angular = 0 linear = 0 if action == 0: angular = 1.0 linear = 0.5 elif action == 1: angular = 0.5 linear = 0.75 elif action == 2: angular = 0.0 linear = 1.0 elif action == 3: angular = -0.5 linear = 0.75 else: angular = -1.0 linear = 0.5 return linear, angular def work(self): global GLOBAL_RUNNING_R, GLOBAL_EP total_step = 1 buffer_s, buffer_a, buffer_r = [], [], [] while not COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP: s, _, _, _ = self.env.reset() s = np.reshape(s, [1, N_S]) ep_r = 0 # rnn_state = SESS.run(self.AC.init_state) # zero rnn state at beginning # keep_state = deepcopy(rnn_state) # keep rnn state for updating global net for ep_t in range(MAX_EP_STEP): # a, rnn_state_ = self.AC.choose_action(s, rnn_state) # get the action and next rnn state a = self.AC.choose_action( s) # get the action and next rnn state b = np.asarray(a) b = b[0][0] action = np.argmax(b) linear, angular = self.convert_action(action) s_, r, done, _ = self.env.step(linear, angular, SKIP_LRF) s_ = np.reshape(s_, [1, N_S]) # if (self.name == 'W_0' or self.name == "W_3") and VISUALIZE: if (self.name == 'W_0') and VISUALIZE: self.env.visualize() done = True if ep_t == MAX_EP_STEP - 1 else done ep_r += r buffer_s.append(s) buffer_a.append(b) buffer_r.append(r) # buffer_r.append((r+8)/8) # normalize if total_step % UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net if done: v_s_ = 0 # terminal else: # v_s_ = SESS.run(self.AC.v, {self.AC.s: s_, self.AC.init_state: rnn_state_})[0, 0] v_s_ = SESS.run(self.AC.v, {self.AC.s: s_})[0, 0] buffer_v_target = [] for r in buffer_r[::-1]: # reverse buffer r v_s_ = r + GAMMA * v_s_ buffer_v_target.append(v_s_) buffer_v_target.reverse() buffer_s, buffer_a, buffer_v_target = np.vstack( buffer_s), np.vstack(buffer_a), np.vstack( buffer_v_target) feed_dict = { self.AC.s: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target, # self.AC.init_state: keep_state, } self.AC.update_global(feed_dict) buffer_s, buffer_a, buffer_r = [], [], [] self.AC.pull_global() # keep_state = deepcopy(rnn_state_) # replace the keep_state as the new initial rnn state_ s = s_ # rnn_state = rnn_state_ # renew rnn state total_step += 1 if done: if len(GLOBAL_RUNNING_R ) == 0: # record running episode reward GLOBAL_RUNNING_R.append(ep_r) else: GLOBAL_RUNNING_R.append(0.9 * GLOBAL_RUNNING_R[-1] + 0.1 * ep_r) if self.name == "W_0": print(self.name, "Ep:", GLOBAL_EP, "Ep_r:", ep_r) # print( # self.name, # "Ep:", GLOBAL_EP, # "| Ep_r: %i" % GLOBAL_RUNNING_R[-1], # ) GLOBAL_EP += 1 if GLOBAL_EP % SAVE_INTERVAL == 0: print("Versuche zu Speichern...") self.AC.save_global() print("...gespeichert!") break