def run(self): super(MarioEnv, self).run() self.env = gym_super_mario_bros.make(self.env_id) self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT) self.reset() print('[ Worker %2d ] ' % (self.idx), end='') print('Playing <', self.env_id, '>') self.request_action(0, False) while True: action = self.child_conn.recv() next_state, reward, done, info = self.env.step(action) self.steps += 1 self.accum_reward += reward next_state = rgb2dataset(next_state) if self.is_render and self.idx == 0: self.env.render() # make a transition self.transition.append(next_state) if len(self.transition) > 4: self.transition.pop(0) if done: self.send_result(info['x_pos']) self.reset() self.request_action(reward, True) else: self.request_action(reward, False)
def run(self): super(MarioEnv, self).run() self.model = A3C( self.s_dim, self.a_dim, gamma=0.95, epsilon_start=1.0, epsilon_end=0.1, epsilon_length=100000, use_cuda=self.use_cuda, ) self.model.l_net.load_state_dict(self.g_net.state_dict()) self.env = gym_super_mario_bros.make(self.env_id) self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT) self.reset() print('[ Worker %2d ] ' % (self.idx), end='') print('Playing <', self.env_id, '>') while True: if len(self.transition) != 4: action = self.model.get_action(self.transition, is_random=True) else: action = self.model.get_action(self.transition, is_random=False) next_state, reward, done, info = self.env.step(action) self.steps += 1 self.accum_reward += reward next_state = rgb2dataset(next_state) if self.is_render and self.idx == 0: self.env.render() self.buffer_state.append(self.transition) self.buffer_action.append(action) self.buffer_reward.append(reward) if len(self.buffer_state ) > 0 and self.steps % self.update_iter == 0: next_transition = self.transition[1:] next_transition.append(next_state) self.train(next_transition, done) self.buffer_state.clear() self.buffer_action.clear() self.buffer_reward.clear() # make a transition self.transition.append(next_state) if len(self.transition) > 4: self.transition.pop(0) if done: self.send_result(info['x_pos']) self.reset()
def reset(self): state = self.env.reset() state = rgb2dataset(state) self.transition.clear() self.transition.append(state) self.steps = 0 self.episodes += 1 self.accum_reward = 0
def reset(self): state = self.env.reset() state = rgb2dataset(state) self.transition = np.zeros([4, 84, 84]) self.transition[-1, :] = state self.steps = 0 self.episodes += 1 self.accum_reward = 0
def run(self): super(MarioEnv, self).run() self.env = gym_super_mario_bros.make(self.env_id) self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT) self.reset() print('[ Worker %2d ] ' % (self.idx), end='') print('Playing <', self.env_id, '>') self.request_action(0, False) while True: action = self.child_conn.recv() # print(SIMPLE_MOVEMENT[action]) next_state, reward, done, info = self.env.step(action) force_done = False if reward == -15: force_done = True self.steps += 1 self.accum_reward += reward next_state = rgb2dataset(next_state) if self.is_render and self.idx == 0: self.env.render() # make a transition self.transition[:3, :, :] = self.transition[1:, :, :] self.transition[3, :, :] = next_state if done: self.send_result(self.prev_xpos) self.reset() self.request_action(reward, force_done) else: self.request_action(reward, force_done) self.prev_xpos = info['x_pos']
epsilon_end=0.01, epsilon_length=300000, use_cuda=use_cuda, lr=lr, replay_buffer_size=replay_buffer_size, train_start_step=train_start_step, batch_size=batch_size, target_update_interval=target_update_interval, train_step_interval=train_step_interval) writer = SummaryWriter(log_dir='runs/ddqn') while model.episode < max_episode: state = env.reset() state = rgb2dataset(state) model.episode += 1 accum_reward = 0 # Transition transition = [] transition.append(state) while True: if len(transition) == 4: action = model.get_action(transition, is_random=False) else: action = model.get_action(transition, is_random=True) state_, reward, done, info = env.step(action)