def play_train(self, continues_attack=False, verbose=False): is_attack = False state_last = None self.safe_action(C._NO_OP, 0, []) self.safe_action(C._MOVE_CAMERA, 0, [C.base_camera_pos]) self._gases = U.find_initial_gases(self.obs) while True: self.safe_action(C._MOVE_CAMERA, 0, [C.base_camera_pos]) if self.policy_flag and (not self.is_end): non_image_feature = self.mapping_source_to_mini_by_rule( self.get_the_input()) #print('non_image_feature.shape:', non_image_feature.shape) #print('non_image_feature:', non_image_feature) image_feature = U.get_simple_map_data(self.obs) #print('image_feature.shape:', image_feature.shape) #print('image_feature:', image_feature) latent_image_feature, mu, logvar = self.encode_obs( image_feature) #print('latent_image_feature.shape:', latent_image_feature.shape) #print('latent_image_feature:', latent_image_feature) feature = np.concatenate( [non_image_feature, latent_image_feature], axis=-1) #print('feature.shape:', feature.shape) #print('feature:', feature) #state_now = feature reward_last = 0 state_now, action, v_preds = self.get_action( feature, reward_last) # print(ProtossAction(action).name) self.mini_step(action) if state_last is not None: if 0: print('state_last:', state_last, ', action_last:', action_last, ', state_now:', state_now) v_preds_next = self.net.policy.get_values(state_now) v_preds_next = self.get_values(v_preds_next) reward = 0 self.local_buffer.append(state_last, action_last, state_now, reward, v_preds, v_preds_next) state_last = state_now action_last = action self.policy_flag = False if self.is_end: if self.rl_training: self.local_buffer.rewards[-1] += 1 * self.result[ 'reward'] # self.result['win'] #print(self.local_buffer.rewards) self.global_buffer.add(self.local_buffer) #print("add %d buffer!" % (len(self.local_buffer.rewards))) break
def sample(self, verbose=False, use_image=False): is_attack = False state_last = None random_generated_int = random.randint(0, 2**31 - 1) filename = self.extract_save_dir + "/" + str( random_generated_int) + ".npz" recording_obs = [] recording_img = [] recording_action = [] np.random.seed(random_generated_int) tf.set_random_seed(random_generated_int) self.safe_action(C._NO_OP, 0, []) self.safe_action(C._MOVE_CAMERA, 0, [C.base_camera_pos]) self._gases = U.find_initial_gases(self.obs) while True: self.safe_action(C._MOVE_CAMERA, 0, [C.base_camera_pos]) if self.policy_flag and (not self.is_end): state_now = self.mapping_source_to_mini_by_rule( self.get_the_input()) recording_obs.append(state_now) if use_image: recording_img.append(U.get_simple_map_data(self.obs)) action, v_preds = self.net.policy.get_action(state_now, verbose=False) recording_action.append(action) self.mini_step(action) if state_last is not None: if False: print('state_last:', state_last, ', action_last:', action_last, ', state_now:', state_now) v_preds_next = self.net.policy.get_values(state_now) v_preds_next = self.get_values(v_preds_next) reward = 0 self.local_buffer.append(state_last, action_last, state_now, reward, v_preds, v_preds_next) state_last = state_now action_last = action self.policy_flag = False if self.is_end: if True: #note this will not consider the minerals larger than 256! recording_obs = np.array(recording_obs, dtype=np.uint16) recording_action = np.array(recording_action, dtype=np.uint8) if not use_image: np.savez_compressed(filename, obs=recording_obs, action=recording_action) else: recording_img = np.array(recording_img, dtype=np.float16) np.savez_compressed(filename, obs=recording_obs, img=recording_img, action=recording_action) break
def sample(self, verbose=False, use_image=True): is_attack = False state_last = None random_generated_int = random.randint(0, 2**31 - 1) filename = self.extract_save_dir + "/" + str( random_generated_int) + ".npz" recording_obs = [] recording_img = [] recording_action = [] recording_reward = [] np.random.seed(random_generated_int) tf.set_random_seed(random_generated_int) self.safe_action(C._NO_OP, 0, []) self.safe_action(C._MOVE_CAMERA, 0, [C.base_camera_pos]) self._gases = U.find_initial_gases(self.obs) while True: self.safe_action(C._MOVE_CAMERA, 0, [C.base_camera_pos]) if self.policy_flag and (not self.is_end): non_image_feature = self.mapping_source_to_mini_by_rule( self.get_the_input()) #print('non_image_feature.shape:', non_image_feature.shape) #print('non_image_feature:', non_image_feature) image_feature = U.get_simple_map_data(self.obs) #print('image_feature.shape:', image_feature.shape) #print('image_feature:', image_feature) latent_image_feature, mu, logvar = self.encode_obs( image_feature) #print('latent_image_feature.shape:', latent_image_feature.shape) #print('latent_image_feature:', latent_image_feature) feature = np.concatenate( [non_image_feature, latent_image_feature], axis=-1) #print('feature.shape:', feature.shape) #print('feature:', feature) #state_now = feature reward_last = 0 state_now, action, v_preds = self.get_action( feature, reward_last) # print(ProtossAction(action).name) self.mini_step(action) if state_last is not None: if False: print('state_last:', state_last, ', action_last:', action_last, ', state_now:', state_now) v_preds_next = self.net.policy.get_values(state_now) v_preds_next = self.get_values(v_preds_next) reward = 0 recording_obs.append(non_image_feature) recording_img.append(image_feature) recording_action.append(action) recording_reward.append(reward) #self.local_buffer.append(state_last, action_last, state_now, reward, v_preds, v_preds_next) state_last = state_now action_last = action self.policy_flag = False if self.is_end: if True: # consider the win/loss, to 0(not end), 1(loss), 2(draw), 3(win) recording_reward[-1] = (1 * self.result['reward'] + 2) if recording_reward[-1] != 0: print("result is:", recording_reward[-1]) recording_obs = np.array(recording_obs, dtype=np.uint16) recording_action = np.array(recording_action, dtype=np.uint8) recording_reward = np.array(recording_reward, dtype=np.uint8) recording_img = np.array(recording_img, dtype=np.float16) np.savez_compressed(filename, obs=recording_obs, img=recording_img, action=recording_action, reward=recording_reward) break