def agent(): env = RemoteEnv(IN_PORT=8080, OUT_PORT=7070, host="127.0.0.1") env.open(0) for i in range(10000000): #state = env.step("restart") state = env.step("restart") prev_energy = state['energy'] done = state['done'] print("OK") while not done: action = int(input('action')) #action = np.random.choice([0, 1]) reward_sum = 0 touched = np.zeros(8) for i in range(8): state = env.step("act", action) energy = state['energy'] touched[i] = state['touched'] reward_sum += state['reward'] if state['done']: break done = state['done'] prev_energy = energy frame = state['frame'] frame = image_decode(state['frame'], 20, 20) print(frame) print('Reward: ', reward_sum) print('Touched: ', touched) print('Signal: ', state['signal']) print('Done: ', state['done']) prev_energy = energy if i >= 2: break env.close()
def get_frame_from_fields(fields, touchs): imgdata = image_decode(fields['frame'], 20, 20) proprioceptions = np.zeros(ARRAY_SIZE) + 0.0001 for i in range(TOUCH_SIZE): proprioceptions[i] = touchs[i] proprioception = np.array(proprioceptions, dtype=np.float32) imgdata = np.vstack([imgdata, proprioception]) return imgdata
def get_frame_from_fields(frame, touchs): imgdata = image_decode(frame, 20, 20, np.uint8) proprioceptions = np.zeros(ARRAY_SIZE) for i in range(TOUCH_SIZE): proprioceptions[i] = touchs[i] proprioception = np.array(proprioceptions, dtype=np.uint8) imgdata = np.vstack([imgdata, proprioception]) return imgdata
def reset(self, env): self.h = deque(maxlen=H_SIZE) env_info = env.remoteenv.step("restart") time.sleep(1) for i in range(np.random.choice(15)): env_info = env.one_step(np.random.choice([0, 1, 2])) f = image_decode(env_info['frame'], 20, 20, dtype=np.float32) for _ in range(H_SIZE): self.h.append(f) return Agent.__make_state__(self.h, np.array(env_info['reward_hist']))
def reset(self, env): env.remoteenv.step("SetNMoves", 1) env_info = env.remoteenv.step("restart") for i in range(np.random.choice(15)): env_info = env.one_step(np.random.choice([0, 1, 2])) img = image_decode(env_info['frame'], 20, 20) self.seq.append(img) self.seq.append(img) self.seq.append(img) self.seq.append(img) return Agent.__make_state__(self.seq)
def act(self, env, action, info=None): envinfo = env.one_step(action) sum_reward = envinfo['reward'] for k in range(1): if envinfo['done']: break else: envinfo = env.one_step(3) sum_reward += envinfo['reward'] f = image_decode(envinfo['frame'], 20, 20, dtype=np.float32) self.h.append(f) return Agent.__make_state__(self.h, np.array( envinfo['reward_hist'])), sum_reward, envinfo['done'], envinfo
def get_frame_from_fields(fields): imgdata = image_decode(fields['frame'], 20, 20) return imgdata
def transform(frame): return image_decode(frame, 20, 20)
def step(self, action): obs, reward, done, info = self.env.step(action) return image_decode(obs), reward, done, info
def reset(self): state = self.env.reset() self.state = image_decode(state)
def to_image(img): imgdata = image_decode(img, 0, 0) return imgdata
def act(self, env, action, info=None): reward_sum, done, env_info = self.__step(env, action) img = image_decode(env_info['frame'], 20, 20) self.seq.append(img) frameseq = Agent.__make_state__(self.seq) return frameseq, reward_sum, done, env_info