def main(): env = KukaDiverseObjectEnv(renders=True, isDiscrete=False) policy = ContinuousDownwardBiasPolicy() while True: obs, done = env.reset(), False print("===================================") print("obs") print(obs) episode_rew = 0 while not done: env.render(mode='human') act = policy.sample_action(obs, .1) print("Action") print(act) obs, rew, done, _ = env.step([0, 0, 0, 0, 0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = KukaDiverseObjectEnv(renders=True, isDiscrete=False) policy = ContinuousDownwardBiasPolicy() while True: obs, done = env.reset(), False print("===================================") print("obs") print(obs) episode_rew = 0 while not done: env.render() act = policy.sample_action(obs, .1) print("Action") print(act) obs, rew, done, _ = env.step([0, 0, 0, 0, 0]) episode_rew += rew print("Episode reward", episode_rew)
episodic_reward = 0 frames = [] steps = 0 while True: if episode > MAX_EPISODES - 3: frames.append(env.render(mode='rgb_array')) # take an action as per the policy if episode < RAND_EPS: # explore for some episodes action = env.action_space.sample() else: action = agent.policy(state) # obtain next state and rewards next_obsv, reward, done, info = env.step(action) next_state = np.asarray( next_obsv, dtype=np.float32) / 255.0 # convert into float array #tb_img = np.reshape(next_state, (-1, 48, 48, 3)) # for tensorboard tb_img = np.reshape(next_state, (-1, ) + state_size) # for tensorboard with train_summary_writer.as_default(): tf.summary.image("Training Image", tb_img, step=episode) tf.summary.histogram("action_vector", action, step=steps) episodic_reward += reward # print('reward:', episodic_reward)