def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(torch.Tensor(o.reshape(1,-1)))[0] o, r, d, _ = env.step(a.data.numpy()[0]) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=10): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 while n < num_episodes: a = get_action(o) # print(a) #加入不确定性 # log_std = -0.5 * np.ones(3, dtype=np.float32) # log_std = torch.nn.Parameter(torch.as_tensor(log_std)) # std = torch.exp(log_std) # pi = Normal(torch.as_tensor(a), std) # a = pi.sample() # a = a.numpy() # print(" ", a) o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 if ep_ret == 10: print("Success!") if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, sleep=1e-3, log=True, verbose=True, reset_state=None, q_action=None, action_parameters=None, random=False): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." if log: logger = EpochLogger() r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0 o = env.reset() if reset_state is None else set_state(reset_state, env) action_parameters = {} if action_parameters is None else action_parameters while n < num_episodes: img = None if render: img = env.render(mode='rgb_array') time.sleep(sleep) if ep_len == 0 and q_action is not None: a = q_action elif random: a = env.action_space.sample() else: a = get_action(o, **action_parameters) o_prev = o o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 results = {'img': img, 'a': a, 'r': r, 'd': d, 'score': ep_ret, 't': ep_len, 'o': o_prev} yield results if d or (ep_len == max_ep_len): if log: logger.store(EpRet=ep_ret, EpLen=ep_len) if verbose: print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0 o = env.reset() if reset_state is None else set_state(reset_state, env) n += 1 if log: logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." def unscale_action(action_space, scaled_action): """ Rescale the action from [-1, 1] to [low, high] (no need for symmetric action space) :param action_space: (gym.spaces.box.Box) :param scaled_action: (np.ndarray) :return: (np.ndarray) """ low, high = action_space.low, action_space.high return low + (0.5 * (scaled_action + 1.0) * (high - low)) logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(o) unscaled_action = unscale_action(env.action_space, a) o, r, d, _ = env.step(unscaled_action) #time.sleep(0.1) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." goal_env = hasattr(env, 'goal') def cat_obs(o): return np.concatenate([o['observation'], o['desired_goal']], axis=-1) logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 if goal_env: o = cat_obs(o) while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(o) o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 if goal_env: o = cat_obs(o) if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 if goal_env: o = cat_obs(o) n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() all_feats = [] o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 while n < num_episodes: if render: env.render(episode=n) time.sleep(1e-3) a = get_action(o) o, r, d, info = env.step(a) ep_ret += r ep_len += 1 if "all_feats" in info.keys(): all_feats.append(info["all_feats"]) if d or (ep_len == max_ep_len): print(f"Coeff: {o[-env.coeff_dim:]}") print(f"All feats", np.array(all_feats).sum(axis=0)) # import pdb; pdb.set_trace() logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 all_feats = [] n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, policy, max_ep_len=None, num_episodes=100, render=True): logger = EpochLogger() obs, reward, done, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 while n < num_episodes: if render: env.render() time.sleep(1e-3) action = policy(obs) obs, reward, done, _ = env.step(action) ep_ret += reward ep_len += 1 if done or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) obs, reward, done, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, gamma=1, key='danger'): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n, ep_info = env.reset(), 0, False, 0, 0, 0, 0 while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(o) o, r, d, info = env.step(a) ep_info = max(ep_info, info[key]) ep_ret += r * gamma**ep_len ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len, perf=ep_ret, fail=ep_info) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len, ep_info = env.reset(), 0, False, 0, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.log_tabular('perf', average_only=True) logger.log_tabular('fail', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=1000, out_name="", render=False): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 num_episodes = 1000 results = {} while n < num_episodes: a = get_action(o) o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) results[n] = ep_len print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 with open("outputs/" + out_name + ".json", 'w') as f: json.dump(results, f) logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_adversarial_policy(env, ego_action, opp_action, env_init, ego_agent, opp_agent, max_ep_len=None, num_episodes=100, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0 init_positions = np.random.random_integers(0, 1) o = env.reset({ 'x': env_init['initial_x'][init_positions], 'y': env_init['initial_y'], 'theta': env_init['initial_theta'] }) while n < num_episodes: if render == True: env.render() # time.sleep(1e-3) #Convert o to RL obs RLobs = ego_agent.process_obs(o) Oppobs = opp_agent.process_obs(o) # Take deterministic actions at test time a = ego_action(RLobs, action_mask=ego_agent.aval_paths, deterministic=True) ego_speed, ego_steer, a = ego_agent.plan(o, a) #Opponent decision a_opp = opp_action(Oppobs, action_mask=opp_agent.aval_paths, deterministic=True) opp_speed, opp_steer, _ = opp_agent.plan(o, a_opp) action = { 'ego_idx': 0, 'speed': [ego_speed, opp_speed], 'steer': [ego_steer, opp_steer] } o, r, d, _ = env.step(action) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) init_positions = np.random.random_integers(0, 1) o, r, d, ep_ret, ep_len = env.reset({ 'x': env_init['initial_x'][init_positions], 'y': env_init['initial_y'], 'theta': env_init['initial_theta'] }), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, env_name=None): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 num_violations = 0 num_target = 0 violations = [] target = [] hit_by_opponent = 0 score_hit = 0 avoid_opponent = 0 hit_feat_counts = [] score_feat_counts = [] avoid_feat_counts = [] episode_feat_counts = [] pellet_counts = 0 power_pellet_counts = 0 eat_ghost_counts = 0 eat_cherry_counts = 0 hit_ghost_counts = 0 pellet_feat_counts = [] power_feat_counts = [] eat_ghost_feat_counts = [] eat_cherry_feat_counts = [] hit_ghost_feat_counts = [] ep_scores = [] demo_obs = [] demo_acs = [] prev_ale = 3 curr_ale = 3 while n < num_episodes: #hit_by_opponent = 0 #score_hit = 0 #avoid_opponent = 0 if render: env.render() time.sleep(1e-3) a = get_action(o) o, r, d, info = env.step(a) #print(r) if env_name == 'Boxing-ram-v0': if r == 0: avoid_opponent += 1 elif r < 0: hit_by_opponent -= int(r) else: score_hit += int(r) if env_name == 'MsPacman-ram-v0': curr_ale = env.ale.lives() if r == 10: pellet_counts += 1 if r == 50: power_pellet_counts += 1 if r == 200 or r == 400 or r == 800 or r == 1600: eat_ghost_counts += 1 if r == 100: eat_cherry_counts += 1 else: if curr_ale == prev_ale-1: hit_ghost_counts += 1 prev_ale = curr_ale if env_name == 'reacher': if info['constraint']: num_violations += 1 if env.get_features()[0]: num_target += 1 ep_ret += r ep_len += 1 demo_obs.append(o) demo_acs.append(a) if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len)) if env_name == 'reacher': print('Violations %d, Target %d'%(num_violations,num_target)) if env_name == 'Boxing-ram-v0': print("damage %d"%hit_by_opponent) print("scores %d"%score_hit) print("avoid %d"%avoid_opponent) if env_name == 'MsPacman-ram-v0': print("pellet %d"%pellet_counts) print("power pellet %d"%power_pellet_counts) print("ghosts eaten %d"%eat_ghost_counts) print("cherry %d"%eat_cherry_counts) print("hit ghost %d"%hit_ghost_counts) if env_name == 'Boxing-ram-v0': episode_feat_counts.append([hit_by_opponent, score_hit, avoid_opponent, ep_ret]) if env_name == 'MsPacman-ram-v0': episode_feat_counts.append([pellet_counts, power_pellet_counts, eat_ghost_counts, eat_cherry_counts, hit_ghost_counts]) ep_scores.append(ep_ret) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 violations.append(num_violations) target.append(num_target) hit_feat_counts.append(hit_by_opponent) score_feat_counts.append(score_hit) avoid_feat_counts.append(avoid_opponent) num_violations = 0 num_target = 0 hit_by_opponent = 0 avoid_opponent = 0 score_hit = 0 pellet_counts = 0 power_pellet_counts = 0 eat_ghost_counts = 0 eat_cherry_counts = 0 hit_ghost_counts = 0 prev_ale = 3 n += 1 if args.env_name == 'reacher': print(violations) print(target) if args.env_name == 'Boxing-ram-v0': features = {'Features': episode_feat_counts, "Obs": demo_obs, "Scores":ep_scores, "Acs": demo_acs} pickle.dump(features, open('boxing_demos.pkl', 'wb')) if args.env_name == 'MsPacman-ram-v0': features = {'Features': episode_feat_counts,"Scores": ep_scores, "Obs": demo_obs, "Acs":demo_acs} pickle.dump(features, open('pacman_demos.pkl', 'wb')) logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=False, params={}, verbose=False): from upn.visualize.render import forward_env from numpngw import write_apng assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." test_envs, test_env_names = [], params["test_env_names"][0] for name in test_env_names: test_envs.append(gym.make(name)) logger = EpochLogger() for env_name, env in zip(test_env_names, test_envs): all_feats = [] all_rews = [] o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 coeff = o[-env.coeff_dim:] acs = [] pbar = tqdm(total=num_episodes) while n < num_episodes: #import pdb; pdb.set_trace() if render: env.render() time.sleep(1e-3) # import pdb; pdb.set_trace() a = get_action(o) acs.append(a) o, r, d, info = env.step(a) ep_ret += r ep_len += 1 if "all_feats" in info.keys(): all_feats.append(info["all_feats"]) if d or (ep_len == max_ep_len): if verbose: print(f"Coeff: {coeff}") print(f"All feats", np.array(all_feats).sum(axis=0)) # import pdb; pdb.set_trace() logger.store(**{f"{env_name}_EpRet": ep_ret}) logger.store(**{f"{env_name}_EpLen": ep_len}) # logger.store(EpRet=ep_ret, EpLen=ep_len) all_rews.append(ep_ret) if verbose: print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) print(f"{env_name}: reward {ep_ret:.03f}") if render: frames = forward_env(env, np.array(acs), batch=False, subrender=False, resize=0.4) fps = 10 fname = f"{env_name}_{n:02d}_rew_{ep_ret:.03f}.png" #os.makedirs(osp.dirname(fname), exist_ok=True) write_apng(os.path.join(args.folder, fname), frames, delay=1000 / fps) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 o = env.reset() all_feats = [] acs = [] n += 1 pbar.update(1) print(f"{env_name}: mean reward {np.mean(all_rews):.03f}") pbar.close() logger.log_tabular(f'{env_name}_EpRet', with_min_and_max=True) logger.log_tabular(f'{env_name}_EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, seed=None): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() success_num = 0 Handlog = {'maxVelocity': np.array([]), 'maxTorque': np.array([])} CMAESlog = {'maxVelocity': np.array([]), 'maxTorque': np.array([])} DRLlog = { 'maxVelocity': np.array([]), 'maxTorque': np.array([]), 'successNum': 0 } tmpMaxVelocity = np.array([]) tmpMaxTorque = np.array([]) env.__init__("GUI", seed=seed) o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 a = get_action(o) while n < num_episodes: if render: env.render() time.sleep(1e-3) a = get_action(o) for i in range(25): o, r, d, o_dict = env.step(a) time.sleep(1 / SIMULATIONFREQUENCY) tmpMaxTorque = np.append(tmpMaxTorque, np.abs(o_dict['torque']).max()) tmpMaxVelocity = np.append(tmpMaxVelocity, np.abs(o_dict['velocity']).max()) # time.sleep(1/SIMULATIONFREQUENCY) # if env.t >2: # input("hhh") ep_ret += r ep_len += 1 # d = False # if d or (ep_len == max_ep_len): if ep_len == max_ep_len: logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) satisfy = d o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 # satisfy = input("Is it satisfying? y or n:\n") if satisfy: print("done!") success_num += 1 if len(DRLlog['maxVelocity']) != 0: DRLlog['maxVelocity'] += tmpMaxVelocity DRLlog['maxVelocity'] /= success_num DRLlog['maxTorque'] += tmpMaxTorque DRLlog['maxTorque'] /= success_num else: DRLlog['maxVelocity'] = tmpMaxVelocity DRLlog['maxTorque'] = tmpMaxTorque # tmpMaxVelocity, tmpMaxTorque, success = run_Hand(env) # if len(Handlog['maxVelocity']) != 0: # Handlog['maxVelocity'] += tmpMaxVelocity # Handlog['maxVelocity'] /= success_num # Handlog['maxTorque'] += tmpMaxTorque # Handlog['maxTorque'] /= success_num # else: # Handlog['maxVelocity'] = tmpMaxVelocity # Handlog['maxTorque'] = tmpMaxTorque # tmpMaxVelocity, tmpMaxTorque = run_CMAES() # if len(Handlog['maxVelocity']) != 0: # CMAESlog['maxVelocity'] += tmpMaxVelocity # CMAESlog['maxVelocity'] /= success_num # CMAESlog['maxTorque'] += tmpMaxTorque # CMAESlog['maxTorque'] /= success_num # else: # CMAESlog['maxVelocity'] = tmpMaxVelocity # CMAESlog['maxTorque'] = tmpMaxTorque tmpMaxVelocity = np.array([]) tmpMaxTorque = np.array([]) n += 1 DRLlog['successNum'] = success_num logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular() return DRLlog, Handlog, CMAESlog
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, make_gif=True): #env = gym.make('flowers-Walker-continuous-v0') assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." logger = EpochLogger() env_babbling = "random" norm_obs = False def get_mu_sigma(v_min, v_max): # assumes sigma has same bounds as mu random_2dparams = np.random.uniform(v_min, v_max, 2) return random_2dparams.tolist() # returning mu and sigma def set_test_env_params(**kwargs): # if kwargs['stump_height'] is not None: # random_stump_h = get_mu_sigma(kwargs['stump_height'][0], kwargs['stump_height'][1]) # random_stump_h[1] = 0.1 if 'poly_shape' not in kwargs.keys(): kwargs['poly_shape'] = None random_stump_h = None random_tunnel_h = None random_stump_r = None random_stump_w = None random_ob_spacing = None random_stump_seq = None if kwargs['stump_height'] is not None: random_stump_h = [kwargs['stump_height'], 0.1] if 'stump_rot' in kwargs.keys() and kwargs['stump_rot'] is not None: random_stump_r = [kwargs['stump_rot'], 0.1] if kwargs['stump_width'] is not None: random_stump_w = [kwargs['stump_width'], 0.1] if kwargs['tunnel_height'] is not None: random_tunnel_h = [kwargs['tunnel_height'], 0.1] if kwargs['obstacle_spacing'] is not None: random_ob_spacing = kwargs['obstacle_spacing'] if kwargs['stump_seq'] is not None: random_stump_seq = kwargs['stump_seq'] env.env.set_environment(roughness=kwargs['roughness'], stump_height=random_stump_h, stump_width=random_stump_w, stump_rot=random_stump_r, tunnel_height=None, obstacle_spacing=random_ob_spacing, gap_width=kwargs['gap_width'], step_height=kwargs['step_height'], step_number=kwargs['step_number'], poly_shape=kwargs['poly_shape'], stump_seq=random_stump_seq) def poly_2_width_height(params): scaling = 14 / 30.0 obstacle_polygon = [(-0.5, 0), (-0.5, 0.25), (-0.25, 0.5), (0.25, 0.5), (0.5, 0.25), (0.5, 0)] paired_params = [[params[i], params[i + 1]] for i in range(0, len(params), 2)] # first recover polygon coordinate poly_coord = [] for i, (b, d) in enumerate(zip(obstacle_polygon, paired_params)): # print(paired_params) if i != 0 and i != (len(obstacle_polygon) - 1): poly_coord.append([(b[0] * scaling) + (d[0] * scaling), (b[1] * scaling) + (d[1] * scaling)]) else: poly_coord.append([(b[0] * scaling) + (d[0] * scaling), (b[1] * scaling)]) # the find maximal width and height poly_coord = np.array(poly_coord) min_x = np.min(poly_coord[:, 0]) max_x = np.max(poly_coord[:, 0]) min_y = np.min(poly_coord[:, 1]) max_y = np.max(poly_coord[:, 1]) height_width_params = [(max_x - min_x) / scaling, (max_y - min_y) / scaling] return np.round(height_width_params, 2) # simple exp: random short fails compared to gmm -> [0.84,5.39] run 11 env_kwargs = { 'roughness': None, 'stump_height': [0.50, 0.50], #stump_levels = [[0., 0.66], [0.66, 1.33], [1.33, 2.]] 'tunnel_height': None, 'stump_rot': None, 'stump_width': None, 'obstacle_spacing': 4, 'gap_width': None, 'step_height': None, 'step_number': None } #test_env_list = pickle.load(open("/home/remy/projects/spinningup/teachers/test_sets/poly_shape0_4.0.pkl", "rb")) test_env_list = pickle.load( open( "/home/remy/projects/spinningup/teachers/test_sets/stump_height0_3.0obstacle_spacing0_6.0.pkl", "rb")) test_env_list = pickle.load( open( "/home/remy/projects/spinningup/teachers/test_sets/stump_seq0_6.0.pkl", "rb")) #test_env_list = params_2_env_list([[0.4,0.8]],['stump_height','obstacle_spacing']) #short agent seed 7(or 11) #test_env_list = params_2_env_list([[0,0],[0.7,1.0],[1.6,5.5],[1.9,0.01]],['stump_height', 'obstacle_spacing']) # default agent seed 0 #test_env_list = params_2_env_list([[0,0],[3.0,0.0],[3.0,5], [1.5,0.5]],['stump_height', 'obstacle_spacing']) # long agent seed 0 #test_env_list = params_2_env_list([[5.0,1.0,5.0,1.0,5.0,1.0,5.0,1.0,5.0,1.0]],'stump_seq') # long agent seed 0 # final_list = [] # for i in [19]: # final_list.append(test_env_list[i]) # for i in range(5): # prev_args = copy.copy(final_list[-1]) # last_poly = prev_args['poly_shape'] # prev_args['poly_shape'] = np.clip(np.random.normal(last_poly,0.5),0,10) # final_list.append(prev_args) # test_env_list = final_list # #print(test_env_list) if norm_obs: norm = MaxMinFilter(env_params_dict=env_kwargs) # increments = np.array([-0.4, 0, -0.4, 0.2, -0.2, 0.4, 0.2, 0.4, 0.4, 0.2, 0.4, 0.0]) # init_poly = np.zeros(12) # init_poly += 5 for i, args in enumerate(test_env_list): #args = params_2_env_list([init_poly],'poly_shape')[0] # if i not in [0,1,3,6,4]: # continue #if i not in [1,5,8,10,25,35]: # continue #print("{}: {}".format(i, args['poly_shape'])) set_test_env_params(**args) #init_poly += increments o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 img = env.render(mode='rgb_array') o = norm(o) if norm_obs else o obss = [o] skip = 2 cpt = 0 #wh = poly_2_width_height(args['poly_shape']) save_img = False images = [] while n < num_episodes: if render: cpt += 1 if (cpt % skip) == 0: if make_gif: img = env.render(mode='rgb_array') images.append(img) if save_img: plt.imsave( "graphics/walker_images/a_quadru_complex_walker_gmm_{}_{}_{}.png" .format(wh, i, cpt), np.array(img)[150:315, :-320, :]) else: env.render() time.sleep(1e-3) a = get_action(o) o, r, d, _ = env.step(a) o = norm(o) if norm_obs else o obss.append(o) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) #print('Episode {}:{} \t EpRet {} \t EpLen {}'.format(i, wh, ep_ret, ep_len)) print('Episode {}:{} \t EpRet {} \t EpLen {}'.format( i, args['stump_height'], ep_ret, ep_len)) #set_test_env_params(**env_kwargs) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 o = norm(o) if norm_obs else o n += 1 #print("MAX:{}".format(np.max(obss, axis=0))) #print("MIN:{}".format(np.min(obss,axis=0))) # # logger.log_tabular('EpRet', with_min_and_max=True) # logger.log_tabular('EpLen', average_only=True) # logger.dump_tabular() # print(len(images)) # print(np.array(images[0]).shape) #[150:315,:-320,:] for long #[200:315,:-320,:] for default imageio.mimsave( 'graphics/demo_quadru_seq_env_{}.gif'.format(i), [np.array(img)[110:315, :-320, :] for i, img in enumerate(images)], fps=29)
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, try_rollouts=0, steps_per_try_rollout=0): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." torch.manual_seed(3) np.random.seed(3) random.seed(3) logger = EpochLogger() o, r, done, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 rollout = [] while n < num_episodes: if try_rollouts != 0: if not rollout: rollout = do_rollouts(get_action, env, o, steps_per_try_rollout, try_rollouts, is_eval=True, take_worst_rollout=False) a, v, logp, _o, _r, _done, _info = rollout.pop(0) o, r, done, info = env.step(a) assert np.array_equal(o, _o) assert r == _r assert done == _done step_output = o, r, done, info else: a = get_action(o)[0] step_output = env.step(a) if render: env.render() # time.sleep(1e-3) if hasattr(env, 'last_step_output'): step_output = env.last_step_output o, r, done, info = step_output ep_ret += r ep_len += 1 if done or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) o, r, done, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular()
def run_policy(env, get_action, save_dir, max_ep_len=10000, num_episodes=10, render=True): assert env is not None, \ "Environment not found!\n\n It looks like the environment wasn't saved, " + \ "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \ "page on Experiment Outputs for how to handle this situation." dir_name = 'trajectory{}st_{}episode'.format(sample_step_per_trj, num_episodes) #!dirname dir_path = osp.join(save_dir, dir_name) os.makedirs(dir_path) #, exist_ok=True) #すでに存在する場合 if save_movie: env = gym.wrappers.Monitor(env, dir_path + '/movies', video_callable=(lambda n: n < 10)) logger = EpochLogger() o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0 observations = [] actions = [] results = [] while n < num_episodes: for t in range(max_ep_len): if render: env.render() time.sleep(1e-5) #1e-2 a = get_action(o) if t < sample_step_per_trj: observations.append(o) actions.append(a) o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 if d or (ep_len == max_ep_len): logger.store(EpRet=ep_ret, EpLen=ep_len) print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len)) results.append([n, ep_ret, ep_len]) o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 n += 1 break logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', average_only=True) logger.dump_tabular() #save csv df_obs = pd.DataFrame(observations) df_act = pd.DataFrame(actions) df_results = pd.DataFrame( results, columns=['Episode', 'EpRet', 'Eplen'], ) #sample_r_mean= df_results['EpRet'].mean() df_obs.to_csv(osp.join(dir_path, "observations.csv"), sep=",", header=False, index=False) df_act.to_csv(osp.join(dir_path, "actions.csv"), sep=",", header=False, index=False) df_results.to_csv(osp.join(dir_path, "each_results.csv"), sep=",", index=False) df_results.describe().to_csv(osp.join(dir_path, "results_describe.csv"), sep=",")