Exemple #1
0
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(torch.Tensor(o.reshape(1,-1)))[0]
        o, r, d, _ = env.step(a.data.numpy()[0])
        ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Exemple #2
0
def run_policy(env, get_action, max_ep_len=None, num_episodes=10):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    while n < num_episodes:
        a = get_action(o)
        # print(a)
        #加入不确定性
        # log_std = -0.5 * np.ones(3, dtype=np.float32)
        # log_std = torch.nn.Parameter(torch.as_tensor(log_std))
        # std = torch.exp(log_std)
        # pi = Normal(torch.as_tensor(a), std)
        # a = pi.sample()
        # a = a.numpy()
        # print("   ", a)

        o, r, d, _ = env.step(a)
        ep_ret += r
        ep_len += 1
        if ep_ret == 10:
            print("Success!")
        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Exemple #3
0
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, sleep=1e-3,
               log=True, verbose=True, reset_state=None, q_action=None, action_parameters=None, random=False):
    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    if log:
        logger = EpochLogger()

    r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0
    o = env.reset() if reset_state is None else set_state(reset_state, env)

    action_parameters = {} if action_parameters is None else action_parameters

    while n < num_episodes:
        img = None
        if render:
            img = env.render(mode='rgb_array')
            time.sleep(sleep)

        if ep_len == 0 and q_action is not None:
            a = q_action
        elif random:
            a = env.action_space.sample()
        else:
            a = get_action(o, **action_parameters)

        o_prev = o
        o, r, d, _ = env.step(a)
        ep_ret += r
        ep_len += 1
        results = {'img': img, 'a': a, 'r': r, 'd': d, 'score': ep_ret, 't': ep_len, 'o': o_prev}

        yield results

        if d or (ep_len == max_ep_len):

            if log:
                logger.store(EpRet=ep_ret, EpLen=ep_len)

            if verbose:
                print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))

                r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0
                o = env.reset() if reset_state is None else set_state(reset_state, env)

            n += 1

    if log:
        logger.log_tabular('EpRet', with_min_and_max=True)
        logger.log_tabular('EpLen', average_only=True)
        logger.dump_tabular()
Exemple #4
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True):
    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    def unscale_action(action_space, scaled_action):
        """
        Rescale the action from [-1, 1] to [low, high]
        (no need for symmetric action space)
        :param action_space: (gym.spaces.box.Box)
        :param scaled_action: (np.ndarray)
        :return: (np.ndarray)
        """
        low, high = action_space.low, action_space.high
        return low + (0.5 * (scaled_action + 1.0) * (high - low))

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        unscaled_action = unscale_action(env.action_space, a)
        o, r, d, _ = env.step(unscaled_action)
        #time.sleep(0.1)
        ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Exemple #5
0
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    goal_env = hasattr(env, 'goal')

    def cat_obs(o):
        return np.concatenate([o['observation'], o['desired_goal']], axis=-1)

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0

    if goal_env:
        o = cat_obs(o)

    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, _ = env.step(a)
        ep_ret += r
        ep_len += 1

        if goal_env:
            o = cat_obs(o)

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            if goal_env:
                o = cat_obs(o)
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Exemple #6
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    all_feats = []
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render(episode=n)
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, info = env.step(a)
        ep_ret += r
        ep_len += 1
        if "all_feats" in info.keys():
            all_feats.append(info["all_feats"])

        if d or (ep_len == max_ep_len):
            print(f"Coeff: {o[-env.coeff_dim:]}")
            print(f"All feats", np.array(all_feats).sum(axis=0))
            # import pdb; pdb.set_trace()
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            all_feats = []
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Exemple #7
0
def run_policy(env, policy, max_ep_len=None, num_episodes=100, render=True):

    logger = EpochLogger()
    obs, reward, done, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        action = policy(obs)
        obs, reward, done, _ = env.step(action)
        ep_ret += reward
        ep_len += 1

        if done or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            obs, reward, done, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Exemple #8
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               gamma=1,
               key='danger'):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n, ep_info = env.reset(), 0, False, 0, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, info = env.step(a)
        ep_info = max(ep_info, info[key])
        ep_ret += r * gamma**ep_len
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len, perf=ep_ret, fail=ep_info)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len, ep_info = env.reset(), 0, False, 0, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.log_tabular('perf', average_only=True)
    logger.log_tabular('fail', average_only=True)
    logger.dump_tabular()
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=1000,
               out_name="",
               render=False):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    num_episodes = 1000

    results = {}
    while n < num_episodes:
        a = get_action(o)
        o, r, d, _ = env.step(a)
        ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            results[n] = ep_len
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    with open("outputs/" + out_name + ".json", 'w') as f:
        json.dump(results, f)

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Exemple #10
0
def run_adversarial_policy(env,
                           ego_action,
                           opp_action,
                           env_init,
                           ego_agent,
                           opp_agent,
                           max_ep_len=None,
                           num_episodes=100,
                           render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()

    r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0
    init_positions = np.random.random_integers(0, 1)
    o = env.reset({
        'x': env_init['initial_x'][init_positions],
        'y': env_init['initial_y'],
        'theta': env_init['initial_theta']
    })
    while n < num_episodes:
        if render == True:
            env.render()
            # time.sleep(1e-3)

        #Convert o to RL obs
        RLobs = ego_agent.process_obs(o)
        Oppobs = opp_agent.process_obs(o)

        # Take deterministic actions at test time
        a = ego_action(RLobs,
                       action_mask=ego_agent.aval_paths,
                       deterministic=True)
        ego_speed, ego_steer, a = ego_agent.plan(o, a)

        #Opponent decision
        a_opp = opp_action(Oppobs,
                           action_mask=opp_agent.aval_paths,
                           deterministic=True)
        opp_speed, opp_steer, _ = opp_agent.plan(o, a_opp)

        action = {
            'ego_idx': 0,
            'speed': [ego_speed, opp_speed],
            'steer': [ego_steer, opp_steer]
        }

        o, r, d, _ = env.step(action)

        ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            init_positions = np.random.random_integers(0, 1)
            o, r, d, ep_ret, ep_len = env.reset({
                'x':
                env_init['initial_x'][init_positions],
                'y':
                env_init['initial_y'],
                'theta':
                env_init['initial_theta']
            }), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, env_name=None):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    num_violations = 0
    num_target = 0
    violations = []
    target = []
    hit_by_opponent = 0
    score_hit = 0
    avoid_opponent = 0
    hit_feat_counts = []
    score_feat_counts = []
    avoid_feat_counts = []
    episode_feat_counts = []

    pellet_counts = 0
    power_pellet_counts = 0
    eat_ghost_counts = 0
    eat_cherry_counts = 0
    hit_ghost_counts = 0
    pellet_feat_counts = []
    power_feat_counts = []
    eat_ghost_feat_counts = []
    eat_cherry_feat_counts = []
    hit_ghost_feat_counts = []
    ep_scores = []

    demo_obs = []
    demo_acs = []

    prev_ale = 3
    curr_ale = 3
    while n < num_episodes:
        #hit_by_opponent = 0
        #score_hit = 0
        #avoid_opponent = 0
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, info = env.step(a)
        #print(r)
        if env_name == 'Boxing-ram-v0':
            if r == 0:
                avoid_opponent += 1
            elif r < 0:
                hit_by_opponent -= int(r)
            else:
                score_hit += int(r)
        if env_name == 'MsPacman-ram-v0':
            curr_ale = env.ale.lives()
            if r == 10:
                pellet_counts += 1
            if r == 50:
                power_pellet_counts += 1
            if r == 200 or r  == 400 or r == 800 or r == 1600:
                eat_ghost_counts += 1
            if r == 100:
                eat_cherry_counts += 1
            else:
                if curr_ale == prev_ale-1:
                    hit_ghost_counts += 1
                    prev_ale = curr_ale
        if env_name == 'reacher':
            if info['constraint']:
                num_violations += 1
            if env.get_features()[0]:
                num_target += 1
        ep_ret += r
        ep_len += 1
        demo_obs.append(o)
        demo_acs.append(a)
        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len))
            if env_name == 'reacher':
                print('Violations %d, Target %d'%(num_violations,num_target))
            if env_name == 'Boxing-ram-v0':
                print("damage %d"%hit_by_opponent)
                print("scores %d"%score_hit)
                print("avoid %d"%avoid_opponent)
            if env_name == 'MsPacman-ram-v0':
                print("pellet %d"%pellet_counts)
                print("power pellet %d"%power_pellet_counts)
                print("ghosts eaten %d"%eat_ghost_counts)
                print("cherry %d"%eat_cherry_counts)
                print("hit ghost %d"%hit_ghost_counts)
            if env_name == 'Boxing-ram-v0':
                episode_feat_counts.append([hit_by_opponent, score_hit, avoid_opponent, ep_ret])
            if env_name == 'MsPacman-ram-v0':
                episode_feat_counts.append([pellet_counts, power_pellet_counts, eat_ghost_counts, eat_cherry_counts, hit_ghost_counts])
                ep_scores.append(ep_ret)
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            violations.append(num_violations)
            target.append(num_target)
            hit_feat_counts.append(hit_by_opponent)
            score_feat_counts.append(score_hit)
            avoid_feat_counts.append(avoid_opponent)
            num_violations = 0
            num_target = 0
            hit_by_opponent = 0
            avoid_opponent = 0
            score_hit = 0

            pellet_counts = 0
            power_pellet_counts = 0 
            eat_ghost_counts = 0
            eat_cherry_counts = 0
            hit_ghost_counts = 0
            prev_ale = 3
            n += 1
    if args.env_name == 'reacher':
        print(violations)
        print(target)
    if args.env_name == 'Boxing-ram-v0':
        features = {'Features': episode_feat_counts, "Obs": demo_obs, "Scores":ep_scores, "Acs": demo_acs}
        pickle.dump(features, open('boxing_demos.pkl', 'wb'))
    if args.env_name == 'MsPacman-ram-v0':
        features = {'Features': episode_feat_counts,"Scores": ep_scores, "Obs": demo_obs, "Acs":demo_acs}
        pickle.dump(features, open('pacman_demos.pkl', 'wb'))
    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Exemple #12
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=False,
               params={},
               verbose=False):

    from upn.visualize.render import forward_env
    from numpngw import write_apng


    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    test_envs, test_env_names = [], params["test_env_names"][0]
    for name in test_env_names:
        test_envs.append(gym.make(name))

    logger = EpochLogger()
    for env_name, env in zip(test_env_names, test_envs):
        all_feats = []
        all_rews = []
        o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
        coeff = o[-env.coeff_dim:]
        acs = []
        pbar = tqdm(total=num_episodes)
        while n < num_episodes:
            #import pdb; pdb.set_trace()
            if render:
                env.render()
                time.sleep(1e-3)
            # import pdb; pdb.set_trace()
            a = get_action(o)
            acs.append(a)
            o, r, d, info = env.step(a)
            ep_ret += r
            ep_len += 1
            if "all_feats" in info.keys():
                all_feats.append(info["all_feats"])

            if d or (ep_len == max_ep_len):
                if verbose:
                    print(f"Coeff: {coeff}")
                    print(f"All feats", np.array(all_feats).sum(axis=0))
                # import pdb; pdb.set_trace()
                logger.store(**{f"{env_name}_EpRet": ep_ret})
                logger.store(**{f"{env_name}_EpLen": ep_len})
                # logger.store(EpRet=ep_ret, EpLen=ep_len)
                all_rews.append(ep_ret)
                if verbose:
                    print('Episode %d \t EpRet %.3f \t EpLen %d' %
                          (n, ep_ret, ep_len))
                print(f"{env_name}: reward {ep_ret:.03f}")
                if render:
                    frames = forward_env(env,
                                         np.array(acs),
                                         batch=False,
                                         subrender=False,
                                         resize=0.4)
                    fps = 10
                    fname = f"{env_name}_{n:02d}_rew_{ep_ret:.03f}.png"
                    #os.makedirs(osp.dirname(fname), exist_ok=True)
                    write_apng(os.path.join(args.folder, fname),
                               frames,
                               delay=1000 / fps)

                o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
                o = env.reset()

                all_feats = []
                acs = []
                n += 1
                pbar.update(1)
        print(f"{env_name}: mean reward {np.mean(all_rews):.03f}")
        pbar.close()

        logger.log_tabular(f'{env_name}_EpRet', with_min_and_max=True)
        logger.log_tabular(f'{env_name}_EpLen', average_only=True)
    logger.dump_tabular()
Exemple #13
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               seed=None):
    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    success_num = 0
    Handlog = {'maxVelocity': np.array([]), 'maxTorque': np.array([])}
    CMAESlog = {'maxVelocity': np.array([]), 'maxTorque': np.array([])}
    DRLlog = {
        'maxVelocity': np.array([]),
        'maxTorque': np.array([]),
        'successNum': 0
    }
    tmpMaxVelocity = np.array([])
    tmpMaxTorque = np.array([])
    env.__init__("GUI", seed=seed)
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    a = get_action(o)

    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)
        a = get_action(o)
        for i in range(25):
            o, r, d, o_dict = env.step(a)
            time.sleep(1 / SIMULATIONFREQUENCY)
        tmpMaxTorque = np.append(tmpMaxTorque, np.abs(o_dict['torque']).max())
        tmpMaxVelocity = np.append(tmpMaxVelocity,
                                   np.abs(o_dict['velocity']).max())
        # time.sleep(1/SIMULATIONFREQUENCY)
        # if env.t >2:
        #     input("hhh")
        ep_ret += r
        ep_len += 1

        # d = False
        # if d or (ep_len == max_ep_len):
        if ep_len == max_ep_len:
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            satisfy = d
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            # satisfy = input("Is it satisfying? y or n:\n")
            if satisfy:
                print("done!")
                success_num += 1
                if len(DRLlog['maxVelocity']) != 0:
                    DRLlog['maxVelocity'] += tmpMaxVelocity
                    DRLlog['maxVelocity'] /= success_num
                    DRLlog['maxTorque'] += tmpMaxTorque
                    DRLlog['maxTorque'] /= success_num
                else:
                    DRLlog['maxVelocity'] = tmpMaxVelocity
                    DRLlog['maxTorque'] = tmpMaxTorque
                # tmpMaxVelocity, tmpMaxTorque, success = run_Hand(env)
                # if len(Handlog['maxVelocity']) != 0:
                #     Handlog['maxVelocity'] += tmpMaxVelocity
                #     Handlog['maxVelocity'] /= success_num
                #     Handlog['maxTorque'] += tmpMaxTorque
                #     Handlog['maxTorque'] /= success_num
                # else:
                #     Handlog['maxVelocity'] = tmpMaxVelocity
                #     Handlog['maxTorque'] = tmpMaxTorque
                # tmpMaxVelocity, tmpMaxTorque = run_CMAES()
                # if len(Handlog['maxVelocity']) != 0:
                #     CMAESlog['maxVelocity'] += tmpMaxVelocity
                #     CMAESlog['maxVelocity'] /= success_num
                #     CMAESlog['maxTorque'] += tmpMaxTorque
                #     CMAESlog['maxTorque'] /= success_num
                # else:
                #     CMAESlog['maxVelocity'] = tmpMaxVelocity
                #     CMAESlog['maxTorque'] = tmpMaxTorque
            tmpMaxVelocity = np.array([])
            tmpMaxTorque = np.array([])
            n += 1
    DRLlog['successNum'] = success_num

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()

    return DRLlog, Handlog, CMAESlog
Exemple #14
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               make_gif=True):
    #env = gym.make('flowers-Walker-continuous-v0')
    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    env_babbling = "random"
    norm_obs = False

    def get_mu_sigma(v_min, v_max):  # assumes sigma has same bounds as mu
        random_2dparams = np.random.uniform(v_min, v_max, 2)
        return random_2dparams.tolist()  # returning mu and sigma

    def set_test_env_params(**kwargs):
        # if kwargs['stump_height'] is not None:
        #     random_stump_h = get_mu_sigma(kwargs['stump_height'][0], kwargs['stump_height'][1])
        #     random_stump_h[1] = 0.1
        if 'poly_shape' not in kwargs.keys():
            kwargs['poly_shape'] = None
        random_stump_h = None
        random_tunnel_h = None
        random_stump_r = None
        random_stump_w = None
        random_ob_spacing = None
        random_stump_seq = None
        if kwargs['stump_height'] is not None:
            random_stump_h = [kwargs['stump_height'], 0.1]
        if 'stump_rot' in kwargs.keys() and kwargs['stump_rot'] is not None:
            random_stump_r = [kwargs['stump_rot'], 0.1]
        if kwargs['stump_width'] is not None:
            random_stump_w = [kwargs['stump_width'], 0.1]
        if kwargs['tunnel_height'] is not None:
            random_tunnel_h = [kwargs['tunnel_height'], 0.1]
        if kwargs['obstacle_spacing'] is not None:
            random_ob_spacing = kwargs['obstacle_spacing']
        if kwargs['stump_seq'] is not None:
            random_stump_seq = kwargs['stump_seq']
        env.env.set_environment(roughness=kwargs['roughness'],
                                stump_height=random_stump_h,
                                stump_width=random_stump_w,
                                stump_rot=random_stump_r,
                                tunnel_height=None,
                                obstacle_spacing=random_ob_spacing,
                                gap_width=kwargs['gap_width'],
                                step_height=kwargs['step_height'],
                                step_number=kwargs['step_number'],
                                poly_shape=kwargs['poly_shape'],
                                stump_seq=random_stump_seq)

    def poly_2_width_height(params):
        scaling = 14 / 30.0
        obstacle_polygon = [(-0.5, 0), (-0.5, 0.25), (-0.25, 0.5), (0.25, 0.5),
                            (0.5, 0.25), (0.5, 0)]
        paired_params = [[params[i], params[i + 1]]
                         for i in range(0, len(params), 2)]
        # first recover polygon coordinate
        poly_coord = []
        for i, (b, d) in enumerate(zip(obstacle_polygon, paired_params)):
            # print(paired_params)
            if i != 0 and i != (len(obstacle_polygon) - 1):
                poly_coord.append([(b[0] * scaling) + (d[0] * scaling),
                                   (b[1] * scaling) + (d[1] * scaling)])
            else:
                poly_coord.append([(b[0] * scaling) + (d[0] * scaling),
                                   (b[1] * scaling)])
        # the find maximal width and height
        poly_coord = np.array(poly_coord)
        min_x = np.min(poly_coord[:, 0])
        max_x = np.max(poly_coord[:, 0])
        min_y = np.min(poly_coord[:, 1])
        max_y = np.max(poly_coord[:, 1])
        height_width_params = [(max_x - min_x) / scaling,
                               (max_y - min_y) / scaling]
        return np.round(height_width_params, 2)

    # simple exp: random short fails compared to gmm -> [0.84,5.39] run 11

    env_kwargs = {
        'roughness': None,
        'stump_height':
        [0.50, 0.50],  #stump_levels = [[0., 0.66], [0.66, 1.33], [1.33, 2.]]
        'tunnel_height': None,
        'stump_rot': None,
        'stump_width': None,
        'obstacle_spacing': 4,
        'gap_width': None,
        'step_height': None,
        'step_number': None
    }

    #test_env_list = pickle.load(open("/home/remy/projects/spinningup/teachers/test_sets/poly_shape0_4.0.pkl", "rb"))
    test_env_list = pickle.load(
        open(
            "/home/remy/projects/spinningup/teachers/test_sets/stump_height0_3.0obstacle_spacing0_6.0.pkl",
            "rb"))
    test_env_list = pickle.load(
        open(
            "/home/remy/projects/spinningup/teachers/test_sets/stump_seq0_6.0.pkl",
            "rb"))
    #test_env_list = params_2_env_list([[0.4,0.8]],['stump_height','obstacle_spacing']) #short agent seed 7(or 11)
    #test_env_list = params_2_env_list([[0,0],[0.7,1.0],[1.6,5.5],[1.9,0.01]],['stump_height', 'obstacle_spacing']) # default agent seed 0
    #test_env_list = params_2_env_list([[0,0],[3.0,0.0],[3.0,5], [1.5,0.5]],['stump_height', 'obstacle_spacing']) # long agent seed 0

    #test_env_list = params_2_env_list([[5.0,1.0,5.0,1.0,5.0,1.0,5.0,1.0,5.0,1.0]],'stump_seq') # long agent seed 0

    # final_list = []
    # for i in [19]:
    #     final_list.append(test_env_list[i])
    # for i in range(5):
    #     prev_args = copy.copy(final_list[-1])
    #     last_poly = prev_args['poly_shape']
    #     prev_args['poly_shape'] = np.clip(np.random.normal(last_poly,0.5),0,10)
    #     final_list.append(prev_args)
    # test_env_list = final_list
    # #print(test_env_list)

    if norm_obs:
        norm = MaxMinFilter(env_params_dict=env_kwargs)

    # increments = np.array([-0.4, 0, -0.4, 0.2, -0.2, 0.4, 0.2, 0.4, 0.4, 0.2, 0.4, 0.0])
    # init_poly = np.zeros(12)
    # init_poly += 5
    for i, args in enumerate(test_env_list):

        #args = params_2_env_list([init_poly],'poly_shape')[0]
        # if i not in [0,1,3,6,4]:
        #     continue
        #if i not in [1,5,8,10,25,35]:
        #    continue
        #print("{}: {}".format(i, args['poly_shape']))
        set_test_env_params(**args)
        #init_poly += increments
        o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
        img = env.render(mode='rgb_array')
        o = norm(o) if norm_obs else o
        obss = [o]
        skip = 2
        cpt = 0
        #wh = poly_2_width_height(args['poly_shape'])

        save_img = False
        images = []
        while n < num_episodes:
            if render:
                cpt += 1
                if (cpt % skip) == 0:
                    if make_gif:
                        img = env.render(mode='rgb_array')
                        images.append(img)

                        if save_img:
                            plt.imsave(
                                "graphics/walker_images/a_quadru_complex_walker_gmm_{}_{}_{}.png"
                                .format(wh, i, cpt),
                                np.array(img)[150:315, :-320, :])
                    else:
                        env.render()
                time.sleep(1e-3)

            a = get_action(o)
            o, r, d, _ = env.step(a)
            o = norm(o) if norm_obs else o
            obss.append(o)
            ep_ret += r
            ep_len += 1

            if d or (ep_len == max_ep_len):
                logger.store(EpRet=ep_ret, EpLen=ep_len)
                #print('Episode {}:{} \t EpRet {} \t EpLen {}'.format(i, wh, ep_ret, ep_len))
                print('Episode {}:{} \t EpRet {} \t EpLen {}'.format(
                    i, args['stump_height'], ep_ret, ep_len))
                #set_test_env_params(**env_kwargs)
                o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
                o = norm(o) if norm_obs else o
                n += 1
                #print("MAX:{}".format(np.max(obss, axis=0)))
                #print("MIN:{}".format(np.min(obss,axis=0)))

        #
        # logger.log_tabular('EpRet', with_min_and_max=True)
        # logger.log_tabular('EpLen', average_only=True)
        # logger.dump_tabular()
        # print(len(images))
        # print(np.array(images[0]).shape)
    #[150:315,:-320,:] for long
    #[200:315,:-320,:] for default
        imageio.mimsave(
            'graphics/demo_quadru_seq_env_{}.gif'.format(i),
            [np.array(img)[110:315, :-320, :] for i, img in enumerate(images)],
            fps=29)
Exemple #15
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               try_rollouts=0,
               steps_per_try_rollout=0):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    torch.manual_seed(3)
    np.random.seed(3)
    random.seed(3)

    logger = EpochLogger()
    o, r, done, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    rollout = []
    while n < num_episodes:
        if try_rollouts != 0:
            if not rollout:
                rollout = do_rollouts(get_action,
                                      env,
                                      o,
                                      steps_per_try_rollout,
                                      try_rollouts,
                                      is_eval=True,
                                      take_worst_rollout=False)
            a, v, logp, _o, _r, _done, _info = rollout.pop(0)
            o, r, done, info = env.step(a)
            assert np.array_equal(o, _o)
            assert r == _r
            assert done == _done
            step_output = o, r, done, info
        else:
            a = get_action(o)[0]
            step_output = env.step(a)

        if render:
            env.render()
            # time.sleep(1e-3)

        if hasattr(env, 'last_step_output'):
            step_output = env.last_step_output

        o, r, done, info = step_output

        ep_ret += r
        ep_len += 1

        if done or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, done, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
def run_policy(env,
               get_action,
               save_dir,
               max_ep_len=10000,
               num_episodes=10,
               render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    dir_name = 'trajectory{}st_{}episode'.format(sample_step_per_trj,
                                                 num_episodes)  #!dirname
    dir_path = osp.join(save_dir, dir_name)
    os.makedirs(dir_path)  #, exist_ok=True) #すでに存在する場合
    if save_movie:
        env = gym.wrappers.Monitor(env,
                                   dir_path + '/movies',
                                   video_callable=(lambda n: n < 10))

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    observations = []
    actions = []
    results = []
    while n < num_episodes:
        for t in range(max_ep_len):
            if render:
                env.render()
                time.sleep(1e-5)  #1e-2

            a = get_action(o)
            if t < sample_step_per_trj:
                observations.append(o)
                actions.append(a)

            o, r, d, _ = env.step(a)

            ep_ret += r
            ep_len += 1

            if d or (ep_len == max_ep_len):
                logger.store(EpRet=ep_ret, EpLen=ep_len)
                print('Episode %d \t EpRet %.3f \t EpLen %d' %
                      (n, ep_ret, ep_len))
                results.append([n, ep_ret, ep_len])
                o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
                n += 1
                break

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()

    #save csv
    df_obs = pd.DataFrame(observations)
    df_act = pd.DataFrame(actions)
    df_results = pd.DataFrame(
        results,
        columns=['Episode', 'EpRet', 'Eplen'],
    )
    #sample_r_mean= df_results['EpRet'].mean()

    df_obs.to_csv(osp.join(dir_path, "observations.csv"),
                  sep=",",
                  header=False,
                  index=False)
    df_act.to_csv(osp.join(dir_path, "actions.csv"),
                  sep=",",
                  header=False,
                  index=False)
    df_results.to_csv(osp.join(dir_path, "each_results.csv"),
                      sep=",",
                      index=False)
    df_results.describe().to_csv(osp.join(dir_path, "results_describe.csv"),
                                 sep=",")