Ejemplo n.º 1
0
def run_policy(env, get_action, max_ep_len=None, num_episodes=10):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    while n < num_episodes:
        a = get_action(o)
        # print(a)
        #加入不确定性
        # log_std = -0.5 * np.ones(3, dtype=np.float32)
        # log_std = torch.nn.Parameter(torch.as_tensor(log_std))
        # std = torch.exp(log_std)
        # pi = Normal(torch.as_tensor(a), std)
        # a = pi.sample()
        # a = a.numpy()
        # print("   ", a)

        o, r, d, _ = env.step(a)
        ep_ret += r
        ep_len += 1
        if ep_ret == 10:
            print("Success!")
        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 2
0
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(torch.Tensor(o.reshape(1,-1)))[0]
        o, r, d, _ = env.step(a.data.numpy()[0])
        ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 3
0
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, sleep=1e-3,
               log=True, verbose=True, reset_state=None, q_action=None, action_parameters=None, random=False):
    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    if log:
        logger = EpochLogger()

    r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0
    o = env.reset() if reset_state is None else set_state(reset_state, env)

    action_parameters = {} if action_parameters is None else action_parameters

    while n < num_episodes:
        img = None
        if render:
            img = env.render(mode='rgb_array')
            time.sleep(sleep)

        if ep_len == 0 and q_action is not None:
            a = q_action
        elif random:
            a = env.action_space.sample()
        else:
            a = get_action(o, **action_parameters)

        o_prev = o
        o, r, d, _ = env.step(a)
        ep_ret += r
        ep_len += 1
        results = {'img': img, 'a': a, 'r': r, 'd': d, 'score': ep_ret, 't': ep_len, 'o': o_prev}

        yield results

        if d or (ep_len == max_ep_len):

            if log:
                logger.store(EpRet=ep_ret, EpLen=ep_len)

            if verbose:
                print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))

                r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0
                o = env.reset() if reset_state is None else set_state(reset_state, env)

            n += 1

    if log:
        logger.log_tabular('EpRet', with_min_and_max=True)
        logger.log_tabular('EpLen', average_only=True)
        logger.dump_tabular()
Ejemplo n.º 4
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True):
    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    def unscale_action(action_space, scaled_action):
        """
        Rescale the action from [-1, 1] to [low, high]
        (no need for symmetric action space)
        :param action_space: (gym.spaces.box.Box)
        :param scaled_action: (np.ndarray)
        :return: (np.ndarray)
        """
        low, high = action_space.low, action_space.high
        return low + (0.5 * (scaled_action + 1.0) * (high - low))

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        unscaled_action = unscale_action(env.action_space, a)
        o, r, d, _ = env.step(unscaled_action)
        #time.sleep(0.1)
        ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 5
0
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    goal_env = hasattr(env, 'goal')

    def cat_obs(o):
        return np.concatenate([o['observation'], o['desired_goal']], axis=-1)

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0

    if goal_env:
        o = cat_obs(o)

    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, _ = env.step(a)
        ep_ret += r
        ep_len += 1

        if goal_env:
            o = cat_obs(o)

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            if goal_env:
                o = cat_obs(o)
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 6
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    all_feats = []
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render(episode=n)
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, info = env.step(a)
        ep_ret += r
        ep_len += 1
        if "all_feats" in info.keys():
            all_feats.append(info["all_feats"])

        if d or (ep_len == max_ep_len):
            print(f"Coeff: {o[-env.coeff_dim:]}")
            print(f"All feats", np.array(all_feats).sum(axis=0))
            # import pdb; pdb.set_trace()
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            all_feats = []
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 7
0
def run_policy(env, policy, max_ep_len=None, num_episodes=100, render=True):

    logger = EpochLogger()
    obs, reward, done, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        action = policy(obs)
        obs, reward, done, _ = env.step(action)
        ep_ret += reward
        ep_len += 1

        if done or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            obs, reward, done, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 8
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               gamma=1,
               key='danger'):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n, ep_info = env.reset(), 0, False, 0, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, info = env.step(a)
        ep_info = max(ep_info, info[key])
        ep_ret += r * gamma**ep_len
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len, perf=ep_ret, fail=ep_info)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len, ep_info = env.reset(), 0, False, 0, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.log_tabular('perf', average_only=True)
    logger.log_tabular('fail', average_only=True)
    logger.dump_tabular()
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=1000,
               out_name="",
               render=False):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    num_episodes = 1000

    results = {}
    while n < num_episodes:
        a = get_action(o)
        o, r, d, _ = env.step(a)
        ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            results[n] = ep_len
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    with open("outputs/" + out_name + ".json", 'w') as f:
        json.dump(results, f)

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 10
0
def run_adversarial_policy(env,
                           ego_action,
                           opp_action,
                           env_init,
                           ego_agent,
                           opp_agent,
                           max_ep_len=None,
                           num_episodes=100,
                           render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()

    r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0
    init_positions = np.random.random_integers(0, 1)
    o = env.reset({
        'x': env_init['initial_x'][init_positions],
        'y': env_init['initial_y'],
        'theta': env_init['initial_theta']
    })
    while n < num_episodes:
        if render == True:
            env.render()
            # time.sleep(1e-3)

        #Convert o to RL obs
        RLobs = ego_agent.process_obs(o)
        Oppobs = opp_agent.process_obs(o)

        # Take deterministic actions at test time
        a = ego_action(RLobs,
                       action_mask=ego_agent.aval_paths,
                       deterministic=True)
        ego_speed, ego_steer, a = ego_agent.plan(o, a)

        #Opponent decision
        a_opp = opp_action(Oppobs,
                           action_mask=opp_agent.aval_paths,
                           deterministic=True)
        opp_speed, opp_steer, _ = opp_agent.plan(o, a_opp)

        action = {
            'ego_idx': 0,
            'speed': [ego_speed, opp_speed],
            'steer': [ego_steer, opp_steer]
        }

        o, r, d, _ = env.step(action)

        ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            init_positions = np.random.random_integers(0, 1)
            o, r, d, ep_ret, ep_len = env.reset({
                'x':
                env_init['initial_x'][init_positions],
                'y':
                env_init['initial_y'],
                'theta':
                env_init['initial_theta']
            }), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 11
0
def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True, env_name=None):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    num_violations = 0
    num_target = 0
    violations = []
    target = []
    hit_by_opponent = 0
    score_hit = 0
    avoid_opponent = 0
    hit_feat_counts = []
    score_feat_counts = []
    avoid_feat_counts = []
    episode_feat_counts = []

    pellet_counts = 0
    power_pellet_counts = 0
    eat_ghost_counts = 0
    eat_cherry_counts = 0
    hit_ghost_counts = 0
    pellet_feat_counts = []
    power_feat_counts = []
    eat_ghost_feat_counts = []
    eat_cherry_feat_counts = []
    hit_ghost_feat_counts = []
    ep_scores = []

    demo_obs = []
    demo_acs = []

    prev_ale = 3
    curr_ale = 3
    while n < num_episodes:
        #hit_by_opponent = 0
        #score_hit = 0
        #avoid_opponent = 0
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, info = env.step(a)
        #print(r)
        if env_name == 'Boxing-ram-v0':
            if r == 0:
                avoid_opponent += 1
            elif r < 0:
                hit_by_opponent -= int(r)
            else:
                score_hit += int(r)
        if env_name == 'MsPacman-ram-v0':
            curr_ale = env.ale.lives()
            if r == 10:
                pellet_counts += 1
            if r == 50:
                power_pellet_counts += 1
            if r == 200 or r  == 400 or r == 800 or r == 1600:
                eat_ghost_counts += 1
            if r == 100:
                eat_cherry_counts += 1
            else:
                if curr_ale == prev_ale-1:
                    hit_ghost_counts += 1
                    prev_ale = curr_ale
        if env_name == 'reacher':
            if info['constraint']:
                num_violations += 1
            if env.get_features()[0]:
                num_target += 1
        ep_ret += r
        ep_len += 1
        demo_obs.append(o)
        demo_acs.append(a)
        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d'%(n, ep_ret, ep_len))
            if env_name == 'reacher':
                print('Violations %d, Target %d'%(num_violations,num_target))
            if env_name == 'Boxing-ram-v0':
                print("damage %d"%hit_by_opponent)
                print("scores %d"%score_hit)
                print("avoid %d"%avoid_opponent)
            if env_name == 'MsPacman-ram-v0':
                print("pellet %d"%pellet_counts)
                print("power pellet %d"%power_pellet_counts)
                print("ghosts eaten %d"%eat_ghost_counts)
                print("cherry %d"%eat_cherry_counts)
                print("hit ghost %d"%hit_ghost_counts)
            if env_name == 'Boxing-ram-v0':
                episode_feat_counts.append([hit_by_opponent, score_hit, avoid_opponent, ep_ret])
            if env_name == 'MsPacman-ram-v0':
                episode_feat_counts.append([pellet_counts, power_pellet_counts, eat_ghost_counts, eat_cherry_counts, hit_ghost_counts])
                ep_scores.append(ep_ret)
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            violations.append(num_violations)
            target.append(num_target)
            hit_feat_counts.append(hit_by_opponent)
            score_feat_counts.append(score_hit)
            avoid_feat_counts.append(avoid_opponent)
            num_violations = 0
            num_target = 0
            hit_by_opponent = 0
            avoid_opponent = 0
            score_hit = 0

            pellet_counts = 0
            power_pellet_counts = 0 
            eat_ghost_counts = 0
            eat_cherry_counts = 0
            hit_ghost_counts = 0
            prev_ale = 3
            n += 1
    if args.env_name == 'reacher':
        print(violations)
        print(target)
    if args.env_name == 'Boxing-ram-v0':
        features = {'Features': episode_feat_counts, "Obs": demo_obs, "Scores":ep_scores, "Acs": demo_acs}
        pickle.dump(features, open('boxing_demos.pkl', 'wb'))
    if args.env_name == 'MsPacman-ram-v0':
        features = {'Features': episode_feat_counts,"Scores": ep_scores, "Obs": demo_obs, "Acs":demo_acs}
        pickle.dump(features, open('pacman_demos.pkl', 'wb'))
    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 12
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=False,
               params={},
               verbose=False):

    from upn.visualize.render import forward_env
    from numpngw import write_apng


    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    test_envs, test_env_names = [], params["test_env_names"][0]
    for name in test_env_names:
        test_envs.append(gym.make(name))

    logger = EpochLogger()
    for env_name, env in zip(test_env_names, test_envs):
        all_feats = []
        all_rews = []
        o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
        coeff = o[-env.coeff_dim:]
        acs = []
        pbar = tqdm(total=num_episodes)
        while n < num_episodes:
            #import pdb; pdb.set_trace()
            if render:
                env.render()
                time.sleep(1e-3)
            # import pdb; pdb.set_trace()
            a = get_action(o)
            acs.append(a)
            o, r, d, info = env.step(a)
            ep_ret += r
            ep_len += 1
            if "all_feats" in info.keys():
                all_feats.append(info["all_feats"])

            if d or (ep_len == max_ep_len):
                if verbose:
                    print(f"Coeff: {coeff}")
                    print(f"All feats", np.array(all_feats).sum(axis=0))
                # import pdb; pdb.set_trace()
                logger.store(**{f"{env_name}_EpRet": ep_ret})
                logger.store(**{f"{env_name}_EpLen": ep_len})
                # logger.store(EpRet=ep_ret, EpLen=ep_len)
                all_rews.append(ep_ret)
                if verbose:
                    print('Episode %d \t EpRet %.3f \t EpLen %d' %
                          (n, ep_ret, ep_len))
                print(f"{env_name}: reward {ep_ret:.03f}")
                if render:
                    frames = forward_env(env,
                                         np.array(acs),
                                         batch=False,
                                         subrender=False,
                                         resize=0.4)
                    fps = 10
                    fname = f"{env_name}_{n:02d}_rew_{ep_ret:.03f}.png"
                    #os.makedirs(osp.dirname(fname), exist_ok=True)
                    write_apng(os.path.join(args.folder, fname),
                               frames,
                               delay=1000 / fps)

                o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
                o = env.reset()

                all_feats = []
                acs = []
                n += 1
                pbar.update(1)
        print(f"{env_name}: mean reward {np.mean(all_rews):.03f}")
        pbar.close()

        logger.log_tabular(f'{env_name}_EpRet', with_min_and_max=True)
        logger.log_tabular(f'{env_name}_EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 13
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               seed=None):
    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    success_num = 0
    Handlog = {'maxVelocity': np.array([]), 'maxTorque': np.array([])}
    CMAESlog = {'maxVelocity': np.array([]), 'maxTorque': np.array([])}
    DRLlog = {
        'maxVelocity': np.array([]),
        'maxTorque': np.array([]),
        'successNum': 0
    }
    tmpMaxVelocity = np.array([])
    tmpMaxTorque = np.array([])
    env.__init__("GUI", seed=seed)
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    a = get_action(o)

    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)
        a = get_action(o)
        for i in range(25):
            o, r, d, o_dict = env.step(a)
            time.sleep(1 / SIMULATIONFREQUENCY)
        tmpMaxTorque = np.append(tmpMaxTorque, np.abs(o_dict['torque']).max())
        tmpMaxVelocity = np.append(tmpMaxVelocity,
                                   np.abs(o_dict['velocity']).max())
        # time.sleep(1/SIMULATIONFREQUENCY)
        # if env.t >2:
        #     input("hhh")
        ep_ret += r
        ep_len += 1

        # d = False
        # if d or (ep_len == max_ep_len):
        if ep_len == max_ep_len:
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            satisfy = d
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            # satisfy = input("Is it satisfying? y or n:\n")
            if satisfy:
                print("done!")
                success_num += 1
                if len(DRLlog['maxVelocity']) != 0:
                    DRLlog['maxVelocity'] += tmpMaxVelocity
                    DRLlog['maxVelocity'] /= success_num
                    DRLlog['maxTorque'] += tmpMaxTorque
                    DRLlog['maxTorque'] /= success_num
                else:
                    DRLlog['maxVelocity'] = tmpMaxVelocity
                    DRLlog['maxTorque'] = tmpMaxTorque
                # tmpMaxVelocity, tmpMaxTorque, success = run_Hand(env)
                # if len(Handlog['maxVelocity']) != 0:
                #     Handlog['maxVelocity'] += tmpMaxVelocity
                #     Handlog['maxVelocity'] /= success_num
                #     Handlog['maxTorque'] += tmpMaxTorque
                #     Handlog['maxTorque'] /= success_num
                # else:
                #     Handlog['maxVelocity'] = tmpMaxVelocity
                #     Handlog['maxTorque'] = tmpMaxTorque
                # tmpMaxVelocity, tmpMaxTorque = run_CMAES()
                # if len(Handlog['maxVelocity']) != 0:
                #     CMAESlog['maxVelocity'] += tmpMaxVelocity
                #     CMAESlog['maxVelocity'] /= success_num
                #     CMAESlog['maxTorque'] += tmpMaxTorque
                #     CMAESlog['maxTorque'] /= success_num
                # else:
                #     CMAESlog['maxVelocity'] = tmpMaxVelocity
                #     CMAESlog['maxTorque'] = tmpMaxTorque
            tmpMaxVelocity = np.array([])
            tmpMaxTorque = np.array([])
            n += 1
    DRLlog['successNum'] = success_num

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()

    return DRLlog, Handlog, CMAESlog
Ejemplo n.º 14
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               try_rollouts=0,
               steps_per_try_rollout=0):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    torch.manual_seed(3)
    np.random.seed(3)
    random.seed(3)

    logger = EpochLogger()
    o, r, done, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    rollout = []
    while n < num_episodes:
        if try_rollouts != 0:
            if not rollout:
                rollout = do_rollouts(get_action,
                                      env,
                                      o,
                                      steps_per_try_rollout,
                                      try_rollouts,
                                      is_eval=True,
                                      take_worst_rollout=False)
            a, v, logp, _o, _r, _done, _info = rollout.pop(0)
            o, r, done, info = env.step(a)
            assert np.array_equal(o, _o)
            assert r == _r
            assert done == _done
            step_output = o, r, done, info
        else:
            a = get_action(o)[0]
            step_output = env.step(a)

        if render:
            env.render()
            # time.sleep(1e-3)

        if hasattr(env, 'last_step_output'):
            step_output = env.last_step_output

        o, r, done, info = step_output

        ep_ret += r
        ep_len += 1

        if done or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, done, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
def run_policy(env,
               get_action,
               save_dir,
               max_ep_len=10000,
               num_episodes=10,
               render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    dir_name = 'trajectory{}st_{}episode'.format(sample_step_per_trj,
                                                 num_episodes)  #!dirname
    dir_path = osp.join(save_dir, dir_name)
    os.makedirs(dir_path)  #, exist_ok=True) #すでに存在する場合
    if save_movie:
        env = gym.wrappers.Monitor(env,
                                   dir_path + '/movies',
                                   video_callable=(lambda n: n < 10))

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    observations = []
    actions = []
    results = []
    while n < num_episodes:
        for t in range(max_ep_len):
            if render:
                env.render()
                time.sleep(1e-5)  #1e-2

            a = get_action(o)
            if t < sample_step_per_trj:
                observations.append(o)
                actions.append(a)

            o, r, d, _ = env.step(a)

            ep_ret += r
            ep_len += 1

            if d or (ep_len == max_ep_len):
                logger.store(EpRet=ep_ret, EpLen=ep_len)
                print('Episode %d \t EpRet %.3f \t EpLen %d' %
                      (n, ep_ret, ep_len))
                results.append([n, ep_ret, ep_len])
                o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
                n += 1
                break

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()

    #save csv
    df_obs = pd.DataFrame(observations)
    df_act = pd.DataFrame(actions)
    df_results = pd.DataFrame(
        results,
        columns=['Episode', 'EpRet', 'Eplen'],
    )
    #sample_r_mean= df_results['EpRet'].mean()

    df_obs.to_csv(osp.join(dir_path, "observations.csv"),
                  sep=",",
                  header=False,
                  index=False)
    df_act.to_csv(osp.join(dir_path, "actions.csv"),
                  sep=",",
                  header=False,
                  index=False)
    df_results.to_csv(osp.join(dir_path, "each_results.csv"),
                      sep=",",
                      index=False)
    df_results.describe().to_csv(osp.join(dir_path, "results_describe.csv"),
                                 sep=",")