Ejemplo n.º 1
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=False,
               params={},
               verbose=False):

    from upn.visualize.render import forward_env
    from numpngw import write_apng


    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    test_envs, test_env_names = [], params["test_env_names"][0]
    for name in test_env_names:
        test_envs.append(gym.make(name))

    logger = EpochLogger()
    for env_name, env in zip(test_env_names, test_envs):
        all_feats = []
        all_rews = []
        o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
        coeff = o[-env.coeff_dim:]
        acs = []
        pbar = tqdm(total=num_episodes)
        while n < num_episodes:
            #import pdb; pdb.set_trace()
            if render:
                env.render()
                time.sleep(1e-3)
            # import pdb; pdb.set_trace()
            a = get_action(o)
            acs.append(a)
            o, r, d, info = env.step(a)
            ep_ret += r
            ep_len += 1
            if "all_feats" in info.keys():
                all_feats.append(info["all_feats"])

            if d or (ep_len == max_ep_len):
                if verbose:
                    print(f"Coeff: {coeff}")
                    print(f"All feats", np.array(all_feats).sum(axis=0))
                # import pdb; pdb.set_trace()
                logger.store(**{f"{env_name}_EpRet": ep_ret})
                logger.store(**{f"{env_name}_EpLen": ep_len})
                # logger.store(EpRet=ep_ret, EpLen=ep_len)
                all_rews.append(ep_ret)
                if verbose:
                    print('Episode %d \t EpRet %.3f \t EpLen %d' %
                          (n, ep_ret, ep_len))
                print(f"{env_name}: reward {ep_ret:.03f}")
                if render:
                    frames = forward_env(env,
                                         np.array(acs),
                                         batch=False,
                                         subrender=False,
                                         resize=0.4)
                    fps = 10
                    fname = f"{env_name}_{n:02d}_rew_{ep_ret:.03f}.png"
                    #os.makedirs(osp.dirname(fname), exist_ok=True)
                    write_apng(os.path.join(args.folder, fname),
                               frames,
                               delay=1000 / fps)

                o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
                o = env.reset()

                all_feats = []
                acs = []
                n += 1
                pbar.update(1)
        print(f"{env_name}: mean reward {np.mean(all_rews):.03f}")
        pbar.close()

        logger.log_tabular(f'{env_name}_EpRet', with_min_and_max=True)
        logger.log_tabular(f'{env_name}_EpLen', average_only=True)
    logger.dump_tabular()
def run_adversarial_policy(env,
                           ego_action,
                           opp_action,
                           env_init,
                           ego_agent,
                           opp_agent,
                           max_ep_len=None,
                           num_episodes=100,
                           render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()

    r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0
    init_positions = np.random.random_integers(0, 1)
    o = env.reset({
        'x': env_init['initial_x'][init_positions],
        'y': env_init['initial_y'],
        'theta': env_init['initial_theta']
    })
    print(init_positions)
    while n < num_episodes:
        if render == True:
            env.render()
            # time.sleep(1e-3)

        #Convert o to RL obs
        RLobs = ego_agent.process_obs(o)
        Oppobs = opp_agent.process_obs(o)

        # Take deterministic actions at test time
        a = ego_action(RLobs,
                       action_mask=ego_agent.aval_paths,
                       deterministic=True)
        ego_speed, ego_steer, a = ego_agent.plan(o, a)

        #Opponent decision
        a_opp = opp_action(Oppobs,
                           action_mask=opp_agent.aval_paths,
                           deterministic=True)
        opp_speed, opp_steer, _ = opp_agent.plan(o, a_opp)

        action = {
            'ego_idx': 0,
            'speed': [ego_speed, opp_speed],
            'steer': [ego_steer, opp_steer]
        }

        o, r, d, _ = env.step(action)

        ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            init_positions = np.random.random_integers(0, 1)
            o, r, d, ep_ret, ep_len = env.reset({
                'x':
                env_init['initial_x'][init_positions],
                'y':
                env_init['initial_y'],
                'theta':
                env_init['initial_theta']
            }), 0, False, 0, 0
            n += 1
            print(init_positions)

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
Ejemplo n.º 3
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               seed=None):
    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    success_num = 0
    Handlog = {'maxVelocity': np.array([]), 'maxTorque': np.array([])}
    CMAESlog = {'maxVelocity': np.array([]), 'maxTorque': np.array([])}
    DRLlog = {
        'maxVelocity': np.array([]),
        'maxTorque': np.array([]),
        'successNum': 0
    }
    tmpMaxVelocity = np.array([])
    tmpMaxTorque = np.array([])
    env.__init__("GUI", seed=seed)
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    a = get_action(o)

    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)
        a = get_action(o)
        for i in range(25):
            o, r, d, o_dict = env.step(a)
            time.sleep(1 / SIMULATIONFREQUENCY)
        tmpMaxTorque = np.append(tmpMaxTorque, np.abs(o_dict['torque']).max())
        tmpMaxVelocity = np.append(tmpMaxVelocity,
                                   np.abs(o_dict['velocity']).max())
        # time.sleep(1/SIMULATIONFREQUENCY)
        # if env.t >2:
        #     input("hhh")
        ep_ret += r
        ep_len += 1

        # d = False
        # if d or (ep_len == max_ep_len):
        if ep_len == max_ep_len:
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            satisfy = d
            o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            # satisfy = input("Is it satisfying? y or n:\n")
            if satisfy:
                print("done!")
                success_num += 1
                if len(DRLlog['maxVelocity']) != 0:
                    DRLlog['maxVelocity'] += tmpMaxVelocity
                    DRLlog['maxVelocity'] /= success_num
                    DRLlog['maxTorque'] += tmpMaxTorque
                    DRLlog['maxTorque'] /= success_num
                else:
                    DRLlog['maxVelocity'] = tmpMaxVelocity
                    DRLlog['maxTorque'] = tmpMaxTorque
                # tmpMaxVelocity, tmpMaxTorque, success = run_Hand(env)
                # if len(Handlog['maxVelocity']) != 0:
                #     Handlog['maxVelocity'] += tmpMaxVelocity
                #     Handlog['maxVelocity'] /= success_num
                #     Handlog['maxTorque'] += tmpMaxTorque
                #     Handlog['maxTorque'] /= success_num
                # else:
                #     Handlog['maxVelocity'] = tmpMaxVelocity
                #     Handlog['maxTorque'] = tmpMaxTorque
                # tmpMaxVelocity, tmpMaxTorque = run_CMAES()
                # if len(Handlog['maxVelocity']) != 0:
                #     CMAESlog['maxVelocity'] += tmpMaxVelocity
                #     CMAESlog['maxVelocity'] /= success_num
                #     CMAESlog['maxTorque'] += tmpMaxTorque
                #     CMAESlog['maxTorque'] /= success_num
                # else:
                #     CMAESlog['maxVelocity'] = tmpMaxVelocity
                #     CMAESlog['maxTorque'] = tmpMaxTorque
            tmpMaxVelocity = np.array([])
            tmpMaxTorque = np.array([])
            n += 1
    DRLlog['successNum'] = success_num

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()

    return DRLlog, Handlog, CMAESlog
Ejemplo n.º 4
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               make_gif=True):
    #env = gym.make('flowers-Walker-continuous-v0')
    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    env_babbling = "random"
    norm_obs = False

    def get_mu_sigma(v_min, v_max):  # assumes sigma has same bounds as mu
        random_2dparams = np.random.uniform(v_min, v_max, 2)
        return random_2dparams.tolist()  # returning mu and sigma

    def set_test_env_params(**kwargs):
        # if kwargs['stump_height'] is not None:
        #     random_stump_h = get_mu_sigma(kwargs['stump_height'][0], kwargs['stump_height'][1])
        #     random_stump_h[1] = 0.1
        if 'poly_shape' not in kwargs.keys():
            kwargs['poly_shape'] = None
        random_stump_h = None
        random_tunnel_h = None
        random_stump_r = None
        random_stump_w = None
        random_ob_spacing = None
        random_stump_seq = None
        if kwargs['stump_height'] is not None:
            random_stump_h = [kwargs['stump_height'], 0.1]
        if 'stump_rot' in kwargs.keys() and kwargs['stump_rot'] is not None:
            random_stump_r = [kwargs['stump_rot'], 0.1]
        if kwargs['stump_width'] is not None:
            random_stump_w = [kwargs['stump_width'], 0.1]
        if kwargs['tunnel_height'] is not None:
            random_tunnel_h = [kwargs['tunnel_height'], 0.1]
        if kwargs['obstacle_spacing'] is not None:
            random_ob_spacing = kwargs['obstacle_spacing']
        if kwargs['stump_seq'] is not None:
            random_stump_seq = kwargs['stump_seq']
        env.env.set_environment(roughness=kwargs['roughness'],
                                stump_height=random_stump_h,
                                stump_width=random_stump_w,
                                stump_rot=random_stump_r,
                                tunnel_height=None,
                                obstacle_spacing=random_ob_spacing,
                                gap_width=kwargs['gap_width'],
                                step_height=kwargs['step_height'],
                                step_number=kwargs['step_number'],
                                poly_shape=kwargs['poly_shape'],
                                stump_seq=random_stump_seq)

    def poly_2_width_height(params):
        scaling = 14 / 30.0
        obstacle_polygon = [(-0.5, 0), (-0.5, 0.25), (-0.25, 0.5), (0.25, 0.5),
                            (0.5, 0.25), (0.5, 0)]
        paired_params = [[params[i], params[i + 1]]
                         for i in range(0, len(params), 2)]
        # first recover polygon coordinate
        poly_coord = []
        for i, (b, d) in enumerate(zip(obstacle_polygon, paired_params)):
            # print(paired_params)
            if i != 0 and i != (len(obstacle_polygon) - 1):
                poly_coord.append([(b[0] * scaling) + (d[0] * scaling),
                                   (b[1] * scaling) + (d[1] * scaling)])
            else:
                poly_coord.append([(b[0] * scaling) + (d[0] * scaling),
                                   (b[1] * scaling)])
        # the find maximal width and height
        poly_coord = np.array(poly_coord)
        min_x = np.min(poly_coord[:, 0])
        max_x = np.max(poly_coord[:, 0])
        min_y = np.min(poly_coord[:, 1])
        max_y = np.max(poly_coord[:, 1])
        height_width_params = [(max_x - min_x) / scaling,
                               (max_y - min_y) / scaling]
        return np.round(height_width_params, 2)

    # simple exp: random short fails compared to gmm -> [0.84,5.39] run 11

    env_kwargs = {
        'roughness': None,
        'stump_height':
        [0.50, 0.50],  #stump_levels = [[0., 0.66], [0.66, 1.33], [1.33, 2.]]
        'tunnel_height': None,
        'stump_rot': None,
        'stump_width': None,
        'obstacle_spacing': 4,
        'gap_width': None,
        'step_height': None,
        'step_number': None
    }

    #test_env_list = pickle.load(open("/home/remy/projects/spinningup/teachers/test_sets/poly_shape0_4.0.pkl", "rb"))
    test_env_list = pickle.load(
        open(
            "/home/remy/projects/spinningup/teachers/test_sets/stump_height0_3.0obstacle_spacing0_6.0.pkl",
            "rb"))
    test_env_list = pickle.load(
        open(
            "/home/remy/projects/spinningup/teachers/test_sets/stump_seq0_6.0.pkl",
            "rb"))
    #test_env_list = params_2_env_list([[0.4,0.8]],['stump_height','obstacle_spacing']) #short agent seed 7(or 11)
    #test_env_list = params_2_env_list([[0,0],[0.7,1.0],[1.6,5.5],[1.9,0.01]],['stump_height', 'obstacle_spacing']) # default agent seed 0
    #test_env_list = params_2_env_list([[0,0],[3.0,0.0],[3.0,5], [1.5,0.5]],['stump_height', 'obstacle_spacing']) # long agent seed 0

    #test_env_list = params_2_env_list([[5.0,1.0,5.0,1.0,5.0,1.0,5.0,1.0,5.0,1.0]],'stump_seq') # long agent seed 0

    # final_list = []
    # for i in [19]:
    #     final_list.append(test_env_list[i])
    # for i in range(5):
    #     prev_args = copy.copy(final_list[-1])
    #     last_poly = prev_args['poly_shape']
    #     prev_args['poly_shape'] = np.clip(np.random.normal(last_poly,0.5),0,10)
    #     final_list.append(prev_args)
    # test_env_list = final_list
    # #print(test_env_list)

    if norm_obs:
        norm = MaxMinFilter(env_params_dict=env_kwargs)

    # increments = np.array([-0.4, 0, -0.4, 0.2, -0.2, 0.4, 0.2, 0.4, 0.4, 0.2, 0.4, 0.0])
    # init_poly = np.zeros(12)
    # init_poly += 5
    for i, args in enumerate(test_env_list):

        #args = params_2_env_list([init_poly],'poly_shape')[0]
        # if i not in [0,1,3,6,4]:
        #     continue
        #if i not in [1,5,8,10,25,35]:
        #    continue
        #print("{}: {}".format(i, args['poly_shape']))
        set_test_env_params(**args)
        #init_poly += increments
        o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
        img = env.render(mode='rgb_array')
        o = norm(o) if norm_obs else o
        obss = [o]
        skip = 2
        cpt = 0
        #wh = poly_2_width_height(args['poly_shape'])

        save_img = False
        images = []
        while n < num_episodes:
            if render:
                cpt += 1
                if (cpt % skip) == 0:
                    if make_gif:
                        img = env.render(mode='rgb_array')
                        images.append(img)

                        if save_img:
                            plt.imsave(
                                "graphics/walker_images/a_quadru_complex_walker_gmm_{}_{}_{}.png"
                                .format(wh, i, cpt),
                                np.array(img)[150:315, :-320, :])
                    else:
                        env.render()
                time.sleep(1e-3)

            a = get_action(o)
            o, r, d, _ = env.step(a)
            o = norm(o) if norm_obs else o
            obss.append(o)
            ep_ret += r
            ep_len += 1

            if d or (ep_len == max_ep_len):
                logger.store(EpRet=ep_ret, EpLen=ep_len)
                #print('Episode {}:{} \t EpRet {} \t EpLen {}'.format(i, wh, ep_ret, ep_len))
                print('Episode {}:{} \t EpRet {} \t EpLen {}'.format(
                    i, args['stump_height'], ep_ret, ep_len))
                #set_test_env_params(**env_kwargs)
                o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
                o = norm(o) if norm_obs else o
                n += 1
                #print("MAX:{}".format(np.max(obss, axis=0)))
                #print("MIN:{}".format(np.min(obss,axis=0)))

        #
        # logger.log_tabular('EpRet', with_min_and_max=True)
        # logger.log_tabular('EpLen', average_only=True)
        # logger.dump_tabular()
        # print(len(images))
        # print(np.array(images[0]).shape)
    #[150:315,:-320,:] for long
    #[200:315,:-320,:] for default
        imageio.mimsave(
            'graphics/demo_quadru_seq_env_{}.gif'.format(i),
            [np.array(img)[110:315, :-320, :] for i, img in enumerate(images)],
            fps=29)
Ejemplo n.º 5
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               try_rollouts=0,
               steps_per_try_rollout=0):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    torch.manual_seed(3)
    np.random.seed(3)
    random.seed(3)

    logger = EpochLogger()
    o, r, done, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    rollout = []
    while n < num_episodes:
        if try_rollouts != 0:
            if not rollout:
                rollout = do_rollouts(get_action,
                                      env,
                                      o,
                                      steps_per_try_rollout,
                                      try_rollouts,
                                      is_eval=True,
                                      take_worst_rollout=False)
            a, v, logp, _o, _r, _done, _info = rollout.pop(0)
            o, r, done, info = env.step(a)
            assert np.array_equal(o, _o)
            assert r == _r
            assert done == _done
            step_output = o, r, done, info
        else:
            a = get_action(o)[0]
            step_output = env.step(a)

        if render:
            env.render()
            # time.sleep(1e-3)

        if hasattr(env, 'last_step_output'):
            step_output = env.last_step_output

        o, r, done, info = step_output

        ep_ret += r
        ep_len += 1

        if done or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, done, ep_ret, ep_len = env.reset(), 0, False, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()
def run_policy(env,
               get_action,
               save_dir,
               max_ep_len=10000,
               num_episodes=10,
               render=True):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    dir_name = 'trajectory{}st_{}episode'.format(sample_step_per_trj,
                                                 num_episodes)  #!dirname
    dir_path = osp.join(save_dir, dir_name)
    os.makedirs(dir_path)  #, exist_ok=True) #すでに存在する場合
    if save_movie:
        env = gym.wrappers.Monitor(env,
                                   dir_path + '/movies',
                                   video_callable=(lambda n: n < 10))

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
    observations = []
    actions = []
    results = []
    while n < num_episodes:
        for t in range(max_ep_len):
            if render:
                env.render()
                time.sleep(1e-5)  #1e-2

            a = get_action(o)
            if t < sample_step_per_trj:
                observations.append(o)
                actions.append(a)

            o, r, d, _ = env.step(a)

            ep_ret += r
            ep_len += 1

            if d or (ep_len == max_ep_len):
                logger.store(EpRet=ep_ret, EpLen=ep_len)
                print('Episode %d \t EpRet %.3f \t EpLen %d' %
                      (n, ep_ret, ep_len))
                results.append([n, ep_ret, ep_len])
                o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
                n += 1
                break

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.dump_tabular()

    #save csv
    df_obs = pd.DataFrame(observations)
    df_act = pd.DataFrame(actions)
    df_results = pd.DataFrame(
        results,
        columns=['Episode', 'EpRet', 'Eplen'],
    )
    #sample_r_mean= df_results['EpRet'].mean()

    df_obs.to_csv(osp.join(dir_path, "observations.csv"),
                  sep=",",
                  header=False,
                  index=False)
    df_act.to_csv(osp.join(dir_path, "actions.csv"),
                  sep=",",
                  header=False,
                  index=False)
    df_results.to_csv(osp.join(dir_path, "each_results.csv"),
                      sep=",",
                      index=False)
    df_results.describe().to_csv(osp.join(dir_path, "results_describe.csv"),
                                 sep=",")
Ejemplo n.º 7
0
def run_policy(env,
               get_action,
               max_ep_len=None,
               num_episodes=100,
               render=True,
               gamma=1,
               key='danger'):

    assert env is not None, \
        "Environment not found!\n\n It looks like the environment wasn't saved, " + \
        "and we can't run the agent in it. :( \n\n Check out the readthedocs " + \
        "page on Experiment Outputs for how to handle this situation."

    logger = EpochLogger()
    o, r, d, ep_ret, ep_len, n, ep_info = env.reset(), 0, False, 0, 0, 0, 0
    while n < num_episodes:
        if render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, info = env.step(a)
        ep_info = max(ep_info, info[key])
        ep_ret += r * gamma**ep_len
        ep_len += 1

        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len, perf=ep_ret, fail=ep_info)
            print('Episode %d \t EpRet %.3f \t EpLen %d' % (n, ep_ret, ep_len))
            o, r, d, ep_ret, ep_len, ep_info = env.reset(), 0, False, 0, 0, 0
            n += 1

    logger.log_tabular('EpRet', with_min_and_max=True)
    logger.log_tabular('EpLen', average_only=True)
    logger.log_tabular('perf', average_only=True)
    logger.log_tabular('fail', average_only=True)
    logger.dump_tabular()