Esempio n. 1
0
    def train(self, **kwargs):
        """
        Run the training algorithm to optimize model parameters for the
        environment provided.
        """
        # define default parameters for each training algorithm, then perturb them based on user input
        preset_kwargs = PRESETS[
            self.training_alg]  # select default kwargs for the algo
        preset_kwargs.update(
            kwargs)  # update default algo kwargs based on user input
        render_saves = preset_kwargs.get('render_saves', False)
        if 'render_saves' in preset_kwargs.keys():
            preset_kwargs.pop('render_saves')

        # dynamically import source code (e.g. import algos.vpg.vpg as mod)
        mod = import_module("algos.{}.{}".format(self.training_alg,
                                                 self.training_alg))
        method = getattr(
            mod, self.training_alg)  # e.g. from algos.vpg.vpg import vpg

        if self.actorCritic is None:
            # use the default actorCritic for the algo
            core = import_module("algos.{}.core".format(
                self.training_alg))  # e.g. import algos.vpg.core as core
            self.actorCritic = getattr(
                core, DEFAULT_ACTOR_CRITIC[self.training_alg]
            )  # e.g. from core import MLPActorCritic as actorCritic

        # prepare mpi if self.ncpu > 1 (and supported by chosen RL algorithm)
        mpi_fork(self.ncpu)  # run parallel code with mpi

        # update logger kwargs
        logger_kwargs = setup_logger_kwargs(self.exp_name,
                                            preset_kwargs['seed'])
        preset_kwargs['logger_kwargs'] = logger_kwargs

        # begin training
        method(self.env, actor_critic=self.actorCritic, **preset_kwargs)

        # render all checkpoints user specifies with 'render_saves'
        if render_saves:
            log_dir = logger_kwargs['output_dir'] + os.sep + 'pyt_save' + os.sep
            fnames = glob.glob(
                log_dir + 'model*.pt'
            )[1:]  # first item in list is final checkpoint, with no itr in file name
            for checkpoint in fnames:
                itr = re.search('model(.*).pt', checkpoint).group(
                    1)  # get epoch number from file name
                render_kwargs = {
                    'filename': '/gym_animation_' + str(itr) + '.mp4',
                    'model_itr': itr
                }
                self.render(save=True,
                            show=False,
                            seed=self.seed,
                            **render_kwargs)
    env_name = 'HalfCheetah_hurdle-v2'
    meta_Skill = Meta_skill('./Skill/ica_skill/model_1211/IcaNet.115.pt')
    output_dir = './network_reserve/model_0322/'

    # gpu_init
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    env = NormalizedActions(gym.make(env_name))
    params.update({
        'env': env,
        'output_dir': output_dir,
        'device': device,
    })

    logger_kwargs = setup_logger_kwargs(exp_name=env_name,
                                        seed=0,
                                        data_dir=output_dir)
    meta_Control = Meta_control(**params)
    meta_Control.logger_setup(logger_kwargs, **params)

    MAX_EPISODE = 100  # 100 #
    MAX_EPOCH = 10
    MAX_STEP = 500  # 500 #

    for episode_idx in range(MAX_EPISODE):
        episode_reward = 0
        episode_success = 0
        for epoch in range(MAX_EPOCH):
            state = env.reset()
            for step in range(MAX_STEP):
                # env.render()
Esempio n. 3
0
                            reward / episode, episode)

            self.logger.log_tabular("reward", with_min_and_max=True)
            self.logger.log_tabular("step", with_min_and_max=True)
            self.logger.log_tabular("reward_test", with_min_and_max=True)
            self.logger.log_tabular("step_test", with_min_and_max=True)
            self.logger.dump_tabular()


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, default='Carla')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--port', type=int, default=2000)
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--batch', type=int, default=32)
    parser.add_argument('--exp_name',
                        type=str,
                        default='dqn_carla_random_pos_replaybuffer10e5')
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)

    from utils.run_utils import setup_logger_kwargs
    logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed)

    dqn = Dqn(args.env, args.port, args.gpu, logger_kwargs=logger_kwargs)
    dqn.train_test()
Esempio n. 4
0
                # plt.legend(loc = 'lower right',       # 默认在左上角即 upper left 可以通过loc进行修改
                #     fancybox = True,           # 边框
                #     framealpha = 0.5,          # 透明度
                #     shadow = True,             # 阴影
                #     borderpad = 1)             # 边框宽度

                if not os.path.exists(DEFAULT_IMG_DIR):
                    os.mkdir(DEFAULT_IMG_DIR)
                out_file = os.path.join(DEFAULT_IMG_DIR, output_name + ".png")
                plt.savefig(out_file)   
                plt.clf()

    else:             

        from utils.run_utils import setup_logger_kwargs
        logger_kwargs = setup_logger_kwargs(args.plot_name, args.seed)
        data_file = os.path.join(logger_kwargs["output_dir"], "progress.txt")

        pd_data = pd.read_table(data_file)    
        mean_name = "Averagetest_reward"
        std_name = "Stdtest_reward"
        mean = pd_data[mean_name]
        std = pd_data[std_name]
        x = pd_data["Epoch"]

        plt.plot(x, mean)
        plt.fill_between(x, mean+std, mean-std)

        output_name = args.output_name
        if args.output_name is None:
            output_name = args.plot_name + "_s" + str(args.seed)
Esempio n. 5
0
            dynamic_model.fit(use_data_buf=True, normalize=True)
            cost_model.fit()
    env.close()

if __name__ == '__main__':
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--robot', type=str, default='point', help="robot model, selected from `point` or `car` ")
    parser.add_argument('--level', type=int, default=1, help="environment difficulty, selected from `1` or `2`, where `2` would be more difficult than `1`")
    parser.add_argument('--epoch', type=int, default=60, help="maximum epochs to train")
    parser.add_argument('--episode', type=int, default=10, help="determines how many episodes data to collect for each epoch")
    parser.add_argument('--render','-r', action='store_true', help="render the environment")
    parser.add_argument('--test', '-t', action='store_true', help="test the performance of pretrained models without training")

    parser.add_argument('--seed', '-s', type=int, default=1, help="seed for Gym, PyTorch and Numpy")
    parser.add_argument('--dir', '-d',type=str, default='./data/', help="directory to save the logging information")
    parser.add_argument('--name','-n', type=str, default='test', help="name of the experiment, used to save data in a folder named by this parameter")
    parser.add_argument('--save', action='store_true', help="save the trained dynamic model, data buffer, and cost model")
    parser.add_argument('--load',type=str, default=None, help="load the trained dynamic model, data buffer, and cost model from a specified directory")
    parser.add_argument('--ensemble',type=int, default=0, help="number of model ensembles, if this argument is greater than 0, then it will replace the default ensembles number in config.yml") # number of ensembles
    parser.add_argument('--optimizer','-o',type=str, default="rce", help=" determine the optimizer, selected from `rce`, `cem`, or `random` ") # random, cem or CCE
    parser.add_argument('--config', '-c', type=str, default='./config.yml', help="specify the path to the configuation file of the models")

    args = parser.parse_args()
    logger_kwargs = setup_logger_kwargs(args.name, args.seed, args.dir)
    logger = EpochLogger(**logger_kwargs)
    config = load_config(args.config)

    run(logger, config, args)
    
Esempio n. 6
0
    parser.add_argument('--anneal_lr', action="store_true")
    parser.add_argument('--debug', action="store_false")
    parser.add_argument('--log_every', default=10, type=int)
    parser.add_argument('--network', default="cnn")
    parser.add_argument('--feature_dim', default=50, type=int)
    parser.add_argument('--target_kl', default=0.03, type=float)
    parser.add_argument('--encoder_dir', default="vae_2")
    parser.add_argument('--encoder_check', default=300, type=int)
    parser.add_argument('--test_epoch', default=10, type=int)
    args = parser.parse_args()

    device = torch.device(
        "cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu")

    from utils.run_utils import setup_logger_kwargs
    logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed)
    logger = EpochLogger(**logger_kwargs)
    with open(os.path.join(logger.output_dir, 'args.json'), 'w') as f:
        json.dump(vars(args), f, sort_keys=True, indent=4)
    writer = SummaryWriter(os.path.join(logger.output_dir, "logs"))

    env = dmc2gym.make(domain_name=args.domain_name,
                       task_name=args.task_name,
                       seed=args.seed,
                       visualize_reward=False,
                       from_pixels=(args.encoder_type == 'pixel'),
                       height=args.image_size,
                       width=args.image_size,
                       frame_skip=args.action_repeat)
    test_env = dmc2gym.make(domain_name=args.domain_name,
                            task_name=args.task_name,
Esempio n. 7
0
    parser.add_argument('--log', type=str, default="logs")
    parser.add_argument('--steps', type=int, default=1000)
    # parser.add_argument('--env', type=str, default="CartPole-v0")
    parser.add_argument('--v_gae_clip', default=False)
    parser.add_argument('--env', type=str, default="HalfCheetah-v2")
    parser.add_argument('--exp_name', type=str, default="orthogonal")
    parser.add_argument('--seed', type=int, default=0)
    args = parser.parse_args()

    gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

    from utils.run_utils import setup_logger_kwargs
    file_name = "ppo_" + args.env + "_" + args.exp_name
    logger_kwargs = setup_logger_kwargs(file_name, args.seed)
    logger = EpochLogger(**logger_kwargs)

    env = gym.make(args.env)
    tf.random.set_seed(args.seed)
    np.random.seed(args.seed)
    env.seed(args.seed)

    action_space = env.action_space
    if isinstance(action_space, Discrete):
        ppo = core.PPO(action_space.n, 0.2, lr_a=args.lr_a, lr_c=args.lr_c)
    else:
        ppo = core.PPO(action_space.shape[0],
                       0.2,
                       False,
                       action_space.high[0],
            d = json.load(outfile)
            minibatch_size = d.get('minibatch_size')
            A_learningRate = d.get('A_learningRate')
            C_learningRate = d.get('C_learningRate')
            discountFactor = d.get('discountFactor')
            explorationRate = d.get('explorationRate')
            learnStart = d.get('learnStart')
            memorySize = d.get('memorySize')
            current_epoch = d.get('current_epoch')
            stepCounter = d.get('stepCounter')
            loadsim_seconds = d.get('loadsim_seconds')

        clear_monitor_files(outdir)
        copy_tree(monitor_path, outdir)
        env = gym.wrappers.Monitor(env, outdir, resume=True)
    logger_kwargs = setup_logger_kwargs('PelicanAttControllerEnv', None)
    policy_net = policy_core.Policy_Net(S_DIM=S_DIM,
                                        A_DIM=A_DIM,
                                        EP_MAX=epochs,
                                        EP_LEN=episode_steps,
                                        GAMMA=discountFactor,
                                        LR=I_learningRate,
                                        BATCH=minibatch_size,
                                        logger_kwargs=logger_kwargs)
    last100Rewards = [0] * 100
    last100RewardsIndex = 0
    last100Filled = False
    all_ep_r = []

    start_time = time.time()
if __name__ == '__main__':
    rospy.init_node('pelican_attitude_controller_policy_test',
                    anonymous=True,
                    log_level=rospy.WARN)
    parser = argparse.ArgumentParser()
    default_fpath = osp.join(
        osp.abspath(osp.dirname(__file__)),
        'data/Pelican_position_controller_dagger_for_ppo/Pelican_position_controller_dagger_for_ppo_s3'
    )
    parser.add_argument('--exp',
                        type=str,
                        default="PelicanAttControllerEnv-v0")
    parser.add_argument('--fpath', type=str, default=default_fpath)
    parser.add_argument('--len', '-l', type=int, default=500)
    parser.add_argument('--episodes', '-n', type=int, default=100)
    parser.add_argument('--itr', '-i', type=int, default=-1)
    parser.add_argument('--seed', '-s', type=int, default=None)
    parser.add_argument('--deterministic', '-d', action='store_true')
    args = parser.parse_args()
    env = gym.make(args.exp)
    outdir = '/tmp/openai_ros_experiments/'
    env = gym.wrappers.Monitor(env, outdir, force=True)
    get_action = load_policy(args.fpath, args.itr if args.itr >= 0 else 'last',
                             args.deterministic)
    logger_kwargs = setup_logger_kwargs(args.exp + '_test', args.seed)
    run_policy(env,
               get_action,
               args.len,
               args.episodes,
               logger_kwargs=logger_kwargs)
Esempio n. 10
0
                       task_name=args.task_name,
                       seed=args.seed,
                       visualize_reward=False,
                       from_pixels=(args.encoder_type == 'pixel'),
                       height=args.image_size,
                       width=args.image_size,
                       frame_skip=args.action_repeat)
    if args.encoder_type == 'pixel':
        env = DMCFrameStack(env, k=args.frame_stack)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    env.seed(args.seed)
    state_dim = env.observation_space.shape

    from utils.run_utils import setup_logger_kwargs
    logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed)

    expert_data_file = os.path.join(logger_kwargs["output_dir"], "experts")
    with open(
            os.path.join(
                expert_data_file, args.domain_name + "_" + args.task_name +
                "_epoch" + str(args.expert_num) + ".pkl"), "rb") as f:
        expert_data = pickle.load(f)

    out_kwargs = setup_logger_kwargs(args.out_dir, args.seed)
    logger = EpochLogger(**out_kwargs)
    writer = SummaryWriter(os.path.join(logger.output_dir, "logs"))
    with open(os.path.join(logger.output_dir, 'args.json'), 'w') as f:
        json.dump(vars(args), f, sort_keys=True, indent=4)
    if not os.path.exists(os.path.join(logger.output_dir, "checkpoints")):
        os.makedirs(os.path.join(logger.output_dir, "checkpoints"))