Ejemplo n.º 1
0
def test_MpiAdam():
    np.random.seed(0)
    tf.set_random_seed(0)

    a = tf.Variable(np.random.randn(3).astype('float32'))
    b = tf.Variable(np.random.randn(2, 5).astype('float32'))
    loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b))

    stepsize = 1e-2
    update_op = tf.train.AdamOptimizer(stepsize).minimize(loss)
    do_update = U.function([], loss, updates=[update_op])

    tf.get_default_session().run(tf.global_variables_initializer())
    for i in range(10):
        print(i, do_update())

    tf.set_random_seed(0)
    tf.get_default_session().run(tf.global_variables_initializer())

    var_list = [a, b]
    lossandgrad = U.function([], [loss, U.flatgrad(loss, var_list)],
                             updates=[update_op])
    adam = MpiAdam(var_list)

    for i in range(10):
        l, g = lossandgrad()
        adam.update(g, stepsize)
        print(i, l)
Ejemplo n.º 2
0
def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
    np.random.seed(0)
    np_random.seed(0)

    env = DummyVecEnv([env_fn])

    with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True)).as_default():
        tf.set_random_seed(0)

        model = learn_fn(env)

        sum_rew = 0
        done = True

        for i in range(n_trials):
            if done:
                obs = env.reset()
                state = model.initial_state

            if state is not None:
                a, v, state, _ = model.step(obs, S=state, M=[False])
            else:
                a, v, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(a)
            sum_rew += float(rew)

        print("Reward in {} trials is {}".format(n_trials, sum_rew))
        assert sum_rew > min_reward_fraction * n_trials, \
            'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
Ejemplo n.º 3
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_TF)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        tf.set_random_seed(self._hyperparams['random_seed'])

        self.tf_iter = 0
        self.batch_size = self._hyperparams['batch_size']
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_id']
            self.device_string = "/gpu:" + str(self.gpu_device)
        self.act_op = None  # mu_hat
        self.feat_op = None  # features
        self.loss_scalar = None
        self.obs_tensor = None
        self.precision_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.feat_vals = None
        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.sess = tf.Session()
        self.policy = TfPolicy(
            dU,
            self.obs_tensor,
            self.act_op,
            self.feat_op,
            np.zeros(dU),
            self.sess,
            self.device_string,
            copy_param_scope=self._hyperparams['copy_param_scope'])

        # List of indices for state (vector) data and image (tensor) data in observation.
        self.x_idx, self.img_idx, i = [], [], 0
        if 'obs_image_data' not in self._hyperparams['network_params']:
            self._hyperparams['network_params'].update({'obs_image_data': []})
        for sensor in self._hyperparams['network_params']['obs_include']:
            dim = self._hyperparams['network_params']['sensor_dims'][sensor]
            if sensor in self._hyperparams['network_params']['obs_image_data']:
                self.img_idx = self.img_idx + list(range(i, i + dim))
            else:
                self.x_idx = self.x_idx + list(range(i, i + dim))
            i += dim
        init_op = tf.initialize_all_variables()
        self.sess.run(init_op)
def set_global_seeds(i):
    try:
        import MPI
        rank = MPI.COMM_WORLD.Get_rank()
    except ImportError:
        rank = 0

    myseed = i + 1000 * rank if i is not None else None
    try:
        from code import tensorflow_code as tf
        tf.set_random_seed(myseed)
    except ImportError:
        pass
    np.random.seed(myseed)
    random.seed(myseed)
def main(args):
    for ite in range(int(args['trial_num'])):
        print('Trial Number:', ite)

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        config = tf.ConfigProto(gpu_options=gpu_options,
                                log_device_placement=False)

        with tf.Session(config=config) as sess:

            if args['change_seed']:
                #rand_seed = 10 * ite
                rand_seed = np.random.randint(1, 1000, size=1)
            else:
                rand_seed = 0
            env = gym.make(args['env'])

            np.random.seed(int(args['random_seed']) + int(rand_seed))
            tf.set_random_seed(int(args['random_seed']) + int(rand_seed))
            env.seed(int(args['random_seed']) + int(rand_seed))

            env_test = gym.make(args['env'])
            env_test.seed(int(args['random_seed']) + int(rand_seed))

            state_dim = env.observation_space.shape[0]
            action_dim = env.action_space.shape[0]
            print('action_space.shape', env.action_space.shape)
            print('observation_space.shape', env.observation_space.shape)
            action_bound = env.action_space.high
            # Ensure action bound is symmetric
            #print(env.action_space.high)
            #print(env.action_space.low)
            assert (env.action_space.high[0] == -env.action_space.low[0])

            agent = TD3(sess,
                        env,
                        state_dim,
                        action_dim,
                        action_bound,
                        int(args['minibatch_size']),
                        tau=float(args['tau']),
                        actor_lr=float(args['actor_lr']),
                        critic_lr=float(args['critic_lr']),
                        gamma=float(args['gamma']),
                        hidden_dim=np.asarray(args['hidden_dim']))

            # actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

            if args['use_gym_monitor']:
                if not args['render_env']:
                    env = wrappers.Monitor(env,
                                           args['monitor_dir'],
                                           video_callable=False,
                                           force=True)
                else:
                    env = wrappers.Monitor(
                        env,
                        args['monitor_dir'],
                        video_callable=lambda episode_id: episode_id % 50 == 0,
                        force=True)

            step_R_i = train(sess, env, env_test, args, agent)

            result_path = "./results/trials/"
            try:
                import pathlib
                pathlib.Path(result_path).mkdir(parents=True, exist_ok=True)
            except:
                print(
                    "A result directory does not exist and cannot be created. The trial results are not saved"
                )

            result_filename = args['result_file'] + '_' + args[
                'env'] + '_trial_idx_' + str(int(args['trial_idx'])) + '.txt'

            if args['overwrite_result'] and ite == 0:
                np.savetxt(result_filename, np.asarray(step_R_i))
            else:
                data = np.loadtxt(result_filename, dtype=float)
                data_new = np.vstack((data, np.asarray(step_R_i)))
                np.savetxt(result_filename, data_new)

            if args['use_gym_monitor']:
                env.monitor.close()
Ejemplo n.º 6
0
def main(args):
    result_name = 'TD3_' + args['env'] + '_trial_idx_' + str(
        int(args['trial_idx']))

    for ite in range(int(args['trial_num'])):
        print('Trial Number:', ite)

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        config = tf.ConfigProto(gpu_options=gpu_options,
                                log_device_placement=False)

        with tf.Session(config=config) as sess:

            if args['change_seed']:
                rand_seed = 10 * ite
            else:
                rand_seed = 0

            np.random.seed(int(args['random_seed']) + int(rand_seed))
            tf.set_random_seed(int(args['random_seed']) + int(rand_seed))
            env = gym.make(args['env'])
            env.seed(int(args['random_seed']) + int(rand_seed))

            if args['save_video']:
                try:
                    import pathlib
                    pathlib.Path("./Video/" + args['env']).mkdir(parents=True,
                                                                 exist_ok=True)
                    video_relative_path = "./Video/" + args['env'] + "/"

                    ## To save video of the first episode
                    env = gym.wrappers.Monitor(
                        env,
                        video_relative_path,
                        video_callable=lambda episode_id: episode_id == 0,
                        force=True)
                    ## To save video of every episodes
                    # env_test = gym.wrappers.Monitor(env_test, video_relative_path, \
                    #    video_callable=lambda episode_id: episode_id%1==0, force =True)
                except:
                    print(
                        "Cannot create video directories. Video will not be saved."
                    )

            state_dim = env.observation_space.shape[0]
            action_dim = env.action_space.shape[0]
            action_bound = env.action_space.high
            # Ensure action bound is symmetric
            assert (env.action_space.high[0] == -env.action_space.low[0])

            if args['method_name'] == 'TD3':
                from TD3_keras_agent import TD3
                agent = TD3(
                    sess,
                    env,
                    state_dim,
                    action_dim,
                    action_bound,
                    int(args['minibatch_size']),
                    tau=float(args['tau']),
                    actor_lr=float(args['actor_lr']),
                    critic_lr=float(args['critic_lr']),
                    gamma=float(args['gamma']),
                    hidden_dim=np.asarray(args['hidden_dim']),
                )

            agent.load_model(iteration=int(args['load_model_iter']),
                             expname=result_name)

            # if args['use_gym_monitor']:
            #     if not args['render_env']:
            #         env = wrappers.Monitor(
            #                 env, args['monitor_dir'], video_callable=False, force=True)
            #     else:
            #         env = wrappers.Monitor(env, args['monitor_dir'], video_callable=lambda episode_id: episode_id==0, force=True)

            test(sess, env, args, agent, result_name)