Beispiel #1
0
def main():
    #cur_dir = os.path.dirname(os.path.abspath(__file__))
    BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
    EXP_DIR = BASE_DIR + '/../experiments/laplace/'
    network_dir = EXP_DIR + 'data_files_pde/' + ('policy_itr_%02d' %
                                                 0) + '.pkl'
    hyperparams_file = EXP_DIR + 'hyperparams.py'
    hyperparams = imp.load_source('hyperparams',
                                  hyperparams_file).config['algorithm']
    #print(network_dir)
    #print(hyperparams.keys())
    pol_dict = pickle.load(open(network_dir, "rb"), encoding='latin1')
    print(pol_dict.keys(), pol_dict['scale'].shape, pol_dict['bias'].shape)
    #print(pol_dict['scale'])
    #print(pol_dict['bias'])
    network_config = hyperparams['policy_opt']['network_params']
    network_config['deg_action'] = 1050
    network_config['param_dim'] = network_config['deg_action']
    network_config['deg_obs'] = network_config['deg_action'] * (
        network_config['history_len'] * 2 + 1) + network_config['history_len']
    network = TfPolicy.load_policy(network_dir,
                                   first_derivative_network,
                                   network_config=network_config)
    np.random.seed(0)
    x = np.random.randn(network_config['deg_action'])
    np.random.seed(0)
    obs = np.random.randn(network_config['deg_obs'])
    act = network.act(x, obs, 0, None, usescale=False)
    print(x.shape, act.shape, obs.shape, act[0:20])
Beispiel #2
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_TF)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        self.tf_iter = 0
        self.checkpoint_file = self._hyperparams['checkpoint_prefix']
        self.batch_size = self._hyperparams['batch_size']
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_id']
            self.device_string = "/gpu:" + str(self.gpu_device)
        self.act_op = None  # mu_hat
        self.loss_scalar = None
        self.obs_tensor = None
        self.precision_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.sess = tf.Session()
        self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, np.zeros(dU), self.sess, self.device_string)
        init_op = tf.initialize_all_variables()
        self.sess.run(init_op)
Beispiel #3
0
    def run(self):

        #itr_start = 0
        #guided_steps = [0.5, 0.4, 0.3, 0.2, 0.1]
        self.algorithm.policy_opt.policy = TfPolicy.load_policy(
            policy_dict_path=self.policy_path,
            tf_generator=fully_connected_tf_network,
            network_config=self.network_config)

        #for itr in range(itr_start, self._hyperparams['iterations']):
        #for m, cond in enumerate(self._train_idx):
        #        for i in range(self._hyperparams['num_samples']):
        #             self._take_sample(itr, cond, m, i)
        #    print('Iteration %d' % (itr))
        #     traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx]
        #     # Clear agent samples.
        #     self.agent.clear_samples()
        #     self.algorithm.iteration(traj_sample_lists)
        #
        #     #pol_sample_lists = self._take_policy_samples(self._train_idx)
        #
        #     #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(itr,                                                                                                                                                             self.algorithm, self.agent, traj_sample_lists, pol_sample_lists)
        #     self.algorithm.policy_opt.policy.pickle_policy(self.algorithm.policy_opt._dO, self.algorithm.policy_opt._dU, self._data_files_dir + ('policy_itr_%02d' % itr))
        #     self._test_peformance(t_length=50)
        #self.algorithm.policy_opt.policy = TfPolicy.load_policy(policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config)
        self._test_peformance(t_length=50)

        #pol_sample_lists = self._take_policy_samples(self._test_idx)
        #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(self.alg                                                                                                                                                             orithm, self.agent, self._test_idx, pol_sample_lists)

        if 'on_exit' in self._hyperparams:
            self._hyperparams['on_exit'](self._hyperparams)
Beispiel #4
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT)
        config.update(hyperparams)

        self._hyperparams = config
        self._dO = dO
        self._dU = dU

        tf.set_random_seed(self._hyperparams['random_seed'])

        self.tf_iter = 0
        self.batch_size = self._hyperparams['batch_size']
        ## this place may need to be changed later
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_ids']
            self.device_string = "/gpu:" + str(self.gpu_device[0])
        self.act_op = None  # mu_hat
        self.loss_scalar = None
        self.obs_tensor = None
        self.precision_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        #self.sess = tf.Session()
        self.sess = tf.Session(
            config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True),
                                  allow_soft_placement=True))
        self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, np.zeros(dU),
                               self.sess, self.device_string)
        #init_op = tf.initialize_all_variables()
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)
Beispiel #5
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_TF)
        if hyperparams is None:
            return

        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        tf.set_random_seed(self._hyperparams['random_seed'])

        self.tf_iter = 0
        self.batch_size = self._hyperparams['batch_size']
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_id']
            self.device_string = "/gpu:" + str(self.gpu_device)
        self.act_op = None  # mu_hat
        self.feat_op = None  # features
        self.loss_scalar = None
        self.obs_tensor = None
        self.precision_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.feat_vals = None
        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.sess = tf.Session()
        self.policy = TfPolicy(
            dU,
            self.obs_tensor,
            self.act_op,
            self.feat_op,
            np.zeros(dU),
            self.sess,
            self.device_string,
            copy_param_scope=self._hyperparams['copy_param_scope'])
        # List of indices for state (vector) data and image (tensor) data in observation.
        self.x_idx, self.img_idx, i = [], [], 0
        if 'obs_image_data' not in self._hyperparams['network_params']:
            self._hyperparams['network_params'].update({'obs_image_data': []})
        for sensor in self._hyperparams['network_params']['obs_include']:
            dim = self._hyperparams['network_params']['sensor_dims'][sensor]
            if sensor in self._hyperparams['network_params']['obs_image_data']:
                self.img_idx = self.img_idx + list(range(i, i + dim))
            else:
                self.x_idx = self.x_idx + list(range(i, i + dim))
            i += dim
        init_op = tf.initialize_all_variables()
        self.sess.run(init_op)

        self.normalize = self._hyperparams['normalize']
        self.policy.normalize = self.normalize
Beispiel #6
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_TF)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        tf.set_random_seed(self._hyperparams['random_seed'])

        self.tf_iter = 0
        self.batch_size = self._hyperparams['batch_size']
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_id']
            self.device_string = "/gpu:" + str(self.gpu_device)
        self.act_op = None  # mu_hat
        self.feat_op = None  # features
        self.obs_tensor = None
        self.cost_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.feat_vals = None
        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.center_adv = self._hyperparams.get("center_adv", True)
        tfconfig = tf.ConfigProto()
        tfconfig.gpu_options.allow_growth = True
        self.sess = tf.Session(config=tfconfig)
        self.policy = TfPolicy(
            dU,
            self.obs_tensor,
            self.act_op,
            self.feat_op,
            np.zeros(dU),
            self.sess,
            self.device_string,
            copy_param_scope=self._hyperparams['copy_param_scope'],
            policy_type=self.policy_type,
            log_std=self.log_std)
        # List of indices for state (vector) data and image (tensor) data in observation.
        self.x_idx, self.img_idx, i = [], [], 0
        if 'obs_image_data' not in self._hyperparams['network_params']:
            self._hyperparams['network_params'].update({'obs_image_data': []})
        for sensor in self._hyperparams['network_params']['obs_include']:
            dim = self._hyperparams['network_params']['sensor_dims'][sensor]
            if sensor in self._hyperparams['network_params']['obs_image_data']:
                self.img_idx = self.img_idx + list(range(i, i + dim))
            else:
                self.x_idx = self.x_idx + list(range(i, i + dim))
            i += dim
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)
Beispiel #7
0
def test_policy_load():
    tf_map = POLICY_OPT_TF['network_model']
    check_path = gps_path + '/gps/algorithm/policy_opt/tf_checkpoint/policy_checkpoint/_pol'
    pol = TfPolicy.load_policy(check_path, tf_map)

    deg_obs = 14
    deg_action = 7
    N = 20
    T = 30
    obs = np.random.randn(N, T, deg_obs)
    obs_reshaped = np.reshape(obs, (N*T, deg_obs))
    pol.scale = np.diag(1.0 / np.std(obs_reshaped, axis=0))
    pol.bias = -np.mean(obs_reshaped.dot(pol.scale), axis=0)
    noise = np.random.randn(deg_action)
    pol.act(None, obs[0, 0], None, None)
Beispiel #8
0
def test_policy_load():
    tf_map = POLICY_OPT_TF['network_model']
    check_path = gps_path + '/gps/algorithm/policy_opt/tf_checkpoint/policy_checkpoint/_pol'
    pol = TfPolicy.load_policy(check_path, tf_map)

    deg_obs = 14
    deg_action = 7
    N = 20
    T = 30
    obs = np.random.randn(N, T, deg_obs)
    obs_reshaped = np.reshape(obs, (N * T, deg_obs))
    pol.scale = np.diag(1.0 / np.std(obs_reshaped, axis=0))
    pol.bias = -np.mean(obs_reshaped.dot(pol.scale), axis=0)
    noise = np.random.randn(deg_action)
    pol.act(None, obs[0, 0], None, None)
Beispiel #9
0
    def __init__(self, hyperparams, dO, dU):

        config = copy.deepcopy(POLICY_OPT_TF)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        #self.debug=True
        tf.set_random_seed(self._hyperparams['random_seed'])

        self.tf_iter = 0
        self.checkpoint_file = self._hyperparams['checkpoint_prefix']
        self.batch_size = self._hyperparams['batch_size']
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_id']
            self.device_string = "/gpu:" + str(self.gpu_device)
        self.act_op = None  # mu_hat
        self.feat_op = None  # features
        self.loss_scalar = None
        self.obs_tensor = None
        self.precision_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.feat_vals = None
        ##
        self.conv_layer_0 = None
        self.conv_layer_1 = None
        self.conv_layer_2 = None
        # self.main_itr = None    # Set this value to None when training
        # self.main_itr = 10      # Set this value to i-th iteration when testing policy at i-th iteration
        # or when resuming training at i-th iteration.
        self.main_itr = 6
        ##

        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.sess = tf.Session()
        #self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, self.feat_op,
        #                       np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope'])
        ##
        self.policy = TfPolicy(
            dU,
            self.obs_tensor,
            self.act_op,
            self.feat_op,
            np.zeros(dU),
            self.sess,
            self.device_string,
            copy_param_scope=self._hyperparams['copy_param_scope'],
            conv_layer_0=self.conv_layer_0,
            conv_layer_1=self.conv_layer_1,
            conv_layer_2=self.conv_layer_2)
        ##
        # List of indices for state (vector) data and image (tensor) data in observation.
        self.x_idx, self.img_idx, i = [], [], 0
        if 'obs_image_data' not in self._hyperparams['network_params']:
            self._hyperparams['network_params'].update({'obs_image_data': []})
        for sensor in self._hyperparams['network_params']['obs_include']:
            dim = self._hyperparams['network_params']['sensor_dims'][sensor]
            if sensor in self._hyperparams['network_params']['obs_image_data']:
                self.img_idx = self.img_idx + list(range(i, i + dim))
            else:
                self.x_idx = self.x_idx + list(range(i, i + dim))
            i += dim
        #init_op = tf.initialize_all_variables()
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)
Beispiel #10
0
def pde_policy_comp(hyperparams,
                    exp_dir,
                    pols,
                    input_dim_1=128,
                    input_dim_2=128,
                    a=1.0,
                    b=1.0,
                    learning_rate=0.0001,
                    mem_len=10,
                    momentum=0.9,
                    err=1e-4):
    dx = a / input_dim_1
    dy = b / input_dim_2
    x_pts = np.arange(0, a + dx / 2, dx)
    y_pts = np.arange(0, b + dy / 2, dy)
    k = np.random.rand(1)[0] + 0.5
    #session = tf.Session()
    session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
        allow_growth=True),
                                               allow_soft_placement=True))

    cost, fcn_family, b = def_cost(dx, dy, x_pts, y_pts, input_dim_1,
                                   input_dim_2, k, session)
    normb = np.linalg.norm(b)

    obs_flag = hyperparams.config['algorithm']['policy_opt']['network_params']

    history_len = hyperparams.config['agent']['history_len']
    param_dim = fcn_family.get_total_num_dim()
    #print(param_dim)
    init_loc = np.random.rand(param_dim, 1)
    fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}]

    SENSOR_DIMS = {
        CUR_LOC: param_dim,
        PAST_OBJ_VAL_DELTAS: history_len,
        PAST_GRADS: history_len * param_dim,
        PAST_LOC_DELTAS: history_len * param_dim,
        CUR_GRAD: param_dim,
        ACTION: param_dim
    }
    obs_flag['sensor_dims'] = SENSOR_DIMS
    obs_flag['param_dim'] = param_dim

    agent = {
        'substeps': hyperparams.config['agent']['substeps'],
        'conditions': 1,
        'dt': hyperparams.config['agent']['dt'],
        'T': hyperparams.config['agent']['T'],
        'sensor_dims': SENSOR_DIMS,
        'state_include': hyperparams.config['agent']['state_include'],
        'obs_include': hyperparams.config['agent']['obs_include'],
        'history_len': history_len,
        'fcns': fcns,
        'fcn_family': fcn_family
    }

    network_config = hyperparams.config['algorithm']['policy_opt'][
        'network_params']
    network_config['deg_action'] = param_dim
    network_config['param_dim'] = network_config['deg_action']
    network_config['deg_obs'] = int(
        np.sum([SENSOR_DIMS[sensor] for sensor in agent['obs_include']]))
    print("****************************************************************")
    print('Initial relative error:',
          np.sqrt(cost.evaluate(fcns[0]['init_loc'])) / normb)

    gd_fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}]
    cg_fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}]
    lbfgs_fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}]
    mm_fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}]
    lr_fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}]
    #agent_gd = copy.deepcopy(agent)
    #agent_cg = copy.deepcopy(agent)
    #agent_lbfgs = copy.deepcopy(agent)
    #agent_mm = copy.deepcopy(agent)
    #agent_lr = copy.deepcopy(agent)
    for i in range(len(pols)):

        agent['fcns'] = gd_fcns
        Agent_gd = AgentLTO(agent)
        gd_pol = GradientDescentPolicy(Agent_gd, learning_rate, 0)
        agent['fcns'] = cg_fcns
        Agent_cg = AgentLTO(agent)
        cg_pol = ConjugateGradientPolicy(Agent_cg, learning_rate, 0)
        agent['fcns'] = lbfgs_fcns
        Agent_lbfgs = AgentLTO(agent)
        lbfgs_pol = LBFGSPolicy(Agent_lbfgs, learning_rate, mem_len, 0)
        agent['fcns'] = mm_fcns
        Agent_mm = AgentLTO(agent)
        mm_pol = MomentumPolicy(Agent_mm, learning_rate, momentum, 0)
        agent['fcns'] = lr_fcns
        Agent_lr = AgentLTO(agent)
        network_dir = exp_dir + 'data_files_pde/' + ('policy_itr_%02d' %
                                                     pols[i]) + '.pkl'
        lr_pol = TfPolicy.load_policy(network_dir,
                                      first_derivative_network,
                                      network_config=network_config)

        x_gd = np.expand_dims(Agent_gd.sample(gd_pol,
                                              0,
                                              verbose=False,
                                              save=False,
                                              noisy=False,
                                              usescale=False).get_X()[-1],
                              axis=1)
        gd_fcns[0]['init_loc'] = x_gd
        print('Relative error after', agent['T'],
              'iteration using GradientDescent Policy :',
              np.sqrt(cost.evaluate(x_gd)) / normb)
        x_cg = np.expand_dims(Agent_cg.sample(cg_pol,
                                              0,
                                              verbose=False,
                                              save=False,
                                              noisy=False,
                                              usescale=False).get_X()[-1],
                              axis=1)
        cg_fcns[0]['init_loc'] = x_cg
        print('Relative error after', agent['T'],
              'iteration using ConjuageGradient Policy:',
              np.sqrt(cost.evaluate(x_cg)) / normb)
        x_lbfgs = np.expand_dims(Agent_lbfgs.sample(
            lbfgs_pol,
            0,
            verbose=False,
            save=False,
            noisy=False,
            usescale=False).get_X()[-1],
                                 axis=1)
        lbfgs_fcns[0]['init_loc'] = x_lbfgs
        print('Relative error after', agent['T'],
              'iteration using LBFGS Policy        :',
              np.sqrt(cost.evaluate(x_lbfgs)) / normb)
        x_mm = np.expand_dims(Agent_mm.sample(mm_pol,
                                              0,
                                              verbose=False,
                                              save=False,
                                              noisy=False,
                                              usescale=False).get_X()[-1],
                              axis=1)
        mm_fcns[0]['init_loc'] = x_mm
        print('Relative error after', agent['T'],
              'iteration using Momentum Policy        :',
              np.sqrt(cost.evaluate(x_mm)) / normb)
        x_lr = np.expand_dims(Agent_lr.sample(lr_pol,
                                              0,
                                              verbose=False,
                                              save=False,
                                              noisy=False,
                                              usescale=False).get_X()[-1],
                              axis=1)
        lr_fcns[0]['init_loc'] = x_lr
        print('Relative error after', agent['T'],
              'iteration using Learned Policy         :',
              np.sqrt(cost.evaluate(x_lr)) / normb)