def main(): #cur_dir = os.path.dirname(os.path.abspath(__file__)) BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2]) EXP_DIR = BASE_DIR + '/../experiments/laplace/' network_dir = EXP_DIR + 'data_files_pde/' + ('policy_itr_%02d' % 0) + '.pkl' hyperparams_file = EXP_DIR + 'hyperparams.py' hyperparams = imp.load_source('hyperparams', hyperparams_file).config['algorithm'] #print(network_dir) #print(hyperparams.keys()) pol_dict = pickle.load(open(network_dir, "rb"), encoding='latin1') print(pol_dict.keys(), pol_dict['scale'].shape, pol_dict['bias'].shape) #print(pol_dict['scale']) #print(pol_dict['bias']) network_config = hyperparams['policy_opt']['network_params'] network_config['deg_action'] = 1050 network_config['param_dim'] = network_config['deg_action'] network_config['deg_obs'] = network_config['deg_action'] * ( network_config['history_len'] * 2 + 1) + network_config['history_len'] network = TfPolicy.load_policy(network_dir, first_derivative_network, network_config=network_config) np.random.seed(0) x = np.random.randn(network_config['deg_action']) np.random.seed(0) obs = np.random.randn(network_config['deg_obs']) act = network.act(x, obs, 0, None, usescale=False) print(x.shape, act.shape, obs.shape, act[0:20])
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TF) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) self.tf_iter = 0 self.checkpoint_file = self._hyperparams['checkpoint_prefix'] self.batch_size = self._hyperparams['batch_size'] self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_id'] self.device_string = "/gpu:" + str(self.gpu_device) self.act_op = None # mu_hat self.loss_scalar = None self.obs_tensor = None self.precision_tensor = None self.action_tensor = None # mu true self.solver = None self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) self.sess = tf.Session() self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, np.zeros(dU), self.sess, self.device_string) init_op = tf.initialize_all_variables() self.sess.run(init_op)
def run(self): #itr_start = 0 #guided_steps = [0.5, 0.4, 0.3, 0.2, 0.1] self.algorithm.policy_opt.policy = TfPolicy.load_policy( policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config) #for itr in range(itr_start, self._hyperparams['iterations']): #for m, cond in enumerate(self._train_idx): # for i in range(self._hyperparams['num_samples']): # self._take_sample(itr, cond, m, i) # print('Iteration %d' % (itr)) # traj_sample_lists = [self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx] # # Clear agent samples. # self.agent.clear_samples() # self.algorithm.iteration(traj_sample_lists) # # #pol_sample_lists = self._take_policy_samples(self._train_idx) # # #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) # self.algorithm.policy_opt.policy.pickle_policy(self.algorithm.policy_opt._dO, self.algorithm.policy_opt._dU, self._data_files_dir + ('policy_itr_%02d' % itr)) # self._test_peformance(t_length=50) #self.algorithm.policy_opt.policy = TfPolicy.load_policy(policy_dict_path=self.policy_path, tf_generator=fully_connected_tf_network, network_config=self.network_config) self._test_peformance(t_length=50) #pol_sample_lists = self._take_policy_samples(self._test_idx) #self._prev_traj_costs, self._prev_pol_costs = self.disp.update(self.alg orithm, self.agent, self._test_idx, pol_sample_lists) if 'on_exit' in self._hyperparams: self._hyperparams['on_exit'](self._hyperparams)
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT) config.update(hyperparams) self._hyperparams = config self._dO = dO self._dU = dU tf.set_random_seed(self._hyperparams['random_seed']) self.tf_iter = 0 self.batch_size = self._hyperparams['batch_size'] ## this place may need to be changed later self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_ids'] self.device_string = "/gpu:" + str(self.gpu_device[0]) self.act_op = None # mu_hat self.loss_scalar = None self.obs_tensor = None self.precision_tensor = None self.action_tensor = None # mu true self.solver = None self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) #self.sess = tf.Session() self.sess = tf.Session( config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True), allow_soft_placement=True)) self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, np.zeros(dU), self.sess, self.device_string) #init_op = tf.initialize_all_variables() init_op = tf.global_variables_initializer() self.sess.run(init_op)
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TF) if hyperparams is None: return config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) tf.set_random_seed(self._hyperparams['random_seed']) self.tf_iter = 0 self.batch_size = self._hyperparams['batch_size'] self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_id'] self.device_string = "/gpu:" + str(self.gpu_device) self.act_op = None # mu_hat self.feat_op = None # features self.loss_scalar = None self.obs_tensor = None self.precision_tensor = None self.action_tensor = None # mu true self.solver = None self.feat_vals = None self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) self.sess = tf.Session() self.policy = TfPolicy( dU, self.obs_tensor, self.act_op, self.feat_op, np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope']) # List of indices for state (vector) data and image (tensor) data in observation. self.x_idx, self.img_idx, i = [], [], 0 if 'obs_image_data' not in self._hyperparams['network_params']: self._hyperparams['network_params'].update({'obs_image_data': []}) for sensor in self._hyperparams['network_params']['obs_include']: dim = self._hyperparams['network_params']['sensor_dims'][sensor] if sensor in self._hyperparams['network_params']['obs_image_data']: self.img_idx = self.img_idx + list(range(i, i + dim)) else: self.x_idx = self.x_idx + list(range(i, i + dim)) i += dim init_op = tf.initialize_all_variables() self.sess.run(init_op) self.normalize = self._hyperparams['normalize'] self.policy.normalize = self.normalize
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TF) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) tf.set_random_seed(self._hyperparams['random_seed']) self.tf_iter = 0 self.batch_size = self._hyperparams['batch_size'] self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_id'] self.device_string = "/gpu:" + str(self.gpu_device) self.act_op = None # mu_hat self.feat_op = None # features self.obs_tensor = None self.cost_tensor = None self.action_tensor = None # mu true self.solver = None self.feat_vals = None self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) self.center_adv = self._hyperparams.get("center_adv", True) tfconfig = tf.ConfigProto() tfconfig.gpu_options.allow_growth = True self.sess = tf.Session(config=tfconfig) self.policy = TfPolicy( dU, self.obs_tensor, self.act_op, self.feat_op, np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope'], policy_type=self.policy_type, log_std=self.log_std) # List of indices for state (vector) data and image (tensor) data in observation. self.x_idx, self.img_idx, i = [], [], 0 if 'obs_image_data' not in self._hyperparams['network_params']: self._hyperparams['network_params'].update({'obs_image_data': []}) for sensor in self._hyperparams['network_params']['obs_include']: dim = self._hyperparams['network_params']['sensor_dims'][sensor] if sensor in self._hyperparams['network_params']['obs_image_data']: self.img_idx = self.img_idx + list(range(i, i + dim)) else: self.x_idx = self.x_idx + list(range(i, i + dim)) i += dim init_op = tf.global_variables_initializer() self.sess.run(init_op)
def test_policy_load(): tf_map = POLICY_OPT_TF['network_model'] check_path = gps_path + '/gps/algorithm/policy_opt/tf_checkpoint/policy_checkpoint/_pol' pol = TfPolicy.load_policy(check_path, tf_map) deg_obs = 14 deg_action = 7 N = 20 T = 30 obs = np.random.randn(N, T, deg_obs) obs_reshaped = np.reshape(obs, (N*T, deg_obs)) pol.scale = np.diag(1.0 / np.std(obs_reshaped, axis=0)) pol.bias = -np.mean(obs_reshaped.dot(pol.scale), axis=0) noise = np.random.randn(deg_action) pol.act(None, obs[0, 0], None, None)
def test_policy_load(): tf_map = POLICY_OPT_TF['network_model'] check_path = gps_path + '/gps/algorithm/policy_opt/tf_checkpoint/policy_checkpoint/_pol' pol = TfPolicy.load_policy(check_path, tf_map) deg_obs = 14 deg_action = 7 N = 20 T = 30 obs = np.random.randn(N, T, deg_obs) obs_reshaped = np.reshape(obs, (N * T, deg_obs)) pol.scale = np.diag(1.0 / np.std(obs_reshaped, axis=0)) pol.bias = -np.mean(obs_reshaped.dot(pol.scale), axis=0) noise = np.random.randn(deg_action) pol.act(None, obs[0, 0], None, None)
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TF) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) #self.debug=True tf.set_random_seed(self._hyperparams['random_seed']) self.tf_iter = 0 self.checkpoint_file = self._hyperparams['checkpoint_prefix'] self.batch_size = self._hyperparams['batch_size'] self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_id'] self.device_string = "/gpu:" + str(self.gpu_device) self.act_op = None # mu_hat self.feat_op = None # features self.loss_scalar = None self.obs_tensor = None self.precision_tensor = None self.action_tensor = None # mu true self.solver = None self.feat_vals = None ## self.conv_layer_0 = None self.conv_layer_1 = None self.conv_layer_2 = None # self.main_itr = None # Set this value to None when training # self.main_itr = 10 # Set this value to i-th iteration when testing policy at i-th iteration # or when resuming training at i-th iteration. self.main_itr = 6 ## self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) self.sess = tf.Session() #self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, self.feat_op, # np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope']) ## self.policy = TfPolicy( dU, self.obs_tensor, self.act_op, self.feat_op, np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope'], conv_layer_0=self.conv_layer_0, conv_layer_1=self.conv_layer_1, conv_layer_2=self.conv_layer_2) ## # List of indices for state (vector) data and image (tensor) data in observation. self.x_idx, self.img_idx, i = [], [], 0 if 'obs_image_data' not in self._hyperparams['network_params']: self._hyperparams['network_params'].update({'obs_image_data': []}) for sensor in self._hyperparams['network_params']['obs_include']: dim = self._hyperparams['network_params']['sensor_dims'][sensor] if sensor in self._hyperparams['network_params']['obs_image_data']: self.img_idx = self.img_idx + list(range(i, i + dim)) else: self.x_idx = self.x_idx + list(range(i, i + dim)) i += dim #init_op = tf.initialize_all_variables() init_op = tf.global_variables_initializer() self.sess.run(init_op)
def pde_policy_comp(hyperparams, exp_dir, pols, input_dim_1=128, input_dim_2=128, a=1.0, b=1.0, learning_rate=0.0001, mem_len=10, momentum=0.9, err=1e-4): dx = a / input_dim_1 dy = b / input_dim_2 x_pts = np.arange(0, a + dx / 2, dx) y_pts = np.arange(0, b + dy / 2, dy) k = np.random.rand(1)[0] + 0.5 #session = tf.Session() session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True), allow_soft_placement=True)) cost, fcn_family, b = def_cost(dx, dy, x_pts, y_pts, input_dim_1, input_dim_2, k, session) normb = np.linalg.norm(b) obs_flag = hyperparams.config['algorithm']['policy_opt']['network_params'] history_len = hyperparams.config['agent']['history_len'] param_dim = fcn_family.get_total_num_dim() #print(param_dim) init_loc = np.random.rand(param_dim, 1) fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}] SENSOR_DIMS = { CUR_LOC: param_dim, PAST_OBJ_VAL_DELTAS: history_len, PAST_GRADS: history_len * param_dim, PAST_LOC_DELTAS: history_len * param_dim, CUR_GRAD: param_dim, ACTION: param_dim } obs_flag['sensor_dims'] = SENSOR_DIMS obs_flag['param_dim'] = param_dim agent = { 'substeps': hyperparams.config['agent']['substeps'], 'conditions': 1, 'dt': hyperparams.config['agent']['dt'], 'T': hyperparams.config['agent']['T'], 'sensor_dims': SENSOR_DIMS, 'state_include': hyperparams.config['agent']['state_include'], 'obs_include': hyperparams.config['agent']['obs_include'], 'history_len': history_len, 'fcns': fcns, 'fcn_family': fcn_family } network_config = hyperparams.config['algorithm']['policy_opt'][ 'network_params'] network_config['deg_action'] = param_dim network_config['param_dim'] = network_config['deg_action'] network_config['deg_obs'] = int( np.sum([SENSOR_DIMS[sensor] for sensor in agent['obs_include']])) print("****************************************************************") print('Initial relative error:', np.sqrt(cost.evaluate(fcns[0]['init_loc'])) / normb) gd_fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}] cg_fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}] lbfgs_fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}] mm_fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}] lr_fcns = [{'fcn_obj': cost, 'dim': param_dim, 'init_loc': init_loc}] #agent_gd = copy.deepcopy(agent) #agent_cg = copy.deepcopy(agent) #agent_lbfgs = copy.deepcopy(agent) #agent_mm = copy.deepcopy(agent) #agent_lr = copy.deepcopy(agent) for i in range(len(pols)): agent['fcns'] = gd_fcns Agent_gd = AgentLTO(agent) gd_pol = GradientDescentPolicy(Agent_gd, learning_rate, 0) agent['fcns'] = cg_fcns Agent_cg = AgentLTO(agent) cg_pol = ConjugateGradientPolicy(Agent_cg, learning_rate, 0) agent['fcns'] = lbfgs_fcns Agent_lbfgs = AgentLTO(agent) lbfgs_pol = LBFGSPolicy(Agent_lbfgs, learning_rate, mem_len, 0) agent['fcns'] = mm_fcns Agent_mm = AgentLTO(agent) mm_pol = MomentumPolicy(Agent_mm, learning_rate, momentum, 0) agent['fcns'] = lr_fcns Agent_lr = AgentLTO(agent) network_dir = exp_dir + 'data_files_pde/' + ('policy_itr_%02d' % pols[i]) + '.pkl' lr_pol = TfPolicy.load_policy(network_dir, first_derivative_network, network_config=network_config) x_gd = np.expand_dims(Agent_gd.sample(gd_pol, 0, verbose=False, save=False, noisy=False, usescale=False).get_X()[-1], axis=1) gd_fcns[0]['init_loc'] = x_gd print('Relative error after', agent['T'], 'iteration using GradientDescent Policy :', np.sqrt(cost.evaluate(x_gd)) / normb) x_cg = np.expand_dims(Agent_cg.sample(cg_pol, 0, verbose=False, save=False, noisy=False, usescale=False).get_X()[-1], axis=1) cg_fcns[0]['init_loc'] = x_cg print('Relative error after', agent['T'], 'iteration using ConjuageGradient Policy:', np.sqrt(cost.evaluate(x_cg)) / normb) x_lbfgs = np.expand_dims(Agent_lbfgs.sample( lbfgs_pol, 0, verbose=False, save=False, noisy=False, usescale=False).get_X()[-1], axis=1) lbfgs_fcns[0]['init_loc'] = x_lbfgs print('Relative error after', agent['T'], 'iteration using LBFGS Policy :', np.sqrt(cost.evaluate(x_lbfgs)) / normb) x_mm = np.expand_dims(Agent_mm.sample(mm_pol, 0, verbose=False, save=False, noisy=False, usescale=False).get_X()[-1], axis=1) mm_fcns[0]['init_loc'] = x_mm print('Relative error after', agent['T'], 'iteration using Momentum Policy :', np.sqrt(cost.evaluate(x_mm)) / normb) x_lr = np.expand_dims(Agent_lr.sample(lr_pol, 0, verbose=False, save=False, noisy=False, usescale=False).get_X()[-1], axis=1) lr_fcns[0]['init_loc'] = x_lr print('Relative error after', agent['T'], 'iteration using Learned Policy :', np.sqrt(cost.evaluate(x_lr)) / normb)