def __init__(self, config, quit_on_end=False): """ Initialize GPSMain Args: config: Hyperparameters for experiment quit_on_end: When true, quit automatically on completion """ self._quit_on_end = quit_on_end self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common'][ 'conditions'] self._hyperparams = config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI( config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm'])
def load_data(dir, load_algorithm_too=True): all_files = list_files(dir) itrs = {} for filename in all_files: itr = int(re.findall('[0-9]{2}', filename)[-1]) itrs[itr] = filename if len(itrs) == 0: print 'No data found! Exiting.' exit() elif len(itrs) == 1: print 'Only one iteration found, so using that' itr = itr.keys()[0] else: print 'Here are the iterations for which data has been collected:' print sorted(itrs) itr = input('Which iteration would you like to train on? ') assert isinstance(itr, int) data_logger = DataLogger() # return [data_logger.unpickle(osp.join(dir, 'traj_sample_itr_%02d.pkl' % itr)) for itr in include] # adapted from gps_main.py traj_samples = data_logger.unpickle( osp.join(dir, 'traj_sample_itr_%02d.pkl' % itr)) if load_algorithm_too: algorithm_state = data_logger.unpickle( osp.join(dir, 'algorithm_itr_%02d.pkl' % itr)) else: algorithm_state = None return traj_samples, algorithm_state, itr
def main(): from gps import __file__ as gps_filepath BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2]) EXP_DIR = BASE_DIR + '/../experiments/jaco_tf_example/' data_files_dir = EXP_DIR + 'data_files/' data_logger = DataLogger() itr = 1 cond = 0 print('Reading states (iteration = ' + str(itr) + ', condition = ' + str(cond) + ') ...') print('\n') #train_sample_lists = data_logger.unpickle(data_files_dir + ('_samplelist_itr%02d.pkl' % itr)) lqr_sample_lists = data_logger.unpickle(data_files_dir + ('pol_lqr_sample_itr_%02d.pkl' % itr)) badmm_sample_lists = data_logger.unpickle( data_files_dir + ('pol_badmm_sample_itr_%02d.pkl' % itr)) print('lqr sample states ' + str(lqr_sample_lists[cond].get_X().shape) + ':') print(lqr_sample_lists[cond].get_X()) print('\n') print('badmm sample states ' + str(badmm_sample_lists[cond].get_X().shape) + ':') print(badmm_sample_lists[cond].get_X()) print('\n')
def __init__(self, config): """ Initialize LQRTestMain Args: config: Test hyperparameters for experiment """ self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common'][ 'conditions'] self._hyperparams = config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm'])
def plotMDTest(): data_logger = DataLogger() position = data_logger.unpickle('./position/9/train_position_1.pkl') print(position) _, ax = plt.subplots() label = 'initial train positions' for i in range(position.shape[0]): if i == 0: ax.scatter(position[i][0], position[i][1], marker='s', color='blue', s=100, label=label) label = None else: ax.scatter(position[i][0], position[i][1], marker='s', color='blue', s=100, label=label) from matplotlib.patches import Ellipse center_position = np.mean(position, axis=0) radius = 0.05 radius_grow = 0.025 for i in range(7): ell = Ellipse(xy=center_position, width=radius*2, height=radius*2, angle=0, fill=False) ax.add_artist(ell) ell.set_clip_box(ax.bbox) ell.set_facecolor(color='black') radius = radius + radius_grow print(radius) ax.set_xlim(0, 0.45) ax.set_ylim(-0.4, 0) ax.set_xlabel('X') ax.set_ylabel('Y') plt.legend(loc='upper left', frameon=True) plt.show()
def run_multi_test(config, tests_from, tests_to, repetitions, itr=0): data_files_dir = '/media/cambel/Data/documents/gps/experiments/paper-08-18/' # /media/cambel/Data/documents/gps/experiments/paper-08-18/02/data_files/policy_itr_00.pkl data_logger = DataLogger() conditions = config['common']['conditions'] policy_prefix = 'policy_itr_' policy_name = 'data_files/' + policy_prefix + '%02d.pkl' % itr print tests_from, tests_to for i in xrange(tests_from, tests_to): np.save("/tmp/gps_test.npy", np.array([i])) policy_file = data_files_dir + str(i).zfill(2) + "/" + policy_name print "Current policy", policy_file policy_opt = data_logger.unpickle(policy_file) pol = policy_opt.policy agent = config['agent']['type'](config['agent']) grasp_count = 0 for cond in list(range(conditions)): reps = 0 if repetitions is None: reps = config['num_samples'] else: reps = repetitions for i in range(reps): gp = agent.execute(pol, cond, verbose=(i < config['verbose_trials']), noisy=False) if gp: grasp_count += 1 print "test:", i, grasp_count
def plotPositionStep(): data_logger = DataLogger() all_position = data_logger.unpickle('./position/6/train_position.pkl') color = 'blue' for i in range(all_position.shape[0]): cur_position_x = all_position[0:i+1, 0] cur_position_y = all_position[0:i+1, 1] print(cur_position_x) print(cur_position_y) if color=='blue': color = 'red' plotpoint(x_data=cur_position_x, y_data=cur_position_y, x_label='position x', y_label='position y', title='position', color=color) else: color = 'blue' plotpoint(x_data=cur_position_x, y_data=cur_position_y, x_label='position x', y_label='position y', title='position', color=color) plt.show() temp_char = raw_input() plt.close(1)
def plotCompareCostAlpha(): """ compare two distance with alpha and without alpha Returns: """ data_logger = DataLogger() for i in range(1, 2): root_dir = './position/compare_alpha_%d/' % i for j in range(6): file_name = root_dir + 'alpha_distance_%d.pkl' % j distance = data_logger.unpickle(file_name) distance = np.expand_dims(distance, axis=0) if j == 0: distances = distance else: distances = np.concatenate((distances, distance), axis=0) if i == 2: all_distances_alpha = distances else: all_distances_alpha = np.concatenate((all_distances_alpha, distances), axis=1) # print(all_distances_alpha[1]) all_distances_alpha[all_distances_alpha > 0.6] = 0.6 mean_distances_alpha = np.mean(all_distances_alpha, axis=1) for i in range(1, 2): root_dir = './position/compare_alpha_%d/' % i for j in range(6): file_name = root_dir + 'without_alpha_distance_%d.pkl' % j distance = data_logger.unpickle(file_name) distance = np.expand_dims(distance, axis=0) if j == 0: distances = distance else: distances = np.concatenate((distances, distance), axis=0) if i == 1: all_distances_alpha = distances else: all_distances_alpha = np.concatenate((all_distances_alpha, distances), axis=1) # print(all_distances_alpha[1]) all_distances_alpha[all_distances_alpha > 0.6] = 0.6 mean_distances_alpha_without = np.mean(all_distances_alpha, axis=1) x_data = list() base_line = list() for i in range(mean_distances_alpha.shape[0]): x_data.append(i) base_line.append(0.06) x_data = np.array(x_data) base_line = np.array(base_line) plotline(x_data=x_data, y1_data=mean_distances_alpha, y2_data=mean_distances_alpha_without, y3_data=base_line, x_label="num of positions", y_label="distance to target") plt.show()
def __init__(self, config, quit_on_end=False): """ Initialize GPSMain Args: config: Hyperparameters for experiment quit_on_end: When true, quit automatically on completion """ self._quit_on_end = quit_on_end self._hyperparams = config # print(config) self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common'][ 'conditions'] self._hyperparams = config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI( config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent # hard code to pass the map_state and target_state config['algorithm']['cost']['costs'][1]['data_types'][3][ 'target_state'] = config['agent']['target_state'] config['algorithm']['cost']['costs'][1]['data_types'][3][ 'map_size'] = config['agent']['map_size'] # config['algorithm']['cost']['costs'][1]['data_types'][3]['map_size'] = CUT_MAP_SIZE if len(config['algorithm']['cost']['costs']) > 2: # temporarily deprecated, not considering collision cost # including cost_collision config['algorithm']['cost']['costs'][2]['data_types'][3][ 'target_state'] = config['agent']['target_state'] config['algorithm']['cost']['costs'][2]['data_types'][3][ 'map_size'] = config['agent']['map_size'] config['algorithm']['cost']['costs'][2]['data_types'][3][ 'map_state'] = config['agent']['map_state'] # print(config['algorithm']) self.algorithm = config['algorithm']['type'](config['algorithm']) # Modified by RH self.finishing_time = None self.U = None self.final_pos = None self.samples = [] self.quick_sample = None # self.map_size = config['agent']['map_size'] self.map_size = CUT_MAP_SIZE self.display_center = config['agent']['display_center']
def plotposition(): data_logger = DataLogger() position = data_logger.unpickle('./position/train_position.pkl') print(position) plotpoint(x_data=position[:, 0], y_data=position[:, 1], x_label='position x', y_label='position y', title='position') plt.show()
def __init__(self, config): self._hyperparams = config self._conditions = config['common']['conditions'] self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI( config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm'])
def __init__(self, config): self._hyperparams = config self._conditions = config['common']['conditions'] # if 'train_conditions' in config['common']: # self._train_idx = config['common']['train_conditions'] # self._test_idx = config['common']['test_conditions'] # else: # self._train_idx = range(self._conditions) # config['common']['train_conditions'] = config['common']['conditions'] # self._hyperparams=config # self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self._algorithm_files_dir = config['common']['demo_controller_file'] self.data_logger = DataLogger()
def __init__(self, config, quit_on_end=False): """ Initialize GPSMain Args: config: Hyperparameters for experiment quit_on_end: When true, quit automatically on completion """ self._quit_on_end = quit_on_end self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common']['conditions'] self._hyperparams=config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI(config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm'])
def setUp(self): from gps import __file__ as gps_filepath gps_filepath = '/'.join(str.split(gps_filepath, '/')[:-1]) gps_filepath = os.path.abspath(gps_filepath) hyperparams_file = gps_filepath + '/test_files/hyperparams.py' hyperparams = imp.load_source('hyperparams', hyperparams_file) config = hyperparams.config seed = config.get('random_seed', 0) random.seed(seed) np.random.seed(seed) config['algorithm']['agent'] = DummyAgent(config['agent']) self.algorithm = config['algorithm']['type'](config['algorithm']) data_logger = DataLogger() self.traj_distr = data_logger.unpickle(gps_filepath + '/test_files/traj_distr') self.traj_info = data_logger.unpickle(gps_filepath + '/test_files/traj_info') self.new_traj_distr = data_logger.unpickle( gps_filepath + '/test_files/new_traj_distr') self.final_eta = data_logger.unpickle(gps_filepath + '/test_files/final_eta') self.mu, self.sigma = data_logger.unpickle(gps_filepath + '/test_files/mu_and_sigma') self.algorithm.cur[0].traj_distr = self.traj_distr self.algorithm.cur[0].traj_info = self.traj_info
def __init__(self, config): """ Initialize GPSMain Args: config: Hyperparameters for experiment quit_on_end: When true, quit automatically on completion """ self._quit_on_end = quit_on_end self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common'][ 'conditions'] self._hyperparams = config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI( config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm']) self.algorithm.init_samples = self._hyperparams['num_samples'] if self.algorithm._hyperparams['ioc']: demo_file = self._data_files_dir + 'demos.pkl' demos = self.data_logger.unpickle(demo_file) if demos is None: self.demo_gen = GenDemo(config) self.demo_gen.ioc_algo = self.algorithm self.demo_gen.generate() demo_file = self._data_files_dir + 'demos.pkl' demos = self.data_logger.unpickle(demo_file) config['agent']['pos_body_offset'] = demos['pos_body_offset'] self.agent = config['agent']['type'](config['agent']) self.algorithm.demoX = demos['demoX'] self.algorithm.demoU = demos['demoU'] self.algorithm.demoO = demos['demoO']
def plotCountSuc(): data_logger = DataLogger() #count_suc = data_logger.unpickle('./position/1/position_ol_alpha_count_step_5.pkl') count_suc = data_logger.unpickle('./position/1/position_md.pkl') rate_suc = np.sum(count_suc, axis=1)/count_suc.shape[1] print(rate_suc) x_date = np.zeros(0) for i in range(rate_suc.shape[0]): x_data = np.concatenate((x_data, np.array([i]))) plotline(x_data=x_data, y1_data=rate_suc, y2_data=rate_suc, y3_data=rate_suc, y4_data=rate_suc, x_label='rate', y_label='condition', title='successful rate') plt.show()
def load_data(dir): all_files = list_files(dir) itrs = {} for filename in all_files: itr = int(re.findall('[0-9]{2}', filename)[-1]) itrs[itr] = filename if len(itrs) == 0: print 'No data found! Exiting.' exit() elif len(itrs) == 1: print 'Only one iteration found, so using that' include = itrs else: print 'Here are the iterations for which data has been collected:' print sorted(itrs) include = raw_input('Which iterations would you like to include? ') if include == "all": include = itrs else: include = eval(include) if type(include) == int: include = [include] elif type(include) in (list, tuple): pass else: raise TypeError( 'Input should be an int or list/tuple thereof, or the keyword "all".' ) data_logger = DataLogger() algorithm_states, traj_sample_lists = [], [] for itr in include: # adapted from gps_main.py algorithm_file = osp.join(dir, 'algorithm_itr_%02d.pkl' % itr) algorithm = data_logger.unpickle(algorithm_file) if algorithm is None: raise RuntimeError("Cannot find '%s'" % algorithm_file) traj_samples = data_logger.unpickle( osp.join(dir, 'traj_sample_itr_%02d.pkl' % itr)) algorithm_states.append(algorithm) traj_sample_lists.append(traj_samples) return algorithm_states, traj_sample_lists
def runTest(itr_load): data_files_dir = config['common']['data_files_dir'] data_logger = DataLogger() algorithm_file = data_files_dir + 'algorithm_itr_%02d.pkl' % itr_load algorithm = data_logger.unpickle(algorithm_file) if algorithm is None: print("Error: cannot find '%s.'" % algorithm_file) os._exit(1) # called instead of sys.exit(), since this is in a thread #pol = algorithm.cur[0].traj_distr pol = algorithm.policy_opt.policy agent_hyperparams = deepcopy(AGENT) agent_hyperparams.update(config['agent']) cost_obstacle = CostObstacle(config['algorithm']['cost']['costs'][2]) cost_state = CostState(config['algorithm']['cost']['costs'][1]) x0s = agent_hyperparams["x0"] for cond in range(len(x0s)): T = agent_hyperparams['T'] dX = x0s[cond].shape[0] dU = agent_hyperparams['sensor_dims'][ACTION] agent_hyperparams['render'] = True agent = config['agent']['type'](agent_hyperparams) time.sleep(1) # Time for init node ''' while True: sample = agent.get_data() raw_input("Get data") ''' # Sample using offline trajectory distribution. for i in range(config['num_samples']): sample = agent.sample(pol, cond, noisy=False) cost_sum = CostSum(config['algorithm']['cost']) cost_obs = cost_obstacle.eval(sample)[0] cost_sta = cost_state.eval(sample)[0] total_cost = np.sum(cost_sum.eval(sample)[0]) weights = config['algorithm']['cost']['weights'] print "Total cost: ", total_cost, print "Cost state: ", np.sum(weights[1] * cost_sta), print "Cost obstacle: ", np.sum(weights[2] * cost_obs) '''
def __init__(self, config): self._hyperparams = config self._conditions = config['common']['conditions'] self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI(config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm'])
def __init__(self, config, quit_on_end=False, no_algorithm=False): """ Initialize GPSMain Args: config: Hyperparameters for experiment quit_on_end: When true, quit automatically on completion """ self._quit_on_end = quit_on_end self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common'][ 'conditions'] self._hyperparams = config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] config['agent']['data_files_dir'] = self._data_files_dir config['algorithm']['data_files_dir'] = self._data_files_dir self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = None if config['gui_on']: from gps.gui.gps_training_gui import GPSTrainingGUI # Only import if neccessary self.gui = GPSTrainingGUI(config['common']) self.mode = None config['algorithm']['agent'] = self.agent if not no_algorithm: self.algorithm = config['algorithm']['type'](config['algorithm']) self.algorithm._data_files_dir = self._data_files_dir if hasattr(self.algorithm, 'policy_opt'): self.algorithm.policy_opt._data_files_dir = self._data_files_dir self.session_id = None
def setUp(self): from gps import __file__ as gps_filepath gps_filepath = '/'.join(str.split(gps_filepath, '/')[:-1]) gps_filepath = os.path.abspath(gps_filepath) hyperparams_file = gps_filepath + '/test_files/hyperparams.py' hyperparams = imp.load_source('hyperparams', hyperparams_file) config = hyperparams.config seed = config.get('random_seed', 0) random.seed(seed) np.random.seed(seed) config['algorithm']['agent'] = DummyAgent(config['agent']) self.algorithm = config['algorithm']['type'](config['algorithm']) data_logger = DataLogger() cur_data = data_logger.unpickle(gps_filepath + '/test_files/sample_list') self.X = cur_data.get_X() self.U = cur_data.get_U() prior = data_logger.unpickle(gps_filepath + '/test_files/prior') self.algorithm.cur[0].traj_info.dynamics.prior = prior self.Fm, self.fv, self.dyn_covar = data_logger.unpickle( gps_filepath + '/test_files/dynamics_data')
def __init__(self, config, quit_on_end=False): """ Initialize GPSMain Args: config: Hyperparameters for experiment quit_on_end: When true, quit automatically on completion """ self.start_time = timeit.default_timer() self._quit_on_end = quit_on_end self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = list(range(self._conditions)) config['common']['train_conditions'] = config['common'][ 'conditions'] self._hyperparams = config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI( config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm']) #CB save image of cost w/ gui if not config['gui_on']: self.simplePlotter = SimplePlotter( config['common']['experiment_name'], config['common']['data_files_dir'])
def __init__(self, config, quit_on_end=False): """ Initialize GPSMain Args: config: Hyperparameters for experiment quit_on_end: When true, quit automatically on completion """ self._quit_on_end = quit_on_end self._hyperparams = config self._conditions = config['common']['conditions'] #self._condition = 1 if 'train_conditions' in config['common']: #False self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common'][ 'conditions'] #create a new key in the dictionary common and assign the value 1 self._hyperparams = config #reinitiallizing the hyperparameters because the config was changed self._test_idx = self._train_idx #getting hte train index again self._data_files_dir = config['common']['data_files_dir'] #getting the data file path from which is stored in the common dic self.agent = config['agent']['type'](config['agent']) #here it creat the object from the agent directory #print(self.agent,'self.agent') self.data_logger = DataLogger() #here the gui files leads to the self.gui = GPSTrainingGUI( config['common']) if config['gui_on'] else None #again with they change the config file now adding object to the dic config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm'])
def plotcost(): data_logger = DataLogger() cost_ol = data_logger.unpickle('./position/5/cost_ol.pkl') #with_alpha_costs = np.delete(with_alpha_costs, [10, 11, 12]) cost_ol_alpha = data_logger.unpickle('./position/5/cost_ol_alpha.pkl') cost_ol_alpha_step = data_logger.unpickle('./position/5/cost_ol_alpha_step.pkl') cost_md = data_logger.unpickle('./position/5/md_test_costs.pkl') #with_alpha_step_costs = data_logger.unpickle('./position/ol_with_alpha_step_costs.pkl') #with_alpha_step_costs = np.delete(with_alpha_step_costs, 4) print(cost_ol.shape[0]) for i in range(0, cost_ol.shape[0]): if cost_ol[i] > -200: cost_ol[i] = -200 #cost_ol = np.delete(cost_ol, i) for i in range(0, cost_ol_alpha.shape[0]): if cost_ol_alpha[i] > -200: #cost_ol_alpha = np.delete(cost_ol_alpha, i) cost_ol_alpha[i] = -200 for i in range(0, cost_ol_alpha_step.shape[0]): if cost_ol_alpha_step[i] > -200: #cost_ol_alpha_step = np.delete(cost_ol_alpha_step, i) cost_ol_alpha_step[i] = -200 for i in range(0, cost_md.shape[0]): if cost_md[i] > -200: #cost_md = np.delete(cost_md, i) cost_md[i] = -200 """ construct x axis""" num_positions = np.zeros(0) #max_len = min(with_alpha_costs.shape[0], without_alpha_costs.shape[0], md_costs.shape[0], with_alpha_step_costs.shape[0]) min_len = min(cost_ol.shape[0], cost_ol_alpha.shape[0], cost_ol_alpha_step.shape[0], cost_md.shape[0]) print('len: %d' % min_len) for i in range(min_len): num_positions = np.append(num_positions, np.array(i)) cost_ol = cost_ol[:min_len] cost_ol_alpha = cost_ol_alpha[:min_len] cost_ol_alpha_step = cost_ol_alpha_step[:min_len] cost_md = cost_md[:min_len] plotline(x_data=num_positions, y1_data=cost_ol, y2_data=cost_ol_alpha, y3_data=cost_ol_alpha_step, y4_data=cost_md, x_label='num of position', y_label='cost', title='compare') plt.show()
def plotCountSucAll(): """ plot varies of successful rate """ data_logger = DataLogger() count_suc1 = data_logger.unpickle('./position/1/position_ol_alpha_count_5.pkl') count_suc1 = data_logger.unpickle('./position/2/position_ol_alpha_count_5.pkl') count_suc1 = data_logger.unpickle('./position/3/position_ol_alpha_count_5.pkl') rate_suc1 = np.sum(count_suc1, axis=1)/count_suc1.shape[1] print(rate_suc1) count_suc2 = data_logger.unpickle('./position/2/position_ol_alpha_count_7.pkl') rate_suc2 = np.sum(count_suc2, axis=1)/count_suc2.shape[1] print(rate_suc2) count_suc3 = data_logger.unpickle('./position/2/position_ol_alpha_count_8.pkl') rate_suc3 = np.sum(count_suc3, axis=1)/count_suc3.shape[1] print(rate_suc3) count_suc4 = data_logger.unpickle('./position/2/position_ol_alpha_count_9.pkl') rate_suc4 = np.sum(count_suc4, axis=1)/count_suc4.shape[1] print(rate_suc4) min_len = min(count_suc1.shape[0], count_suc2.shape[0], count_suc3.shape[0], count_suc4.shape[0]) x_data = np.zeros(0) for i in range(min_len): x_data = np.concatenate((x_data, np.array([i]))) rate_suc1 = rate_suc1[:min_len] rate_suc2 = rate_suc2[:min_len] rate_suc3 = rate_suc3[:min_len] rate_suc4 = rate_suc4[:min_len] plotline(x_data=x_data, y1_data=rate_suc1, y2_data=rate_suc2, y3_data=rate_suc3, y4_data=rate_suc4, x_label='condition', y_label='rate', title='successful rate') plt.show()
def __init__(self): self._hyperparams = copy.deepcopy(hyperparams_config) self._conditions = self._hyperparams['common']['conditions'] if 'train_conditions' in self._hyperparams['common']: self._train_idx = self._hyperparams['common']['train_conditions'] self._test_idx = self._hyperparams['common']['test_conditions'] else: self._train_idx = range(self._conditions) self._hyperparams['common'][ 'train_conditions'] = self._hyperparams['common']['conditions'] self._test_idx = self._train_idx self.iteration_count = 0 self.dU = 6 self.dO = 270006 self.T = hyperparams_agent['T'] self.M = len(self._train_idx) self.resume_training = 27 self.agent = AgentBaxterPreTrain(hyperparams_agent) self.policy_opt = PolicyOptTf(hyperparams_policy_opt, self.dO, self.dU) self.data_logger = DataLogger() self.save_dir = '/hdd/gps-master/python/gps/pre_train/policy_opt/policy_opt_save/'
class GPSMain(object): """ Main class to run algorithms and experiments. """ def __init__(self, config): self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common']['conditions'] self._hyperparams=config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI(config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm']) def run(self, itr_load=None): """ Run training by iteratively sampling and taking an iteration. Args: itr_load: If specified, loads algorithm state from that iteration, and resumes training at the next iteration. Returns: None """ itr_start = self._initialize(itr_load) for itr in range(itr_start, self._hyperparams['iterations']): for cond in self._train_idx: for i in range(self._hyperparams['num_samples']): self._take_sample(itr, cond, i) traj_sample_lists = [ self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx ] self._take_iteration(itr, traj_sample_lists) pol_sample_lists = self._take_policy_samples() self._log_data(itr, traj_sample_lists, pol_sample_lists) self._end() def test_policy(self, itr, N): """ Take N policy samples of the algorithm state at iteration itr, for testing the policy to see how it is behaving. (Called directly from the command line --policy flag). Args: itr: the iteration from which to take policy samples N: the number of policy samples to take Returns: None """ algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr self.algorithm = self.data_logger.unpickle(algorithm_file) if self.algorithm is None: print("Error: cannot find '%s.'" % algorithm_file) os._exit(1) # called instead of sys.exit(), since t traj_sample_lists = self.data_logger.unpickle(self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr)) pol_sample_lists = self._take_policy_samples(N) self.data_logger.pickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr), copy.copy(pol_sample_lists) ) if self.gui: self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) self.gui.set_status_text(('Took %d policy sample(s) from ' + 'algorithm state at iteration %d.\n' + 'Saved to: data_files/pol_sample_itr_%02d.pkl.\n') % (N, itr, itr)) def _initialize(self, itr_load): """ Initialize from the specified iteration. Args: itr_load: If specified, loads algorithm state from that iteration, and resumes training at the next iteration. Returns: itr_start: Iteration to start from. """ if itr_load is None: if self.gui: self.gui.set_status_text('Press \'go\' to begin.') return 0 else: algorithm_file = self._data_files_dir + 'algorithm_i_%02d.pkl' % itr_load self.algorithm = self.data_logger.unpickle(algorithm_file) if self.algorithm is None: print("Error: cannot find '%s.'" % algorithm_file) os._exit(1) # called instead of sys.exit(), since this is in a thread if self.gui: traj_sample_lists = self.data_logger.unpickle(self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr_load)) pol_sample_lists = self.data_logger.unpickle(self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr_load)) self.gui.update(itr_load, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) self.gui.set_status_text( ('Resuming training from algorithm state at iteration %d.\n' + 'Press \'go\' to begin.') % itr_load) return itr_load + 1 def _take_sample(self, itr, cond, i): """ Collect a sample from the agent. Args: itr: Iteration number. cond: Condition number. i: Sample number. Returns: None """ pol = self.algorithm.cur[cond].traj_distr if self.gui: self.gui.set_image_overlays(cond) # Must call for each new cond. redo = True while redo: while self.gui.mode in ('wait', 'request', 'process'): if self.gui.mode in ('wait', 'process'): time.sleep(0.01) continue # 'request' mode. if self.gui.request == 'reset': try: self.agent.reset(cond) except NotImplementedError: self.gui.err_msg = 'Agent reset unimplemented.' elif self.gui.request == 'fail': self.gui.err_msg = 'Cannot fail before sampling.' self.gui.process_mode() # Complete request. self.gui.set_status_text( 'Sampling: iteration %d, condition %d, sample %d.' % (itr, cond, i) ) self.agent.sample( pol, cond, verbose=(i < self._hyperparams['verbose_trials']) ) if self.gui.mode == 'request' and self.gui.request == 'fail': redo = True self.gui.process_mode() self.agent.delete_last_sample(cond) else: redo = False else: self.agent.sample( pol, cond, verbose=(i < self._hyperparams['verbose_trials']) ) def _take_iteration(self, itr, sample_lists): """ Take an iteration of the algorithm. Args: itr: Iteration number. Returns: None """ if self.gui: self.gui.set_status_text('Calculating.') self.gui.start_display_calculating() self.algorithm.iteration(sample_lists) if self.gui: self.gui.stop_display_calculating() def _take_policy_samples(self, N=None): """ Take samples from the policy to see how it's doing. Args: N : number of policy samples to take per condition Returns: None """ if 'verbose_policy_trials' not in self._hyperparams: return None if not N: N = self._hyperparams['verbose_policy_trials'] if self.gui: self.gui.set_status_text('Taking policy samples.') pol_samples = [[None for _ in range(N)] for _ in range(self._conditions)] for cond in range(len(self._test_idx)): for i in range(N): pol_samples[cond][i] = self.agent.sample( self.algorithm.policy_opt.policy, self._test_idx[cond], verbose=True, save=False) return [SampleList(samples) for samples in pol_samples] def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None): """ Log data and algorithm, and update the GUI. Args: itr: Iteration number. traj_sample_lists: trajectory samples as SampleList object pol_sample_lists: policy samples as SampleList object Returns: None """ if self.gui: self.gui.set_status_text('Logging data and updating GUI.') self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) self.gui.save_figure( self._data_files_dir + ('figure_itr_%02d.png' % itr) ) if 'no_sample_logging' in self._hyperparams['common']: return self.data_logger.pickle( self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr), copy.copy(self.algorithm) ) self.data_logger.pickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr), copy.copy(traj_sample_lists) ) if pol_sample_lists: self.data_logger.pickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr), copy.copy(pol_sample_lists) ) def _end(self): """ Finish running and exit. """ if self.gui: self.gui.set_status_text('Training complete.') self.gui.end_mode()
class GPSMain(object): """ Main class to run tensorflow_code-pytorch and experiments. """ def __init__(self, config, quit_on_end=False): """ Initialize GPSMain Args: config: Hyperparameters for experiment quit_on_end: When true, quit automatically on completion """ self._quit_on_end = quit_on_end self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common'][ 'conditions'] self._hyperparams = config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI( config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm']) def run(self, itr_load=None): """ Run training by iteratively sampling and taking an iteration. Args: itr_load: If specified, loads algorithm state from that iteration, and resumes training at the next iteration. Returns: None """ try: itr_start = self._initialize(itr_load) for itr in range(itr_start, self._hyperparams['iterations']): """ get samples """ for cond in self._train_idx: for i in range(self._hyperparams['num_samples']): self._take_sample(itr, cond, i) traj_sample_lists = [ self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx ] """ Clear agent samples """ self.agent.clear_samples() """ interation """ self._take_iteration(itr, traj_sample_lists) """ test policy and samples """ pol_sample_lists = self._take_policy_samples() self._log_data(itr, traj_sample_lists, pol_sample_lists) except Exception as e: traceback.print_exception(*sys.exc_info()) finally: self._end() def test_policy(self, itr, N): """ Take N policy samples of the algorithm state at iteration itr, for testing the policy to see how it is behaving. (Called directly from the command line --policy flag). Args: itr: the iteration from which to take policy samples N: the number of policy samples to take Returns: None """ algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr self.algorithm = self.data_logger.unpickle(algorithm_file) if self.algorithm is None: print("Error: cannot find '%s.'" % algorithm_file) os._exit(1) # called instead of sys.exit(), since t traj_sample_lists = self.data_logger.unpickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr)) pol_sample_lists = self._take_policy_samples(N) self.data_logger.pickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr), copy.copy(pol_sample_lists)) if self.gui: self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) self.gui.set_status_text( ('Took %d policy sample(s) from ' + 'algorithm state at iteration %d.\n' + 'Saved to: data_files/pol_sample_itr_%02d.pkl.\n') % (N, itr, itr)) def _initialize(self, itr_load): """ Initialize from the specified iteration. Args: itr_load: If specified, loads algorithm state from that iteration, and resumes training at the next iteration. Returns: itr_start: Iteration to start from. """ if itr_load is None: if self.gui: self.gui.set_status_text('Press \'go\' to begin.') return 0 else: algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr_load self.algorithm = self.data_logger.unpickle(algorithm_file) if self.algorithm is None: print("Error: cannot find '%s.'" % algorithm_file) os._exit( 1 ) # called instead of sys.exit(), since this is in a thread if self.gui: traj_sample_lists = self.data_logger.unpickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr_load)) if self.algorithm.cur[0].pol_info: pol_sample_lists = self.data_logger.unpickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr_load)) else: pol_sample_lists = None self.gui.set_status_text(( 'Resuming training from algorithm state at iteration %d.\n' + 'Press \'go\' to begin.') % itr_load) return itr_load + 1 def _take_sample(self, itr, cond, i): """ Collect a sample from the agent. Args: itr: Iteration number. cond: Condition number. i: Sample number. Returns: None """ if self.algorithm._hyperparams['sample_on_policy'] \ and self.algorithm.iteration_count > 0: pol = self.algorithm.policy_opt.policy print(" ========================== on policy ====================") else: pol = self.algorithm.cur[cond].traj_distr if self.gui: self.gui.set_image_overlays(cond) # Must call for each new cond. redo = True while redo: while self.gui.mode in ('wait', 'request', 'process'): if self.gui.mode in ('wait', 'process'): time.sleep(0.01) continue # 'request' mode. if self.gui.request == 'reset': try: self.agent.reset(cond) except NotImplementedError: self.gui.err_msg = 'Agent reset unimplemented.' elif self.gui.request == 'fail': self.gui.err_msg = 'Cannot fail before sampling.' self.gui.process_mode() # Complete request. self.gui.set_status_text( 'Sampling: iteration %d, condition %d, sample %d.' % (itr, cond, i)) self.agent.sample( pol, cond, verbose=(i < self._hyperparams['verbose_trials'])) if self.gui.mode == 'request' and self.gui.request == 'fail': redo = True self.gui.process_mode() self.agent.delete_last_sample(cond) else: redo = False else: self.agent.sample( pol, cond, verbose=(i < self._hyperparams['verbose_trials'])) def _take_iteration(self, itr, sample_lists): """ Take an iteration of the algorithm. Args: itr: Iteration number. Returns: None """ if self.gui: self.gui.set_status_text('Calculating.') self.gui.start_display_calculating() self.algorithm.iteration(sample_lists) if self.gui: self.gui.stop_display_calculating() def _take_policy_samples(self, N=None): """ Take samples from the policy to see how it's doing. Args: N : number of policy samples to take per condition Returns: None """ print( " ================================ test policy ====================================" ) if 'verbose_policy_trials' not in self._hyperparams: # AlgorithmTrajOpt return None verbose = self._hyperparams['verbose_policy_trials'] if self.gui: self.gui.set_status_text('Taking policy samples.') pol_samples = [[None] for _ in range(len(self._test_idx))] # Since this isn't noisy, just take one sample. # TODO: Make this noisy? Add hyperparam? # TODO: Take at all conditions for GUI? for cond in range(len(self._test_idx)): pol_samples[cond][0] = self.agent.sample( self.algorithm.policy_opt.policy, self._test_idx[cond], verbose=verbose, save=False, noisy=False) return [SampleList(samples) for samples in pol_samples] def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None): """ Log data and algorithm, and update the GUI. Args: itr: Iteration number. traj_sample_lists: trajectory samples as SampleList object pol_sample_lists: policy samples as SampleList object Returns: None """ if self.gui: self.gui.set_status_text('Logging data and updating GUI.') self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) self.gui.save_figure(self._data_files_dir + ('figure_itr_%02d.png' % itr)) if 'no_sample_logging' in self._hyperparams['common']: return self.data_logger.pickle( self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr), copy.copy(self.algorithm)) self.data_logger.pickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr), copy.copy(traj_sample_lists)) if pol_sample_lists: self.data_logger.pickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr), copy.copy(pol_sample_lists)) def _end(self): """ Finish running and exit. """ if self.gui: self.gui.set_status_text('Training complete.') self.gui.end_mode() if self._quit_on_end: # Quit automatically (for running sequential expts) os._exit(1)
def save_data(output_dir, sample_lists, itr): if not osp.isdir(output_dir): os.makedirs(output_dir) DataLogger().pickle(osp.join(output_dir, 'pol_sample_itr_%02d.pkl' % itr), sample_lists)
def main(): """ Main function to be run. """ parser = argparse.ArgumentParser( description='Run the Guided Policy Search algorithm.') parser.add_argument('experiment', type=str, help='experiment name') parser.add_argument('-n', '--new', action='store_true', help='create new experiment') parser.add_argument('-t', '--targetsetup', action='store_true', help='run target setup') parser.add_argument('-r', '--resume', metavar='N', type=int, help='resume training from iter N') parser.add_argument('-p', '--policy', metavar='N', type=int, help='take N policy samples (for BADMM/MDGPS only)') parser.add_argument('-s', '--silent', action='store_true', help='silent debug print outs') parser.add_argument('-q', '--quit', action='store_true', help='quit GUI automatically when finished') parser.add_argument('-c', '--condition', metavar='N', type=int, help='consider N position') parser.add_argument('-m', '--num', metavar='N', type=int, help='test\' N nums of experiment') parser.add_argument('-exper', '--exper', metavar='N', type=int, help='time of test experiment') parser.add_argument('-set', '--set_cond', metavar='N', type=int, help='train on special position setting') parser.add_argument('-algi', '--alg_itr', metavar='N', type=int, help='control the time of train NN') args = parser.parse_args() exp_name = args.experiment resume_training_itr = args.resume test_policy_N = args.policy from gps import __file__ as gps_filepath gps_filepath = os.path.abspath(gps_filepath) gps_dir = '/'.join(str.split(gps_filepath, '/')[:-3]) + '/' exp_dir = gps_dir + 'experiments/' + exp_name + '/' hyperparams_file = exp_dir + 'hyperparams.py' if args.silent: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) else: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) if args.new: from shutil import copy if os.path.exists(exp_dir): sys.exit("Experiment '%s' already exists.\nPlease remove '%s'." % (exp_name, exp_dir)) os.makedirs(exp_dir) prev_exp_file = '.previous_experiment' prev_exp_dir = None try: with open(prev_exp_file, 'r') as f: prev_exp_dir = f.readline() copy(prev_exp_dir + 'hyperparams.py', exp_dir) if os.path.exists(prev_exp_dir + 'targets.npz'): copy(prev_exp_dir + 'targets.npz', exp_dir) except IOError as e: with open(hyperparams_file, 'w') as f: f.write( '# To get started, copy over hyperparams from another experiment.\n' + '# Visit rll.berkeley.edu/gps/hyperparams.html for documentation.' ) with open(prev_exp_file, 'w') as f: f.write(exp_dir) exit_msg = ("Experiment '%s' created.\nhyperparams file: '%s'" % (exp_name, hyperparams_file)) if prev_exp_dir and os.path.exists(prev_exp_dir): exit_msg += "\ncopied from : '%shyperparams.py'" % prev_exp_dir sys.exit(exit_msg) if not os.path.exists(hyperparams_file): sys.exit("Experiment '%s' does not exist.\nDid you create '%s'?" % (exp_name, hyperparams_file)) hyperparams = imp.load_source('hyperparams', hyperparams_file) if args.targetsetup: try: import matplotlib.pyplot as plt from gps.agent.ros.agent_ros import AgentROS from gps.gui.target_setup_gui import TargetSetupGUI agent = AgentROS(hyperparams.config['agent']) TargetSetupGUI(hyperparams.config['common'], agent) plt.ioff() plt.show() except ImportError: sys.exit('ROS required for target setup.') elif test_policy_N: import random import numpy as np import matplotlib.pyplot as plt seed = hyperparams.config.get('random_seed', 0) random.seed(seed) np.random.seed(seed) data_files_dir = exp_dir + 'data_files/' data_filenames = os.listdir(data_files_dir) algorithm_prefix = 'algorithm_itr_' algorithm_filenames = [ f for f in data_filenames if f.startswith(algorithm_prefix) ] current_algorithm = sorted(algorithm_filenames, reverse=True)[0] current_itr = int( current_algorithm[len(algorithm_prefix):len(algorithm_prefix) + 2]) gps = GPSMain(hyperparams.config) if hyperparams.config['gui_on']: test_policy = threading.Thread(target=lambda: gps.test_policy( itr=current_itr, N=test_policy_N)) test_policy.daemon = True test_policy.start() plt.ioff() plt.show() else: gps.test_policy(itr=current_itr, N=test_policy_N) else: if args.condition: """ if specify the N training position""" num_position = args.condition data_logger = DataLogger() positions = data_logger.unpickle('./position/train_position.pkl') # positions = data_logger.unpickle('./position/suc_train_position.pkl') hyperparams.agent['conditions'] = num_position hyperparams.common['conditions'] = num_position hyperparams.algorithm['conditions'] = num_position pos_body_offset = list() for i in range(num_position): pos_body_offset.append(positions[i]) hyperparams.agent['pos_body_offset'] = pos_body_offset import random import numpy as np import matplotlib.pyplot as plt seed = hyperparams.config.get('random_seed', 0) random.seed(seed) np.random.seed(seed) # set the time of training NN if args.alg_itr: hyperparams.config['iterations'] = args.alg_itr """ set extend setting """ data_logger = DataLogger() train_position = data_logger.unpickle( './position/all_train_position.pkl') hyperparams.agent['pos_body_offset'] = list(train_position) hyperparams.agent['conditions'] = len(train_position) hyperparams.common['conditions'] = len(train_position) hyperparams.algorithm['conditions'] = len(train_position) gps = GPSMain(hyperparams.config, args.quit) if hyperparams.config['gui_on']: run_gps = threading.Thread( target=lambda: gps.run(itr_load=resume_training_itr)) run_gps.daemon = True run_gps.start() plt.ioff() plt.show() else: costs, mean_cost, position_suc_count, all_distance = gps.run( args.num, exper_condition=args.set_cond, itr_load=resume_training_itr) # gps.data_logger.pickle('./position/%d/experiment_%d/md_all_distance.pkl' # % (args.num, args.exper), all_distance) gps.data_logger.pickle('./position/md_all_distance.pkl', all_distance) gps.data_logger.pickle('./position/md_all_cost.pkl', costs) """
class GPSMain(object): """ Main class to run algorithms and experiments. """ def __init__(self, config, quit_on_end=False): """ Initialize GPSMain Args: config: Hyperparameters for experiment quit_on_end: When true, quit automatically on completion """ self._quit_on_end = quit_on_end self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common'][ 'conditions'] self._hyperparams = config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI( config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm']) def run(self, time_experiment, exper_condition, itr_load=None): """ Run training by iteratively sampling and taking an iteration. Args: itr_load: If specified, loads algorithm state from that iteration, and resumes training at the next iteration. Returns: None """ itr_start = self._initialize(itr_load) # test_position = self.data_logger.unpickle('./position/%d/%d/test_position.pkl' # % (time_experiment, exper_condition)) self.target_ee_point = self.agent._hyperparams['target_ee_points'][:3] for itr in range(itr_start, self._hyperparams['iterations']): print('itr******: %d **********' % itr) for cond in self._train_idx: for i in range(self._hyperparams['num_samples']): self._take_sample(itr, cond, i) traj_sample_lists = [ self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx ] # Clear agent samples. self.agent.clear_samples() self._take_iteration(itr, traj_sample_lists) pol_sample_lists = self._take_policy_samples() self._log_data(itr, traj_sample_lists, pol_sample_lists) """ test policy and collect costs""" """ gradually add the distance of agent position """ center_position = 0.02 radius = 0.02 max_error_bound = 0.02 directory = 9 for test_condition in range(7): # test_position = self.generate_position(center_position, radius, 30, max_error_bound) test_position = self.data_logger.unpickle( './position/test_position_%d.pkl' % (test_condition + 1)) costs, position_suc_count, distance = self.test_cost( test_position, len(pol_sample_lists)) print('distance:', distance) # add the position_suc_count if test_condition == 0: #augement array all_pos_suc_count = np.expand_dims(position_suc_count, axis=0) all_distance = np.expand_dims(distance, axis=0) else: all_pos_suc_count = np.vstack( (all_pos_suc_count, position_suc_count)) all_distance = np.vstack((all_distance, distance)) costs = costs.reshape(costs.shape[0] * costs.shape[1]) mean_cost = np.array([np.mean(costs)]) center_position = center_position + radius * 2 self._end() return costs, mean_cost, all_pos_suc_count, all_distance def generate_position(self, cposition, radius, conditions, max_error_bound): # all_positions = np.zeros(0) while True: all_positions = np.array([cposition, -cposition, 0]) center_position = np.array([cposition, -cposition, 0]) for i in range(conditions): position = np.random.uniform(cposition - radius, cposition + radius, 3) while True: position[2] = 0 position[1] = -position[1] area = (position - center_position).dot(position - center_position) # area = np.sum(np.multiply(position - center_position, position - center_position)) if area <= radius**2: # print(area) break position = np.random.uniform(cposition - radius, cposition + radius, 3) position = np.floor(position * 1000) / 1000.0 all_positions = np.concatenate((all_positions, position)) all_positions = np.reshape(all_positions, [all_positions.shape[0] / 3, 3]) # print(all_positions[:, 1]) # print('mean:') # print(np.mean(all_positions, axis=0)) mean_position = np.mean(all_positions, axis=0) # mean_error1 = np.fabs(mean_position[0] - 0.11) # mean_error2 = np.fabs(mean_position[1] + 0.11) mean_error1 = np.fabs(mean_position[0] - (cposition - max_error_bound)) mean_error2 = np.fabs(mean_position[1] + (cposition - max_error_bound)) if mean_error1 < max_error_bound and mean_error2 < max_error_bound: print('mean:') print(np.mean(all_positions, axis=0)) break print(all_positions) print(all_positions.shape) return all_positions def test_cost(self, positions, train_cond): """ test policy and collect costs Args: positions: test position from test_position.pkl Returns: cost: mean cost of all test position total_suc: successful pegging trial count 1:successful 0:fail """ iteration = positions.shape[0] / train_cond total_costs = list() total_ee_points = list() total_suc = np.zeros(0) total_distance = np.zeros(0) for itr in range(iteration): for cond in self._train_idx: self._hyperparams['agent']['pos_body_offset'][ cond] = positions[itr + cond] self.agent.reset_model(self._hyperparams) _, cost, ee_points = self._test_policy_samples() for cond in self._train_idx: total_ee_points.append(ee_points[cond]) total_costs.append(cost) print("total_costs:", total_costs) for i in range(len(total_ee_points)): ee_error = total_ee_points[i][:3] - self.target_ee_point distance = ee_error.dot(ee_error)**0.5 if (distance < 0.06): total_suc = np.concatenate((total_suc, np.array([1]))) else: total_suc = np.concatenate((total_suc, np.array([0]))) total_distance = np.concatenate( (total_distance, np.array([distance]))) return np.array(total_costs), total_suc, total_distance def test_policy(self, itr, N): """ Take N policy samples of the algorithm state at iteration itr, for testing the policy to see how it is behaving. (Called directly from the command line --policy flag). Args: itr: the iteration from which to take policy samples N: the number of policy samples to take Returns: None """ algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr self.algorithm = self.data_logger.unpickle(algorithm_file) if self.algorithm is None: print("Error: cannot find '%s.'" % algorithm_file) os._exit(1) # called instead of sys.exit(), since t traj_sample_lists = self.data_logger.unpickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr)) pol_sample_lists = self._take_policy_samples(N) self.data_logger.pickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr), copy.copy(pol_sample_lists)) if self.gui: self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) self.gui.set_status_text( ('Took %d policy sample(s) from ' + 'algorithm state at iteration %d.\n' + 'Saved to: data_files/pol_sample_itr_%02d.pkl.\n') % (N, itr, itr)) def _initialize(self, itr_load): """ Initialize from the specified iteration. Args: itr_load: If specified, loads algorithm state from that iteration, and resumes training at the next iteration. Returns: itr_start: Iteration to start from. """ if itr_load is None: if self.gui: self.gui.set_status_text('Press \'go\' to begin.') return 0 else: algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr_load self.algorithm = self.data_logger.unpickle(algorithm_file) if self.algorithm is None: print("Error: cannot find '%s.'" % algorithm_file) os._exit( 1 ) # called instead of sys.exit(), since this is in a thread if self.gui: traj_sample_lists = self.data_logger.unpickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr_load)) if self.algorithm.cur[0].pol_info: pol_sample_lists = self.data_logger.unpickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr_load)) else: pol_sample_lists = None self.gui.set_status_text(( 'Resuming training from algorithm state at iteration %d.\n' + 'Press \'go\' to begin.') % itr_load) return itr_load + 1 def _take_sample(self, itr, cond, i): """ Collect a sample from the agent. Args: itr: Iteration number. cond: Condition number. i: Sample number. Returns: None """ if self.algorithm._hyperparams['sample_on_policy'] \ and self.algorithm.iteration_count > 0: pol = self.algorithm.policy_opt.policy else: pol = self.algorithm.cur[cond].traj_distr if self.gui: self.gui.set_image_overlays(cond) # Must call for each new cond. redo = True while redo: while self.gui.mode in ('wait', 'request', 'process'): if self.gui.mode in ('wait', 'process'): time.sleep(0.01) continue # 'request' mode. if self.gui.request == 'reset': try: self.agent.reset(cond) except NotImplementedError: self.gui.err_msg = 'Agent reset unimplemented.' elif self.gui.request == 'fail': self.gui.err_msg = 'Cannot fail before sampling.' self.gui.process_mode() # Complete request. self.gui.set_status_text( 'Sampling: iteration %d, condition %d, sample %d.' % (itr, cond, i)) self.agent.sample( pol, cond, verbose=(i < self._hyperparams['verbose_trials'])) if self.gui.mode == 'request' and self.gui.request == 'fail': redo = True self.gui.process_mode() self.agent.delete_last_sample(cond) else: redo = False else: self.agent.sample( pol, cond, verbose=(i < self._hyperparams['verbose_trials'])) def _take_iteration(self, itr, sample_lists): """ Take an iteration of the algorithm. Args: itr: Iteration number. Returns: None """ if self.gui: self.gui.set_status_text('Calculating.') self.gui.start_display_calculating() self.algorithm.iteration(sample_lists) if self.gui: self.gui.stop_display_calculating() def _take_policy_samples(self, N=None): """ Take samples from the policy to see how it's doing. Args: N : number of policy samples to take per condition Returns: None """ if 'verbose_policy_trials' not in self._hyperparams: # AlgorithmTrajOpt return None verbose = self._hyperparams['verbose_policy_trials'] if self.gui: self.gui.set_status_text('Taking policy samples.') pol_samples = [[None] for _ in range(len(self._test_idx))] # Since this isn't noisy, just take one sample. # TODO: Make this noisy? Add hyperparam? # TODO: Take at all conditions for GUI? for cond in range(len(self._test_idx)): pol_samples[cond][0] = self.agent.sample( self.algorithm.policy_opt.policy, self._test_idx[cond], verbose=verbose, save=False, noisy=False) return [SampleList(samples) for samples in pol_samples] def _test_policy_samples(self, N=None): """ test sample from the policy and collect the costs Args: N: Returns: samples costs: list of cost for each condition ee_point: list of ee_point for each condition """ if 'verbose_policy_trials' not in self._hyperparams: return None verbose = self._hyperparams['verbose_policy_trials'] pol_samples = [[None] for _ in range(len(self._test_idx))] costs = list() ee_points = list() for cond in range(len(self._test_idx)): pol_samples[cond][0] = self.agent.sample( self.algorithm.policy_opt.policy, self._test_idx[cond], verbose=verbose, save=False, noisy=False) # in algorithm.py: _eval_cost policy_cost = self.algorithm.cost[0].eval(pol_samples[cond][0])[0] policy_cost = np.sum(policy_cost) #100 step costs.append(policy_cost) ee_points.append(self.agent.get_ee_point(cond)) return [SampleList(samples) for samples in pol_samples], costs, ee_points def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None): """ Log data and algorithm, and update the GUI. Args: itr: Iteration number. traj_sample_lists: trajectory samples as SampleList object pol_sample_lists: policy samples as SampleList object Returns: None """ if self.gui: self.gui.set_status_text('Logging data and updating GUI.') self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) self.gui.save_figure(self._data_files_dir + ('figure_itr_%02d.png' % itr)) if 'no_sample_logging' in self._hyperparams['common']: return self.data_logger.pickle( self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr), copy.copy(self.algorithm)) self.data_logger.pickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr), copy.copy(traj_sample_lists)) if pol_sample_lists: self.data_logger.pickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr), copy.copy(pol_sample_lists)) def _end(self): """ Finish running and exit. """ if self.gui: self.gui.set_status_text('Training complete.') self.gui.end_mode() if self._quit_on_end: # Quit automatically (for running sequential expts) os._exit(1)
class GPSMain(object): """ Main class to run algorithms and experiments. """ def __init__(self, config, quit_on_end=False): """ Initialize GPSMain Args: config: Hyperparameters for experiment quit_on_end: When true, quit automatically on completion """ self._quit_on_end = quit_on_end self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common'][ 'conditions'] self._hyperparams = config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() self.gui = GPSTrainingGUI( config['common']) if config['gui_on'] else None config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm']) self.init_alpha(self) def run(self, config, itr_load=None): """ Run training by iteratively sampling and taking an iteration. Args: itr_load: If specified, loads algorithm state from that iteration, and resumes training at the next iteration. Returns: None """ self.target_points = self.agent._hyperparams['target_ee_points'][:3] itr_start = self._initialize(itr_load) # """ set pre""" # position_train = self.data_logger.unpickle('./position/position_train.pkl') """ generate random training position in a specify circle""" center_position = np.array([0.05, -0.08, 0]) position_train = self.generate_position_radius(center_position, 0.08, 7, 0.02) print('training position.....') print(position_train) # print('test all testing position....') # for i in xrange(position_train.shape[0]): # test_positions = self.generate_position_radius(position_train[i], 0.03, 5, 0.01) # if i == 0: # all_test_positions = test_positions # else: # all_test_positions = np.concatenate((all_test_positions, test_positions)) T = self.algorithm.T N = self._hyperparams['num_samples'] dU = self.algorithm.dU for num_pos in range(position_train.shape[0]): """ load train position and reset agent model. """ for cond in self._train_idx: self._hyperparams['agent']['pos_body_offset'][ cond] = position_train[num_pos] self.agent.reset_model(self._hyperparams) # initial train array train_prc = np.zeros((0, T, dU, dU)) train_mu = np.zeros((0, T, dU)) train_obs_data = np.zeros((0, T, self.algorithm.dO)) train_wt = np.zeros((0, T)) # initial variables count_suc = 0 for itr in range(itr_start, self._hyperparams['iterations']): print('******************num_pos:************', num_pos) print('______________________itr:____________', itr) for cond in self._train_idx: for i in range(self._hyperparams['num_samples']): if num_pos == 0: self._take_sample(itr, cond, i) elif itr == 0: self._take_sample(itr, cond, i) else: self._take_train_sample(itr, cond, i) # self._take_sample(itr, cond, i) traj_sample_lists = [ self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx ] # calculate the distance of the end-effector to target position ee_pos = self.agent.get_ee_pos(cond)[:3] target_pos = self.agent._hyperparams['target_ee_pos'][:3] distance_pos = ee_pos - target_pos distance_ee = np.sqrt(distance_pos.dot(distance_pos)) print('distance ee:', distance_ee) # collect the successful sample to train global policy if distance_ee <= 0.06: count_suc += 1 tgt_mu, tgt_prc, obs_data, tgt_wt = self.train_prepare( traj_sample_lists) train_mu = np.concatenate((train_mu, tgt_mu)) train_prc = np.concatenate((train_prc, tgt_prc)) train_obs_data = np.concatenate((train_obs_data, obs_data)) train_wt = np.concatenate((train_wt, tgt_wt)) # Clear agent samples. self.agent.clear_samples() # if get enough sample, then break if count_suc > 8: break self._take_iteration(itr, traj_sample_lists) if self.algorithm.flag_reset: break # pol_sample_lists = self._take_policy_samples() # self._log_data(itr, traj_sample_lists, pol_sample_lists) if num_pos > 0: self.algorithm.fit_global_linear_policy(traj_sample_lists) if not self.algorithm.flag_reset: # train NN with good samples self.algorithm.policy_opt.update(train_obs_data, train_mu, train_prc, train_wt) # test the trained in the current position print('test current policy.....') self.test_current_policy() print('test all testing position....') for i in xrange(position_train.shape[0]): test_positions = self.generate_position_radius( position_train[i], 0.03, 5, 0.01) if i == 0: all_test_positions = test_positions else: all_test_positions = np.concatenate( (all_test_positions, test_positions)) self.test_cost(all_test_positions) # reset the algorithm to the initial algorithm for the next position # del self.algorithm # config['algorithm']['agent'] = self.agent # self.algorithm = config['algorithm']['type'](config['algorithm']) self.algorithm.reset_alg() self.next_iteration_prepare() self._end() def generate_position_radius(self, position_ori, radius, conditions, max_error_bound): """ Args: position_ori: original center position of generated positions radius: area's radius conditions: the quantity of generating positions max_error_bound: the mean of generated positions' error around cposition Returns: """ c_x = position_ori[0] c_y = position_ori[1] while True: all_positions = np.zeros(0) center_position = np.array([c_x, c_y, 0]) for i in range(conditions): position = np.random.uniform(radius, radius, 3) while True: position[2] = 0 position[1] = (position[1] + c_y) position[0] = position[0] + c_x area = (position - center_position).dot(position - center_position) if area <= (np.pi * radius**2) / 4.0: break position = np.random.uniform(-radius, radius, 3) if i == 0: all_positions = position all_positions = np.expand_dims(all_positions, axis=0) else: all_positions = np.vstack((all_positions, position)) mean_position = np.mean(all_positions, axis=0) mean_error = np.fabs(center_position - mean_position) print('mean_error:', mean_error) if mean_error[0] < max_error_bound and mean_error[ 1] < max_error_bound: break all_positions = np.floor(all_positions * 1000) / 1000.0 print('all_position:', all_positions) return all_positions def test_cost(self, position): """ test the NN policy at all position Args: position: Returns: """ total_costs = np.zeros(0) total_distance = np.zeros(0) total_suc = np.zeros(0) print 'calculate cost_________________' for itr in range(position.shape[0]): if itr % 51 == 0: print('****************') for cond in self._train_idx: self._hyperparams['agent']['pos_body_offset'][cond] = position[ itr] self.agent.reset_model(self._hyperparams) _, cost, ee_points = self.take_nn_samples() ee_error = ee_points[:3] - self.target_points distance = np.sqrt(ee_error.dot(ee_error)) error = np.sum(np.fabs(ee_error)) if (error < 0.02): total_suc = np.concatenate((total_suc, np.array([1]))) else: total_suc = np.concatenate((total_suc, np.array([0]))) total_costs = np.concatenate((total_costs, np.array(cost))) total_distance = np.concatenate( (total_distance, np.array([distance]))) # return np.mean(total_costs), total_suc, total_distance return total_costs, total_suc, total_distance def next_iteration_prepare(self): """ prepare for the next iteration Returns: """ self.init_alpha() def init_alpha(self, val=None): """ initialize the alpha1, 2, the default is 0.7, 0.3 Args: val: Returns: """ if val is None: self.alpha1 = 0.75 self.alpha2 = 0.25 else: self.alpha1 = 0.75 self.alpha2 = 0.25 def pol_alpha(self): return self.alpha1, self.alpha2 def train_prepare(self, sample_lists): """ prepare the train data of the sample lists Args: sample_lists: sample list from agent Returns: target mu, prc, obs_data, wt """ algorithm = self.algorithm dU, dO, T = algorithm.dU, algorithm.dO, algorithm.T obs_data, tgt_mu = np.zeros((0, T, dO)), np.zeros((0, T, dU)) tgt_prc = np.zeros((0, T, dU, dU)) tgt_wt = np.zeros((0, T)) wt_origin = 0.01 * np.ones(T) for m in range(algorithm.M): samples = sample_lists[m] X = samples.get_X() N = len(samples) prc = np.zeros((N, T, dU, dU)) mu = np.zeros((N, T, dU)) wt = np.zeros((N, T)) traj = algorithm.cur[m].traj_distr for t in range(T): prc[:, t, :, :] = np.tile(traj.inv_pol_covar[t, :, :], [N, 1, 1]) for i in range(N): mu[i, t, :] = (traj.K[t, :, :].dot(X[i, t, :]) + traj.k[t, :]) wt[:, t].fill(wt_origin[t]) tgt_mu = np.concatenate((tgt_mu, mu)) tgt_prc = np.concatenate((tgt_prc, prc)) obs_data = np.concatenate((obs_data, samples.get_obs())) tgt_wt = np.concatenate((tgt_wt, wt)) return tgt_mu, tgt_prc, obs_data, tgt_wt def test_policy(self, itr, N): """ Take N policy samples of the algorithm state at iteration itr, for testing the policy to see how it is behaving. (Called directly from the command line --policy flag). Args: itr: the iteration from which to take policy samples N: the number of policy samples to take Returns: None """ algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr self.algorithm = self.data_logger.unpickle(algorithm_file) if self.algorithm is None: print("Error: cannot find '%s.'" % algorithm_file) os._exit(1) # called instead of sys.exit(), since t traj_sample_lists = self.data_logger.unpickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr)) pol_sample_lists = self._take_policy_samples(N) self.data_logger.pickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr), copy.copy(pol_sample_lists)) if self.gui: self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) self.gui.set_status_text( ('Took %d policy sample(s) from ' + 'algorithm state at iteration %d.\n' + 'Saved to: data_files/pol_sample_itr_%02d.pkl.\n') % (N, itr, itr)) def _initialize(self, itr_load): """ Initialize from the specified iteration. Args: itr_load: If specified, loads algorithm state from that iteration, and resumes training at the next iteration. Returns: itr_start: Iteration to start from. """ if itr_load is None: if self.gui: self.gui.set_status_text('Press \'go\' to begin.') return 0 else: algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr_load self.algorithm = self.data_logger.unpickle(algorithm_file) if self.algorithm is None: print("Error: cannot find '%s.'" % algorithm_file) os._exit( 1 ) # called instead of sys.exit(), since this is in a thread if self.gui: traj_sample_lists = self.data_logger.unpickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr_load)) if self.algorithm.cur[0].pol_info: pol_sample_lists = self.data_logger.unpickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr_load)) else: pol_sample_lists = None self.gui.set_status_text(( 'Resuming training from algorithm state at iteration %d.\n' + 'Press \'go\' to begin.') % itr_load) return itr_load + 1 def _take_sample(self, itr, cond, i): """ Collect a sample from the agent. Args: itr: Iteration number. cond: Condition number. i: Sample number. Returns: None """ if self.algorithm._hyperparams['sample_on_policy'] \ and self.algorithm.iteration_count > 0: pol = self.algorithm.policy_opt.policy else: pol = self.algorithm.cur[cond].traj_distr if self.gui: self.gui.set_image_overlays(cond) # Must call for each new cond. redo = True while redo: while self.gui.mode in ('wait', 'request', 'process'): if self.gui.mode in ('wait', 'process'): time.sleep(0.01) continue # 'request' mode. if self.gui.request == 'reset': try: self.agent.reset(cond) except NotImplementedError: self.gui.err_msg = 'Agent reset unimplemented.' elif self.gui.request == 'fail': self.gui.err_msg = 'Cannot fail before sampling.' self.gui.process_mode() # Complete request. self.gui.set_status_text( 'Sampling: iteration %d, condition %d, sample %d.' % (itr, cond, i)) self.agent.sample( pol, cond, verbose=(i < self._hyperparams['verbose_trials'])) if self.gui.mode == 'request' and self.gui.request == 'fail': redo = True self.gui.process_mode() self.agent.delete_last_sample(cond) else: redo = False else: self.agent.sample( pol, cond, verbose=(i < self._hyperparams['verbose_trials'])) def _take_train_sample(self, itr, cond, i): """ collect sample with merge policy Args: itr: cond: i: Returns: """ alpha1, alpha2 = self.pol_alpha() print("alpha:********%03f, %03f******" % (alpha1, alpha2)) pol1 = self.algorithm.cur[cond].traj_distr pol2 = self.algorithm.cur[cond].last_pol if not self.gui: self.agent.merge_controller( pol1, alpha1, pol2, alpha2, cond, verbose=(i < self._hyperparams['verbose_trials'])) def _take_iteration(self, itr, sample_lists): """ Take an iteration of the algorithm. Args: itr: Iteration number. Returns: None """ if self.gui: self.gui.set_status_text('Calculating.') self.gui.start_display_calculating() self.algorithm.iteration(sample_lists) if self.gui: self.gui.stop_display_calculating() def _take_policy_samples(self, N=None): """ Take samples from the policy to see how it's doing. Args: N : number of policy samples to take per condition Returns: None """ if 'verbose_policy_trials' not in self._hyperparams: # AlgorithmTrajOpt return None verbose = self._hyperparams['verbose_policy_trials'] if self.gui: self.gui.set_status_text('Taking policy samples.') pol_samples = [[None] for _ in range(len(self._test_idx))] # Since this isn't noisy, just take one sample. # TODO: Make this noisy? Add hyperparam? # TODO: Take at all conditions for GUI? for cond in range(len(self._test_idx)): pol_samples[cond][0] = self.agent.sample( self.algorithm.policy_opt.policy, self._test_idx[cond], verbose=verbose, save=False, noisy=False) return [SampleList(samples) for samples in pol_samples] def take_nn_samples(self, N=None): """ take the NN policy Args: N: Returns: samples, costs, ee_points """ """ Take samples from the policy to see how it's doing. Args: N : number of policy samples to take per condition Returns: None """ if 'verbose_policy_trials' not in self._hyperparams: # AlgorithmTrajOpt return None verbose = self._hyperparams['verbose_policy_trials'] if self.gui: self.gui.set_status_text('Taking policy samples.') pol_samples = [[None] for _ in range(len(self._test_idx))] # Since this isn't noisy, just take one sample. # TODO: Make this noisy? Add hyperparam? # TODO: Take at all conditions for GUI? costs = list() for cond in range(len(self._test_idx)): pol_samples[cond][0] = self.agent.sample( self.algorithm.policy_opt.policy, self._test_idx[cond], verbose=verbose, save=False, noisy=False) policy_cost = self.algorithm.cost[0].eval(pol_samples[cond][0])[0] policy_cost = np.sum(policy_cost) print "cost: %d" % policy_cost # wait to plot in gui in gps_training_gui.py costs.append(policy_cost) ee_points = self.agent.get_ee_point(cond) return [SampleList(samples) for samples in pol_samples], costs, ee_points def test_current_policy(self): """ test the current NN policy in the current position Returns: """ verbose = self._hyperparams['verbose_policy_trials'] for cond in self._train_idx: samples = self.agent.sample(self.algorithm.policy_opt.policy, cond, verbose=verbose, save=False, noisy=False) def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None): """ Log data and algorithm, and update the GUI. Args: itr: Iteration number. traj_sample_lists: trajectory samples as SampleList object pol_sample_lists: policy samples as SampleList object Returns: None """ if self.gui: self.gui.set_status_text('Logging data and updating GUI.') self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists, pol_sample_lists) self.gui.save_figure(self._data_files_dir + ('figure_itr_%02d.png' % itr)) if 'no_sample_logging' in self._hyperparams['common']: return self.data_logger.pickle( self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr), copy.copy(self.algorithm)) self.data_logger.pickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr), copy.copy(traj_sample_lists)) if pol_sample_lists: self.data_logger.pickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr), copy.copy(pol_sample_lists)) def _end(self): """ Finish running and exit. """ if self.gui: self.gui.set_status_text('Training complete.') self.gui.end_mode() if self._quit_on_end: # Quit automatically (for running sequential expts) os._exit(1)
class GenDemo(object): """ Generator of demos. """ def __init__(self, config): self._hyperparams = config self._conditions = config['common']['conditions'] # if 'train_conditions' in config['common']: # self._train_idx = config['common']['train_conditions'] # self._test_idx = config['common']['test_conditions'] # else: # self._train_idx = range(self._conditions) # config['common']['train_conditions'] = config['common']['conditions'] # self._hyperparams=config # self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self._algorithm_files_dir = config['common']['demo_controller_file'] self.data_logger = DataLogger() def generate(self): """ Generate demos and save them in a file for experiment. Returns: None. """ # Load the algorithm import pickle algorithm_file = self._algorithm_files_dir # This should give us the optimal controller. Maybe set to 'controller_itr_%02d.pkl' % itr_load will be better? self.algorithm = pickle.load(open(algorithm_file)) if self.algorithm is None: print("Error: cannot find '%s.'" % algorithm_file) os._exit(1) # called instead of sys.exit(), since t # Keep the initial states of the agent the sames as the demonstrations. self._learning = self.ioc_algo._hyperparams['learning_from_prior'] # if the experiment is learning from prior experience agent_config = self._hyperparams['demo_agent'] if agent_config['filename'] == './mjc_models/pr2_arm3d.xml' and not self._learning: agent_config['x0'] = self.algorithm._hyperparams['agent_x0'] agent_config['pos_body_idx'] = self.algorithm._hyperparams['agent_pos_body_idx'] agent_config['pos_body_offset'] = self.algorithm._hyperparams['agent_pos_body_offset'] self.agent = agent_config['type'](agent_config) # Roll out the demonstrations from controllers var_mult = self.algorithm._hyperparams['var_mult'] T = self.algorithm.T demos = [] M = agent_config['conditions'] N = self.ioc_algo._hyperparams['num_demos'] if not self._learning: controllers = {} good_conds = self.ioc_algo._hyperparams['demo_cond'] # Store each controller under M conditions into controllers. for i in xrange(M): controllers[i] = self.algorithm.cur[i].traj_distr controllers_var = copy.copy(controllers) for i in xrange(M): # Increase controller variance. controllers_var[i].chol_pol_covar *= var_mult # Gather demos. for j in xrange(N): demo = self.agent.sample( controllers_var[i], i, verbose=(i < self.algorithm._hyperparams['demo_verbose']), save = True ) demos.append(demo) else: # Extract the neural network policy. pol = self.algorithm.policy_opt.policy for i in xrange(M): # Gather demos. demo = self.agent.sample( pol, i, verbose=(i < self._hyperparams['verbose_trials']) ) demos.append(demo) # Filter out worst (M - good_conds) demos. target_position = agent_config['target_end_effector'][:3] dists_to_target = np.zeros(M) for i in xrange(M): demo_end_effector = demos[i].get(END_EFFECTOR_POINTS) dists_to_target[i] = np.amin(np.sqrt(np.sum((demo_end_effector[:, :3] - target_position.reshape(1, -1))**2, axis = 1)), axis = 0) if not self._learning: good_indices = dists_to_target.argsort()[:good_conds - M].tolist() else: good_indicators = (dists_to_target <= agent_config['success_upper_bound']).tolist() good_indices = [i for i in xrange(len(good_indicators)) if good_indicators[i]] bad_indices = np.argmax(dists_to_target) self.ioc_algo._hyperparams['demo_cond'] = len(good_indices) filtered_demos = [] self.ioc_algo.demo_conditions = [] self.ioc_algo.failed_conditions = [] exp_dir = self._data_files_dir.replace("data_files", "") with open(exp_dir + 'log.txt', 'a') as f: f.write('\nThe demo conditions are: \n') for i in good_indices: filtered_demos.append(demos[i]) self.ioc_algo.demo_conditions.append(agent_config['pos_body_offset'][i]) with open(exp_dir + 'log.txt', 'a') as f: f.write('\n' + str(agent_config['pos_body_offset'][i]) + '\n') with open(exp_dir + 'log.txt', 'a') as f: f.write('\nThe failed badmm conditions are: \n') for i in xrange(M): if i not in good_indices: self.ioc_algo.failed_conditions.append(agent_config['pos_body_offset'][i]) with open(exp_dir + 'log.txt', 'a') as f: f.write('\n' + str(agent_config['pos_body_offset'][i]) + '\n') # import pdb; pdb.set_trace() shuffle(filtered_demos) demo_list = SampleList(filtered_demos) demo_store = {'demoX': demo_list.get_X(), 'demoU': demo_list.get_U(), 'demoO': demo_list.get_obs()} if self._learning: demo_store['pos_body_offset'] = [agent_config['pos_body_offset'][bad_indices]] # Save the demos. self.data_logger.pickle( self._data_files_dir + 'demos.pkl', copy.copy(demo_store) )
class LQRTestMain(object): """ Main class to run algorithms and experiments. """ def __init__(self, config): """ Initialize LQRTestMain Args: config: Test hyperparameters for experiment """ self._hyperparams = config self._conditions = config['common']['conditions'] if 'train_conditions' in config['common']: self._train_idx = config['common']['train_conditions'] self._test_idx = config['common']['test_conditions'] else: self._train_idx = range(self._conditions) config['common']['train_conditions'] = config['common'][ 'conditions'] self._hyperparams = config self._test_idx = self._train_idx self._data_files_dir = config['common']['data_files_dir'] self.agent = config['agent']['type'](config['agent']) self.data_logger = DataLogger() config['algorithm']['agent'] = self.agent self.algorithm = config['algorithm']['type'](config['algorithm']) def run(self, itr_load=None): """ Run training by iteratively sampling and taking an iteration. Args: itr_load: If specified, loads algorithm state from that iteration, and resumes training at the next iteration. Returns: None """ for itr in range(0, self._hyperparams['iterations']): for cond in self._train_idx: for i in range(self._hyperparams['num_samples']): self._take_sample(itr, cond, i) traj_sample_lists = [ self.agent.get_samples(cond, -self._hyperparams['num_samples']) for cond in self._train_idx ] self._take_iteration(traj_sample_lists) #self._log_data(itr, traj_sample_lists, pol_sample_lists) if (itr == 3): """ self.data_logger.pickle( self._data_files_dir + 'test_traj_distr.pkl', copy.copy(self.algorithm.prev[0].traj_distr) ) self.data_logger.pickle( self._data_files_dir + 'test_traj_info.pkl', copy.copy(self.algorithm.prev[0].traj_info) ) self.data_logger.pickle( self._data_files_dir + 'test_new_traj_distr.pkl', copy.copy(self.algorithm.prev[0].new_traj_distr) ) self.data_logger.pickle( self._data_files_dir + 'test_final_eta.pkl', copy.copy(self.algorithm.prev[0].eta) ) mu_and_sigma = self.algorithm.forward(self.algorithm.prev[0].new_traj_distr, self.algorithm.prev[0].traj_info) self.data_logger.pickle( self._data_files_dir + 'test_mu_and_sigma.pkl', copy.copy(mu_and_sigma) ) """ self.data_logger.pickle( self._data_files_dir + 'test_prior', copy.copy( self.algorithm.prev[0].traj_info.dynamics.get_prior())) self.data_logger.pickle( self._data_files_dir + 'test_sample_list', copy.copy(self.algorithm.prev[0].sample_list)) dynamics_data = self.algorithm.prev[ 0].traj_info.dynamics.Fm, self.algorithm.prev[ 0].traj_info.dynamics.fv, self.algorithm.prev[ 0].traj_info.dynamics.dyn_covar self.data_logger.pickle(self._data_files_dir + 'test_dynamics', copy.copy(dynamics_data)) def _take_sample(self, itr, cond, i): """ Collect a sample from the agent. Args: itr: Iteration number. cond: Condition number. i: Sample number. Returns: None """ if self.algorithm._hyperparams['sample_on_policy'] \ and self.algorithm.iteration_count > 0: pol = self.algorithm.policy_opt.policy else: pol = self.algorithm.cur[cond].traj_distr self.agent.sample(pol, cond, verbose=(i < self._hyperparams['verbose_trials'])) def _take_iteration(self, sample_lists): """ Take an iteration of the algorithm. """ self.algorithm.iteration(sample_lists) def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None): """ Log data and algorithm, and update the GUI. Args: itr: Iteration number. traj_sample_lists: trajectory samples as SampleList object pol_sample_lists: policy samples as SampleList object Returns: None """ if 'no_sample_logging' in self._hyperparams['common']: return self.data_logger.pickle( self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr), copy.copy(self.algorithm)) self.data_logger.pickle( self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr), copy.copy(traj_sample_lists)) if pol_sample_lists: self.data_logger.pickle( self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr), copy.copy(pol_sample_lists))