def __init__(self, config, quit_on_end=False):
        """
        Initialize GPSMain
        Args:
            config: Hyperparameters for experiment
            quit_on_end: When true, quit automatically on completion
        """

        self._quit_on_end = quit_on_end
        self._hyperparams = config
        self._conditions = config['common']['conditions']

        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common'][
                'conditions']
            self._hyperparams = config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])

        self.data_logger = DataLogger()

        self.gui = GPSTrainingGUI(
            config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])
Esempio n. 2
0
def load_data(dir, load_algorithm_too=True):
    all_files = list_files(dir)
    itrs = {}
    for filename in all_files:
        itr = int(re.findall('[0-9]{2}', filename)[-1])
        itrs[itr] = filename

    if len(itrs) == 0:
        print 'No data found! Exiting.'
        exit()
    elif len(itrs) == 1:
        print 'Only one iteration found, so using that'
        itr = itr.keys()[0]
    else:
        print 'Here are the iterations for which data has been collected:'
        print sorted(itrs)
        itr = input('Which iteration would you like to train on? ')
        assert isinstance(itr, int)

    data_logger = DataLogger()
    # return [data_logger.unpickle(osp.join(dir, 'traj_sample_itr_%02d.pkl' % itr)) for itr in include]
    # adapted from gps_main.py
    traj_samples = data_logger.unpickle(
        osp.join(dir, 'traj_sample_itr_%02d.pkl' % itr))
    if load_algorithm_too:
        algorithm_state = data_logger.unpickle(
            osp.join(dir, 'algorithm_itr_%02d.pkl' % itr))
    else:
        algorithm_state = None
    return traj_samples, algorithm_state, itr
Esempio n. 3
0
def main():
    from gps import __file__ as gps_filepath

    BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
    EXP_DIR = BASE_DIR + '/../experiments/jaco_tf_example/'

    data_files_dir = EXP_DIR + 'data_files/'

    data_logger = DataLogger()

    itr = 1
    cond = 0
    print('Reading states (iteration = ' + str(itr) + ', condition = ' +
          str(cond) + ') ...')

    print('\n')

    #train_sample_lists = data_logger.unpickle(data_files_dir + ('_samplelist_itr%02d.pkl' % itr))
    lqr_sample_lists = data_logger.unpickle(data_files_dir +
                                            ('pol_lqr_sample_itr_%02d.pkl' %
                                             itr))
    badmm_sample_lists = data_logger.unpickle(
        data_files_dir + ('pol_badmm_sample_itr_%02d.pkl' % itr))

    print('lqr sample states ' + str(lqr_sample_lists[cond].get_X().shape) +
          ':')
    print(lqr_sample_lists[cond].get_X())
    print('\n')
    print('badmm sample states ' +
          str(badmm_sample_lists[cond].get_X().shape) + ':')
    print(badmm_sample_lists[cond].get_X())
    print('\n')
Esempio n. 4
0
    def __init__(self, config):
        """
        Initialize LQRTestMain
        Args:
            config: Test hyperparameters for experiment
        """
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common'][
                'conditions']
            self._hyperparams = config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])
Esempio n. 5
0
def plotMDTest():
    data_logger = DataLogger()
    position = data_logger.unpickle('./position/9/train_position_1.pkl')
    print(position)
    _, ax = plt.subplots()
    label = 'initial train positions'
    for i in range(position.shape[0]):
        if i == 0:
            ax.scatter(position[i][0], position[i][1], marker='s', color='blue', s=100, label=label)
            label = None
        else:
            ax.scatter(position[i][0], position[i][1], marker='s', color='blue', s=100, label=label)

    from matplotlib.patches import Ellipse
    center_position = np.mean(position, axis=0)
    radius = 0.05
    radius_grow = 0.025
    for i in range(7):
        ell = Ellipse(xy=center_position, width=radius*2, height=radius*2, angle=0, fill=False)
        ax.add_artist(ell)
        ell.set_clip_box(ax.bbox)
        ell.set_facecolor(color='black')
        radius = radius + radius_grow
        print(radius)
    ax.set_xlim(0, 0.45)
    ax.set_ylim(-0.4, 0)
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    plt.legend(loc='upper left', frameon=True)
    plt.show()
Esempio n. 6
0
def run_multi_test(config, tests_from, tests_to, repetitions, itr=0):
    data_files_dir = '/media/cambel/Data/documents/gps/experiments/paper-08-18/'
    # /media/cambel/Data/documents/gps/experiments/paper-08-18/02/data_files/policy_itr_00.pkl
    data_logger = DataLogger()

    conditions = config['common']['conditions']
    policy_prefix = 'policy_itr_'
    policy_name = 'data_files/' + policy_prefix + '%02d.pkl' % itr

    print tests_from, tests_to
    for i in xrange(tests_from, tests_to):
        np.save("/tmp/gps_test.npy", np.array([i]))
        policy_file = data_files_dir + str(i).zfill(2) + "/" + policy_name
        print "Current policy", policy_file
        policy_opt = data_logger.unpickle(policy_file)
        pol = policy_opt.policy

        agent = config['agent']['type'](config['agent'])

        grasp_count = 0
        for cond in list(range(conditions)):
            reps = 0
            if repetitions is None:
                reps = config['num_samples']
            else:
                reps = repetitions

            for i in range(reps):
                gp = agent.execute(pol,
                                   cond,
                                   verbose=(i < config['verbose_trials']),
                                   noisy=False)
                if gp:
                    grasp_count += 1
        print "test:", i, grasp_count
Esempio n. 7
0
def plotPositionStep():
    data_logger = DataLogger()
    all_position = data_logger.unpickle('./position/6/train_position.pkl')
    color = 'blue'
    for i in range(all_position.shape[0]):
        cur_position_x = all_position[0:i+1, 0]
        cur_position_y = all_position[0:i+1, 1]
        print(cur_position_x)
        print(cur_position_y)
        if color=='blue':
            color = 'red'
            plotpoint(x_data=cur_position_x,
                      y_data=cur_position_y,
                      x_label='position x',
                      y_label='position y',
                      title='position',
                      color=color)

        else:
            color = 'blue'
            plotpoint(x_data=cur_position_x,
                      y_data=cur_position_y,
                      x_label='position x',
                      y_label='position y',
                      title='position',
                      color=color)
        plt.show()
        temp_char = raw_input()
        plt.close(1)
Esempio n. 8
0
def plotCompareCostAlpha():
    """
    compare two distance with alpha and without alpha
    Returns:

    """
    data_logger = DataLogger()
    for i in range(1, 2):
        root_dir = './position/compare_alpha_%d/' % i
        for j in range(6):
            file_name = root_dir + 'alpha_distance_%d.pkl' % j
            distance = data_logger.unpickle(file_name)
            distance = np.expand_dims(distance, axis=0)
            if j == 0:
                distances = distance
            else:
                distances = np.concatenate((distances, distance), axis=0)
        if i == 2:
            all_distances_alpha = distances
        else:
            all_distances_alpha = np.concatenate((all_distances_alpha, distances), axis=1)
    # print(all_distances_alpha[1])
    all_distances_alpha[all_distances_alpha > 0.6] = 0.6
    mean_distances_alpha = np.mean(all_distances_alpha, axis=1)

    for i in range(1, 2):
        root_dir = './position/compare_alpha_%d/' % i
        for j in range(6):
            file_name = root_dir + 'without_alpha_distance_%d.pkl' % j
            distance = data_logger.unpickle(file_name)
            distance = np.expand_dims(distance, axis=0)
            if j == 0:
                distances = distance
            else:
                distances = np.concatenate((distances, distance), axis=0)
        if i == 1:
            all_distances_alpha = distances
        else:
            all_distances_alpha = np.concatenate((all_distances_alpha, distances), axis=1)
    # print(all_distances_alpha[1])
    all_distances_alpha[all_distances_alpha > 0.6] = 0.6
    mean_distances_alpha_without = np.mean(all_distances_alpha, axis=1)

    x_data = list()
    base_line = list()
    for i in range(mean_distances_alpha.shape[0]):
        x_data.append(i)
        base_line.append(0.06)
    x_data = np.array(x_data)
    base_line = np.array(base_line)

    plotline(x_data=x_data,
             y1_data=mean_distances_alpha,
             y2_data=mean_distances_alpha_without,
             y3_data=base_line,
             x_label="num of positions",
             y_label="distance to target")
    plt.show()
Esempio n. 9
0
    def __init__(self, config, quit_on_end=False):
        """
		Initialize GPSMain
		Args:
			config: Hyperparameters for experiment
			quit_on_end: When true, quit automatically on completion
		"""
        self._quit_on_end = quit_on_end
        self._hyperparams = config
        # print(config)
        self._conditions = config['common']['conditions']
        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common'][
                'conditions']
            self._hyperparams = config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']
        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()
        self.gui = GPSTrainingGUI(
            config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        # hard code to pass the map_state and target_state
        config['algorithm']['cost']['costs'][1]['data_types'][3][
            'target_state'] = config['agent']['target_state']
        config['algorithm']['cost']['costs'][1]['data_types'][3][
            'map_size'] = config['agent']['map_size']
        # config['algorithm']['cost']['costs'][1]['data_types'][3]['map_size'] = CUT_MAP_SIZE

        if len(config['algorithm']['cost']['costs']) > 2:
            # temporarily deprecated, not considering collision cost
            # including cost_collision
            config['algorithm']['cost']['costs'][2]['data_types'][3][
                'target_state'] = config['agent']['target_state']
            config['algorithm']['cost']['costs'][2]['data_types'][3][
                'map_size'] = config['agent']['map_size']
            config['algorithm']['cost']['costs'][2]['data_types'][3][
                'map_state'] = config['agent']['map_state']
        # print(config['algorithm'])
        self.algorithm = config['algorithm']['type'](config['algorithm'])

        # Modified by RH
        self.finishing_time = None
        self.U = None
        self.final_pos = None
        self.samples = []
        self.quick_sample = None
        # self.map_size = config['agent']['map_size']
        self.map_size = CUT_MAP_SIZE
        self.display_center = config['agent']['display_center']
Esempio n. 10
0
def plotposition():
    data_logger = DataLogger()
    position = data_logger.unpickle('./position/train_position.pkl')
    print(position)
    plotpoint(x_data=position[:, 0],
             y_data=position[:, 1],
             x_label='position x',
             y_label='position y',
             title='position')
    plt.show()
Esempio n. 11
0
    def __init__(self, config):
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()
        self.gui = GPSTrainingGUI(
            config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])
Esempio n. 12
0
    def __init__(self, config):
        self._hyperparams = config
        self._conditions = config['common']['conditions']

        # if 'train_conditions' in config['common']:
        # 	self._train_idx = config['common']['train_conditions']
        # 	self._test_idx = config['common']['test_conditions']
        # else:
        # 	self._train_idx = range(self._conditions)
        # 	config['common']['train_conditions'] = config['common']['conditions']
        # 	self._hyperparams=config
        # 	self._test_idx = self._train_idx
        self._data_files_dir = config['common']['data_files_dir']
        self._algorithm_files_dir = config['common']['demo_controller_file']
        self.data_logger = DataLogger()
Esempio n. 13
0
    def __init__(self, config, quit_on_end=False):
        """
        Initialize GPSMain
        Args:
            config: Hyperparameters for experiment
            quit_on_end: When true, quit automatically on completion
        """
        self._quit_on_end = quit_on_end
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common']['conditions']
            self._hyperparams=config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()
        self.gui = GPSTrainingGUI(config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])
Esempio n. 14
0
    def setUp(self):
        from gps import __file__ as gps_filepath
        gps_filepath = '/'.join(str.split(gps_filepath, '/')[:-1])
        gps_filepath = os.path.abspath(gps_filepath)
        hyperparams_file = gps_filepath + '/test_files/hyperparams.py'
        hyperparams = imp.load_source('hyperparams', hyperparams_file)
        config = hyperparams.config

        seed = config.get('random_seed', 0)
        random.seed(seed)
        np.random.seed(seed)

        config['algorithm']['agent'] = DummyAgent(config['agent'])
        self.algorithm = config['algorithm']['type'](config['algorithm'])

        data_logger = DataLogger()
        self.traj_distr = data_logger.unpickle(gps_filepath +
                                               '/test_files/traj_distr')
        self.traj_info = data_logger.unpickle(gps_filepath +
                                              '/test_files/traj_info')
        self.new_traj_distr = data_logger.unpickle(
            gps_filepath + '/test_files/new_traj_distr')
        self.final_eta = data_logger.unpickle(gps_filepath +
                                              '/test_files/final_eta')
        self.mu, self.sigma = data_logger.unpickle(gps_filepath +
                                                   '/test_files/mu_and_sigma')

        self.algorithm.cur[0].traj_distr = self.traj_distr
        self.algorithm.cur[0].traj_info = self.traj_info
Esempio n. 15
0
    def __init__(self, config):
        """
        Initialize GPSMain
        Args:
            config: Hyperparameters for experiment
            quit_on_end: When true, quit automatically on completion
        """
        self._quit_on_end = quit_on_end
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common'][
                'conditions']
            self._hyperparams = config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()
        self.gui = GPSTrainingGUI(
            config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])
        self.algorithm.init_samples = self._hyperparams['num_samples']

        if self.algorithm._hyperparams['ioc']:
            demo_file = self._data_files_dir + 'demos.pkl'
            demos = self.data_logger.unpickle(demo_file)
            if demos is None:
                self.demo_gen = GenDemo(config)
                self.demo_gen.ioc_algo = self.algorithm
                self.demo_gen.generate()
                demo_file = self._data_files_dir + 'demos.pkl'
                demos = self.data_logger.unpickle(demo_file)
            config['agent']['pos_body_offset'] = demos['pos_body_offset']
            self.agent = config['agent']['type'](config['agent'])
            self.algorithm.demoX = demos['demoX']
            self.algorithm.demoU = demos['demoU']
            self.algorithm.demoO = demos['demoO']
Esempio n. 16
0
def plotCountSuc():
    data_logger = DataLogger()
    #count_suc = data_logger.unpickle('./position/1/position_ol_alpha_count_step_5.pkl')
    count_suc = data_logger.unpickle('./position/1/position_md.pkl')
    rate_suc = np.sum(count_suc, axis=1)/count_suc.shape[1]
    print(rate_suc)
    x_date = np.zeros(0)
    for i in range(rate_suc.shape[0]):
        x_data = np.concatenate((x_data, np.array([i])))
    plotline(x_data=x_data,
             y1_data=rate_suc,
             y2_data=rate_suc,
             y3_data=rate_suc,
             y4_data=rate_suc,
             x_label='rate',
             y_label='condition',
             title='successful rate')
    plt.show()
Esempio n. 17
0
def load_data(dir):
    all_files = list_files(dir)
    itrs = {}
    for filename in all_files:
        itr = int(re.findall('[0-9]{2}', filename)[-1])
        itrs[itr] = filename

    if len(itrs) == 0:
        print 'No data found! Exiting.'
        exit()
    elif len(itrs) == 1:
        print 'Only one iteration found, so using that'
        include = itrs
    else:
        print 'Here are the iterations for which data has been collected:'
        print sorted(itrs)
        include = raw_input('Which iterations would you like to include? ')
        if include == "all":
            include = itrs
        else:
            include = eval(include)
            if type(include) == int:
                include = [include]
            elif type(include) in (list, tuple):
                pass
            else:
                raise TypeError(
                    'Input should be an int or list/tuple thereof, or the keyword "all".'
                )

    data_logger = DataLogger()
    algorithm_states, traj_sample_lists = [], []
    for itr in include:
        # adapted from gps_main.py
        algorithm_file = osp.join(dir, 'algorithm_itr_%02d.pkl' % itr)
        algorithm = data_logger.unpickle(algorithm_file)
        if algorithm is None:
            raise RuntimeError("Cannot find '%s'" % algorithm_file)
        traj_samples = data_logger.unpickle(
            osp.join(dir, 'traj_sample_itr_%02d.pkl' % itr))
        algorithm_states.append(algorithm)
        traj_sample_lists.append(traj_samples)
    return algorithm_states, traj_sample_lists
Esempio n. 18
0
def runTest(itr_load):
    data_files_dir = config['common']['data_files_dir']
    data_logger = DataLogger()

    algorithm_file = data_files_dir + 'algorithm_itr_%02d.pkl' % itr_load
    algorithm = data_logger.unpickle(algorithm_file)
    if algorithm is None:
        print("Error: cannot find '%s.'" % algorithm_file)
        os._exit(1)  # called instead of sys.exit(), since this is in a thread

    #pol = algorithm.cur[0].traj_distr
    pol = algorithm.policy_opt.policy
    agent_hyperparams = deepcopy(AGENT)
    agent_hyperparams.update(config['agent'])
    cost_obstacle = CostObstacle(config['algorithm']['cost']['costs'][2])
    cost_state = CostState(config['algorithm']['cost']['costs'][1])

    x0s = agent_hyperparams["x0"]
    for cond in range(len(x0s)):
        T = agent_hyperparams['T']
        dX = x0s[cond].shape[0]
        dU = agent_hyperparams['sensor_dims'][ACTION]

        agent_hyperparams['render'] = True
        agent = config['agent']['type'](agent_hyperparams)
        time.sleep(1)  # Time for init node
        '''
		while True:
			sample = agent.get_data()		
			raw_input("Get data")
		'''
        # Sample using offline trajectory distribution.
        for i in range(config['num_samples']):
            sample = agent.sample(pol, cond, noisy=False)
            cost_sum = CostSum(config['algorithm']['cost'])
            cost_obs = cost_obstacle.eval(sample)[0]
            cost_sta = cost_state.eval(sample)[0]
            total_cost = np.sum(cost_sum.eval(sample)[0])
            weights = config['algorithm']['cost']['weights']
            print "Total cost: ", total_cost,
            print "Cost state: ", np.sum(weights[1] * cost_sta),
            print "Cost obstacle: ", np.sum(weights[2] * cost_obs)
        '''
Esempio n. 19
0
    def __init__(self, config):
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()
        self.gui = GPSTrainingGUI(config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])
Esempio n. 20
0
    def __init__(self, config, quit_on_end=False, no_algorithm=False):
        """
        Initialize GPSMain
        Args:
            config: Hyperparameters for experiment
            quit_on_end: When true, quit automatically on completion
        """
        self._quit_on_end = quit_on_end
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common'][
                'conditions']
            self._hyperparams = config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']
        config['agent']['data_files_dir'] = self._data_files_dir
        config['algorithm']['data_files_dir'] = self._data_files_dir

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()
        self.gui = None
        if config['gui_on']:
            from gps.gui.gps_training_gui import GPSTrainingGUI  # Only import if neccessary
            self.gui = GPSTrainingGUI(config['common'])
        self.mode = None

        config['algorithm']['agent'] = self.agent
        if not no_algorithm:
            self.algorithm = config['algorithm']['type'](config['algorithm'])
            self.algorithm._data_files_dir = self._data_files_dir
            if hasattr(self.algorithm, 'policy_opt'):
                self.algorithm.policy_opt._data_files_dir = self._data_files_dir

        self.session_id = None
Esempio n. 21
0
	def __init__(self, config):
		self._hyperparams = config
		self._conditions = config['common']['conditions']

		# if 'train_conditions' in config['common']:
		# 	self._train_idx = config['common']['train_conditions']
		# 	self._test_idx = config['common']['test_conditions']
		# else:
		# 	self._train_idx = range(self._conditions)
		# 	config['common']['train_conditions'] = config['common']['conditions']
		# 	self._hyperparams=config
		# 	self._test_idx = self._train_idx
		self._data_files_dir = config['common']['data_files_dir']
		self._algorithm_files_dir = config['common']['demo_controller_file']
		self.data_logger = DataLogger()
Esempio n. 22
0
    def setUp(self):
        from gps import __file__ as gps_filepath
        gps_filepath = '/'.join(str.split(gps_filepath, '/')[:-1])
        gps_filepath = os.path.abspath(gps_filepath)
        hyperparams_file = gps_filepath + '/test_files/hyperparams.py'
        hyperparams = imp.load_source('hyperparams', hyperparams_file)
        config = hyperparams.config

        seed = config.get('random_seed', 0)
        random.seed(seed)
        np.random.seed(seed)

        config['algorithm']['agent'] = DummyAgent(config['agent'])
        self.algorithm = config['algorithm']['type'](config['algorithm'])

        data_logger = DataLogger()
        cur_data = data_logger.unpickle(gps_filepath +
                                        '/test_files/sample_list')
        self.X = cur_data.get_X()
        self.U = cur_data.get_U()
        prior = data_logger.unpickle(gps_filepath + '/test_files/prior')
        self.algorithm.cur[0].traj_info.dynamics.prior = prior
        self.Fm, self.fv, self.dyn_covar = data_logger.unpickle(
            gps_filepath + '/test_files/dynamics_data')
Esempio n. 23
0
    def __init__(self, config, quit_on_end=False):
        """
        Initialize GPSMain
        Args:
            config: Hyperparameters for experiment
            quit_on_end: When true, quit automatically on completion
        """
        self.start_time = timeit.default_timer()
        self._quit_on_end = quit_on_end
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = list(range(self._conditions))
            config['common']['train_conditions'] = config['common'][
                'conditions']
            self._hyperparams = config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()
        self.gui = GPSTrainingGUI(
            config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])

        #CB save image of cost w/ gui
        if not config['gui_on']:
            self.simplePlotter = SimplePlotter(
                config['common']['experiment_name'],
                config['common']['data_files_dir'])
Esempio n. 24
0
 def __init__(self, config, quit_on_end=False):
     """
     Initialize GPSMain
     Args:
         config: Hyperparameters for experiment
         quit_on_end: When true, quit automatically on completion
     """
     self._quit_on_end = quit_on_end
     self._hyperparams = config
     self._conditions = config['common']['conditions']
     #self._condition = 1
     if 'train_conditions' in config['common']:
         #False
         self._train_idx = config['common']['train_conditions']
         self._test_idx = config['common']['test_conditions']
     else:
         self._train_idx = range(self._conditions)
         config['common']['train_conditions'] = config['common'][
             'conditions']
         #create a new key in the dictionary common and assign the value 1
         self._hyperparams = config
         #reinitiallizing the hyperparameters because the config was changed
         self._test_idx = self._train_idx
         #getting hte train index again
     self._data_files_dir = config['common']['data_files_dir']
     #getting the data file path from which is stored in the common dic
     self.agent = config['agent']['type'](config['agent'])
     #here it creat the object from the agent directory
     #print(self.agent,'self.agent')
     self.data_logger = DataLogger()
     #here the gui files leads to the
     self.gui = GPSTrainingGUI(
         config['common']) if config['gui_on'] else None
     #again with they change the config file now adding object to the dic
     config['algorithm']['agent'] = self.agent
     self.algorithm = config['algorithm']['type'](config['algorithm'])
Esempio n. 25
0
def plotcost():
    data_logger = DataLogger()
    cost_ol = data_logger.unpickle('./position/5/cost_ol.pkl')
    #with_alpha_costs = np.delete(with_alpha_costs, [10, 11, 12])
    cost_ol_alpha = data_logger.unpickle('./position/5/cost_ol_alpha.pkl')
    cost_ol_alpha_step = data_logger.unpickle('./position/5/cost_ol_alpha_step.pkl')
    cost_md = data_logger.unpickle('./position/5/md_test_costs.pkl')
    #with_alpha_step_costs = data_logger.unpickle('./position/ol_with_alpha_step_costs.pkl')
    #with_alpha_step_costs = np.delete(with_alpha_step_costs, 4)
    print(cost_ol.shape[0])
    for i in range(0, cost_ol.shape[0]):
        if cost_ol[i] > -200:
            cost_ol[i] = -200
            #cost_ol = np.delete(cost_ol, i)
    for i in range(0, cost_ol_alpha.shape[0]):
        if cost_ol_alpha[i] > -200:
            #cost_ol_alpha = np.delete(cost_ol_alpha, i)
            cost_ol_alpha[i] = -200
    for i in range(0, cost_ol_alpha_step.shape[0]):
        if cost_ol_alpha_step[i] > -200:
            #cost_ol_alpha_step = np.delete(cost_ol_alpha_step, i)
            cost_ol_alpha_step[i] = -200
    for i in range(0, cost_md.shape[0]):
        if cost_md[i] > -200:
            #cost_md = np.delete(cost_md, i)
            cost_md[i] = -200

    """ construct x axis"""
    num_positions = np.zeros(0)
    #max_len = min(with_alpha_costs.shape[0], without_alpha_costs.shape[0], md_costs.shape[0], with_alpha_step_costs.shape[0])
    min_len = min(cost_ol.shape[0], cost_ol_alpha.shape[0], cost_ol_alpha_step.shape[0], cost_md.shape[0])
    print('len: %d' % min_len)
    for i in range(min_len):
        num_positions = np.append(num_positions, np.array(i))
    cost_ol = cost_ol[:min_len]
    cost_ol_alpha = cost_ol_alpha[:min_len]
    cost_ol_alpha_step = cost_ol_alpha_step[:min_len]
    cost_md = cost_md[:min_len]

    plotline(x_data=num_positions,
             y1_data=cost_ol,
             y2_data=cost_ol_alpha,
             y3_data=cost_ol_alpha_step,
             y4_data=cost_md,
             x_label='num of position',
             y_label='cost',
             title='compare')

    plt.show()
Esempio n. 26
0
def plotCountSucAll():
    """
    plot varies of successful rate
    """
    data_logger = DataLogger()
    count_suc1 = data_logger.unpickle('./position/1/position_ol_alpha_count_5.pkl')
    count_suc1 = data_logger.unpickle('./position/2/position_ol_alpha_count_5.pkl')
    count_suc1 = data_logger.unpickle('./position/3/position_ol_alpha_count_5.pkl')
    rate_suc1 = np.sum(count_suc1, axis=1)/count_suc1.shape[1]
    print(rate_suc1)

    count_suc2 = data_logger.unpickle('./position/2/position_ol_alpha_count_7.pkl')
    rate_suc2 = np.sum(count_suc2, axis=1)/count_suc2.shape[1]
    print(rate_suc2)

    count_suc3 = data_logger.unpickle('./position/2/position_ol_alpha_count_8.pkl')
    rate_suc3 = np.sum(count_suc3, axis=1)/count_suc3.shape[1]
    print(rate_suc3)

    count_suc4 = data_logger.unpickle('./position/2/position_ol_alpha_count_9.pkl')
    rate_suc4 = np.sum(count_suc4, axis=1)/count_suc4.shape[1]
    print(rate_suc4)

    min_len = min(count_suc1.shape[0], count_suc2.shape[0], count_suc3.shape[0], count_suc4.shape[0])
    x_data = np.zeros(0)
    for i in range(min_len):
        x_data = np.concatenate((x_data, np.array([i])))

    rate_suc1 = rate_suc1[:min_len]
    rate_suc2 = rate_suc2[:min_len]
    rate_suc3 = rate_suc3[:min_len]
    rate_suc4 = rate_suc4[:min_len]

    plotline(x_data=x_data,
             y1_data=rate_suc1,
             y2_data=rate_suc2,
             y3_data=rate_suc3,
             y4_data=rate_suc4,
             x_label='condition',
             y_label='rate',
             title='successful rate')
    plt.show()
Esempio n. 27
0
    def __init__(self):
        self._hyperparams = copy.deepcopy(hyperparams_config)
        self._conditions = self._hyperparams['common']['conditions']
        if 'train_conditions' in self._hyperparams['common']:
            self._train_idx = self._hyperparams['common']['train_conditions']
            self._test_idx = self._hyperparams['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            self._hyperparams['common'][
                'train_conditions'] = self._hyperparams['common']['conditions']
            self._test_idx = self._train_idx

        self.iteration_count = 0

        self.dU = 6
        self.dO = 270006
        self.T = hyperparams_agent['T']
        self.M = len(self._train_idx)
        self.resume_training = 27

        self.agent = AgentBaxterPreTrain(hyperparams_agent)
        self.policy_opt = PolicyOptTf(hyperparams_policy_opt, self.dO, self.dU)
        self.data_logger = DataLogger()
        self.save_dir = '/hdd/gps-master/python/gps/pre_train/policy_opt/policy_opt_save/'
Esempio n. 28
0
class GPSMain(object):
    """ Main class to run algorithms and experiments. """
    def __init__(self, config):
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common']['conditions']
            self._hyperparams=config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()
        self.gui = GPSTrainingGUI(config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])

    def run(self, itr_load=None):
        """
        Run training by iteratively sampling and taking an iteration.
        Args:
            itr_load: If specified, loads algorithm state from that
                iteration, and resumes training at the next iteration.
        Returns: None
        """
        itr_start = self._initialize(itr_load)

        for itr in range(itr_start, self._hyperparams['iterations']):
            for cond in self._train_idx:
                for i in range(self._hyperparams['num_samples']):
                    self._take_sample(itr, cond, i)

            traj_sample_lists = [
                self.agent.get_samples(cond, -self._hyperparams['num_samples'])
                for cond in self._train_idx
            ]
            self._take_iteration(itr, traj_sample_lists)
            pol_sample_lists = self._take_policy_samples()
            self._log_data(itr, traj_sample_lists, pol_sample_lists)

        self._end()

    def test_policy(self, itr, N):
        """
        Take N policy samples of the algorithm state at iteration itr,
        for testing the policy to see how it is behaving.
        (Called directly from the command line --policy flag).
        Args:
            itr: the iteration from which to take policy samples
            N: the number of policy samples to take
        Returns: None
        """
        algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr
        self.algorithm = self.data_logger.unpickle(algorithm_file)
        if self.algorithm is None:
            print("Error: cannot find '%s.'" % algorithm_file)
            os._exit(1) # called instead of sys.exit(), since t
        traj_sample_lists = self.data_logger.unpickle(self._data_files_dir +
            ('traj_sample_itr_%02d.pkl' % itr))

        pol_sample_lists = self._take_policy_samples(N)
        self.data_logger.pickle(
            self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr),
            copy.copy(pol_sample_lists)
        )

        if self.gui:
            self.gui.update(itr, self.algorithm, self.agent,
                traj_sample_lists, pol_sample_lists)
            self.gui.set_status_text(('Took %d policy sample(s) from ' +
                'algorithm state at iteration %d.\n' +
                'Saved to: data_files/pol_sample_itr_%02d.pkl.\n') % (N, itr, itr))

    def _initialize(self, itr_load):
        """
        Initialize from the specified iteration.
        Args:
            itr_load: If specified, loads algorithm state from that
                iteration, and resumes training at the next iteration.
        Returns:
            itr_start: Iteration to start from.
        """
        if itr_load is None:
            if self.gui:
                self.gui.set_status_text('Press \'go\' to begin.')
            return 0
        else:
            algorithm_file = self._data_files_dir + 'algorithm_i_%02d.pkl' % itr_load
            self.algorithm = self.data_logger.unpickle(algorithm_file)
            if self.algorithm is None:
                print("Error: cannot find '%s.'" % algorithm_file)
                os._exit(1) # called instead of sys.exit(), since this is in a thread
                
            if self.gui:
                traj_sample_lists = self.data_logger.unpickle(self._data_files_dir +
                    ('traj_sample_itr_%02d.pkl' % itr_load))
                pol_sample_lists = self.data_logger.unpickle(self._data_files_dir +
                    ('pol_sample_itr_%02d.pkl' % itr_load))
                self.gui.update(itr_load, self.algorithm, self.agent,
                    traj_sample_lists, pol_sample_lists)
                self.gui.set_status_text(
                    ('Resuming training from algorithm state at iteration %d.\n' +
                    'Press \'go\' to begin.') % itr_load)
            return itr_load + 1

    def _take_sample(self, itr, cond, i):
        """
        Collect a sample from the agent.
        Args:
            itr: Iteration number.
            cond: Condition number.
            i: Sample number.
        Returns: None
        """
        pol = self.algorithm.cur[cond].traj_distr
        if self.gui:
            self.gui.set_image_overlays(cond)   # Must call for each new cond.
            redo = True
            while redo:
                while self.gui.mode in ('wait', 'request', 'process'):
                    if self.gui.mode in ('wait', 'process'):
                        time.sleep(0.01)
                        continue
                    # 'request' mode.
                    if self.gui.request == 'reset':
                        try:
                            self.agent.reset(cond)
                        except NotImplementedError:
                            self.gui.err_msg = 'Agent reset unimplemented.'
                    elif self.gui.request == 'fail':
                        self.gui.err_msg = 'Cannot fail before sampling.'
                    self.gui.process_mode()  # Complete request.

                self.gui.set_status_text(
                    'Sampling: iteration %d, condition %d, sample %d.' %
                    (itr, cond, i)
                )
                self.agent.sample(
                    pol, cond,
                    verbose=(i < self._hyperparams['verbose_trials'])
                )

                if self.gui.mode == 'request' and self.gui.request == 'fail':
                    redo = True
                    self.gui.process_mode()
                    self.agent.delete_last_sample(cond)
                else:
                    redo = False
        else:
            self.agent.sample(
                pol, cond,
                verbose=(i < self._hyperparams['verbose_trials'])
            )

    def _take_iteration(self, itr, sample_lists):
        """
        Take an iteration of the algorithm.
        Args:
            itr: Iteration number.
        Returns: None
        """
        if self.gui:
            self.gui.set_status_text('Calculating.')
            self.gui.start_display_calculating()
        self.algorithm.iteration(sample_lists)
        if self.gui:
            self.gui.stop_display_calculating()

    def _take_policy_samples(self, N=None):
        """
        Take samples from the policy to see how it's doing.
        Args:
            N  : number of policy samples to take per condition
        Returns: None
        """
        if 'verbose_policy_trials' not in self._hyperparams:
            return None
        if not N:
            N = self._hyperparams['verbose_policy_trials']
        if self.gui:
            self.gui.set_status_text('Taking policy samples.')
        pol_samples = [[None for _ in range(N)] for _ in range(self._conditions)]
        for cond in range(len(self._test_idx)):
            for i in range(N):
                pol_samples[cond][i] = self.agent.sample(
                    self.algorithm.policy_opt.policy, self._test_idx[cond],
                    verbose=True, save=False)
        return [SampleList(samples) for samples in pol_samples]

    def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None):
        """
        Log data and algorithm, and update the GUI.
        Args:
            itr: Iteration number.
            traj_sample_lists: trajectory samples as SampleList object
            pol_sample_lists: policy samples as SampleList object
        Returns: None
        """
        if self.gui:
            self.gui.set_status_text('Logging data and updating GUI.')
            self.gui.update(itr, self.algorithm, self.agent,
                traj_sample_lists, pol_sample_lists)
            self.gui.save_figure(
                self._data_files_dir + ('figure_itr_%02d.png' % itr)
            )
        if 'no_sample_logging' in self._hyperparams['common']:
            return
        self.data_logger.pickle(
            self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr),
            copy.copy(self.algorithm)
        )
        self.data_logger.pickle(
            self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr),
            copy.copy(traj_sample_lists)
        )
        if pol_sample_lists:
            self.data_logger.pickle(
                self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr),
                copy.copy(pol_sample_lists)
            )

    def _end(self):
        """ Finish running and exit. """
        if self.gui:
            self.gui.set_status_text('Training complete.')
            self.gui.end_mode()
class GPSMain(object):
    """ Main class to run tensorflow_code-pytorch and experiments. """
    def __init__(self, config, quit_on_end=False):
        """
        Initialize GPSMain
        Args:
            config: Hyperparameters for experiment
            quit_on_end: When true, quit automatically on completion
        """

        self._quit_on_end = quit_on_end
        self._hyperparams = config
        self._conditions = config['common']['conditions']

        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common'][
                'conditions']
            self._hyperparams = config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])

        self.data_logger = DataLogger()

        self.gui = GPSTrainingGUI(
            config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])

    def run(self, itr_load=None):
        """
        Run training by iteratively sampling and taking an iteration.
        Args:
            itr_load: If specified, loads algorithm state from that
                iteration, and resumes training at the next iteration.
        Returns: None
        """
        try:
            itr_start = self._initialize(itr_load)
            for itr in range(itr_start, self._hyperparams['iterations']):
                """ get samples """
                for cond in self._train_idx:
                    for i in range(self._hyperparams['num_samples']):
                        self._take_sample(itr, cond, i)

                traj_sample_lists = [
                    self.agent.get_samples(cond,
                                           -self._hyperparams['num_samples'])
                    for cond in self._train_idx
                ]
                """ Clear agent samples """
                self.agent.clear_samples()
                """ interation """
                self._take_iteration(itr, traj_sample_lists)
                """ test policy and samples """
                pol_sample_lists = self._take_policy_samples()
                self._log_data(itr, traj_sample_lists, pol_sample_lists)

        except Exception as e:
            traceback.print_exception(*sys.exc_info())

        finally:
            self._end()

    def test_policy(self, itr, N):
        """
        Take N policy samples of the algorithm state at iteration itr,
        for testing the policy to see how it is behaving.
        (Called directly from the command line --policy flag).
        Args:
            itr: the iteration from which to take policy samples
            N: the number of policy samples to take
        Returns: None
        """
        algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr
        self.algorithm = self.data_logger.unpickle(algorithm_file)

        if self.algorithm is None:
            print("Error: cannot find '%s.'" % algorithm_file)
            os._exit(1)
            # called instead of sys.exit(), since t
        traj_sample_lists = self.data_logger.unpickle(
            self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr))

        pol_sample_lists = self._take_policy_samples(N)

        self.data_logger.pickle(
            self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr),
            copy.copy(pol_sample_lists))

        if self.gui:
            self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists,
                            pol_sample_lists)
            self.gui.set_status_text(
                ('Took %d policy sample(s) from ' +
                 'algorithm state at iteration %d.\n' +
                 'Saved to: data_files/pol_sample_itr_%02d.pkl.\n') %
                (N, itr, itr))

    def _initialize(self, itr_load):
        """
        Initialize from the specified iteration.
        Args:
            itr_load: If specified, loads algorithm state from that
                iteration, and resumes training at the next iteration.
        Returns:
            itr_start: Iteration to start from.
        """
        if itr_load is None:
            if self.gui:
                self.gui.set_status_text('Press \'go\' to begin.')
            return 0
        else:
            algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr_load
            self.algorithm = self.data_logger.unpickle(algorithm_file)
            if self.algorithm is None:
                print("Error: cannot find '%s.'" % algorithm_file)
                os._exit(
                    1
                )  # called instead of sys.exit(), since this is in a thread

            if self.gui:
                traj_sample_lists = self.data_logger.unpickle(
                    self._data_files_dir +
                    ('traj_sample_itr_%02d.pkl' % itr_load))
                if self.algorithm.cur[0].pol_info:
                    pol_sample_lists = self.data_logger.unpickle(
                        self._data_files_dir +
                        ('pol_sample_itr_%02d.pkl' % itr_load))
                else:
                    pol_sample_lists = None
                self.gui.set_status_text((
                    'Resuming training from algorithm state at iteration %d.\n'
                    + 'Press \'go\' to begin.') % itr_load)
            return itr_load + 1

    def _take_sample(self, itr, cond, i):
        """
        Collect a sample from the agent.
        Args:
            itr: Iteration number.
            cond: Condition number.
            i: Sample number.
        Returns: None
        """
        if self.algorithm._hyperparams['sample_on_policy'] \
                and self.algorithm.iteration_count > 0:
            pol = self.algorithm.policy_opt.policy
            print(" ========================== on policy ====================")
        else:
            pol = self.algorithm.cur[cond].traj_distr

        if self.gui:
            self.gui.set_image_overlays(cond)  # Must call for each new cond.
            redo = True
            while redo:
                while self.gui.mode in ('wait', 'request', 'process'):
                    if self.gui.mode in ('wait', 'process'):
                        time.sleep(0.01)
                        continue
                    # 'request' mode.
                    if self.gui.request == 'reset':
                        try:
                            self.agent.reset(cond)
                        except NotImplementedError:
                            self.gui.err_msg = 'Agent reset unimplemented.'
                    elif self.gui.request == 'fail':
                        self.gui.err_msg = 'Cannot fail before sampling.'
                    self.gui.process_mode()  # Complete request.

                self.gui.set_status_text(
                    'Sampling: iteration %d, condition %d, sample %d.' %
                    (itr, cond, i))
                self.agent.sample(
                    pol,
                    cond,
                    verbose=(i < self._hyperparams['verbose_trials']))

                if self.gui.mode == 'request' and self.gui.request == 'fail':
                    redo = True
                    self.gui.process_mode()
                    self.agent.delete_last_sample(cond)
                else:
                    redo = False
        else:
            self.agent.sample(
                pol, cond, verbose=(i < self._hyperparams['verbose_trials']))

    def _take_iteration(self, itr, sample_lists):
        """
        Take an iteration of the algorithm.
        Args:
            itr: Iteration number.
        Returns: None
        """
        if self.gui:
            self.gui.set_status_text('Calculating.')
            self.gui.start_display_calculating()

        self.algorithm.iteration(sample_lists)

        if self.gui:
            self.gui.stop_display_calculating()

    def _take_policy_samples(self, N=None):
        """
        Take samples from the policy to see how it's doing.
        Args:
            N  : number of policy samples to take per condition
        Returns: None
        """
        print(
            " ================================ test policy ===================================="
        )
        if 'verbose_policy_trials' not in self._hyperparams:
            # AlgorithmTrajOpt
            return None
        verbose = self._hyperparams['verbose_policy_trials']

        if self.gui:
            self.gui.set_status_text('Taking policy samples.')

        pol_samples = [[None] for _ in range(len(self._test_idx))]
        # Since this isn't noisy, just take one sample.
        # TODO: Make this noisy? Add hyperparam?
        # TODO: Take at all conditions for GUI?
        for cond in range(len(self._test_idx)):
            pol_samples[cond][0] = self.agent.sample(
                self.algorithm.policy_opt.policy,
                self._test_idx[cond],
                verbose=verbose,
                save=False,
                noisy=False)

        return [SampleList(samples) for samples in pol_samples]

    def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None):
        """
        Log data and algorithm, and update the GUI.
        Args:
            itr: Iteration number.
            traj_sample_lists: trajectory samples as SampleList object
            pol_sample_lists: policy samples as SampleList object
        Returns: None
        """
        if self.gui:
            self.gui.set_status_text('Logging data and updating GUI.')
            self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists,
                            pol_sample_lists)
            self.gui.save_figure(self._data_files_dir +
                                 ('figure_itr_%02d.png' % itr))
        if 'no_sample_logging' in self._hyperparams['common']:
            return
        self.data_logger.pickle(
            self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr),
            copy.copy(self.algorithm))
        self.data_logger.pickle(
            self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr),
            copy.copy(traj_sample_lists))
        if pol_sample_lists:
            self.data_logger.pickle(
                self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr),
                copy.copy(pol_sample_lists))

    def _end(self):
        """ Finish running and exit. """
        if self.gui:
            self.gui.set_status_text('Training complete.')
            self.gui.end_mode()
            if self._quit_on_end:
                # Quit automatically (for running sequential expts)
                os._exit(1)
Esempio n. 30
0
def save_data(output_dir, sample_lists, itr):
    if not osp.isdir(output_dir):
        os.makedirs(output_dir)
    DataLogger().pickle(osp.join(output_dir, 'pol_sample_itr_%02d.pkl' % itr),
                        sample_lists)
Esempio n. 31
0
def main():
    """ Main function to be run. """
    parser = argparse.ArgumentParser(
        description='Run the Guided Policy Search algorithm.')
    parser.add_argument('experiment', type=str, help='experiment name')
    parser.add_argument('-n',
                        '--new',
                        action='store_true',
                        help='create new experiment')
    parser.add_argument('-t',
                        '--targetsetup',
                        action='store_true',
                        help='run target setup')
    parser.add_argument('-r',
                        '--resume',
                        metavar='N',
                        type=int,
                        help='resume training from iter N')
    parser.add_argument('-p',
                        '--policy',
                        metavar='N',
                        type=int,
                        help='take N policy samples (for BADMM/MDGPS only)')
    parser.add_argument('-s',
                        '--silent',
                        action='store_true',
                        help='silent debug print outs')
    parser.add_argument('-q',
                        '--quit',
                        action='store_true',
                        help='quit GUI automatically when finished')
    parser.add_argument('-c',
                        '--condition',
                        metavar='N',
                        type=int,
                        help='consider N position')
    parser.add_argument('-m',
                        '--num',
                        metavar='N',
                        type=int,
                        help='test\' N nums of experiment')
    parser.add_argument('-exper',
                        '--exper',
                        metavar='N',
                        type=int,
                        help='time of test experiment')
    parser.add_argument('-set',
                        '--set_cond',
                        metavar='N',
                        type=int,
                        help='train on special position setting')
    parser.add_argument('-algi',
                        '--alg_itr',
                        metavar='N',
                        type=int,
                        help='control the time of train NN')

    args = parser.parse_args()

    exp_name = args.experiment
    resume_training_itr = args.resume
    test_policy_N = args.policy

    from gps import __file__ as gps_filepath
    gps_filepath = os.path.abspath(gps_filepath)
    gps_dir = '/'.join(str.split(gps_filepath, '/')[:-3]) + '/'
    exp_dir = gps_dir + 'experiments/' + exp_name + '/'
    hyperparams_file = exp_dir + 'hyperparams.py'

    if args.silent:
        logging.basicConfig(format='%(levelname)s:%(message)s',
                            level=logging.INFO)
    else:
        logging.basicConfig(format='%(levelname)s:%(message)s',
                            level=logging.DEBUG)

    if args.new:
        from shutil import copy

        if os.path.exists(exp_dir):
            sys.exit("Experiment '%s' already exists.\nPlease remove '%s'." %
                     (exp_name, exp_dir))
        os.makedirs(exp_dir)

        prev_exp_file = '.previous_experiment'
        prev_exp_dir = None
        try:
            with open(prev_exp_file, 'r') as f:
                prev_exp_dir = f.readline()
            copy(prev_exp_dir + 'hyperparams.py', exp_dir)
            if os.path.exists(prev_exp_dir + 'targets.npz'):
                copy(prev_exp_dir + 'targets.npz', exp_dir)
        except IOError as e:
            with open(hyperparams_file, 'w') as f:
                f.write(
                    '# To get started, copy over hyperparams from another experiment.\n'
                    +
                    '# Visit rll.berkeley.edu/gps/hyperparams.html for documentation.'
                )
        with open(prev_exp_file, 'w') as f:
            f.write(exp_dir)

        exit_msg = ("Experiment '%s' created.\nhyperparams file: '%s'" %
                    (exp_name, hyperparams_file))
        if prev_exp_dir and os.path.exists(prev_exp_dir):
            exit_msg += "\ncopied from     : '%shyperparams.py'" % prev_exp_dir
        sys.exit(exit_msg)

    if not os.path.exists(hyperparams_file):
        sys.exit("Experiment '%s' does not exist.\nDid you create '%s'?" %
                 (exp_name, hyperparams_file))

    hyperparams = imp.load_source('hyperparams', hyperparams_file)
    if args.targetsetup:
        try:
            import matplotlib.pyplot as plt
            from gps.agent.ros.agent_ros import AgentROS
            from gps.gui.target_setup_gui import TargetSetupGUI

            agent = AgentROS(hyperparams.config['agent'])
            TargetSetupGUI(hyperparams.config['common'], agent)

            plt.ioff()
            plt.show()
        except ImportError:
            sys.exit('ROS required for target setup.')
    elif test_policy_N:
        import random
        import numpy as np
        import matplotlib.pyplot as plt

        seed = hyperparams.config.get('random_seed', 0)
        random.seed(seed)
        np.random.seed(seed)

        data_files_dir = exp_dir + 'data_files/'
        data_filenames = os.listdir(data_files_dir)
        algorithm_prefix = 'algorithm_itr_'
        algorithm_filenames = [
            f for f in data_filenames if f.startswith(algorithm_prefix)
        ]
        current_algorithm = sorted(algorithm_filenames, reverse=True)[0]
        current_itr = int(
            current_algorithm[len(algorithm_prefix):len(algorithm_prefix) + 2])

        gps = GPSMain(hyperparams.config)
        if hyperparams.config['gui_on']:
            test_policy = threading.Thread(target=lambda: gps.test_policy(
                itr=current_itr, N=test_policy_N))
            test_policy.daemon = True
            test_policy.start()

            plt.ioff()
            plt.show()
        else:
            gps.test_policy(itr=current_itr, N=test_policy_N)
    else:
        if args.condition:
            """ if specify the N training position"""
            num_position = args.condition
            data_logger = DataLogger()
            positions = data_logger.unpickle('./position/train_position.pkl')
            # positions = data_logger.unpickle('./position/suc_train_position.pkl')
            hyperparams.agent['conditions'] = num_position
            hyperparams.common['conditions'] = num_position
            hyperparams.algorithm['conditions'] = num_position
            pos_body_offset = list()
            for i in range(num_position):
                pos_body_offset.append(positions[i])
            hyperparams.agent['pos_body_offset'] = pos_body_offset

        import random
        import numpy as np
        import matplotlib.pyplot as plt

        seed = hyperparams.config.get('random_seed', 0)
        random.seed(seed)
        np.random.seed(seed)

        # set the time of training NN
        if args.alg_itr:
            hyperparams.config['iterations'] = args.alg_itr
        """
        set extend setting
        """
        data_logger = DataLogger()
        train_position = data_logger.unpickle(
            './position/all_train_position.pkl')
        hyperparams.agent['pos_body_offset'] = list(train_position)
        hyperparams.agent['conditions'] = len(train_position)
        hyperparams.common['conditions'] = len(train_position)
        hyperparams.algorithm['conditions'] = len(train_position)

        gps = GPSMain(hyperparams.config, args.quit)
        if hyperparams.config['gui_on']:
            run_gps = threading.Thread(
                target=lambda: gps.run(itr_load=resume_training_itr))
            run_gps.daemon = True
            run_gps.start()

            plt.ioff()
            plt.show()
        else:
            costs, mean_cost, position_suc_count, all_distance = gps.run(
                args.num,
                exper_condition=args.set_cond,
                itr_load=resume_training_itr)
            # gps.data_logger.pickle('./position/%d/experiment_%d/md_all_distance.pkl'
            #                        % (args.num, args.exper), all_distance)
            gps.data_logger.pickle('./position/md_all_distance.pkl',
                                   all_distance)
            gps.data_logger.pickle('./position/md_all_cost.pkl', costs)
            """
Esempio n. 32
0
class GPSMain(object):
    """ Main class to run algorithms and experiments. """
    def __init__(self, config, quit_on_end=False):
        """
        Initialize GPSMain
        Args:
            config: Hyperparameters for experiment
            quit_on_end: When true, quit automatically on completion
        """
        self._quit_on_end = quit_on_end
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common'][
                'conditions']
            self._hyperparams = config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()
        self.gui = GPSTrainingGUI(
            config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])

    def run(self, time_experiment, exper_condition, itr_load=None):
        """
        Run training by iteratively sampling and taking an iteration.
        Args:
            itr_load: If specified, loads algorithm state from that
                iteration, and resumes training at the next iteration.
        Returns: None
        """
        itr_start = self._initialize(itr_load)

        # test_position = self.data_logger.unpickle('./position/%d/%d/test_position.pkl'
        #                                           % (time_experiment, exper_condition))
        self.target_ee_point = self.agent._hyperparams['target_ee_points'][:3]

        for itr in range(itr_start, self._hyperparams['iterations']):
            print('itr******:  %d   **********' % itr)
            for cond in self._train_idx:
                for i in range(self._hyperparams['num_samples']):
                    self._take_sample(itr, cond, i)

            traj_sample_lists = [
                self.agent.get_samples(cond, -self._hyperparams['num_samples'])
                for cond in self._train_idx
            ]

            # Clear agent samples.
            self.agent.clear_samples()

            self._take_iteration(itr, traj_sample_lists)
            pol_sample_lists = self._take_policy_samples()

            self._log_data(itr, traj_sample_lists, pol_sample_lists)
        """ test policy and collect costs"""
        """
        gradually add the distance of agent position
        """
        center_position = 0.02
        radius = 0.02
        max_error_bound = 0.02
        directory = 9
        for test_condition in range(7):
            # test_position = self.generate_position(center_position, radius, 30, max_error_bound)
            test_position = self.data_logger.unpickle(
                './position/test_position_%d.pkl' % (test_condition + 1))
            costs, position_suc_count, distance = self.test_cost(
                test_position, len(pol_sample_lists))
            print('distance:', distance)
            # add the position_suc_count
            if test_condition == 0:
                #augement array
                all_pos_suc_count = np.expand_dims(position_suc_count, axis=0)
                all_distance = np.expand_dims(distance, axis=0)
            else:
                all_pos_suc_count = np.vstack(
                    (all_pos_suc_count, position_suc_count))
                all_distance = np.vstack((all_distance, distance))

            costs = costs.reshape(costs.shape[0] * costs.shape[1])
            mean_cost = np.array([np.mean(costs)])
            center_position = center_position + radius * 2

        self._end()
        return costs, mean_cost, all_pos_suc_count, all_distance

    def generate_position(self, cposition, radius, conditions,
                          max_error_bound):
        # all_positions = np.zeros(0)

        while True:
            all_positions = np.array([cposition, -cposition, 0])
            center_position = np.array([cposition, -cposition, 0])
            for i in range(conditions):
                position = np.random.uniform(cposition - radius,
                                             cposition + radius, 3)
                while True:
                    position[2] = 0
                    position[1] = -position[1]
                    area = (position - center_position).dot(position -
                                                            center_position)
                    # area = np.sum(np.multiply(position - center_position, position - center_position))
                    if area <= radius**2:
                        # print(area)
                        break
                    position = np.random.uniform(cposition - radius,
                                                 cposition + radius, 3)
                position = np.floor(position * 1000) / 1000.0
                all_positions = np.concatenate((all_positions, position))
            all_positions = np.reshape(all_positions,
                                       [all_positions.shape[0] / 3, 3])
            # print(all_positions[:, 1])
            # print('mean:')
            # print(np.mean(all_positions, axis=0))
            mean_position = np.mean(all_positions, axis=0)
            # mean_error1 = np.fabs(mean_position[0] - 0.11)
            # mean_error2 = np.fabs(mean_position[1] + 0.11)
            mean_error1 = np.fabs(mean_position[0] -
                                  (cposition - max_error_bound))
            mean_error2 = np.fabs(mean_position[1] +
                                  (cposition - max_error_bound))
            if mean_error1 < max_error_bound and mean_error2 < max_error_bound:
                print('mean:')
                print(np.mean(all_positions, axis=0))
                break
        print(all_positions)
        print(all_positions.shape)
        return all_positions

    def test_cost(self, positions, train_cond):
        """
        test policy and collect costs
        Args:
            positions: test position from test_position.pkl

        Returns:
            cost:   mean cost of all test position
            total_suc:  successful pegging trial count  1:successful    0:fail

        """
        iteration = positions.shape[0] / train_cond
        total_costs = list()
        total_ee_points = list()
        total_suc = np.zeros(0)
        total_distance = np.zeros(0)
        for itr in range(iteration):
            for cond in self._train_idx:
                self._hyperparams['agent']['pos_body_offset'][
                    cond] = positions[itr + cond]
            self.agent.reset_model(self._hyperparams)
            _, cost, ee_points = self._test_policy_samples()
            for cond in self._train_idx:
                total_ee_points.append(ee_points[cond])
            total_costs.append(cost)
        print("total_costs:", total_costs)
        for i in range(len(total_ee_points)):
            ee_error = total_ee_points[i][:3] - self.target_ee_point
            distance = ee_error.dot(ee_error)**0.5
            if (distance < 0.06):
                total_suc = np.concatenate((total_suc, np.array([1])))
            else:
                total_suc = np.concatenate((total_suc, np.array([0])))
            total_distance = np.concatenate(
                (total_distance, np.array([distance])))
        return np.array(total_costs), total_suc, total_distance

    def test_policy(self, itr, N):
        """
        Take N policy samples of the algorithm state at iteration itr,
        for testing the policy to see how it is behaving.
        (Called directly from the command line --policy flag).
        Args:
            itr: the iteration from which to take policy samples
            N: the number of policy samples to take
        Returns: None
        """
        algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr
        self.algorithm = self.data_logger.unpickle(algorithm_file)
        if self.algorithm is None:
            print("Error: cannot find '%s.'" % algorithm_file)
            os._exit(1)  # called instead of sys.exit(), since t
        traj_sample_lists = self.data_logger.unpickle(
            self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr))

        pol_sample_lists = self._take_policy_samples(N)
        self.data_logger.pickle(
            self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr),
            copy.copy(pol_sample_lists))

        if self.gui:
            self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists,
                            pol_sample_lists)
            self.gui.set_status_text(
                ('Took %d policy sample(s) from ' +
                 'algorithm state at iteration %d.\n' +
                 'Saved to: data_files/pol_sample_itr_%02d.pkl.\n') %
                (N, itr, itr))

    def _initialize(self, itr_load):
        """
        Initialize from the specified iteration.
        Args:
            itr_load: If specified, loads algorithm state from that
                iteration, and resumes training at the next iteration.
        Returns:
            itr_start: Iteration to start from.
        """
        if itr_load is None:
            if self.gui:
                self.gui.set_status_text('Press \'go\' to begin.')
            return 0
        else:
            algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr_load
            self.algorithm = self.data_logger.unpickle(algorithm_file)
            if self.algorithm is None:
                print("Error: cannot find '%s.'" % algorithm_file)
                os._exit(
                    1
                )  # called instead of sys.exit(), since this is in a thread

            if self.gui:
                traj_sample_lists = self.data_logger.unpickle(
                    self._data_files_dir +
                    ('traj_sample_itr_%02d.pkl' % itr_load))
                if self.algorithm.cur[0].pol_info:
                    pol_sample_lists = self.data_logger.unpickle(
                        self._data_files_dir +
                        ('pol_sample_itr_%02d.pkl' % itr_load))
                else:
                    pol_sample_lists = None
                self.gui.set_status_text((
                    'Resuming training from algorithm state at iteration %d.\n'
                    + 'Press \'go\' to begin.') % itr_load)
            return itr_load + 1

    def _take_sample(self, itr, cond, i):
        """
        Collect a sample from the agent.
        Args:
            itr: Iteration number.
            cond: Condition number.
            i: Sample number.
        Returns: None
        """
        if self.algorithm._hyperparams['sample_on_policy'] \
                and self.algorithm.iteration_count > 0:
            pol = self.algorithm.policy_opt.policy
        else:
            pol = self.algorithm.cur[cond].traj_distr
        if self.gui:
            self.gui.set_image_overlays(cond)  # Must call for each new cond.
            redo = True
            while redo:
                while self.gui.mode in ('wait', 'request', 'process'):
                    if self.gui.mode in ('wait', 'process'):
                        time.sleep(0.01)
                        continue
                    # 'request' mode.
                    if self.gui.request == 'reset':
                        try:
                            self.agent.reset(cond)
                        except NotImplementedError:
                            self.gui.err_msg = 'Agent reset unimplemented.'
                    elif self.gui.request == 'fail':
                        self.gui.err_msg = 'Cannot fail before sampling.'
                    self.gui.process_mode()  # Complete request.

                self.gui.set_status_text(
                    'Sampling: iteration %d, condition %d, sample %d.' %
                    (itr, cond, i))
                self.agent.sample(
                    pol,
                    cond,
                    verbose=(i < self._hyperparams['verbose_trials']))

                if self.gui.mode == 'request' and self.gui.request == 'fail':
                    redo = True
                    self.gui.process_mode()
                    self.agent.delete_last_sample(cond)
                else:
                    redo = False
        else:
            self.agent.sample(
                pol, cond, verbose=(i < self._hyperparams['verbose_trials']))

    def _take_iteration(self, itr, sample_lists):
        """
        Take an iteration of the algorithm.
        Args:
            itr: Iteration number.
        Returns: None
        """
        if self.gui:
            self.gui.set_status_text('Calculating.')
            self.gui.start_display_calculating()
        self.algorithm.iteration(sample_lists)
        if self.gui:
            self.gui.stop_display_calculating()

    def _take_policy_samples(self, N=None):
        """
        Take samples from the policy to see how it's doing.
        Args:
            N  : number of policy samples to take per condition
        Returns: None
        """
        if 'verbose_policy_trials' not in self._hyperparams:
            # AlgorithmTrajOpt
            return None
        verbose = self._hyperparams['verbose_policy_trials']
        if self.gui:
            self.gui.set_status_text('Taking policy samples.')
        pol_samples = [[None] for _ in range(len(self._test_idx))]
        # Since this isn't noisy, just take one sample.
        # TODO: Make this noisy? Add hyperparam?
        # TODO: Take at all conditions for GUI?
        for cond in range(len(self._test_idx)):
            pol_samples[cond][0] = self.agent.sample(
                self.algorithm.policy_opt.policy,
                self._test_idx[cond],
                verbose=verbose,
                save=False,
                noisy=False)
        return [SampleList(samples) for samples in pol_samples]

    def _test_policy_samples(self, N=None):
        """
        test sample from the policy and collect the costs
        Args:
            N:

        Returns:
            samples
            costs:      list of cost for each condition
            ee_point:   list of ee_point for each condition

        """
        if 'verbose_policy_trials' not in self._hyperparams:
            return None
        verbose = self._hyperparams['verbose_policy_trials']
        pol_samples = [[None] for _ in range(len(self._test_idx))]
        costs = list()
        ee_points = list()
        for cond in range(len(self._test_idx)):
            pol_samples[cond][0] = self.agent.sample(
                self.algorithm.policy_opt.policy,
                self._test_idx[cond],
                verbose=verbose,
                save=False,
                noisy=False)
            # in algorithm.py: _eval_cost
            policy_cost = self.algorithm.cost[0].eval(pol_samples[cond][0])[0]
            policy_cost = np.sum(policy_cost)  #100 step
            costs.append(policy_cost)
            ee_points.append(self.agent.get_ee_point(cond))
        return [SampleList(samples)
                for samples in pol_samples], costs, ee_points

    def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None):
        """
        Log data and algorithm, and update the GUI.
        Args:
            itr: Iteration number.
            traj_sample_lists: trajectory samples as SampleList object
            pol_sample_lists: policy samples as SampleList object
        Returns: None
        """
        if self.gui:
            self.gui.set_status_text('Logging data and updating GUI.')
            self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists,
                            pol_sample_lists)
            self.gui.save_figure(self._data_files_dir +
                                 ('figure_itr_%02d.png' % itr))
        if 'no_sample_logging' in self._hyperparams['common']:
            return
        self.data_logger.pickle(
            self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr),
            copy.copy(self.algorithm))
        self.data_logger.pickle(
            self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr),
            copy.copy(traj_sample_lists))
        if pol_sample_lists:
            self.data_logger.pickle(
                self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr),
                copy.copy(pol_sample_lists))

    def _end(self):
        """ Finish running and exit. """
        if self.gui:
            self.gui.set_status_text('Training complete.')
            self.gui.end_mode()
            if self._quit_on_end:
                # Quit automatically (for running sequential expts)
                os._exit(1)
Esempio n. 33
0
class GPSMain(object):
    """ Main class to run algorithms and experiments. """
    def __init__(self, config, quit_on_end=False):
        """
        Initialize GPSMain
        Args:
            config: Hyperparameters for experiment
            quit_on_end: When true, quit automatically on completion
        """
        self._quit_on_end = quit_on_end
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common'][
                'conditions']
            self._hyperparams = config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()
        self.gui = GPSTrainingGUI(
            config['common']) if config['gui_on'] else None

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])

        self.init_alpha(self)

    def run(self, config, itr_load=None):
        """
        Run training by iteratively sampling and taking an iteration.
        Args:
            itr_load: If specified, loads algorithm state from that
                iteration, and resumes training at the next iteration.
        Returns: None
        """
        self.target_points = self.agent._hyperparams['target_ee_points'][:3]
        itr_start = self._initialize(itr_load)

        # """ set pre"""
        # position_train = self.data_logger.unpickle('./position/position_train.pkl')
        """ generate random training position in a specify circle"""
        center_position = np.array([0.05, -0.08, 0])
        position_train = self.generate_position_radius(center_position, 0.08,
                                                       7, 0.02)

        print('training position.....')
        print(position_train)

        # print('test all testing position....')
        # for i in xrange(position_train.shape[0]):
        #     test_positions = self.generate_position_radius(position_train[i], 0.03, 5, 0.01)
        #     if i == 0:
        #         all_test_positions = test_positions
        #     else:
        #         all_test_positions = np.concatenate((all_test_positions, test_positions))

        T = self.algorithm.T
        N = self._hyperparams['num_samples']
        dU = self.algorithm.dU
        for num_pos in range(position_train.shape[0]):
            """ load train position and reset agent model. """
            for cond in self._train_idx:
                self._hyperparams['agent']['pos_body_offset'][
                    cond] = position_train[num_pos]
            self.agent.reset_model(self._hyperparams)

            # initial train array
            train_prc = np.zeros((0, T, dU, dU))
            train_mu = np.zeros((0, T, dU))
            train_obs_data = np.zeros((0, T, self.algorithm.dO))
            train_wt = np.zeros((0, T))

            # initial variables
            count_suc = 0

            for itr in range(itr_start, self._hyperparams['iterations']):
                print('******************num_pos:************', num_pos)
                print('______________________itr:____________', itr)
                for cond in self._train_idx:
                    for i in range(self._hyperparams['num_samples']):
                        if num_pos == 0:
                            self._take_sample(itr, cond, i)
                        elif itr == 0:
                            self._take_sample(itr, cond, i)
                        else:
                            self._take_train_sample(itr, cond, i)
                            # self._take_sample(itr, cond, i)

                traj_sample_lists = [
                    self.agent.get_samples(cond,
                                           -self._hyperparams['num_samples'])
                    for cond in self._train_idx
                ]

                # calculate the distance of  the end-effector to target position
                ee_pos = self.agent.get_ee_pos(cond)[:3]
                target_pos = self.agent._hyperparams['target_ee_pos'][:3]
                distance_pos = ee_pos - target_pos
                distance_ee = np.sqrt(distance_pos.dot(distance_pos))
                print('distance ee:', distance_ee)

                # collect the successful sample to train global policy
                if distance_ee <= 0.06:
                    count_suc += 1
                    tgt_mu, tgt_prc, obs_data, tgt_wt = self.train_prepare(
                        traj_sample_lists)
                    train_mu = np.concatenate((train_mu, tgt_mu))
                    train_prc = np.concatenate((train_prc, tgt_prc))
                    train_obs_data = np.concatenate((train_obs_data, obs_data))
                    train_wt = np.concatenate((train_wt, tgt_wt))

                # Clear agent samples.
                self.agent.clear_samples()

                # if get enough sample, then break
                if count_suc > 8:
                    break

                self._take_iteration(itr, traj_sample_lists)
                if self.algorithm.flag_reset:
                    break
                # pol_sample_lists = self._take_policy_samples()
                # self._log_data(itr, traj_sample_lists, pol_sample_lists)
                if num_pos > 0:
                    self.algorithm.fit_global_linear_policy(traj_sample_lists)

            if not self.algorithm.flag_reset:
                # train NN with good samples
                self.algorithm.policy_opt.update(train_obs_data, train_mu,
                                                 train_prc, train_wt)

                # test the trained in the current position
                print('test current policy.....')
                self.test_current_policy()
                print('test all testing position....')
                for i in xrange(position_train.shape[0]):
                    test_positions = self.generate_position_radius(
                        position_train[i], 0.03, 5, 0.01)
                    if i == 0:
                        all_test_positions = test_positions
                    else:
                        all_test_positions = np.concatenate(
                            (all_test_positions, test_positions))
                self.test_cost(all_test_positions)

            # reset the algorithm to the initial algorithm for the next position
            # del self.algorithm
            # config['algorithm']['agent'] = self.agent
            # self.algorithm = config['algorithm']['type'](config['algorithm'])
            self.algorithm.reset_alg()
            self.next_iteration_prepare()

        self._end()

    def generate_position_radius(self, position_ori, radius, conditions,
                                 max_error_bound):
        """

        Args:
            position_ori: original center position of generated positions
            radius:     area's radius
            conditions: the quantity of generating positions
            max_error_bound: the mean of generated positions' error around cposition

        Returns:

        """
        c_x = position_ori[0]
        c_y = position_ori[1]
        while True:
            all_positions = np.zeros(0)
            center_position = np.array([c_x, c_y, 0])
            for i in range(conditions):
                position = np.random.uniform(radius, radius, 3)
                while True:
                    position[2] = 0
                    position[1] = (position[1] + c_y)
                    position[0] = position[0] + c_x
                    area = (position - center_position).dot(position -
                                                            center_position)
                    if area <= (np.pi * radius**2) / 4.0:
                        break
                    position = np.random.uniform(-radius, radius, 3)
                if i == 0:
                    all_positions = position
                    all_positions = np.expand_dims(all_positions, axis=0)
                else:
                    all_positions = np.vstack((all_positions, position))

            mean_position = np.mean(all_positions, axis=0)
            mean_error = np.fabs(center_position - mean_position)
            print('mean_error:', mean_error)
            if mean_error[0] < max_error_bound and mean_error[
                    1] < max_error_bound:
                break

        all_positions = np.floor(all_positions * 1000) / 1000.0
        print('all_position:', all_positions)
        return all_positions

    def test_cost(self, position):
        """
        test the NN policy at all position
        Args:
            position:

        Returns:

        """
        total_costs = np.zeros(0)
        total_distance = np.zeros(0)
        total_suc = np.zeros(0)
        print 'calculate cost_________________'
        for itr in range(position.shape[0]):
            if itr % 51 == 0:
                print('****************')
            for cond in self._train_idx:
                self._hyperparams['agent']['pos_body_offset'][cond] = position[
                    itr]
            self.agent.reset_model(self._hyperparams)
            _, cost, ee_points = self.take_nn_samples()
            ee_error = ee_points[:3] - self.target_points
            distance = np.sqrt(ee_error.dot(ee_error))
            error = np.sum(np.fabs(ee_error))
            if (error < 0.02):
                total_suc = np.concatenate((total_suc, np.array([1])))
            else:
                total_suc = np.concatenate((total_suc, np.array([0])))
            total_costs = np.concatenate((total_costs, np.array(cost)))
            total_distance = np.concatenate(
                (total_distance, np.array([distance])))
        # return np.mean(total_costs), total_suc, total_distance
        return total_costs, total_suc, total_distance

    def next_iteration_prepare(self):
        """
        prepare for the next iteration
        Returns:

        """
        self.init_alpha()

    def init_alpha(self, val=None):
        """
        initialize the alpha1, 2, the default is 0.7, 0.3
        Args:
            val:

        Returns:

        """
        if val is None:
            self.alpha1 = 0.75
            self.alpha2 = 0.25
        else:
            self.alpha1 = 0.75
            self.alpha2 = 0.25

    def pol_alpha(self):
        return self.alpha1, self.alpha2

    def train_prepare(self, sample_lists):
        """
        prepare the train data of the sample lists
        Args:
            sample_lists: sample list from agent

        Returns:
            target mu, prc, obs_data, wt

        """
        algorithm = self.algorithm
        dU, dO, T = algorithm.dU, algorithm.dO, algorithm.T
        obs_data, tgt_mu = np.zeros((0, T, dO)), np.zeros((0, T, dU))
        tgt_prc = np.zeros((0, T, dU, dU))
        tgt_wt = np.zeros((0, T))
        wt_origin = 0.01 * np.ones(T)
        for m in range(algorithm.M):
            samples = sample_lists[m]
            X = samples.get_X()
            N = len(samples)
            prc = np.zeros((N, T, dU, dU))
            mu = np.zeros((N, T, dU))
            wt = np.zeros((N, T))

            traj = algorithm.cur[m].traj_distr
            for t in range(T):
                prc[:, t, :, :] = np.tile(traj.inv_pol_covar[t, :, :],
                                          [N, 1, 1])
                for i in range(N):
                    mu[i,
                       t, :] = (traj.K[t, :, :].dot(X[i, t, :]) + traj.k[t, :])
                wt[:, t].fill(wt_origin[t])
            tgt_mu = np.concatenate((tgt_mu, mu))
            tgt_prc = np.concatenate((tgt_prc, prc))
            obs_data = np.concatenate((obs_data, samples.get_obs()))
            tgt_wt = np.concatenate((tgt_wt, wt))

        return tgt_mu, tgt_prc, obs_data, tgt_wt

    def test_policy(self, itr, N):
        """
        Take N policy samples of the algorithm state at iteration itr,
        for testing the policy to see how it is behaving.
        (Called directly from the command line --policy flag).
        Args:
            itr: the iteration from which to take policy samples
            N: the number of policy samples to take
        Returns: None
        """
        algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr
        self.algorithm = self.data_logger.unpickle(algorithm_file)
        if self.algorithm is None:
            print("Error: cannot find '%s.'" % algorithm_file)
            os._exit(1)  # called instead of sys.exit(), since t
        traj_sample_lists = self.data_logger.unpickle(
            self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr))

        pol_sample_lists = self._take_policy_samples(N)
        self.data_logger.pickle(
            self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr),
            copy.copy(pol_sample_lists))

        if self.gui:
            self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists,
                            pol_sample_lists)
            self.gui.set_status_text(
                ('Took %d policy sample(s) from ' +
                 'algorithm state at iteration %d.\n' +
                 'Saved to: data_files/pol_sample_itr_%02d.pkl.\n') %
                (N, itr, itr))

    def _initialize(self, itr_load):
        """
        Initialize from the specified iteration.
        Args:
            itr_load: If specified, loads algorithm state from that
                iteration, and resumes training at the next iteration.
        Returns:
            itr_start: Iteration to start from.
        """
        if itr_load is None:
            if self.gui:
                self.gui.set_status_text('Press \'go\' to begin.')
            return 0
        else:
            algorithm_file = self._data_files_dir + 'algorithm_itr_%02d.pkl' % itr_load
            self.algorithm = self.data_logger.unpickle(algorithm_file)
            if self.algorithm is None:
                print("Error: cannot find '%s.'" % algorithm_file)
                os._exit(
                    1
                )  # called instead of sys.exit(), since this is in a thread

            if self.gui:
                traj_sample_lists = self.data_logger.unpickle(
                    self._data_files_dir +
                    ('traj_sample_itr_%02d.pkl' % itr_load))
                if self.algorithm.cur[0].pol_info:
                    pol_sample_lists = self.data_logger.unpickle(
                        self._data_files_dir +
                        ('pol_sample_itr_%02d.pkl' % itr_load))
                else:
                    pol_sample_lists = None
                self.gui.set_status_text((
                    'Resuming training from algorithm state at iteration %d.\n'
                    + 'Press \'go\' to begin.') % itr_load)
            return itr_load + 1

    def _take_sample(self, itr, cond, i):
        """
        Collect a sample from the agent.
        Args:
            itr: Iteration number.
            cond: Condition number.
            i: Sample number.
        Returns: None
        """
        if self.algorithm._hyperparams['sample_on_policy'] \
                and self.algorithm.iteration_count > 0:
            pol = self.algorithm.policy_opt.policy
        else:
            pol = self.algorithm.cur[cond].traj_distr
        if self.gui:
            self.gui.set_image_overlays(cond)  # Must call for each new cond.
            redo = True
            while redo:
                while self.gui.mode in ('wait', 'request', 'process'):
                    if self.gui.mode in ('wait', 'process'):
                        time.sleep(0.01)
                        continue
                    # 'request' mode.
                    if self.gui.request == 'reset':
                        try:
                            self.agent.reset(cond)
                        except NotImplementedError:
                            self.gui.err_msg = 'Agent reset unimplemented.'
                    elif self.gui.request == 'fail':
                        self.gui.err_msg = 'Cannot fail before sampling.'
                    self.gui.process_mode()  # Complete request.

                self.gui.set_status_text(
                    'Sampling: iteration %d, condition %d, sample %d.' %
                    (itr, cond, i))
                self.agent.sample(
                    pol,
                    cond,
                    verbose=(i < self._hyperparams['verbose_trials']))

                if self.gui.mode == 'request' and self.gui.request == 'fail':
                    redo = True
                    self.gui.process_mode()
                    self.agent.delete_last_sample(cond)
                else:
                    redo = False
        else:
            self.agent.sample(
                pol, cond, verbose=(i < self._hyperparams['verbose_trials']))

    def _take_train_sample(self, itr, cond, i):
        """
        collect sample with merge policy
        Args:
            itr:
            cond:
            i:

        Returns:

        """
        alpha1, alpha2 = self.pol_alpha()
        print("alpha:********%03f, %03f******" % (alpha1, alpha2))
        pol1 = self.algorithm.cur[cond].traj_distr
        pol2 = self.algorithm.cur[cond].last_pol
        if not self.gui:
            self.agent.merge_controller(
                pol1,
                alpha1,
                pol2,
                alpha2,
                cond,
                verbose=(i < self._hyperparams['verbose_trials']))

    def _take_iteration(self, itr, sample_lists):
        """
        Take an iteration of the algorithm.
        Args:
            itr: Iteration number.
        Returns: None
        """
        if self.gui:
            self.gui.set_status_text('Calculating.')
            self.gui.start_display_calculating()
        self.algorithm.iteration(sample_lists)
        if self.gui:
            self.gui.stop_display_calculating()

    def _take_policy_samples(self, N=None):
        """
        Take samples from the policy to see how it's doing.
        Args:
            N  : number of policy samples to take per condition
        Returns: None
        """
        if 'verbose_policy_trials' not in self._hyperparams:
            # AlgorithmTrajOpt
            return None
        verbose = self._hyperparams['verbose_policy_trials']
        if self.gui:
            self.gui.set_status_text('Taking policy samples.')
        pol_samples = [[None] for _ in range(len(self._test_idx))]
        # Since this isn't noisy, just take one sample.
        # TODO: Make this noisy? Add hyperparam?
        # TODO: Take at all conditions for GUI?
        for cond in range(len(self._test_idx)):
            pol_samples[cond][0] = self.agent.sample(
                self.algorithm.policy_opt.policy,
                self._test_idx[cond],
                verbose=verbose,
                save=False,
                noisy=False)
        return [SampleList(samples) for samples in pol_samples]

    def take_nn_samples(self, N=None):
        """
        take the NN policy
        Args:
            N:

        Returns:
            samples, costs, ee_points

        """
        """
            Take samples from the policy to see how it's doing.
            Args:
                N  : number of policy samples to take per condition
            Returns: None
            """

        if 'verbose_policy_trials' not in self._hyperparams:
            # AlgorithmTrajOpt
            return None
        verbose = self._hyperparams['verbose_policy_trials']
        if self.gui:
            self.gui.set_status_text('Taking policy samples.')
        pol_samples = [[None] for _ in range(len(self._test_idx))]
        # Since this isn't noisy, just take one sample.
        # TODO: Make this noisy? Add hyperparam?
        # TODO: Take at all conditions for GUI?
        costs = list()
        for cond in range(len(self._test_idx)):
            pol_samples[cond][0] = self.agent.sample(
                self.algorithm.policy_opt.policy,
                self._test_idx[cond],
                verbose=verbose,
                save=False,
                noisy=False)
            policy_cost = self.algorithm.cost[0].eval(pol_samples[cond][0])[0]
            policy_cost = np.sum(policy_cost)
            print "cost: %d" % policy_cost  # wait to plot in gui in gps_training_gui.py
            costs.append(policy_cost)

            ee_points = self.agent.get_ee_point(cond)

        return [SampleList(samples)
                for samples in pol_samples], costs, ee_points

    def test_current_policy(self):
        """
        test the current NN policy in the current position
        Returns:

        """
        verbose = self._hyperparams['verbose_policy_trials']
        for cond in self._train_idx:
            samples = self.agent.sample(self.algorithm.policy_opt.policy,
                                        cond,
                                        verbose=verbose,
                                        save=False,
                                        noisy=False)

    def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None):
        """
        Log data and algorithm, and update the GUI.
        Args:
            itr: Iteration number.
            traj_sample_lists: trajectory samples as SampleList object
            pol_sample_lists: policy samples as SampleList object
        Returns: None
        """
        if self.gui:
            self.gui.set_status_text('Logging data and updating GUI.')
            self.gui.update(itr, self.algorithm, self.agent, traj_sample_lists,
                            pol_sample_lists)
            self.gui.save_figure(self._data_files_dir +
                                 ('figure_itr_%02d.png' % itr))
        if 'no_sample_logging' in self._hyperparams['common']:
            return
        self.data_logger.pickle(
            self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr),
            copy.copy(self.algorithm))
        self.data_logger.pickle(
            self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr),
            copy.copy(traj_sample_lists))
        if pol_sample_lists:
            self.data_logger.pickle(
                self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr),
                copy.copy(pol_sample_lists))

    def _end(self):
        """ Finish running and exit. """
        if self.gui:
            self.gui.set_status_text('Training complete.')
            self.gui.end_mode()
            if self._quit_on_end:
                # Quit automatically (for running sequential expts)
                os._exit(1)
Esempio n. 34
0
class GenDemo(object):
	""" Generator of demos. """
	def __init__(self, config):
		self._hyperparams = config
		self._conditions = config['common']['conditions']

		# if 'train_conditions' in config['common']:
		# 	self._train_idx = config['common']['train_conditions']
		# 	self._test_idx = config['common']['test_conditions']
		# else:
		# 	self._train_idx = range(self._conditions)
		# 	config['common']['train_conditions'] = config['common']['conditions']
		# 	self._hyperparams=config
		# 	self._test_idx = self._train_idx
		self._data_files_dir = config['common']['data_files_dir']
		self._algorithm_files_dir = config['common']['demo_controller_file']
		self.data_logger = DataLogger()


	def generate(self):
		"""
		 Generate demos and save them in a file for experiment.
		 Returns: None.
		"""
		# Load the algorithm
		import pickle

		algorithm_file = self._algorithm_files_dir # This should give us the optimal controller. Maybe set to 'controller_itr_%02d.pkl' % itr_load will be better?
		self.algorithm = pickle.load(open(algorithm_file))
		if self.algorithm is None:
			print("Error: cannot find '%s.'" % algorithm_file)
			os._exit(1) # called instead of sys.exit(), since t

		# Keep the initial states of the agent the sames as the demonstrations.
		self._learning = self.ioc_algo._hyperparams['learning_from_prior'] # if the experiment is learning from prior experience
		agent_config = self._hyperparams['demo_agent']
		if agent_config['filename'] == './mjc_models/pr2_arm3d.xml' and not self._learning:
			agent_config['x0'] = self.algorithm._hyperparams['agent_x0']
			agent_config['pos_body_idx'] = self.algorithm._hyperparams['agent_pos_body_idx']
			agent_config['pos_body_offset'] = self.algorithm._hyperparams['agent_pos_body_offset']
		self.agent = agent_config['type'](agent_config)

		# Roll out the demonstrations from controllers
		var_mult = self.algorithm._hyperparams['var_mult']
		T = self.algorithm.T
		demos = []

		M = agent_config['conditions']
		N = self.ioc_algo._hyperparams['num_demos']
		if not self._learning:
			controllers = {}
			good_conds = self.ioc_algo._hyperparams['demo_cond']

			# Store each controller under M conditions into controllers.
			for i in xrange(M):
				controllers[i] = self.algorithm.cur[i].traj_distr
			controllers_var = copy.copy(controllers)
			for i in xrange(M):

				# Increase controller variance.
				controllers_var[i].chol_pol_covar *= var_mult
				# Gather demos.
				for j in xrange(N):
					demo = self.agent.sample(
						controllers_var[i], i,
						verbose=(i < self.algorithm._hyperparams['demo_verbose']),
						save = True
					)
					demos.append(demo)
		else:
			# Extract the neural network policy.
			pol = self.algorithm.policy_opt.policy
			for i in xrange(M):
				# Gather demos.
				demo = self.agent.sample(
					pol, i,
					verbose=(i < self._hyperparams['verbose_trials'])
					)
				demos.append(demo)

		# Filter out worst (M - good_conds) demos.
		target_position = agent_config['target_end_effector'][:3]
		dists_to_target = np.zeros(M)
		for i in xrange(M):
			demo_end_effector = demos[i].get(END_EFFECTOR_POINTS)
			dists_to_target[i] = np.amin(np.sqrt(np.sum((demo_end_effector[:, :3] - target_position.reshape(1, -1))**2, axis = 1)), axis = 0)
		if not self._learning:
			good_indices = dists_to_target.argsort()[:good_conds - M].tolist()
		else:
			good_indicators = (dists_to_target <= agent_config['success_upper_bound']).tolist()
			good_indices = [i for i in xrange(len(good_indicators)) if good_indicators[i]]
			bad_indices = np.argmax(dists_to_target)
			self.ioc_algo._hyperparams['demo_cond'] = len(good_indices)
		filtered_demos = []
		self.ioc_algo.demo_conditions = []
		self.ioc_algo.failed_conditions = []
		exp_dir = self._data_files_dir.replace("data_files", "")
		with open(exp_dir + 'log.txt', 'a') as f:
			f.write('\nThe demo conditions are: \n')
		for i in good_indices:
			filtered_demos.append(demos[i])
			self.ioc_algo.demo_conditions.append(agent_config['pos_body_offset'][i])
			with open(exp_dir + 'log.txt', 'a') as f:
				f.write('\n' + str(agent_config['pos_body_offset'][i]) + '\n')
		with open(exp_dir + 'log.txt', 'a') as f:
			f.write('\nThe failed badmm conditions are: \n')
		for i in xrange(M):
			if i not in good_indices:
				self.ioc_algo.failed_conditions.append(agent_config['pos_body_offset'][i])
				with open(exp_dir + 'log.txt', 'a') as f:
					f.write('\n' + str(agent_config['pos_body_offset'][i]) + '\n')
		# import pdb; pdb.set_trace()
		shuffle(filtered_demos)
		demo_list =  SampleList(filtered_demos)
		demo_store = {'demoX': demo_list.get_X(), 'demoU': demo_list.get_U(), 'demoO': demo_list.get_obs()}
		if self._learning:
			demo_store['pos_body_offset'] = [agent_config['pos_body_offset'][bad_indices]]
		# Save the demos.
		self.data_logger.pickle(
			self._data_files_dir + 'demos.pkl',
			copy.copy(demo_store)
		)
Esempio n. 35
0
class LQRTestMain(object):
    """ Main class to run algorithms and experiments. """
    def __init__(self, config):
        """
        Initialize LQRTestMain
        Args:
            config: Test hyperparameters for experiment
        """
        self._hyperparams = config
        self._conditions = config['common']['conditions']
        if 'train_conditions' in config['common']:
            self._train_idx = config['common']['train_conditions']
            self._test_idx = config['common']['test_conditions']
        else:
            self._train_idx = range(self._conditions)
            config['common']['train_conditions'] = config['common'][
                'conditions']
            self._hyperparams = config
            self._test_idx = self._train_idx

        self._data_files_dir = config['common']['data_files_dir']

        self.agent = config['agent']['type'](config['agent'])
        self.data_logger = DataLogger()

        config['algorithm']['agent'] = self.agent
        self.algorithm = config['algorithm']['type'](config['algorithm'])

    def run(self, itr_load=None):
        """
        Run training by iteratively sampling and taking an iteration.
        Args:
            itr_load: If specified, loads algorithm state from that
                iteration, and resumes training at the next iteration.
        Returns: None
        """

        for itr in range(0, self._hyperparams['iterations']):
            for cond in self._train_idx:
                for i in range(self._hyperparams['num_samples']):
                    self._take_sample(itr, cond, i)

            traj_sample_lists = [
                self.agent.get_samples(cond, -self._hyperparams['num_samples'])
                for cond in self._train_idx
            ]
            self._take_iteration(traj_sample_lists)
            #self._log_data(itr, traj_sample_lists, pol_sample_lists)
            if (itr == 3):
                """
                self.data_logger.pickle(
                    self._data_files_dir + 'test_traj_distr.pkl',
                    copy.copy(self.algorithm.prev[0].traj_distr)
                )
                self.data_logger.pickle(
                    self._data_files_dir + 'test_traj_info.pkl',
                    copy.copy(self.algorithm.prev[0].traj_info)
                )
                self.data_logger.pickle(
                    self._data_files_dir + 'test_new_traj_distr.pkl',
                    copy.copy(self.algorithm.prev[0].new_traj_distr)
                )
                self.data_logger.pickle(
                    self._data_files_dir + 'test_final_eta.pkl',
                    copy.copy(self.algorithm.prev[0].eta)
                )
                mu_and_sigma = self.algorithm.forward(self.algorithm.prev[0].new_traj_distr,
                                                      self.algorithm.prev[0].traj_info)
                self.data_logger.pickle(
                    self._data_files_dir + 'test_mu_and_sigma.pkl',
                    copy.copy(mu_and_sigma)
                )
                """
                self.data_logger.pickle(
                    self._data_files_dir + 'test_prior',
                    copy.copy(
                        self.algorithm.prev[0].traj_info.dynamics.get_prior()))
                self.data_logger.pickle(
                    self._data_files_dir + 'test_sample_list',
                    copy.copy(self.algorithm.prev[0].sample_list))
                dynamics_data = self.algorithm.prev[
                    0].traj_info.dynamics.Fm, self.algorithm.prev[
                        0].traj_info.dynamics.fv, self.algorithm.prev[
                            0].traj_info.dynamics.dyn_covar
                self.data_logger.pickle(self._data_files_dir + 'test_dynamics',
                                        copy.copy(dynamics_data))

    def _take_sample(self, itr, cond, i):
        """
        Collect a sample from the agent.
        Args:
            itr: Iteration number.
            cond: Condition number.
            i: Sample number.
        Returns: None
        """
        if self.algorithm._hyperparams['sample_on_policy'] \
                and self.algorithm.iteration_count > 0:
            pol = self.algorithm.policy_opt.policy
        else:
            pol = self.algorithm.cur[cond].traj_distr
        self.agent.sample(pol,
                          cond,
                          verbose=(i < self._hyperparams['verbose_trials']))

    def _take_iteration(self, sample_lists):
        """
        Take an iteration of the algorithm.
        """
        self.algorithm.iteration(sample_lists)

    def _log_data(self, itr, traj_sample_lists, pol_sample_lists=None):
        """
        Log data and algorithm, and update the GUI.
        Args:
            itr: Iteration number.
            traj_sample_lists: trajectory samples as SampleList object
            pol_sample_lists: policy samples as SampleList object
        Returns: None
        """
        if 'no_sample_logging' in self._hyperparams['common']:
            return
        self.data_logger.pickle(
            self._data_files_dir + ('algorithm_itr_%02d.pkl' % itr),
            copy.copy(self.algorithm))
        self.data_logger.pickle(
            self._data_files_dir + ('traj_sample_itr_%02d.pkl' % itr),
            copy.copy(traj_sample_lists))
        if pol_sample_lists:
            self.data_logger.pickle(
                self._data_files_dir + ('pol_sample_itr_%02d.pkl' % itr),
                copy.copy(pol_sample_lists))