def bootstrap(self, expe, n, bootstap_range_div):
        conf = make_configuration(
            expe.ag.conf.m_centers - expe.ag.conf.m_ranges /
            (2 * bootstap_range_div), expe.ag.conf.m_centers +
            expe.ag.conf.m_ranges / (2 * bootstap_range_div),
            expe.ag.conf.s_centers - expe.ag.conf.s_ranges /
            (2 * bootstap_range_div), expe.ag.conf.s_centers +
            expe.ag.conf.s_ranges / (2 * bootstap_range_div))

        m_rand = rand_bounds(conf.m_bounds, n=n)
        for m in m_rand:
            m[-expe.ag.dmp.
              n_dmps:] = expe.ag.dmp.default[:expe.ag.dmp.
                                             n_dmps] + conf.m_ranges[
                                                 -expe.ag.dmp.n_dmps:] * randn(
                                                     expe.ag.dmp.n_dmps)
            mov = expe.ag.motor_primitive(m)
            s = expe.env.update(mov, log=True)
            s = expe.ag.sensory_primitive(s)
            expe.ag.sensorimotor_model.update(m, s)
            expe.ag.emit('choice', array([nan] * len(expe.ag.expl_dims)))
            expe.ag.emit('inference', m)
            expe.ag.emit('movement', mov)
            expe.ag.emit('perception', s)
        expe._update_logs()
    def __init__(self, mid, m_space, s_space, env_conf):


        explo_noise = 0.05


        self.conf = make_configuration(env_conf.m_mins[m_space], 
                                       env_conf.m_maxs[m_space], 
                                       array(list(env_conf.m_mins[m_space]) + list(env_conf.s_mins))[s_space],
                                       array(list(env_conf.m_maxs[m_space]) + list(env_conf.s_maxs))[s_space])
        
        self.im_dims = self.conf.s_dims
        
        self.mid = mid
        self.m_space = m_space
        self.s_space = s_space
        self.motor_babbling_n_iter = 10
        
        self.s = None
        self.last_interest = 0
        

        im_cls, kwargs = (MiscRandomInterest, {
                          'competence_measure': competence_dist,
                           'win_size': 1000,
                           'competence_mode': 'knn',
                           'k': 20,
                           'progress_mode': 'local'})
        
        self.im = im_cls(self.conf, self.im_dims, **kwargs)
        
        sm_cls, kwargs = (NonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':explo_noise})
        self.sm = sm_cls(self.conf, **kwargs)
        
        Agent.__init__(self, self.conf, self.sm, self.im)
예제 #3
0
def get_params(n_bfs, starting_position, babbling_name, sm_name, im_name):
    n_dmps = len(starting_position)

    default = zeros(n_dmps * (n_bfs + 2))
    default[:n_dmps] = starting_position
    default[-n_dmps:] = starting_position

    poppy_ag = {
        'm_mins':
        list([-600] * (n_dmps * n_bfs)) + list(default[:n_dmps] - 180.),
        'm_maxs':
        list([600] * (n_dmps * n_bfs)) + list(default[:n_dmps] + 180.),
        's_mins': [-1., -0.7, -0.1],
        's_maxs': [1., 0.7, 0.7]
    }

    poppy_ag_conf = make_configuration(**poppy_ag)

    im_dims = poppy_ag_conf.m_dims if babbling_name == 'motor' else poppy_ag_conf.s_dims
    im = InterestModel.from_configuration(poppy_ag_conf, im_dims, im_name)

    sm_cls, kwargs = sms[sm_name]
    sm = sm_cls(poppy_ag_conf, **kwargs)

    used = array([False] * n_dmps + [True] * (n_dmps * n_bfs) +
                 [True] * n_dmps)
    return {
        'n_dmps': n_dmps,
        'n_bfs': n_bfs,
        'used': used,
        'default': default,
        'conf': poppy_ag_conf,
        'sm': sm,
        'im': im
    }
예제 #4
0
    def __init__(self,
                 mid,
                 m_space,
                 s_space,
                 env_conf,
                 explo_noise=0.,
                 normalize_interests=True,
                 context_mode=None):

        self.conf = make_configuration(
            env_conf.m_mins[m_space], env_conf.m_maxs[m_space],
            array(list(env_conf.m_mins[m_space]) +
                  list(env_conf.s_mins))[s_space],
            array(list(env_conf.m_maxs[m_space]) +
                  list(env_conf.s_maxs))[s_space])

        self.im_dims = self.conf.s_dims

        self.mid = mid
        self.m_space = m_space
        self.context_mode = context_mode
        self.s_space = s_space
        self.motor_babbling_n_iter = 0

        self.s = None
        self.last_interest = 0

        if context_mode is not None:
            im_cls, kwargs = (ContextRandomInterest, {
                'win_size': 50,
                'competence_mode': 'knn',
                'k': 20,
                'progress_mode': 'local',
                'context_mode': context_mode
            })
        else:
            im_cls, kwargs = (MiscRandomInterest, {
                'win_size': 50,
                'competence_mode': 'knn',
                'k': 20,
                'progress_mode': 'local'
            })

        self.im = im_cls(self.conf, self.im_dims, **kwargs)

        sm_cls, kwargs = (DemonstrableNN, {
            'fwd': 'NN',
            'inv': 'NN',
            'sigma_explo_ratio': explo_noise
        })
        self.sm = sm_cls(self.conf, **kwargs)

        Agent.__init__(self,
                       self.conf,
                       self.sm,
                       self.im,
                       context_mode=self.context_mode)
    def __init__(self,
                 config,
                 n_motor_babbling=0,
                 explo_noise=0.1,
                 normalize_interests=False):

        self.config = config
        self.n_motor_babbling = n_motor_babbling
        self.explo_noise = explo_noise
        self.normalize_interests = normalize_interests

        self.conf = make_configuration(**config)

        self.t = 0
        self.modules = {}
        self.chosen_modules = []
        self.progresses_evolution = {}
        self.interests_evolution = {}
        self.mid_control = ""

        # Define motor and sensory spaces:
        m_ndims = self.conf.m_ndims  # number of motor parameters

        self.m_space = range(m_ndims)
        self.c_dims = range(m_ndims, m_ndims + 2)
        self.s_hand = range(m_ndims + 2, m_ndims + 32)
        self.s_joystick_1 = range(m_ndims + 32, m_ndims + 52)
        self.s_joystick_2 = range(m_ndims + 52, m_ndims + 72)
        self.s_ergo = range(m_ndims + 72, m_ndims + 92)
        self.s_ball = range(m_ndims + 92, m_ndims + 112)
        self.s_light = range(m_ndims + 112, m_ndims + 122)
        self.s_sound = range(m_ndims + 122, m_ndims + 132)

        self.s_spaces = dict(s_hand=self.s_hand,
                             s_joystick_1=self.s_joystick_1,
                             s_joystick_2=self.s_joystick_2,
                             s_ergo=self.s_ergo,
                             s_ball=self.s_ball,
                             s_light=self.s_light,
                             s_sound=self.s_sound)

        self.modules["mod"] = LearningModule(
            "mod",
            self.m_space,
            self.c_dims + self.s_hand + self.s_joystick_1 + self.s_joystick_2 +
            self.s_ergo + self.s_ball + self.s_light + self.s_sound,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_n_dims=2,
                              context_sensory_bounds=[[-1., -1.], [1., 1.]]),
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)

        for mid in self.modules.keys():
            self.progresses_evolution[mid] = []
            self.interests_evolution[mid] = []
예제 #6
0
    def __init__(self, config, mid):

        self.config = config  #global config
        self.mconf = config.modules[mid]  # module config
        if mid[0:3] == 'mod':
            self.mid = mid
        else:
            raise ValueError('Module name must begin with mod')
        #bounds config
        #print self.mconf['m'], self.mconf['s']
        self.conf = make_configuration(self.config.agent.mins[self.mconf['m']],
                                       self.config.agent.maxs[self.mconf['m']],
                                       self.config.agent.mins[self.mconf['s']],
                                       self.config.agent.maxs[self.mconf['s']])

        #print self.mconf['m'], self.mconf['s'], self.conf
        self.im_dims = self.conf.m_dims if self.mconf[
            'babbling_name'] == 'motor' else self.conf.s_dims
        #         self.im = InterestModel.from_configuration(self.conf,
        #                                                    self.im_dims,
        #                                                    self.mconf['im_name'])

        self.im_mode = self.mconf["im_mode"]
        self.s = None
        self.sp = None
        self.snn = None
        self.last_module_to_credit = -1
        self.last_interest = 0

        im_cls, kwargs = config.ims[self.mconf['im_name']]
        kwargs['mode'] = self.im_mode
        self.im = im_cls(self.conf, self.im_dims, **kwargs)

        sm_cls, kwargs = config.sms[self.mconf['sm_name']]
        self.sm = sm_cls(self.conf, **kwargs)
        #print self.mconf['s'], self.config.agent.s_dims
        #self.s_filter = [self.config.agent.s_dims.index(sd) for sd in self.mconf['s']]

        Agent.__init__(self, self.conf, self.sm, self.im)

        if self.mconf['from_log'] is not None:
            from_log_dir = self.mconf['from_log'][0]
            with open(from_log_dir + '/{}'.format('log.pickle'), 'r') as f:
                log = cPickle.load(f)
                f.close()

            from_log_mod = self.mconf['from_log'][1]
            self.fast_forward_models(log, from_log_mod,
                                     self.mconf['from_log'][2])

        #self.controled_vars = set(self.mconf['m'])
        self.overall_interest = 0
        self.social_interest = 0
        self.top_down_interest = 0
        self.top_down_points = Queue.Queue()
        self.own_interest = 0
예제 #7
0
    def __init__(self, config, mid):
            
        self.config = config #global config
        self.mconf = config.modules[mid] # module config
        if mid[0:3] == 'mod':
            self.mid = mid
        else:
            raise ValueError('Module name must begin with mod')
        #bounds config
        #print self.mconf['m'], self.mconf['s']
        self.conf = make_configuration(self.config.agent.mins[self.mconf['m']], 
                                       self.config.agent.maxs[self.mconf['m']], 
                                       self.config.agent.mins[self.mconf['s']],
                                       self.config.agent.maxs[self.mconf['s']])
        
        #print self.mconf['m'], self.mconf['s'], self.conf
        self.im_dims = self.conf.m_dims if self.mconf['babbling_name'] == 'motor' else self.conf.s_dims        
#         self.im = InterestModel.from_configuration(self.conf, 
#                                                    self.im_dims, 
#                                                    self.mconf['im_name'])

        self.im_mode = self.mconf["im_mode"]
        self.s = None
        self.sp = None
        self.snn = None
        self.last_module_to_credit = -1
        self.last_interest = 0
        
        im_cls, kwargs = config.ims[self.mconf['im_name']]
        kwargs['mode'] = self.im_mode
        self.im = im_cls(self.conf, self.im_dims, **kwargs)
        
        sm_cls, kwargs = config.sms[self.mconf['sm_name']]
        self.sm = sm_cls(self.conf, **kwargs)
        #print self.mconf['s'], self.config.agent.s_dims
        #self.s_filter = [self.config.agent.s_dims.index(sd) for sd in self.mconf['s']]
        
        Agent.__init__(self, self.conf, self.sm, self.im)
        
        if self.mconf['from_log'] is not None:
            from_log_dir = self.mconf['from_log'][0]
            with open(from_log_dir + '/{}'.format('log.pickle'), 'r') as f:
                log = cPickle.load(f)
                f.close()
                
            from_log_mod = self.mconf['from_log'][1]
            self.fast_forward_models(log, from_log_mod, self.mconf['from_log'][2])
        
        #self.controled_vars = set(self.mconf['m'])
        self.overall_interest = 0
        self.social_interest = 0
        self.top_down_interest = 0 
        self.top_down_points = Queue.Queue()
        self.own_interest = 0
    def __init__(self, mid, m_space, s_space, env_conf, explo_noise, win_size, interest_model):

        explo_noise = explo_noise

        self.conf = make_configuration(env_conf.m_mins[m_space],
                                       env_conf.m_maxs[m_space],
                                       array(list(env_conf.m_mins[m_space]) + list(env_conf.s_mins))[s_space],
                                       array(list(env_conf.m_maxs[m_space]) + list(env_conf.s_maxs))[s_space])

        self.im_dims = self.conf.s_dims

        self.mid = mid
        self.m_space = m_space
        self.s_space = s_space
        self.motor_babbling_n_iter = 10

        self.s = None
        self.last_interest = 0

        if interest_model == 'uniform':
            im_cls, kwargs = (MiscRandomInterest, {
                'competence_measure': competence_dist,
                'win_size': win_size,
                'competence_mode': 'knn',
                'k': 20,
                'progress_mode': 'local'})
        elif interest_model == 'normal':
            im_cls, kwargs = (MiscGaussianInterest, {
                'competence_measure': competence_dist,
                'win_size': win_size,
                'competence_mode': 'knn',
                'k': 20,
                'progress_mode': 'local'})
        elif interest_model == 'active':
            im_cls, kwargs = (MiscDiscretizedInterest, {
                'x_card': 20 ** len(self.im_dims),  # 20 is the number of cells on each dimension
                'cells_win_size': 20,  # window size parameter (ws)
                'eps_random': 0.1,  # proportion of random choice of cell
                'measure': competence_dist,
                'competence_measure': competence_dist,
                'win_size': win_size,
                'competence_mode': 'knn',
                'k': 20,
                'progress_mode': 'local'})
        else:
            raise NotImplementedError

        self.im = im_cls(self.conf, self.im_dims, **kwargs)

        sm_cls, kwargs = (NonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio': explo_noise})
        self.sm = sm_cls(self.conf, **kwargs)

        Agent.__init__(self, self.conf, self.sm, self.im)
예제 #9
0
    def __init__(self,
                 mid,
                 m_space,
                 s_space,
                 env_conf,
                 explo_noise=0.,
                 normalize_interests=True,
                 context_mode=None):

        self.conf = make_configuration(
            env_conf.m_mins[m_space], env_conf.m_maxs[m_space],
            array(list(env_conf.m_mins[m_space]) +
                  list(env_conf.s_mins))[s_space],
            array(list(env_conf.m_maxs[m_space]) +
                  list(env_conf.s_maxs))[s_space])

        self.im_dims = self.conf.s_dims

        self.mid = mid
        self.m_space = m_space
        self.context_mode = context_mode
        self.s_space = s_space
        self.motor_babbling_n_iter = 0
        self.n_mdims = 4
        self.n_sdims = len(s_space) // 10
        self.explo_noise = 0.2

        self.s = None
        self.last_interest = 0

        if context_mode is None:
            self.im = MiscRandomInterest(self.conf,
                                         self.conf.s_dims,
                                         self.n_sdims,
                                         win_size=100)
        else:
            self.im = ContextRandomInterest(self.conf, self.conf.s_dims,
                                            self.n_sdims, 100, context_mode)

        #self.im = im_cls(self.conf, self.im_dims, **kwargs)

        self.sm = BufferedDataset(self.conf.m_ndims,
                                  self.conf.s_ndims,
                                  buffer_size=10000,
                                  lateness=10)
        #sm_cls, kwargs = (DemonstrableNN, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':explo_noise})
        #self.sm = sm_cls(self.conf, **kwargs)

        Agent.__init__(self,
                       self.conf,
                       self.sm,
                       self.im,
                       context_mode=self.context_mode)
    def __init__(self,  conf,
                        model_type_head='WNN',  config_str_head='default',
                        model_type_arm= 'WNN',  config_str_arm ='default'):
        """NaoSensorimotorModel init method.

        Args:
            conf (NaoConfiguration): a configuration class object
            model_type_head (str, optional)
            config_str_head (str, optional)
            model_type_arm (str, optional)
            config_str_arm (str, optional)
        """
        SensorimotorModel.__init__(self,conf)

        conf_sm_model_head = make_configuration(
                                self.conf.mins[self.conf.head_dims],
                                self.conf.maxs[self.conf.head_dims],
                                self.conf.mins[self.conf.image_dims
                                               + self.conf.arm_dims],
                                self.conf.maxs[self.conf.image_dims
                                               + self.conf.arm_dims])

        conf_sm_model_arm  = make_configuration(
                                self.conf.mins[self.conf.arm_dims],
                                self.conf.maxs[self.conf.arm_dims],
                                self.conf.mins[self.conf.space_dims],
                                self.conf.maxs[self.conf.space_dims])
        self.sm_model_head = \
            SensorimotorModel.from_configuration(conf_sm_model_head,
                                                 model_type_head,
                                                 config_str_head)
        self.sm_model_arm = \
            SensorimotorModel.from_configuration(conf_sm_model_arm,
                                                 model_type_arm,
                                                 config_str_arm)
        self.t = 0
        self.mode = 'explore'
        self.sm_model_head.mode = 'explore'
        self.sm_model_arm.mode = 'explore'
        self.sigma_m = [0.1,0.1]
    def bootstrap(self, expe, n, bootstap_range_div):
        conf = make_configuration(expe.ag.conf.m_centers - expe.ag.conf.m_ranges/(2 * bootstap_range_div),
                                  expe.ag.conf.m_centers + expe.ag.conf.m_ranges/(2 * bootstap_range_div),
                                  expe.ag.conf.s_centers - expe.ag.conf.s_ranges/(2 * bootstap_range_div),
                                  expe.ag.conf.s_centers + expe.ag.conf.s_ranges/(2 * bootstap_range_div))

        m_rand = rand_bounds(conf.m_bounds, n=n)
        for m in m_rand:
            m[-expe.ag.dmp.n_dmps:] = expe.ag.dmp.default[:expe.ag.dmp.n_dmps] + conf.m_ranges[-expe.ag.dmp.n_dmps:] * randn(expe.ag.dmp.n_dmps)
            mov = expe.ag.motor_primitive(m)
            s = expe.env.update(mov, log=True)
            s = expe.ag.sensory_primitive(s)
            expe.ag.sensorimotor_model.update(m, s)
            expe.ag.emit('choice', array([nan] * len(expe.ag.expl_dims)))
            expe.ag.emit('inference', m)
            expe.ag.emit('movement', mov)
            expe.ag.emit('perception', s)
        expe._update_logs()
예제 #12
0
    def __init__(self, mid, m_space, s_space, env_conf):

        explo_noise = 0.05

        self.conf = make_configuration(
            env_conf.m_mins[m_space], env_conf.m_maxs[m_space],
            array(list(env_conf.m_mins[m_space]) +
                  list(env_conf.s_mins))[s_space],
            array(list(env_conf.m_maxs[m_space]) +
                  list(env_conf.s_maxs))[s_space])

        self.im_dims = self.conf.s_dims

        self.mid = mid
        self.m_space = m_space
        self.s_space = s_space
        self.motor_babbling_n_iter = 10

        self.s = None
        self.last_interest = 0

        im_cls, kwargs = (MiscRandomInterest, {
            'competence_measure': competence_dist,
            'win_size': 1000,
            'competence_mode': 'knn',
            'k': 20,
            'progress_mode': 'local'
        })

        self.im = im_cls(self.conf, self.im_dims, **kwargs)

        sm_cls, kwargs = (NonParametric, {
            'fwd': 'NN',
            'inv': 'NN',
            'sigma_explo_ratio': explo_noise
        })
        self.sm = sm_cls(self.conf, **kwargs)

        Agent.__init__(self, self.conf, self.sm, self.im)
예제 #13
0
    def __init__(self,
                 mid,
                 m_space,
                 s_space,
                 max_steps,
                 env_conf,
                 explo_noise=0.05,
                 motor_babbling_n_iter=10,
                 optim_explo=None,
                 end_point=False):

        self.mid = mid
        self.m_space = m_space
        self.s_space = s_space
        self.n_mdims = len(self.m_space)
        self.n_sdims = len(self.s_space)
        self.max_steps = max_steps
        self.env_conf = env_conf
        self.explo_noise = explo_noise
        self.motor_babbling_n_iter = motor_babbling_n_iter
        self.optim_explo = optim_explo
        self.end_point = end_point

        self.s = None
        self.sg = None
        self.last_interest = 0
        self.t = 0

        # Sensorimotor Model
        conf = make_configuration(
            list(env_conf.m_mins[m_space]) * self.max_steps,
            list(env_conf.m_maxs[m_space]) * self.max_steps,
            list(
                np.array(
                    list(env_conf.m_mins[m_space]) +
                    list(env_conf.s_mins))[s_space]) * self.max_steps,
            list(
                np.array(
                    list(env_conf.m_maxs[m_space]) +
                    list(env_conf.s_maxs))[s_space]) * self.max_steps)

        self.sm = BufferedDataset(
            conf.m_ndims,
            conf.s_ndims,
            buffer_size=
            10000,  # Size of a small kdtree buffer to update this one often and move the data to the big kdtree less often  
            lateness=100
        )  # The model can be "late" by this number of points: they are not yet taken into account (added to the small kdtree)

        if self.end_point:
            self.sm_end = BufferedDataset(conf.m_ndims,
                                          len(s_space),
                                          buffer_size=10000,
                                          lateness=100)

            self.interest_model = MiscRandomInterest(
                conf, conf.s_dims[-self.n_sdims:], self.n_sdims, win_size=200)
        else:
            self.interest_model = MiscRandomInterest(conf,
                                                     conf.s_dims,
                                                     self.n_sdims,
                                                     win_size=200)
예제 #14
0
    def __init__(self,
                 name=None,
                 hierarchy_type=0,
                 babbling_name="goal",
                 supervisor_name="interest",
                 supervisor_explo="motor",
                 supervisor_n_explo_points=0,
                 supervisor_ccm="competence",
                 supervisor_ccl="local",
                 tdd=False,
                 ns=False,
                 perturbation=None,
                 from_log=None,
                 iterations=None):

        ################################### EXPERIMENT CONFIG ###################################

        self.name = name or 'Experiment'
        self.init_rest_trial = False
        self.bootstrap = 100
        self.bootstrap_range_div = 1.
        self.iter = iterations or 50
        self.log_each = self.iter  #must be <= iter
        self.eval_at = []
        self.n_eval = 0
        self.eval_modes = []

        self.gui = False

        self.hierarchy_type = hierarchy_type
        self.babbling_name = babbling_name
        if self.babbling_name == "goal":
            self.motor_babbling_n_iter = 0
        else:
            self.motor_babbling_n_iter = self.iter

        self.from_log = from_log

        ################################### AGENT CONFIG ###################################

        self.n_dyn_motors = 4
        self.n_dmps = self.n_dyn_motors
        self.dmp_use_initial = False
        self.dmp_use_goal = True
        self.n_bfs = 2
        self.n_static_motor = 0
        self.rest_position = np.zeros(self.n_dmps + self.n_static_motor)

        self.motor_n_dims = self.n_dyn_motors * self.n_bfs + self.n_static_motor
        if self.dmp_use_initial:
            self.motor_n_dims = self.motor_n_dims + self.n_dmps
        if self.dmp_use_goal:
            self.motor_n_dims = self.motor_n_dims + self.n_dmps

        self.move_steps = 50
        self.motor_dims = range(self.motor_n_dims)

        if self.hierarchy_type <= 1:
            self.s_n_dims = 5 * self.n_bfs + 5
        elif self.hierarchy_type == 2:
            self.s_n_dims = 7 * self.n_bfs + 5
        else:
            raise NotImplementedError

        self.sensori_dims = range(self.motor_n_dims,
                                  self.motor_n_dims + self.s_n_dims)
        self.used_dims = self.motor_n_dims + self.s_n_dims

        self.im_name = 'miscRandom_local'
        self.choose_children_local = (supervisor_ccl == 'local')

        self.sms = {
            'knn1': (NonParametric, {
                'fwd': 'NN',
                'inv': 'NN',
                'sigma_explo_ratio': 0.01
            }),
        }

        sm = 'knn1'
        im_mode = 'sg'
        self.std_range = [-1., 1.]

        m = self.motor_dims
        s = self.sensori_dims

        self.operators = ["par"]

        if self.hierarchy_type == 0:
            self.m_spaces = dict(m=m)
            self.s_spaces = dict(s=s)

            self.modules = dict(mod1=dict(
                m=m,
                s=s,
                m_list=[m],
                operator="par",
                babbling_name="goal",
                sm_name=sm,
                im_name=self.im_name,
                im_mode=im_mode,
                from_log=None,
                motor_babbling_n_iter=self.motor_babbling_n_iter), )
        elif self.hierarchy_type == 1:
            self.m_spaces = dict(m_arm=range(12))
            self.s_spaces = dict(s_h=range(self.motor_n_dims + 0,
                                           self.motor_n_dims + 9),
                                 s_t1=range(self.motor_n_dims + 9,
                                            self.motor_n_dims + 15),
                                 s_t2=range(self.motor_n_dims + 15,
                                            self.motor_n_dims + 21),
                                 s_o=range(self.motor_n_dims + 21,
                                           self.motor_n_dims + 23),
                                 s_b=range(self.motor_n_dims + 23,
                                           self.motor_n_dims + 25))

            self.modules = dict(
                mod1=dict(m=self.m_spaces["m_arm"],
                          s=self.s_spaces["s_h"],
                          m_list=[self.m_spaces["m_arm"]],
                          operator="par",
                          babbling_name="goal",
                          sm_name=sm,
                          im_name=self.im_name,
                          im_mode=im_mode,
                          from_log=None,
                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                mod2=dict(m=self.s_spaces["s_h"],
                          s=self.s_spaces["s_t1"],
                          m_list=[self.s_spaces["s_h"]],
                          operator="par",
                          babbling_name="goal",
                          sm_name=sm,
                          im_name=self.im_name,
                          im_mode=im_mode,
                          from_log=None,
                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                mod3=dict(m=self.s_spaces["s_t1"],
                          s=self.s_spaces["s_o"],
                          m_list=[self.s_spaces["s_t1"]],
                          operator="par",
                          babbling_name="goal",
                          sm_name=sm,
                          im_name=self.im_name,
                          im_mode=im_mode,
                          from_log=None,
                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                mod4=dict(m=self.s_spaces["s_o"],
                          s=self.s_spaces["s_b"],
                          m_list=[self.s_spaces["s_o"]],
                          operator="par",
                          babbling_name="goal",
                          sm_name=sm,
                          im_name=self.im_name,
                          im_mode=im_mode,
                          from_log=None,
                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                mod5=dict(m=self.s_spaces["s_h"],
                          s=self.s_spaces["s_t2"],
                          m_list=[self.s_spaces["s_h"]],
                          operator="par",
                          babbling_name="goal",
                          sm_name=sm,
                          im_name=self.im_name,
                          im_mode=im_mode,
                          from_log=None,
                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                mod6=dict(m=self.s_spaces["s_t2"],
                          s=self.s_spaces["s_o"],
                          m_list=[self.s_spaces["s_t2"]],
                          operator="par",
                          babbling_name="goal",
                          sm_name=sm,
                          im_name=self.im_name,
                          im_mode=im_mode,
                          from_log=None,
                          motor_babbling_n_iter=self.motor_babbling_n_iter),
            )
        else:
            raise NotImplementedError

        self.supervisor_name = supervisor_name
        self.supervisor_explo = supervisor_explo
        self.supervisor_n_explo_points = supervisor_n_explo_points
        self.supervisor_ccm = supervisor_ccm
        self.supervisor_ccl = supervisor_ccl

        if self.supervisor_name == "random":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(
                choice="random",
                llb=False,
                explo=self.supervisor_explo,
                n_explo_points=self.supervisor_n_explo_points,
                choose_children_mode=self.supervisor_ccm,
                choose_children_local=self.supervisor_ccl)
        elif self.supervisor_name == "interest":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(
                choice="prop",
                llb=False,
                explo=self.supervisor_explo,
                n_explo_points=self.supervisor_n_explo_points,
                choose_children_mode=self.supervisor_ccm,
                choose_children_local=self.supervisor_ccl)
        elif self.supervisor_name == "interest_greedy":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(
                choice="greedy",
                llb=False,
                explo=self.supervisor_explo,
                n_explo_points=self.supervisor_n_explo_points,
                choose_children_mode=self.supervisor_ccm,
                choose_children_local=self.supervisor_ccl)
        elif self.supervisor_name == "interest_bias":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(
                choice="prop",
                llb=True,
                explo=self.supervisor_explo,
                n_explo_points=self.supervisor_n_explo_points,
                choose_children_mode=self.supervisor_ccm,
                choose_children_local=self.supervisor_ccl)
        else:
            raise NotImplementedError

        self.eval_dims = s[-4:-2]
        self.eval_explo_dims = s[-4:-2]

        self.eval_range = np.array([[-1.], [1.]])
        self.eval_explo_eps = 0.02
        self.eval_explo_comp_eps = 0.02

        ################################### Env CONFIG ###################################

        self.max_param = 500.  # max DMP weight
        self.max_params = self.max_param * np.ones(
            (self.n_dmps * self.n_bfs, ))

        if self.dmp_use_initial:
            self.max_params = np.append([1] * self.n_dmps, self.max_params)
        if self.dmp_use_goal:
            self.max_params = np.append(self.max_params, [1] * self.n_dmps)

        self.env_cls = CogSci2016Environment
        self.env_cfg = dict(move_steps=self.move_steps,
                            max_params=self.max_params,
                            perturbation=perturbation,
                            gui=self.gui)

        self.rest_position = [0.] * self.motor_n_dims

        self.m_mins = [-1.] * (self.n_dyn_motors * (self.n_bfs + 1))
        self.m_maxs = [1.] * (self.n_dyn_motors * (self.n_bfs + 1))

        self.s_mins = [-1.] * (3 * (self.n_bfs + 1)) + [-1.5] * (
            self.n_bfs + 1) + [0.] * (self.n_bfs + 1) + [-1.5] * (
                self.n_bfs + 1) + [0.] * (self.n_bfs + 1) + [-2., -2., 0., 0.]
        self.s_maxs = [1.] * (3 * (self.n_bfs + 1)) + [1.5, 1.5] * (
            self.n_bfs + 1) + [1.5, 1.5] * (self.n_bfs + 1) + [
                2., 2., 10., 0.3
            ]

        ################################### Process CONFIG ###################################

        self.agent = make_configuration(self.m_mins, self.m_maxs, self.s_mins,
                                        self.s_maxs)
        self.tag = self.name
        self.log_dir = ''  #determined later
예제 #15
0
    def __init__(self,
                 config,
                 model_babbling="random",
                 n_motor_babbling=0,
                 explo_noise=0.1,
                 choice_eps=0.2,
                 proba_imitate=0.5,
                 tau=1.):

        self.config = config
        self.model_babbling = model_babbling
        self.n_motor_babbling = n_motor_babbling
        self.explo_noise = explo_noise
        self.choice_eps = choice_eps
        self.proba_imitate = proba_imitate
        self.conf = make_configuration(**config)

        self.t = 0
        self.modules = {}
        self.chosen_modules = []
        self.goals = []
        self.cp_evolution = {}
        self.pp_evolution = {}

        self.mid_control = None
        self.last_cmd = None

        # Define motor and sensory spaces:
        m_ndims = self.conf.m_ndims  # number of motor parameters
        self.arm_n_dims = 40
        self.diva_n_dims = 28
        assert (m_ndims == self.arm_n_dims + self.diva_n_dims)

        self.m_arm = range(self.arm_n_dims)
        self.m_diva = range(self.arm_n_dims,
                            self.arm_n_dims + self.diva_n_dims)

        self.m_space = range(m_ndims)
        self.c_dims = range(m_ndims, m_ndims + 3)
        self.s_hand = range(m_ndims + 3, m_ndims + 33)
        self.s_culbuto_1 = range(m_ndims + 33, m_ndims + 63)
        self.s_self_sound = range(m_ndims + 63, m_ndims + 73)
        self.s_caregiver_sound = range(m_ndims + 73, m_ndims + 83)

        self.s_spaces = dict(s_hand=self.s_hand,
                             s_culbuto_1=self.s_culbuto_1,
                             s_self_sound=self.s_self_sound,
                             s_caregiver_sound=self.s_caregiver_sound)

        self.arm_modules = ['mod1', 'mod3', 'mod6']
        self.diva_modules = ['mod12', 'mod14']
        self.arm_goal_selection = 0.10

        # Create the learning modules:
        self.modules['mod1'] = LearningModule("mod1",
                                              self.m_arm,
                                              self.s_hand,
                                              self.conf,
                                              explo_noise=self.explo_noise)
        self.modules['mod3'] = LearningModule(
            "mod3",
            self.m_arm,
            self.c_dims + self.s_culbuto_1,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[0, 1, 2],
                              context_n_dims=3,
                              context_sensory_bounds=[[-2.] * 3, [2.] * 3]),
            explo_noise=self.explo_noise)
        self.modules['mod6'] = LearningModule(
            "mod6",
            self.m_arm,
            self.c_dims + self.s_caregiver_sound,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[0, 1, 2],
                              context_n_dims=3,
                              context_sensory_bounds=[[-2.] * 3, [2.] * 3]),
            explo_noise=self.explo_noise)

        self.modules['mod12'] = LearningModule(
            "mod12",
            self.m_diva,
            self.c_dims + self.s_culbuto_1,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[0, 1, 2],
                              context_n_dims=3,
                              context_sensory_bounds=[[-2.] * 3, [2.] * 3]),
            explo_noise=self.explo_noise)
        #self.modules['mod13'] = LearningModule("mod13", self.m_diva, self.s_self_sound, self.conf, explo_noise=self.explo_noise)
        self.modules['mod14'] = LearningModule(
            "mod14",
            self.m_diva,
            self.s_self_sound,
            self.conf,
            imitate="mod6",
            explo_noise=self.explo_noise,
            proba_imitate=self.proba_imitate)

        for mid in self.modules.keys():
            self.cp_evolution[mid] = []
            self.pp_evolution[mid] = []

        self.count_arm = 0
        self.count_diva = 0

        self.mids = [
            "mod" + str(i) for i in range(1, 15)
            if "mod" + str(i) in self.modules.keys()
        ]

        #Book-keeping
        self.actions = []
        self.observations = []
예제 #16
0
    def __init__(self,
                 config,
                 model_babbling="random",
                 n_motor_babbling=0,
                 explo_noise=0.1,
                 choice_eps=0.2,
                 proba_imitate=0.5):

        self.config = config
        self.model_babbling = model_babbling
        self.n_motor_babbling = n_motor_babbling
        self.explo_noise = explo_noise
        self.choice_eps = choice_eps
        self.proba_imitate = proba_imitate
        self.conf = make_configuration(**config)

        self.t = 0
        self.modules = {}
        self.chosen_modules = []
        self.cp_evolution = {}
        self.pp_evolution = {}

        self.mid_control = None
        self.last_cmd = None

        # Define motor and sensory spaces:
        m_ndims = self.conf.m_ndims  # number of motor parameters

        self.arm_n_dims = 21
        self.diva_n_dims = 28

        self.m_arm = range(self.arm_n_dims)
        self.m_diva = range(self.arm_n_dims,
                            self.arm_n_dims + self.diva_n_dims)
        self.m_space = range(m_ndims)
        self.c_dims = range(m_ndims, m_ndims + 10)
        self.s_hand = range(m_ndims + 10, m_ndims + 20)
        self.s_tool = range(m_ndims + 20, m_ndims + 30)
        self.s_toy1 = range(m_ndims + 30, m_ndims + 40)
        self.s_toy2 = range(m_ndims + 40, m_ndims + 50)
        self.s_toy3 = range(m_ndims + 50, m_ndims + 60)
        self.s_sound = range(m_ndims + 60, m_ndims + 70)
        self.s_caregiver = range(m_ndims + 70, m_ndims + 80)

        self.s_spaces = dict(s_hand=self.s_hand,
                             s_tool=self.s_tool,
                             s_toy1=self.s_toy1,
                             s_toy2=self.s_toy2,
                             s_toy3=self.s_toy3,
                             s_sound=self.s_sound,
                             s_caregiver=self.s_caregiver)

        # Create the 10 learning modules:
        self.modules['mod1'] = LearningModule("mod1",
                                              self.m_arm,
                                              self.s_hand,
                                              self.conf,
                                              explo_noise=self.explo_noise,
                                              proba_imitate=self.proba_imitate)
        #self.modules['mod1'] = LearningModule("mod1", self.m_arm, self.c_dims + self.s_hand, self.conf, context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate)
        self.modules['mod2'] = LearningModule(
            "mod2",
            self.m_arm,
            self.c_dims[0:2] + self.s_tool,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[0, 1],
                              context_n_dims=2,
                              context_sensory_bounds=[[-1.] * 2, [1.] * 2]),
            explo_noise=self.explo_noise,
            proba_imitate=self.proba_imitate)
        self.modules['mod3'] = LearningModule(
            "mod3",
            self.m_arm,
            self.c_dims[0:4] + self.s_toy1,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[0, 1, 2, 3],
                              context_n_dims=4,
                              context_sensory_bounds=[[-1.] * 4, [1.] * 4]),
            explo_noise=self.explo_noise,
            proba_imitate=self.proba_imitate)
        self.modules['mod4'] = LearningModule(
            "mod4",
            self.m_arm,
            self.c_dims[0:2] + self.c_dims[4:6] + self.s_toy2,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[0, 1, 4, 5],
                              context_n_dims=4,
                              context_sensory_bounds=[[-1.] * 4, [1.] * 4]),
            explo_noise=self.explo_noise,
            proba_imitate=self.proba_imitate)
        self.modules['mod5'] = LearningModule(
            "mod5",
            self.m_arm,
            self.c_dims[0:2] + self.c_dims[6:8] + self.s_toy3,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[0, 1, 6, 7],
                              context_n_dims=4,
                              context_sensory_bounds=[[-1.] * 4, [1.] * 4]),
            explo_noise=self.explo_noise,
            proba_imitate=self.proba_imitate)
        self.modules['mod6'] = LearningModule(
            "mod6",
            self.m_arm,
            self.c_dims[0:8] + self.s_sound,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[0, 1, 2, 3, 4, 5, 6, 7],
                              context_n_dims=8,
                              context_sensory_bounds=[[-1.] * 8, [1.] * 8]),
            explo_noise=self.explo_noise,
            proba_imitate=self.proba_imitate)

        self.modules['mod10'] = LearningModule(
            "mod10",
            self.m_diva,
            self.c_dims[2:4] + self.c_dims[8:10] + self.s_toy1,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[2, 3, 8, 9],
                              context_n_dims=4,
                              context_sensory_bounds=[[-1.] * 4, [1.] * 4]),
            explo_noise=self.explo_noise,
            proba_imitate=self.proba_imitate)
        self.modules['mod11'] = LearningModule(
            "mod11",
            self.m_diva,
            self.c_dims[4:6] + self.c_dims[8:10] + self.s_toy2,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[4, 5, 8, 9],
                              context_n_dims=4,
                              context_sensory_bounds=[[-1.] * 4, [1.] * 4]),
            explo_noise=self.explo_noise,
            proba_imitate=self.proba_imitate)
        self.modules['mod12'] = LearningModule(
            "mod12",
            self.m_diva,
            self.c_dims[6:8] + self.c_dims[8:10] + self.s_toy3,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_dims=[6, 7, 8, 9],
                              context_n_dims=4,
                              context_sensory_bounds=[[-1.] * 4, [1.] * 4]),
            explo_noise=self.explo_noise,
            proba_imitate=self.proba_imitate)

        #self.modules['mod13'] = LearningModule("mod13", self.m_diva, self.c_dims + self.s_sound, self.conf, imitate="mod6", context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate)
        self.modules['mod13'] = LearningModule(
            "mod13",
            self.m_diva,
            self.s_sound,
            self.conf,
            imitate="mod6",
            explo_noise=self.explo_noise,
            proba_imitate=self.proba_imitate)

        #self.modules['mod7'] = LearningModule("mod7", self.m_arm, self.c_dims + self.s_caregiver, self.conf, context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate)
        #self.modules['mod8'] = LearningModule("mod8", self.m_diva, self.c_dims + self.s_hand, self.conf, context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate)
        #self.modules['mod9'] = LearningModule("mod9", self.m_diva, self.c_dims + self.s_tool, self.conf, context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate)
        #self.modules['mod14'] = LearningModule("mod14", self.m_diva, self.c_dims + self.s_caregiver, self.conf, context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate)

        for mid in self.modules.keys():
            self.cp_evolution[mid] = []
            self.pp_evolution[mid] = []

        self.count_arm = 0
        self.count_diva = 0

        self.mids = [
            "mod" + str(i) for i in range(1, 15)
            if "mod" + str(i) in self.modules.keys()
        ]
예제 #17
0
    def __init__(self,
                 config,
                 babbling_mode="active",
                 n_motor_babbling=0,
                 explo_noise=0.1,
                 choice_eps=0.2,
                 normalize_interests=True):

        self.config = config
        self.babbling_mode = "prop" if babbling_mode == "active" else "random"
        self.n_motor_babbling = n_motor_babbling
        self.explo_noise = explo_noise
        self.choice_eps = choice_eps,
        self.normalize_interests = normalize_interests

        self.conf = make_configuration(**config)

        self.t = 0
        self.modules = {}
        self.chosen_modules = []
        self.progresses_evolution = {}
        self.interests_evolution = {}

        self.have_to_replay_arm_demo = None

        self.mid_control = ''

        # Define motor and sensory spaces:
        m_ndims = self.conf.m_ndims  # number of motor parameters

        self.m_space = range(m_ndims)
        self.c_dims = range(m_ndims, m_ndims + 2)
        self.s_hand = range(m_ndims + 2, m_ndims + 32)
        self.s_joystick_1 = range(m_ndims + 32, m_ndims + 52)
        self.s_joystick_2 = range(m_ndims + 52, m_ndims + 72)
        self.s_ergo = range(m_ndims + 72, m_ndims + 92)
        self.s_ball = range(m_ndims + 92, m_ndims + 112)
        self.s_light = range(m_ndims + 112, m_ndims + 122)
        self.s_sound = range(m_ndims + 122, m_ndims + 132)

        self.s_spaces = dict(s_hand=self.s_hand,
                             s_joystick_1=self.s_joystick_1,
                             s_joystick_2=self.s_joystick_2,
                             s_ergo=self.s_ergo,
                             s_ball=self.s_ball,
                             s_light=self.s_light,
                             s_sound=self.s_sound)

        #print
        #print "Initialize agent with spaces:"
        #print "Motor", self.m_space
        #print "Context", self.c_dims
        #print "Hand", self.s_hand
        #print "Joystick1", self.s_joystick_1
        #print "Joystick2", self.s_joystick_2
        #print "Ergo", self.s_ergo
        #print "Ball", self.s_ball
        #print "Light", self.s_light
        #print "Sound", self.s_sound

        # Create the 6 learning modules:
        self.modules['mod1'] = LearningModule(
            "mod1",
            self.m_space,
            self.s_hand,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod2'] = LearningModule(
            "mod2",
            self.m_space,
            self.s_joystick_1,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod3'] = LearningModule(
            "mod3",
            self.m_space,
            self.s_joystick_2,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod4'] = LearningModule(
            "mod4",
            self.m_space, [self.c_dims[0]] + self.s_ergo,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_n_dims=1,
                              context_sensory_bounds=[[-1.], [1.]]),
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod5'] = LearningModule(
            "mod5",
            self.m_space,
            self.c_dims + self.s_ball,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_n_dims=2,
                              context_sensory_bounds=[[-1., -1.], [1., 1.]]),
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod6'] = LearningModule(
            "mod6",
            self.m_space,
            self.c_dims + self.s_light,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_n_dims=2,
                              context_sensory_bounds=[[-1., -1.], [1., 1.]]),
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod7'] = LearningModule(
            "mod7",
            self.m_space,
            self.c_dims + self.s_sound,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_n_dims=2,
                              context_sensory_bounds=[[-1., -1.], [1., 1.]]),
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)

        self.space2mid = dict(s_hand="mod1",
                              s_joystick_1="mod2",
                              s_joystick_2="mod3",
                              s_ergo="mod4",
                              s_ball="mod5",
                              s_light="mod6",
                              s_sound="mod7")

        self.mid2space = dict(mod1="s_hand",
                              mod2="s_joystick_1",
                              mod3="s_joystick_2",
                              mod4="s_ergo",
                              mod5="s_ball",
                              mod6="s_light",
                              mod7="s_sound")

        for mid in self.modules.keys():
            self.progresses_evolution[mid] = []
            self.interests_evolution[mid] = []
예제 #18
0
    def __init__(self, 
                 name=None, 
                 hierarchy_type=0, 
                 babbling_name="goal", 
                 supervisor_name="interest", 
                 supervisor_explo="motor", 
                 supervisor_n_explo_points = 0,
                 supervisor_ccm="competence", 
                 supervisor_ccl="local", 
                 sm_model='NN',
                 im_model='miscRandom_local',
                 im_mode='sg',
                 tdd=False,
                 ns=False,
                 envnoise=0,
                 perturbation=None,
                 allow_split_mod1=False,
                 from_log=None,
                 bootstrap=0,
                 explo_noise=0.01,
                 iterations=None):
              
        ################################### EXPERIMENT CONFIG ###################################
    
        self.name = name or 'Experiment'
        self.init_rest_trial = False
        self.bootstrap = bootstrap
        self.bootstrap_range_div = 1.
        self.iter = iterations or 50
        self.log_each = self.iter #must be <= iter
        self.eval_at = []
        self.n_eval = 0
        self.eval_modes = []
        
        self.gui = False
        
        self.hierarchy_type = hierarchy_type
        self.babbling_name = babbling_name
        if self.babbling_name == "goal":
            self.motor_babbling_n_iter = 10
        else:
            self.motor_babbling_n_iter = self.iter
            
        self.from_log = from_log
        
        ################################### AGENT CONFIG ###################################
        
        self.n_dyn_motors = 4
        self.n_dmps = self.n_dyn_motors
        self.dmp_use_initial = False
        self.dmp_use_goal = True
        self.n_bfs = 2
        self.n_static_motor = 0
        self.rest_position = np.zeros(self.n_dmps + self.n_static_motor)
        
        self.motor_n_dims = self.n_dyn_motors * self.n_bfs + self.n_static_motor
        if self.dmp_use_initial: 
            self.motor_n_dims = self.motor_n_dims +  self.n_dmps
        if self.dmp_use_goal:
            self.motor_n_dims = self.motor_n_dims +  self.n_dmps
             
        
        self.move_steps = 50
        self.motor_dims = range(self.motor_n_dims)
        self.s_n_dims = 31 * 3
        
        self.sensori_dims = range(self.motor_n_dims, self.motor_n_dims + self.s_n_dims)
        self.used_dims = self.motor_n_dims + self.s_n_dims
        
        self.choose_children_local = (supervisor_ccl == 'local')
        
        self.ims = {'miscRandom_local': (MiscRandomInterest, {
                                  'competence_measure': competence_dist,
                                  #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.),
                                   'win_size': 1000,
                                   'competence_mode': 'knn',
                                   'k': 20,
                                   'progress_mode': 'local'}),
                    'miscRandom_global': (MiscRandomInterest, {
                                  'competence_measure': competence_dist,
                                  #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.),
                                   'win_size': 1000,
                                   'competence_mode': 'knn',
                                   'k': 20,
                                   'progress_mode': 'global'}),
            }
        
        self.sms = {
            'NN': (NonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':explo_noise}),
            'LWLR-BFGS-EXPLO': (NonParametric, {'fwd': 'LWLR', 'k':10, 'sigma':0.1, 'sigma_explo_ratio':explo_noise, 'inv': 'L-BFGS-B', 'maxfun':200, 'ftol':0, 'gtol':0}),
            'LWLR-BFGS-NOEXPLO': (NonParametric, {'fwd': 'LWLR', 'k':20, 'sigma':0.1, 'sigma_explo_ratio':0., 'inv': 'L-BFGS-B', 'maxfun':200, 'ftol':0, 'gtol':0}),
            'LWLR-CMAES': (NonParametric, {'fwd': 'LWLR', 'k':10, 'sigma':0.1, 'inv': 'CMAES', 'cmaes_sigma':0.05, 'sigma_explo_ratio':explo_noise, 'maxfevals':20}),
        }
          
        self.sm_model = sm_model
        self.im_model = im_model
        self.im_name = self.im_model
        
        sm = self.sm_model
        
        self.std_range = [-1.,1.]
        
        
        m = self.motor_dims
        s = self.sensori_dims
        
        self.operators = ["par"]
        
        if self.hierarchy_type == 0:
            self.m_spaces = dict(m=m)
            self.s_spaces = dict(s=s)
            
            self.modules = dict(mod1 = dict(m = m,
                                          s = s,     
                                          m_list = [m],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                )
        elif self.hierarchy_type == 1:
            self.m_spaces = dict(m_arm=range(12))
            self.s_spaces = dict(s_h=range(self.motor_n_dims + 0, self.motor_n_dims + 9),
                                 s_t1=range(self.motor_n_dims + 9, self.motor_n_dims + 15),
                                 s_t2=range(self.motor_n_dims + 15, self.motor_n_dims + 21),
                                 s_o1=range(self.motor_n_dims + 21, self.motor_n_dims + 27),
                                 s_o2=range(self.motor_n_dims + 27, self.motor_n_dims + 33),
                                 s_o3=range(self.motor_n_dims + 33, self.motor_n_dims + 39),
                                 s_o4=range(self.motor_n_dims + 39, self.motor_n_dims + 45),
                                 s_o5=range(self.motor_n_dims + 45, self.motor_n_dims + 51),
                                 s_o6=range(self.motor_n_dims + 51, self.motor_n_dims + 57),
                                 s_o7=range(self.motor_n_dims + 57, self.motor_n_dims + 63),
                                 s_o8=range(self.motor_n_dims + 63, self.motor_n_dims + 69),
                                 s_o9=range(self.motor_n_dims + 69, self.motor_n_dims + 75),
                                 s_o10=range(self.motor_n_dims + 75, self.motor_n_dims + 81),
                                 s_o11=range(self.motor_n_dims + 81, self.motor_n_dims + 87),
                                 s_o12=range(self.motor_n_dims + 87, self.motor_n_dims + 93))

            self.modules = dict(mod1 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_h"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod2 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_t1"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod3 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_t2"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                
                                mod4 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o1"],     
                                          m_list = [self.m_spaces["m_arm"]],        
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod5 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o2"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod6 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o3"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod7 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o4"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod8 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o5"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod9 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o6"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod10 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o7"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod11 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o8"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod12 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o9"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod13 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o10"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod14 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o11"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod15 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o12"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                )
        elif self.hierarchy_type == 2:
            self.m_spaces = dict(m_arm=range(12))
            self.s_spaces = dict(
                                 s_o1=[self.motor_n_dims + 23, self.motor_n_dims + 26],#range(self.motor_n_dims + 21, self.motor_n_dims + 27),
                                 s_o4=[self.motor_n_dims + 41, self.motor_n_dims + 44],#range(self.motor_n_dims + 39, self.motor_n_dims + 45),
                                 )

            self.modules = dict(
                                
                                
                                mod4 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o1"],     
                                          m_list = [self.m_spaces["m_arm"]],        
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod7 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o4"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                )
        else:
            raise NotImplementedError
        
        
        self.supervisor_name = supervisor_name
        self.supervisor_explo = supervisor_explo
        self.supervisor_n_explo_points = supervisor_n_explo_points
        self.supervisor_ccm = supervisor_ccm
        self.supervisor_ccl = supervisor_ccl
        
        if self.supervisor_name == "random":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="random",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl,
                                          allow_split_mod1=allow_split_mod1)
        elif self.supervisor_name == "interest":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="prop",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl,
                                          allow_split_mod1=allow_split_mod1)
        elif self.supervisor_name == "interest-pmin":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="prop-min",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl,
                                          allow_split_mod1=allow_split_mod1)
        elif self.supervisor_name == "interest_greedy":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="greedy",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl,
                                          allow_split_mod1=allow_split_mod1)
        elif self.supervisor_name == "interest_bias":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="prop",
                                          llb=True,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl,
                                          allow_split_mod1=allow_split_mod1)
        else:
            raise NotImplementedError
        
        
        self.eval_dims = s[-4:-2]
        self.eval_explo_dims = s[-4:-2]
        
        self.eval_range = np.array([[-1.],
                                 [1.]])
        self.eval_explo_eps = 0.02
        self.eval_explo_comp_eps = 0.02
        
        
        ################################### Env CONFIG ###################################
                
        self.max_param = 500. # max DMP weight 
        self.max_params = self.max_param * np.ones((self.n_dmps * self.n_bfs,))  

        if self.dmp_use_initial: 
            self.max_params = np.append([1]*self.n_dmps, self.max_params)
        if self.dmp_use_goal:
            self.max_params = np.append(self.max_params, [1]*self.n_dmps)

        self.env_cls = IROS2016Environment
        self.env_cfg = dict(move_steps=self.move_steps, 
                            max_params=self.max_params,
                            noise=envnoise,
                            perturbation=perturbation,
                            gui=self.gui)
        
        self.rest_position = [0.] * self.motor_n_dims
        
        self.m_mins = [-1.] * (self.n_dyn_motors * (self.n_bfs+1))
        self.m_maxs = [1.] * (self.n_dyn_motors * (self.n_bfs+1))
        
        self.s_mins = [-1.5] * 31 * 3
        self.s_maxs = [1.5] * 31 * 3
        
        
        ################################### Process CONFIG ###################################
        
        self.agent = make_configuration(self.m_mins, 
                                        self.m_maxs, 
                                        self.s_mins, 
                                        self.s_maxs)
        self.tag = self.name
        self.log_dir = ''#determined later
예제 #19
0
    def __init__(self, 
                 name=None, 
                 hierarchy_type=0, 
                 babbling_name="goal", 
                 supervisor_name="interest", 
                 supervisor_explo="motor", 
                 supervisor_n_explo_points = 0,
                 supervisor_ccm="competence", 
                 supervisor_ccl="local", 
                 im_model='miscRandom_local',
                 tdd=False,
                 ns=False,
                 perturbation=None,
                 from_log=None,
                 iterations=None):
              
        ################################### EXPERIMENT CONFIG ###################################
    
        self.name = name or 'Experiment'
        self.init_rest_trial = False
        self.bootstrap = 100
        self.bootstrap_range_div = 1.
        self.iter = iterations or 50
        self.log_each = self.iter #must be <= iter
        self.eval_at = []
        self.n_eval = 0
        self.eval_modes = []
        
        self.gui = True
        
        self.hierarchy_type = hierarchy_type
        self.babbling_name = babbling_name
        if self.babbling_name == "goal":
            self.motor_babbling_n_iter = 0
        else:
            self.motor_babbling_n_iter = self.iter
            
        self.from_log = from_log
        
        ################################### AGENT CONFIG ###################################
        
        self.n_dyn_motors = 3
        self.n_dmps = self.n_dyn_motors
        self.dmp_use_initial = False
        self.dmp_use_goal = True
        self.n_bfs = 2
        self.n_static_motor = 0
        self.rest_position = np.zeros(self.n_dmps + self.n_static_motor)
        
        self.motor_n_dims = self.n_dyn_motors * self.n_bfs + self.n_static_motor
        if self.dmp_use_initial: 
            self.motor_n_dims = self.motor_n_dims +  self.n_dmps
        if self.dmp_use_goal:
            self.motor_n_dims = self.motor_n_dims +  self.n_dmps
             
        self.n_context_dims = 2
        
        self.context_mode = dict(mode='mcs',
                            reset_iterations=20,
                            context_n_dims=self.n_context_dims,
                            context_sensory_bounds=np.array([[-1.5, -1.5],[1.5, 1.5]]))

        self.move_steps = 50
        self.motor_dims = range(self.motor_n_dims)
        
        self.s_n_dims = 17
        
        self.sensori_dims = range(self.motor_n_dims, self.motor_n_dims + self.s_n_dims)
        self.used_dims = self.motor_n_dims + self.s_n_dims
        
        self.im_model = im_model
        self.im_name = self.im_model        
        
        self.ims = {'miscRandom_local': (MiscRandomInterest, {
                                  'competence_measure': competence_dist,
                                  #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.),
                                   'win_size': 1000,
                                   'competence_mode': 'knn',
                                   'k': 20,
                                   'progress_mode': 'local'}),
                    'context_miscRandom_local': (ContextRandomInterest, {
                                  #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.),
                                   'win_size': 1000,
                                   'competence_mode': 'knn',
                                   'k': 20,
                                   'progress_mode': 'local',
                                   'context_mode': self.context_mode}),
            }
        
        self.choose_children_local = (supervisor_ccl == 'local')
        
        self.sms = {
            'knn1': (NonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':0.01}),
            'context_knn': (ContextNonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':0.01,'context_mode': self.context_mode}),
        }
          
          
        sm = 'knn1'
        im_mode = 'sg'
        self.std_range = [-1.,1.]
        
        
        m = self.motor_dims
        s = self.sensori_dims
        
        self.operators = ["par"]
        
        if self.hierarchy_type == 0:
            self.m_spaces = dict(m=m)
            self.s_spaces = dict(s=s)
            
            self.modules = dict(mod1 = dict(m = m,
                                          s = s,     
                                          m_list = [m],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          context_mode=self.context_mode,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                )
        elif self.hierarchy_type == 1:
            self.m_spaces = dict(m_arm=range(9))
            self.s_spaces = dict(s_h=range(self.motor_n_dims + self.n_context_dims + 0, self.motor_n_dims + self.n_context_dims + 6),
                                 s_t1=range(self.motor_n_dims + self.n_context_dims + 6, self.motor_n_dims + self.n_context_dims + 12),
                                 #s_t2=range(self.motor_n_dims + self.n_context_dims + 15, self.motor_n_dims + self.n_context_dims + 21),
                                 s_o=range(self.motor_n_dims, self.motor_n_dims + 2) + range(self.motor_n_dims + self.n_context_dims + 12, self.motor_n_dims + self.n_context_dims + 15))

            self.modules = dict(mod1 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_h"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          context_mode=None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod2 = dict(m = self.s_spaces["s_h"],
                                          s = self.s_spaces["s_t1"],     
                                          m_list = [self.s_spaces["s_h"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          context_mode=None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
#                                 mod3 = dict(m = self.s_spaces["s_h"],
#                                           s = self.s_spaces["s_t2"],     
#                                           m_list = [self.s_spaces["s_h"]],      
#                                           operator = "par",                            
#                                           babbling_name = "goal",
#                                           sm_name = sm,
#                                           im_name = self.im_name,
#                                           im_mode = im_mode,
#                                           from_log = None,
#                                           context_mode=None,
#                                           motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                
                                mod3 = dict(m = self.s_spaces["s_h"],
                                          s = self.s_spaces["s_o"],     
                                          m_list = [self.s_spaces["s_h"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = 'context_knn',
                                          im_name = 'context_miscRandom_local',
                                          im_mode = im_mode,
                                          from_log = None,
                                          context_mode=self.context_mode,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod4 = dict(m = self.s_spaces["s_t1"],
                                          s = self.s_spaces["s_o"],     
                                          m_list = [self.s_spaces["s_t1"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = 'context_knn',
                                          im_name = 'context_miscRandom_local',
                                          im_mode = im_mode,
                                          from_log = None,
                                          context_mode=self.context_mode,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
#                                 mod6 = dict(m = self.s_spaces["s_t2"],
#                                           s = self.s_spaces["s_o"],     
#                                           m_list = [self.s_spaces["s_t2"]],      
#                                           operator = "par",                            
#                                           babbling_name = "goal",
#                                           sm_name = 'context_knn',
#                                           im_name = 'context_miscRandom_local',
#                                           im_mode = im_mode,
#                                           from_log = None,
#                                           context_mode=self.context_mode,
#                                           motor_babbling_n_iter=self.motor_babbling_n_iter),
                                )
        else:
            raise NotImplementedError
        
        
        self.supervisor_name = supervisor_name
        self.supervisor_explo = supervisor_explo
        self.supervisor_n_explo_points = supervisor_n_explo_points
        self.supervisor_ccm = supervisor_ccm
        self.supervisor_ccl = supervisor_ccl
        
        if self.supervisor_name == "random":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="random",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl)
        elif self.supervisor_name == "interest":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="prop",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl)
        elif self.supervisor_name == "interest_greedy":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="greedy",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl)
        elif self.supervisor_name == "interest_bias":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="prop",
                                          llb=True,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl)
        else:
            raise NotImplementedError
        
        
        self.eval_dims = s[-4:-2]
        self.eval_explo_dims = s[-4:-2]
        
        self.eval_range = np.array([[-1.],
                                 [1.]])
        self.eval_explo_eps = 0.02
        self.eval_explo_comp_eps = 0.02
        
        
        ################################### Env CONFIG ###################################
                
        self.max_param = 300. # max DMP weight 
        self.max_params = self.max_param * np.ones((self.n_dmps * self.n_bfs,))  

        if self.dmp_use_initial: 
            self.max_params = np.append([1]*self.n_dmps, self.max_params)
        if self.dmp_use_goal:
            self.max_params = np.append(self.max_params, [1]*self.n_dmps)

            


        
        iccm_conf = dict(move_steps=self.move_steps, 
                            max_params=self.max_params,
                            gui=self.gui)



        self.context_mode = dict(mode='mcs',
                            reset_iterations=10,
                            context_n_dims=2,
                            context_sensory_bounds=np.array([[-1.5, -1.5],[1.5, 1.5]]))



        self.env_cls = ContextEnvironment
        self.env_cfg = dict(env_cls=ICDL2016Environment, 
                            env_conf=iccm_conf, 
                            context_mode=self.context_mode)
        
        self.rest_position = [0.] * self.motor_n_dims
        
        self.m_mins = [-1.] * (self.n_dyn_motors * (self.n_bfs+1))
        self.m_maxs = [1.] * (self.n_dyn_motors * (self.n_bfs+1))
        
        self.s_mins = [-1.5] * (17)
        self.s_maxs = [1.5] * (17)
        
        ################################### Process CONFIG ###################################
        
        self.agent = make_configuration(self.m_mins, 
                                        self.m_maxs, 
                                        self.s_mins, 
                                        self.s_maxs)
        self.tag = self.name
        self.log_dir = ''#determined later
예제 #20
0
파일: supervisor.py 프로젝트: 4SkyNet/APEX
    def __init__(self,
                 config,
                 babbling_mode="active",
                 n_motor_babbling=0.1,
                 explo_noise=0.05,
                 choice_eps=0.2,
                 normalize_interests=True):
        self.config = config
        self.babbling_mode = babbling_mode
        self.n_motor_babbling = n_motor_babbling
        self.explo_noise = explo_noise
        self.choice_eps = choice_eps,
        self.normalize_interests = normalize_interests

        self.conf = make_configuration(**config)

        self.t = 0
        self.modules = {}
        self.chosen_modules = []
        self.goals = []
        self.progresses_evolution = {}
        self.interests_evolution = {}

        self.ms = None

        self.have_to_replay_arm_demo = None

        self.mid_control = ''
        self.measure_interest = False

        # Define motor and sensory spaces:
        m_ndims = self.conf.m_ndims  # number of motor parameters

        self.m_space = range(m_ndims)
        self.c_dims = range(m_ndims, m_ndims + 2)
        self.s_hand = range(m_ndims + 2, m_ndims + 32)
        self.s_joystick_1 = range(m_ndims + 32, m_ndims + 52)
        self.s_joystick_2 = range(m_ndims + 52, m_ndims + 72)
        self.s_ergo = range(m_ndims + 72, m_ndims + 92)
        self.s_ball = range(m_ndims + 92, m_ndims + 112)
        self.s_light = range(m_ndims + 112, m_ndims + 122)
        self.s_sound = range(m_ndims + 122, m_ndims + 132)

        self.s_hand_right = range(m_ndims + 132, m_ndims + 162)
        self.s_base = range(m_ndims + 162, m_ndims + 192)
        self.s_arena = range(m_ndims + 192, m_ndims + 212)
        self.s_obj1 = range(m_ndims + 212, m_ndims + 232)
        self.s_obj2 = range(m_ndims + 232, m_ndims + 252)
        self.s_obj3 = range(m_ndims + 252, m_ndims + 272)
        self.s_rdm1 = range(m_ndims + 272, m_ndims + 292)
        self.s_rdm2 = range(m_ndims + 292, m_ndims + 312)

        self.s_spaces = dict(s_hand=self.s_hand,
                             s_joystick_1=self.s_joystick_1,
                             s_joystick_2=self.s_joystick_2,
                             s_ergo=self.s_ergo,
                             s_ball=self.s_ball,
                             s_light=self.s_light,
                             s_sound=self.s_sound,
                             s_hand_right=self.s_hand_right,
                             s_base=self.s_base,
                             s_arena=self.s_arena,
                             s_obj1=self.s_obj1,
                             s_obj2=self.s_obj2,
                             s_obj3=self.s_obj3,
                             s_rdm1=self.s_rdm1,
                             s_rdm2=self.s_rdm2)

        #print
        #print "Initialize agent with spaces:"
        #print "Motor", self.m_space
        #print "Context", self.c_dims
        #print "Hand", self.s_hand
        #print "Joystick1", self.s_joystick_1
        #print "Joystick2", self.s_joystick_2
        #print "Ergo", self.s_ergo
        #print "Ball", self.s_ball
        #print "Light", self.s_light
        #print "Sound", self.s_sound

        # Create the 6 learning modules:
        self.modules['mod1'] = LearningModule(
            "mod1",
            self.m_space,
            self.s_hand,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod2'] = LearningModule(
            "mod2",
            self.m_space,
            self.s_joystick_1,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod3'] = LearningModule(
            "mod3",
            self.m_space,
            self.s_joystick_2,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod4'] = LearningModule(
            "mod4",
            self.m_space, [self.c_dims[0]] + self.s_ergo,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_n_dims=1,
                              context_sensory_bounds=[[-1.], [1.]]),
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod5'] = LearningModule(
            "mod5",
            self.m_space,
            self.c_dims + self.s_ball,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_n_dims=2,
                              context_sensory_bounds=[[-1., -1.], [1., 1.]]),
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod6'] = LearningModule(
            "mod6",
            self.m_space,
            self.c_dims + self.s_light,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_n_dims=2,
                              context_sensory_bounds=[[-1., -1.], [1., 1.]]),
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod7'] = LearningModule(
            "mod7",
            self.m_space,
            self.c_dims + self.s_sound,
            self.conf,
            context_mode=dict(mode='mcs',
                              context_n_dims=2,
                              context_sensory_bounds=[[-1., -1.], [1., 1.]]),
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)

        self.modules['mod8'] = LearningModule(
            "mod8",
            self.m_space,
            self.s_hand_right,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod9'] = LearningModule(
            "mod9",
            self.m_space,
            self.s_base,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod10'] = LearningModule(
            "mod10",
            self.m_space,
            self.s_arena,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod11'] = LearningModule(
            "mod11",
            self.m_space,
            self.s_obj1,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod12'] = LearningModule(
            "mod12",
            self.m_space,
            self.s_obj2,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod13'] = LearningModule(
            "mod13",
            self.m_space,
            self.s_obj3,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod14'] = LearningModule(
            "mod14",
            self.m_space,
            self.s_rdm1,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)
        self.modules['mod15'] = LearningModule(
            "mod15",
            self.m_space,
            self.s_rdm2,
            self.conf,
            explo_noise=self.explo_noise,
            normalize_interests=self.normalize_interests)

        self.space2mid = dict(s_hand="mod1",
                              s_joystick_1="mod2",
                              s_joystick_2="mod3",
                              s_ergo="mod4",
                              s_ball="mod5",
                              s_light="mod6",
                              s_sound="mod7",
                              s_hand_right='mod8',
                              s_base='mod9',
                              s_arena='mod10',
                              s_obj1='mod11',
                              s_obj2='mod12',
                              s_obj3='mod13',
                              s_rdm1='mod14',
                              s_rdm2='mod15')

        self.mid2space = dict(
            mod1="s_hand",
            mod2="s_joystick_1",
            mod3="s_joystick_2",
            mod4="s_ergo",
            mod5="s_ball",
            mod6="s_light",
            mod7="s_sound",
            mod8="s_hand_right",
            mod9="s_base",
            mod10="s_arena",
            mod11="s_obj1",
            mod12="s_obj2",
            mod13="s_obj3",
            mod14="s_rdm1",
            mod15="s_rdm2",
        )

        for mid in self.modules.keys():
            self.progresses_evolution[mid] = []
            self.interests_evolution[mid] = []
예제 #21
0
    def __init__(self, 
                 name=None, 
                 hierarchy_type=0, 
                 babbling_name="goal", 
                 supervisor_name="interest", 
                 supervisor_explo="motor", 
                 supervisor_n_explo_points = 0,
                 supervisor_ccm="competence", 
                 supervisor_ccl="local", 
                 sm_model='NN',
                 im_model='miscRandom_local',
                 im_mode='sg',
                 tdd=False,
                 ns=False,
                 envnoise=0,
                 perturbation=None,
                 allow_split_mod1=False,
                 from_log=None,
                 bootstrap=0,
                 explo_noise=0.01,
                 iterations=None):
              
        ################################### EXPERIMENT CONFIG ###################################
    
        self.name = name or 'Experiment'
        self.init_rest_trial = False
        self.bootstrap = bootstrap
        self.bootstrap_range_div = 1.
        self.iter = iterations or 50
        self.log_each = self.iter #must be <= iter
        self.eval_at = []
        self.n_eval = 0
        self.eval_modes = []
        
        self.gui = True
        
        self.hierarchy_type = hierarchy_type
        self.babbling_name = babbling_name
        if self.babbling_name == "goal":
            self.motor_babbling_n_iter = 10
        else:
            self.motor_babbling_n_iter = self.iter
            
        self.from_log = from_log
        
        ################################### AGENT CONFIG ###################################
        
        self.n_dyn_motors = 4
        self.n_dmps = self.n_dyn_motors
        self.dmp_use_initial = False
        self.dmp_use_goal = True
        self.n_bfs = 2
        self.n_static_motor = 0
        self.rest_position = np.zeros(self.n_dmps + self.n_static_motor)
        
        self.motor_n_dims = self.n_dyn_motors * self.n_bfs + self.n_static_motor
        if self.dmp_use_initial: 
            self.motor_n_dims = self.motor_n_dims +  self.n_dmps
        if self.dmp_use_goal:
            self.motor_n_dims = self.motor_n_dims +  self.n_dmps
             
        
        self.move_steps = 50
        self.motor_dims = range(self.motor_n_dims)
        self.s_n_dims = 31 * 3
        
        self.sensori_dims = range(self.motor_n_dims, self.motor_n_dims + self.s_n_dims)
        self.used_dims = self.motor_n_dims + self.s_n_dims
        
        self.choose_children_local = (supervisor_ccl == 'local')
        
        self.ims = {'miscRandom_local': (MiscRandomInterest, {
                                  'competence_measure': competence_dist,
                                  #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.),
                                   'win_size': 1000,
                                   'competence_mode': 'knn',
                                   'k': 20,
                                   'progress_mode': 'local'}),
                    'miscRandom_global': (MiscRandomInterest, {
                                  'competence_measure': competence_dist,
                                  #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.),
                                   'win_size': 1000,
                                   'competence_mode': 'knn',
                                   'k': 20,
                                   'progress_mode': 'global'}),
            }
        
        self.sms = {
            'NN': (NonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':explo_noise}),
            'LWLR-BFGS-EXPLO': (NonParametric, {'fwd': 'LWLR', 'k':10, 'sigma':0.1, 'sigma_explo_ratio':explo_noise, 'inv': 'L-BFGS-B', 'maxfun':200, 'ftol':0, 'gtol':0}),
            'LWLR-BFGS-NOEXPLO': (NonParametric, {'fwd': 'LWLR', 'k':20, 'sigma':0.1, 'sigma_explo_ratio':0., 'inv': 'L-BFGS-B', 'maxfun':200, 'ftol':0, 'gtol':0}),
            'LWLR-CMAES': (NonParametric, {'fwd': 'LWLR', 'k':10, 'sigma':0.1, 'inv': 'CMAES', 'cmaes_sigma':0.05, 'sigma_explo_ratio':explo_noise, 'maxfevals':20}),
        }
          
        self.sm_model = sm_model
        self.im_model = im_model
        self.im_name = self.im_model
        
        sm = self.sm_model
        
        self.std_range = [-1.,1.]
        
        
        m = self.motor_dims
        s = self.sensori_dims
        
        self.operators = ["par"]
        
        if self.hierarchy_type == 0:
            self.m_spaces = dict(m=m)
            self.s_spaces = dict(s=s)
            
            self.modules = dict(mod1 = dict(m = m,
                                          s = s,     
                                          m_list = [m],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                )
        elif self.hierarchy_type == 1:
            self.m_spaces = dict(m_arm=range(12))
            self.s_spaces = dict(s_h=range(self.motor_n_dims + 0, self.motor_n_dims + 9),
                                 s_t1=range(self.motor_n_dims + 9, self.motor_n_dims + 15),
                                 s_t2=range(self.motor_n_dims + 15, self.motor_n_dims + 21),
                                 s_o1=range(self.motor_n_dims + 21, self.motor_n_dims + 27),
                                 s_o2=range(self.motor_n_dims + 27, self.motor_n_dims + 33),
                                 s_o3=range(self.motor_n_dims + 33, self.motor_n_dims + 39),
                                 s_o4=range(self.motor_n_dims + 39, self.motor_n_dims + 45),
                                 s_o5=range(self.motor_n_dims + 45, self.motor_n_dims + 51),
                                 s_o6=range(self.motor_n_dims + 51, self.motor_n_dims + 57),
                                 s_o7=range(self.motor_n_dims + 57, self.motor_n_dims + 63),
                                 s_o8=range(self.motor_n_dims + 63, self.motor_n_dims + 69),
                                 s_o9=range(self.motor_n_dims + 69, self.motor_n_dims + 75),
                                 s_o10=range(self.motor_n_dims + 75, self.motor_n_dims + 81),
                                 s_o11=range(self.motor_n_dims + 81, self.motor_n_dims + 87),
                                 s_o12=range(self.motor_n_dims + 87, self.motor_n_dims + 93))

            self.modules = dict(mod1 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_h"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod2 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_t1"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod3 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_t2"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                
                                mod4 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o1"],     
                                          m_list = [self.m_spaces["m_arm"]],        
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod5 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o2"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod6 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o3"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod7 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o4"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod8 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o5"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod9 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o6"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod10 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o7"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod11 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o8"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod12 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o9"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod13 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o10"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod14 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o11"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod15 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o12"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                )
        elif self.hierarchy_type == 2:
            self.m_spaces = dict(m_arm=range(12))
            self.s_spaces = dict(
                                 s_o1=[self.motor_n_dims + 23, self.motor_n_dims + 26],#range(self.motor_n_dims + 21, self.motor_n_dims + 27),
                                 s_o4=[self.motor_n_dims + 41, self.motor_n_dims + 44],#range(self.motor_n_dims + 39, self.motor_n_dims + 45),
                                 )

            self.modules = dict(
                                
                                
                                mod4 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o1"],     
                                          m_list = [self.m_spaces["m_arm"]],        
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                
                                mod7 = dict(m = self.m_spaces["m_arm"],
                                          s = self.s_spaces["s_o4"],     
                                          m_list = [self.m_spaces["m_arm"]],      
                                          operator = "par",                            
                                          babbling_name = "goal",
                                          sm_name = sm,
                                          im_name = self.im_name,
                                          im_mode = im_mode,
                                          from_log = None,
                                          motor_babbling_n_iter=self.motor_babbling_n_iter),
                                )
        else:
            raise NotImplementedError
        
        
        self.supervisor_name = supervisor_name
        self.supervisor_explo = supervisor_explo
        self.supervisor_n_explo_points = supervisor_n_explo_points
        self.supervisor_ccm = supervisor_ccm
        self.supervisor_ccl = supervisor_ccl
        
        if self.supervisor_name == "random":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="random",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl,
                                          allow_split_mod1=allow_split_mod1)
        elif self.supervisor_name == "interest":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="prop",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl,
                                          allow_split_mod1=allow_split_mod1)
        elif self.supervisor_name == "interest-pmin":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="prop-min",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl,
                                          allow_split_mod1=allow_split_mod1)
        elif self.supervisor_name == "interest_greedy":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="greedy",
                                          llb=False,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl,
                                          allow_split_mod1=allow_split_mod1)
        elif self.supervisor_name == "interest_bias":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(choice="prop",
                                          llb=True,
                                          explo=self.supervisor_explo,
                                          n_explo_points=self.supervisor_n_explo_points,
                                          choose_children_mode=self.supervisor_ccm,
                                          choose_children_local=self.supervisor_ccl,
                                          allow_split_mod1=allow_split_mod1)
        else:
            raise NotImplementedError
        
        
        self.eval_dims = s[-4:-2]
        self.eval_explo_dims = s[-4:-2]
        
        self.eval_range = np.array([[-1.],
                                 [1.]])
        self.eval_explo_eps = 0.02
        self.eval_explo_comp_eps = 0.02
        
        
        ################################### Env CONFIG ###################################
                
        self.max_param = 500. # max DMP weight 
        self.max_params = self.max_param * np.ones((self.n_dmps * self.n_bfs,))  

        if self.dmp_use_initial: 
            self.max_params = np.append([1]*self.n_dmps, self.max_params)
        if self.dmp_use_goal:
            self.max_params = np.append(self.max_params, [1]*self.n_dmps)

        self.env_cls = IROS2016Environment
        self.env_cfg = dict(move_steps=self.move_steps, 
                            max_params=self.max_params,
                            noise=envnoise,
                            perturbation=perturbation,
                            gui=self.gui)
        
        self.rest_position = [0.] * self.motor_n_dims
        
        self.m_mins = [-1.] * (self.n_dyn_motors * (self.n_bfs+1))
        self.m_maxs = [1.] * (self.n_dyn_motors * (self.n_bfs+1))
        
        self.s_mins = [-1.5] * 31 * 3
        self.s_maxs = [1.5] * 31 * 3
        
        
        ################################### Process CONFIG ###################################
        
        self.agent = make_configuration(self.m_mins, 
                                        self.m_maxs, 
                                        self.s_mins, 
                                        self.s_maxs)
        self.tag = self.name
        self.log_dir = ''#determined later
예제 #22
0
    def __init__(
        self,
        name=None,
        hierarchy_type=0,
        babbling_name="goal",
        supervisor_name="interest",
        supervisor_explo="motor",
        supervisor_n_explo_points=0,
        supervisor_ccm="competence",
        supervisor_ccl="local",
        tdd=False,
        ns=False,
        perturbation=None,
        from_log=None,
        iterations=None,
    ):

        ################################### EXPERIMENT CONFIG ###################################

        self.name = name or "Experiment"
        self.init_rest_trial = False
        self.bootstrap = 100
        self.bootstrap_range_div = 1.0
        self.iter = iterations or 50
        self.log_each = self.iter  # must be <= iter
        self.eval_at = []
        self.n_eval = 0
        self.eval_modes = []

        self.gui = False

        self.hierarchy_type = hierarchy_type
        self.babbling_name = babbling_name
        if self.babbling_name == "goal":
            self.motor_babbling_n_iter = 0
        else:
            self.motor_babbling_n_iter = self.iter

        self.from_log = from_log

        ################################### AGENT CONFIG ###################################

        self.n_dyn_motors = 4
        self.n_dmps = self.n_dyn_motors
        self.dmp_use_initial = False
        self.dmp_use_goal = True
        self.n_bfs = 2
        self.n_static_motor = 0
        self.rest_position = np.zeros(self.n_dmps + self.n_static_motor)

        self.motor_n_dims = self.n_dyn_motors * self.n_bfs + self.n_static_motor
        if self.dmp_use_initial:
            self.motor_n_dims = self.motor_n_dims + self.n_dmps
        if self.dmp_use_goal:
            self.motor_n_dims = self.motor_n_dims + self.n_dmps

        self.move_steps = 50
        self.motor_dims = range(self.motor_n_dims)

        if self.hierarchy_type <= 1:
            self.s_n_dims = 5 * self.n_bfs + 5
        elif self.hierarchy_type == 2:
            self.s_n_dims = 7 * self.n_bfs + 5
        else:
            raise NotImplementedError

        self.sensori_dims = range(self.motor_n_dims, self.motor_n_dims + self.s_n_dims)
        self.used_dims = self.motor_n_dims + self.s_n_dims

        self.im_name = "miscRandom_local"
        self.choose_children_local = supervisor_ccl == "local"

        self.sms = {"knn1": (NonParametric, {"fwd": "NN", "inv": "NN", "sigma_explo_ratio": 0.01})}

        sm = "knn1"
        im_mode = "sg"
        self.std_range = [-1.0, 1.0]

        m = self.motor_dims
        s = self.sensori_dims

        self.operators = ["par"]

        if self.hierarchy_type == 0:
            self.m_spaces = dict(m=m)
            self.s_spaces = dict(s=s)

            self.modules = dict(
                mod1=dict(
                    m=m,
                    s=s,
                    m_list=[m],
                    operator="par",
                    babbling_name="goal",
                    sm_name=sm,
                    im_name=self.im_name,
                    im_mode=im_mode,
                    from_log=None,
                    motor_babbling_n_iter=self.motor_babbling_n_iter,
                )
            )
        elif self.hierarchy_type == 1:
            self.m_spaces = dict(m_arm=range(12))
            self.s_spaces = dict(
                s_h=range(self.motor_n_dims + 0, self.motor_n_dims + 9),
                s_t1=range(self.motor_n_dims + 9, self.motor_n_dims + 15),
                s_t2=range(self.motor_n_dims + 15, self.motor_n_dims + 21),
                s_o=range(self.motor_n_dims + 21, self.motor_n_dims + 23),
                s_b=range(self.motor_n_dims + 23, self.motor_n_dims + 25),
            )

            self.modules = dict(
                mod1=dict(
                    m=self.m_spaces["m_arm"],
                    s=self.s_spaces["s_h"],
                    m_list=[self.m_spaces["m_arm"]],
                    operator="par",
                    babbling_name="goal",
                    sm_name=sm,
                    im_name=self.im_name,
                    im_mode=im_mode,
                    from_log=None,
                    motor_babbling_n_iter=self.motor_babbling_n_iter,
                ),
                mod2=dict(
                    m=self.s_spaces["s_h"],
                    s=self.s_spaces["s_t1"],
                    m_list=[self.s_spaces["s_h"]],
                    operator="par",
                    babbling_name="goal",
                    sm_name=sm,
                    im_name=self.im_name,
                    im_mode=im_mode,
                    from_log=None,
                    motor_babbling_n_iter=self.motor_babbling_n_iter,
                ),
                mod3=dict(
                    m=self.s_spaces["s_t1"],
                    s=self.s_spaces["s_o"],
                    m_list=[self.s_spaces["s_t1"]],
                    operator="par",
                    babbling_name="goal",
                    sm_name=sm,
                    im_name=self.im_name,
                    im_mode=im_mode,
                    from_log=None,
                    motor_babbling_n_iter=self.motor_babbling_n_iter,
                ),
                mod4=dict(
                    m=self.s_spaces["s_o"],
                    s=self.s_spaces["s_b"],
                    m_list=[self.s_spaces["s_o"]],
                    operator="par",
                    babbling_name="goal",
                    sm_name=sm,
                    im_name=self.im_name,
                    im_mode=im_mode,
                    from_log=None,
                    motor_babbling_n_iter=self.motor_babbling_n_iter,
                ),
                mod5=dict(
                    m=self.s_spaces["s_h"],
                    s=self.s_spaces["s_t2"],
                    m_list=[self.s_spaces["s_h"]],
                    operator="par",
                    babbling_name="goal",
                    sm_name=sm,
                    im_name=self.im_name,
                    im_mode=im_mode,
                    from_log=None,
                    motor_babbling_n_iter=self.motor_babbling_n_iter,
                ),
                mod6=dict(
                    m=self.s_spaces["s_t2"],
                    s=self.s_spaces["s_o"],
                    m_list=[self.s_spaces["s_t2"]],
                    operator="par",
                    babbling_name="goal",
                    sm_name=sm,
                    im_name=self.im_name,
                    im_mode=im_mode,
                    from_log=None,
                    motor_babbling_n_iter=self.motor_babbling_n_iter,
                ),
            )
        else:
            raise NotImplementedError

        self.supervisor_name = supervisor_name
        self.supervisor_explo = supervisor_explo
        self.supervisor_n_explo_points = supervisor_n_explo_points
        self.supervisor_ccm = supervisor_ccm
        self.supervisor_ccl = supervisor_ccl

        if self.supervisor_name == "random":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(
                choice="random",
                llb=False,
                explo=self.supervisor_explo,
                n_explo_points=self.supervisor_n_explo_points,
                choose_children_mode=self.supervisor_ccm,
                choose_children_local=self.supervisor_ccl,
            )
        elif self.supervisor_name == "interest":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(
                choice="prop",
                llb=False,
                explo=self.supervisor_explo,
                n_explo_points=self.supervisor_n_explo_points,
                choose_children_mode=self.supervisor_ccm,
                choose_children_local=self.supervisor_ccl,
            )
        elif self.supervisor_name == "interest_greedy":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(
                choice="greedy",
                llb=False,
                explo=self.supervisor_explo,
                n_explo_points=self.supervisor_n_explo_points,
                choose_children_mode=self.supervisor_ccm,
                choose_children_local=self.supervisor_ccl,
            )
        elif self.supervisor_name == "interest_bias":
            self.supervisor_cls = Supervisor
            self.supervisor_config = dict(
                choice="prop",
                llb=True,
                explo=self.supervisor_explo,
                n_explo_points=self.supervisor_n_explo_points,
                choose_children_mode=self.supervisor_ccm,
                choose_children_local=self.supervisor_ccl,
            )
        else:
            raise NotImplementedError

        self.eval_dims = s[-4:-2]
        self.eval_explo_dims = s[-4:-2]

        self.eval_range = np.array([[-1.0], [1.0]])
        self.eval_explo_eps = 0.02
        self.eval_explo_comp_eps = 0.02

        ################################### Env CONFIG ###################################

        self.max_param = 500.0  # max DMP weight
        self.max_params = self.max_param * np.ones((self.n_dmps * self.n_bfs,))

        if self.dmp_use_initial:
            self.max_params = np.append([1] * self.n_dmps, self.max_params)
        if self.dmp_use_goal:
            self.max_params = np.append(self.max_params, [1] * self.n_dmps)

        self.env_cls = CogSci2016Environment
        self.env_cfg = dict(
            move_steps=self.move_steps, max_params=self.max_params, perturbation=perturbation, gui=self.gui
        )

        self.rest_position = [0.0] * self.motor_n_dims

        self.m_mins = [-1.0] * (self.n_dyn_motors * (self.n_bfs + 1))
        self.m_maxs = [1.0] * (self.n_dyn_motors * (self.n_bfs + 1))

        self.s_mins = (
            [-1.0] * (3 * (self.n_bfs + 1))
            + [-1.5] * (self.n_bfs + 1)
            + [0.0] * (self.n_bfs + 1)
            + [-1.5] * (self.n_bfs + 1)
            + [0.0] * (self.n_bfs + 1)
            + [-2.0, -2.0, 0.0, 0.0]
        )
        self.s_maxs = (
            [1.0] * (3 * (self.n_bfs + 1))
            + [1.5, 1.5] * (self.n_bfs + 1)
            + [1.5, 1.5] * (self.n_bfs + 1)
            + [2.0, 2.0, 10.0, 0.3]
        )

        ################################### Process CONFIG ###################################

        self.agent = make_configuration(self.m_mins, self.m_maxs, self.s_mins, self.s_maxs)
        self.tag = self.name
        self.log_dir = ""  # determined later