def bootstrap(self, expe, n, bootstap_range_div): conf = make_configuration( expe.ag.conf.m_centers - expe.ag.conf.m_ranges / (2 * bootstap_range_div), expe.ag.conf.m_centers + expe.ag.conf.m_ranges / (2 * bootstap_range_div), expe.ag.conf.s_centers - expe.ag.conf.s_ranges / (2 * bootstap_range_div), expe.ag.conf.s_centers + expe.ag.conf.s_ranges / (2 * bootstap_range_div)) m_rand = rand_bounds(conf.m_bounds, n=n) for m in m_rand: m[-expe.ag.dmp. n_dmps:] = expe.ag.dmp.default[:expe.ag.dmp. n_dmps] + conf.m_ranges[ -expe.ag.dmp.n_dmps:] * randn( expe.ag.dmp.n_dmps) mov = expe.ag.motor_primitive(m) s = expe.env.update(mov, log=True) s = expe.ag.sensory_primitive(s) expe.ag.sensorimotor_model.update(m, s) expe.ag.emit('choice', array([nan] * len(expe.ag.expl_dims))) expe.ag.emit('inference', m) expe.ag.emit('movement', mov) expe.ag.emit('perception', s) expe._update_logs()
def __init__(self, mid, m_space, s_space, env_conf): explo_noise = 0.05 self.conf = make_configuration(env_conf.m_mins[m_space], env_conf.m_maxs[m_space], array(list(env_conf.m_mins[m_space]) + list(env_conf.s_mins))[s_space], array(list(env_conf.m_maxs[m_space]) + list(env_conf.s_maxs))[s_space]) self.im_dims = self.conf.s_dims self.mid = mid self.m_space = m_space self.s_space = s_space self.motor_babbling_n_iter = 10 self.s = None self.last_interest = 0 im_cls, kwargs = (MiscRandomInterest, { 'competence_measure': competence_dist, 'win_size': 1000, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local'}) self.im = im_cls(self.conf, self.im_dims, **kwargs) sm_cls, kwargs = (NonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':explo_noise}) self.sm = sm_cls(self.conf, **kwargs) Agent.__init__(self, self.conf, self.sm, self.im)
def get_params(n_bfs, starting_position, babbling_name, sm_name, im_name): n_dmps = len(starting_position) default = zeros(n_dmps * (n_bfs + 2)) default[:n_dmps] = starting_position default[-n_dmps:] = starting_position poppy_ag = { 'm_mins': list([-600] * (n_dmps * n_bfs)) + list(default[:n_dmps] - 180.), 'm_maxs': list([600] * (n_dmps * n_bfs)) + list(default[:n_dmps] + 180.), 's_mins': [-1., -0.7, -0.1], 's_maxs': [1., 0.7, 0.7] } poppy_ag_conf = make_configuration(**poppy_ag) im_dims = poppy_ag_conf.m_dims if babbling_name == 'motor' else poppy_ag_conf.s_dims im = InterestModel.from_configuration(poppy_ag_conf, im_dims, im_name) sm_cls, kwargs = sms[sm_name] sm = sm_cls(poppy_ag_conf, **kwargs) used = array([False] * n_dmps + [True] * (n_dmps * n_bfs) + [True] * n_dmps) return { 'n_dmps': n_dmps, 'n_bfs': n_bfs, 'used': used, 'default': default, 'conf': poppy_ag_conf, 'sm': sm, 'im': im }
def __init__(self, mid, m_space, s_space, env_conf, explo_noise=0., normalize_interests=True, context_mode=None): self.conf = make_configuration( env_conf.m_mins[m_space], env_conf.m_maxs[m_space], array(list(env_conf.m_mins[m_space]) + list(env_conf.s_mins))[s_space], array(list(env_conf.m_maxs[m_space]) + list(env_conf.s_maxs))[s_space]) self.im_dims = self.conf.s_dims self.mid = mid self.m_space = m_space self.context_mode = context_mode self.s_space = s_space self.motor_babbling_n_iter = 0 self.s = None self.last_interest = 0 if context_mode is not None: im_cls, kwargs = (ContextRandomInterest, { 'win_size': 50, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local', 'context_mode': context_mode }) else: im_cls, kwargs = (MiscRandomInterest, { 'win_size': 50, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local' }) self.im = im_cls(self.conf, self.im_dims, **kwargs) sm_cls, kwargs = (DemonstrableNN, { 'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio': explo_noise }) self.sm = sm_cls(self.conf, **kwargs) Agent.__init__(self, self.conf, self.sm, self.im, context_mode=self.context_mode)
def __init__(self, config, n_motor_babbling=0, explo_noise=0.1, normalize_interests=False): self.config = config self.n_motor_babbling = n_motor_babbling self.explo_noise = explo_noise self.normalize_interests = normalize_interests self.conf = make_configuration(**config) self.t = 0 self.modules = {} self.chosen_modules = [] self.progresses_evolution = {} self.interests_evolution = {} self.mid_control = "" # Define motor and sensory spaces: m_ndims = self.conf.m_ndims # number of motor parameters self.m_space = range(m_ndims) self.c_dims = range(m_ndims, m_ndims + 2) self.s_hand = range(m_ndims + 2, m_ndims + 32) self.s_joystick_1 = range(m_ndims + 32, m_ndims + 52) self.s_joystick_2 = range(m_ndims + 52, m_ndims + 72) self.s_ergo = range(m_ndims + 72, m_ndims + 92) self.s_ball = range(m_ndims + 92, m_ndims + 112) self.s_light = range(m_ndims + 112, m_ndims + 122) self.s_sound = range(m_ndims + 122, m_ndims + 132) self.s_spaces = dict(s_hand=self.s_hand, s_joystick_1=self.s_joystick_1, s_joystick_2=self.s_joystick_2, s_ergo=self.s_ergo, s_ball=self.s_ball, s_light=self.s_light, s_sound=self.s_sound) self.modules["mod"] = LearningModule( "mod", self.m_space, self.c_dims + self.s_hand + self.s_joystick_1 + self.s_joystick_2 + self.s_ergo + self.s_ball + self.s_light + self.s_sound, self.conf, context_mode=dict(mode='mcs', context_n_dims=2, context_sensory_bounds=[[-1., -1.], [1., 1.]]), explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) for mid in self.modules.keys(): self.progresses_evolution[mid] = [] self.interests_evolution[mid] = []
def __init__(self, config, mid): self.config = config #global config self.mconf = config.modules[mid] # module config if mid[0:3] == 'mod': self.mid = mid else: raise ValueError('Module name must begin with mod') #bounds config #print self.mconf['m'], self.mconf['s'] self.conf = make_configuration(self.config.agent.mins[self.mconf['m']], self.config.agent.maxs[self.mconf['m']], self.config.agent.mins[self.mconf['s']], self.config.agent.maxs[self.mconf['s']]) #print self.mconf['m'], self.mconf['s'], self.conf self.im_dims = self.conf.m_dims if self.mconf[ 'babbling_name'] == 'motor' else self.conf.s_dims # self.im = InterestModel.from_configuration(self.conf, # self.im_dims, # self.mconf['im_name']) self.im_mode = self.mconf["im_mode"] self.s = None self.sp = None self.snn = None self.last_module_to_credit = -1 self.last_interest = 0 im_cls, kwargs = config.ims[self.mconf['im_name']] kwargs['mode'] = self.im_mode self.im = im_cls(self.conf, self.im_dims, **kwargs) sm_cls, kwargs = config.sms[self.mconf['sm_name']] self.sm = sm_cls(self.conf, **kwargs) #print self.mconf['s'], self.config.agent.s_dims #self.s_filter = [self.config.agent.s_dims.index(sd) for sd in self.mconf['s']] Agent.__init__(self, self.conf, self.sm, self.im) if self.mconf['from_log'] is not None: from_log_dir = self.mconf['from_log'][0] with open(from_log_dir + '/{}'.format('log.pickle'), 'r') as f: log = cPickle.load(f) f.close() from_log_mod = self.mconf['from_log'][1] self.fast_forward_models(log, from_log_mod, self.mconf['from_log'][2]) #self.controled_vars = set(self.mconf['m']) self.overall_interest = 0 self.social_interest = 0 self.top_down_interest = 0 self.top_down_points = Queue.Queue() self.own_interest = 0
def __init__(self, config, mid): self.config = config #global config self.mconf = config.modules[mid] # module config if mid[0:3] == 'mod': self.mid = mid else: raise ValueError('Module name must begin with mod') #bounds config #print self.mconf['m'], self.mconf['s'] self.conf = make_configuration(self.config.agent.mins[self.mconf['m']], self.config.agent.maxs[self.mconf['m']], self.config.agent.mins[self.mconf['s']], self.config.agent.maxs[self.mconf['s']]) #print self.mconf['m'], self.mconf['s'], self.conf self.im_dims = self.conf.m_dims if self.mconf['babbling_name'] == 'motor' else self.conf.s_dims # self.im = InterestModel.from_configuration(self.conf, # self.im_dims, # self.mconf['im_name']) self.im_mode = self.mconf["im_mode"] self.s = None self.sp = None self.snn = None self.last_module_to_credit = -1 self.last_interest = 0 im_cls, kwargs = config.ims[self.mconf['im_name']] kwargs['mode'] = self.im_mode self.im = im_cls(self.conf, self.im_dims, **kwargs) sm_cls, kwargs = config.sms[self.mconf['sm_name']] self.sm = sm_cls(self.conf, **kwargs) #print self.mconf['s'], self.config.agent.s_dims #self.s_filter = [self.config.agent.s_dims.index(sd) for sd in self.mconf['s']] Agent.__init__(self, self.conf, self.sm, self.im) if self.mconf['from_log'] is not None: from_log_dir = self.mconf['from_log'][0] with open(from_log_dir + '/{}'.format('log.pickle'), 'r') as f: log = cPickle.load(f) f.close() from_log_mod = self.mconf['from_log'][1] self.fast_forward_models(log, from_log_mod, self.mconf['from_log'][2]) #self.controled_vars = set(self.mconf['m']) self.overall_interest = 0 self.social_interest = 0 self.top_down_interest = 0 self.top_down_points = Queue.Queue() self.own_interest = 0
def __init__(self, mid, m_space, s_space, env_conf, explo_noise, win_size, interest_model): explo_noise = explo_noise self.conf = make_configuration(env_conf.m_mins[m_space], env_conf.m_maxs[m_space], array(list(env_conf.m_mins[m_space]) + list(env_conf.s_mins))[s_space], array(list(env_conf.m_maxs[m_space]) + list(env_conf.s_maxs))[s_space]) self.im_dims = self.conf.s_dims self.mid = mid self.m_space = m_space self.s_space = s_space self.motor_babbling_n_iter = 10 self.s = None self.last_interest = 0 if interest_model == 'uniform': im_cls, kwargs = (MiscRandomInterest, { 'competence_measure': competence_dist, 'win_size': win_size, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local'}) elif interest_model == 'normal': im_cls, kwargs = (MiscGaussianInterest, { 'competence_measure': competence_dist, 'win_size': win_size, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local'}) elif interest_model == 'active': im_cls, kwargs = (MiscDiscretizedInterest, { 'x_card': 20 ** len(self.im_dims), # 20 is the number of cells on each dimension 'cells_win_size': 20, # window size parameter (ws) 'eps_random': 0.1, # proportion of random choice of cell 'measure': competence_dist, 'competence_measure': competence_dist, 'win_size': win_size, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local'}) else: raise NotImplementedError self.im = im_cls(self.conf, self.im_dims, **kwargs) sm_cls, kwargs = (NonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio': explo_noise}) self.sm = sm_cls(self.conf, **kwargs) Agent.__init__(self, self.conf, self.sm, self.im)
def __init__(self, mid, m_space, s_space, env_conf, explo_noise=0., normalize_interests=True, context_mode=None): self.conf = make_configuration( env_conf.m_mins[m_space], env_conf.m_maxs[m_space], array(list(env_conf.m_mins[m_space]) + list(env_conf.s_mins))[s_space], array(list(env_conf.m_maxs[m_space]) + list(env_conf.s_maxs))[s_space]) self.im_dims = self.conf.s_dims self.mid = mid self.m_space = m_space self.context_mode = context_mode self.s_space = s_space self.motor_babbling_n_iter = 0 self.n_mdims = 4 self.n_sdims = len(s_space) // 10 self.explo_noise = 0.2 self.s = None self.last_interest = 0 if context_mode is None: self.im = MiscRandomInterest(self.conf, self.conf.s_dims, self.n_sdims, win_size=100) else: self.im = ContextRandomInterest(self.conf, self.conf.s_dims, self.n_sdims, 100, context_mode) #self.im = im_cls(self.conf, self.im_dims, **kwargs) self.sm = BufferedDataset(self.conf.m_ndims, self.conf.s_ndims, buffer_size=10000, lateness=10) #sm_cls, kwargs = (DemonstrableNN, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':explo_noise}) #self.sm = sm_cls(self.conf, **kwargs) Agent.__init__(self, self.conf, self.sm, self.im, context_mode=self.context_mode)
def __init__(self, conf, model_type_head='WNN', config_str_head='default', model_type_arm= 'WNN', config_str_arm ='default'): """NaoSensorimotorModel init method. Args: conf (NaoConfiguration): a configuration class object model_type_head (str, optional) config_str_head (str, optional) model_type_arm (str, optional) config_str_arm (str, optional) """ SensorimotorModel.__init__(self,conf) conf_sm_model_head = make_configuration( self.conf.mins[self.conf.head_dims], self.conf.maxs[self.conf.head_dims], self.conf.mins[self.conf.image_dims + self.conf.arm_dims], self.conf.maxs[self.conf.image_dims + self.conf.arm_dims]) conf_sm_model_arm = make_configuration( self.conf.mins[self.conf.arm_dims], self.conf.maxs[self.conf.arm_dims], self.conf.mins[self.conf.space_dims], self.conf.maxs[self.conf.space_dims]) self.sm_model_head = \ SensorimotorModel.from_configuration(conf_sm_model_head, model_type_head, config_str_head) self.sm_model_arm = \ SensorimotorModel.from_configuration(conf_sm_model_arm, model_type_arm, config_str_arm) self.t = 0 self.mode = 'explore' self.sm_model_head.mode = 'explore' self.sm_model_arm.mode = 'explore' self.sigma_m = [0.1,0.1]
def bootstrap(self, expe, n, bootstap_range_div): conf = make_configuration(expe.ag.conf.m_centers - expe.ag.conf.m_ranges/(2 * bootstap_range_div), expe.ag.conf.m_centers + expe.ag.conf.m_ranges/(2 * bootstap_range_div), expe.ag.conf.s_centers - expe.ag.conf.s_ranges/(2 * bootstap_range_div), expe.ag.conf.s_centers + expe.ag.conf.s_ranges/(2 * bootstap_range_div)) m_rand = rand_bounds(conf.m_bounds, n=n) for m in m_rand: m[-expe.ag.dmp.n_dmps:] = expe.ag.dmp.default[:expe.ag.dmp.n_dmps] + conf.m_ranges[-expe.ag.dmp.n_dmps:] * randn(expe.ag.dmp.n_dmps) mov = expe.ag.motor_primitive(m) s = expe.env.update(mov, log=True) s = expe.ag.sensory_primitive(s) expe.ag.sensorimotor_model.update(m, s) expe.ag.emit('choice', array([nan] * len(expe.ag.expl_dims))) expe.ag.emit('inference', m) expe.ag.emit('movement', mov) expe.ag.emit('perception', s) expe._update_logs()
def __init__(self, mid, m_space, s_space, env_conf): explo_noise = 0.05 self.conf = make_configuration( env_conf.m_mins[m_space], env_conf.m_maxs[m_space], array(list(env_conf.m_mins[m_space]) + list(env_conf.s_mins))[s_space], array(list(env_conf.m_maxs[m_space]) + list(env_conf.s_maxs))[s_space]) self.im_dims = self.conf.s_dims self.mid = mid self.m_space = m_space self.s_space = s_space self.motor_babbling_n_iter = 10 self.s = None self.last_interest = 0 im_cls, kwargs = (MiscRandomInterest, { 'competence_measure': competence_dist, 'win_size': 1000, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local' }) self.im = im_cls(self.conf, self.im_dims, **kwargs) sm_cls, kwargs = (NonParametric, { 'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio': explo_noise }) self.sm = sm_cls(self.conf, **kwargs) Agent.__init__(self, self.conf, self.sm, self.im)
def __init__(self, mid, m_space, s_space, max_steps, env_conf, explo_noise=0.05, motor_babbling_n_iter=10, optim_explo=None, end_point=False): self.mid = mid self.m_space = m_space self.s_space = s_space self.n_mdims = len(self.m_space) self.n_sdims = len(self.s_space) self.max_steps = max_steps self.env_conf = env_conf self.explo_noise = explo_noise self.motor_babbling_n_iter = motor_babbling_n_iter self.optim_explo = optim_explo self.end_point = end_point self.s = None self.sg = None self.last_interest = 0 self.t = 0 # Sensorimotor Model conf = make_configuration( list(env_conf.m_mins[m_space]) * self.max_steps, list(env_conf.m_maxs[m_space]) * self.max_steps, list( np.array( list(env_conf.m_mins[m_space]) + list(env_conf.s_mins))[s_space]) * self.max_steps, list( np.array( list(env_conf.m_maxs[m_space]) + list(env_conf.s_maxs))[s_space]) * self.max_steps) self.sm = BufferedDataset( conf.m_ndims, conf.s_ndims, buffer_size= 10000, # Size of a small kdtree buffer to update this one often and move the data to the big kdtree less often lateness=100 ) # The model can be "late" by this number of points: they are not yet taken into account (added to the small kdtree) if self.end_point: self.sm_end = BufferedDataset(conf.m_ndims, len(s_space), buffer_size=10000, lateness=100) self.interest_model = MiscRandomInterest( conf, conf.s_dims[-self.n_sdims:], self.n_sdims, win_size=200) else: self.interest_model = MiscRandomInterest(conf, conf.s_dims, self.n_sdims, win_size=200)
def __init__(self, name=None, hierarchy_type=0, babbling_name="goal", supervisor_name="interest", supervisor_explo="motor", supervisor_n_explo_points=0, supervisor_ccm="competence", supervisor_ccl="local", tdd=False, ns=False, perturbation=None, from_log=None, iterations=None): ################################### EXPERIMENT CONFIG ################################### self.name = name or 'Experiment' self.init_rest_trial = False self.bootstrap = 100 self.bootstrap_range_div = 1. self.iter = iterations or 50 self.log_each = self.iter #must be <= iter self.eval_at = [] self.n_eval = 0 self.eval_modes = [] self.gui = False self.hierarchy_type = hierarchy_type self.babbling_name = babbling_name if self.babbling_name == "goal": self.motor_babbling_n_iter = 0 else: self.motor_babbling_n_iter = self.iter self.from_log = from_log ################################### AGENT CONFIG ################################### self.n_dyn_motors = 4 self.n_dmps = self.n_dyn_motors self.dmp_use_initial = False self.dmp_use_goal = True self.n_bfs = 2 self.n_static_motor = 0 self.rest_position = np.zeros(self.n_dmps + self.n_static_motor) self.motor_n_dims = self.n_dyn_motors * self.n_bfs + self.n_static_motor if self.dmp_use_initial: self.motor_n_dims = self.motor_n_dims + self.n_dmps if self.dmp_use_goal: self.motor_n_dims = self.motor_n_dims + self.n_dmps self.move_steps = 50 self.motor_dims = range(self.motor_n_dims) if self.hierarchy_type <= 1: self.s_n_dims = 5 * self.n_bfs + 5 elif self.hierarchy_type == 2: self.s_n_dims = 7 * self.n_bfs + 5 else: raise NotImplementedError self.sensori_dims = range(self.motor_n_dims, self.motor_n_dims + self.s_n_dims) self.used_dims = self.motor_n_dims + self.s_n_dims self.im_name = 'miscRandom_local' self.choose_children_local = (supervisor_ccl == 'local') self.sms = { 'knn1': (NonParametric, { 'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio': 0.01 }), } sm = 'knn1' im_mode = 'sg' self.std_range = [-1., 1.] m = self.motor_dims s = self.sensori_dims self.operators = ["par"] if self.hierarchy_type == 0: self.m_spaces = dict(m=m) self.s_spaces = dict(s=s) self.modules = dict(mod1=dict( m=m, s=s, m_list=[m], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter), ) elif self.hierarchy_type == 1: self.m_spaces = dict(m_arm=range(12)) self.s_spaces = dict(s_h=range(self.motor_n_dims + 0, self.motor_n_dims + 9), s_t1=range(self.motor_n_dims + 9, self.motor_n_dims + 15), s_t2=range(self.motor_n_dims + 15, self.motor_n_dims + 21), s_o=range(self.motor_n_dims + 21, self.motor_n_dims + 23), s_b=range(self.motor_n_dims + 23, self.motor_n_dims + 25)) self.modules = dict( mod1=dict(m=self.m_spaces["m_arm"], s=self.s_spaces["s_h"], m_list=[self.m_spaces["m_arm"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod2=dict(m=self.s_spaces["s_h"], s=self.s_spaces["s_t1"], m_list=[self.s_spaces["s_h"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod3=dict(m=self.s_spaces["s_t1"], s=self.s_spaces["s_o"], m_list=[self.s_spaces["s_t1"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod4=dict(m=self.s_spaces["s_o"], s=self.s_spaces["s_b"], m_list=[self.s_spaces["s_o"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod5=dict(m=self.s_spaces["s_h"], s=self.s_spaces["s_t2"], m_list=[self.s_spaces["s_h"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod6=dict(m=self.s_spaces["s_t2"], s=self.s_spaces["s_o"], m_list=[self.s_spaces["s_t2"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter), ) else: raise NotImplementedError self.supervisor_name = supervisor_name self.supervisor_explo = supervisor_explo self.supervisor_n_explo_points = supervisor_n_explo_points self.supervisor_ccm = supervisor_ccm self.supervisor_ccl = supervisor_ccl if self.supervisor_name == "random": self.supervisor_cls = Supervisor self.supervisor_config = dict( choice="random", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl) elif self.supervisor_name == "interest": self.supervisor_cls = Supervisor self.supervisor_config = dict( choice="prop", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl) elif self.supervisor_name == "interest_greedy": self.supervisor_cls = Supervisor self.supervisor_config = dict( choice="greedy", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl) elif self.supervisor_name == "interest_bias": self.supervisor_cls = Supervisor self.supervisor_config = dict( choice="prop", llb=True, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl) else: raise NotImplementedError self.eval_dims = s[-4:-2] self.eval_explo_dims = s[-4:-2] self.eval_range = np.array([[-1.], [1.]]) self.eval_explo_eps = 0.02 self.eval_explo_comp_eps = 0.02 ################################### Env CONFIG ################################### self.max_param = 500. # max DMP weight self.max_params = self.max_param * np.ones( (self.n_dmps * self.n_bfs, )) if self.dmp_use_initial: self.max_params = np.append([1] * self.n_dmps, self.max_params) if self.dmp_use_goal: self.max_params = np.append(self.max_params, [1] * self.n_dmps) self.env_cls = CogSci2016Environment self.env_cfg = dict(move_steps=self.move_steps, max_params=self.max_params, perturbation=perturbation, gui=self.gui) self.rest_position = [0.] * self.motor_n_dims self.m_mins = [-1.] * (self.n_dyn_motors * (self.n_bfs + 1)) self.m_maxs = [1.] * (self.n_dyn_motors * (self.n_bfs + 1)) self.s_mins = [-1.] * (3 * (self.n_bfs + 1)) + [-1.5] * ( self.n_bfs + 1) + [0.] * (self.n_bfs + 1) + [-1.5] * ( self.n_bfs + 1) + [0.] * (self.n_bfs + 1) + [-2., -2., 0., 0.] self.s_maxs = [1.] * (3 * (self.n_bfs + 1)) + [1.5, 1.5] * ( self.n_bfs + 1) + [1.5, 1.5] * (self.n_bfs + 1) + [ 2., 2., 10., 0.3 ] ################################### Process CONFIG ################################### self.agent = make_configuration(self.m_mins, self.m_maxs, self.s_mins, self.s_maxs) self.tag = self.name self.log_dir = '' #determined later
def __init__(self, config, model_babbling="random", n_motor_babbling=0, explo_noise=0.1, choice_eps=0.2, proba_imitate=0.5, tau=1.): self.config = config self.model_babbling = model_babbling self.n_motor_babbling = n_motor_babbling self.explo_noise = explo_noise self.choice_eps = choice_eps self.proba_imitate = proba_imitate self.conf = make_configuration(**config) self.t = 0 self.modules = {} self.chosen_modules = [] self.goals = [] self.cp_evolution = {} self.pp_evolution = {} self.mid_control = None self.last_cmd = None # Define motor and sensory spaces: m_ndims = self.conf.m_ndims # number of motor parameters self.arm_n_dims = 40 self.diva_n_dims = 28 assert (m_ndims == self.arm_n_dims + self.diva_n_dims) self.m_arm = range(self.arm_n_dims) self.m_diva = range(self.arm_n_dims, self.arm_n_dims + self.diva_n_dims) self.m_space = range(m_ndims) self.c_dims = range(m_ndims, m_ndims + 3) self.s_hand = range(m_ndims + 3, m_ndims + 33) self.s_culbuto_1 = range(m_ndims + 33, m_ndims + 63) self.s_self_sound = range(m_ndims + 63, m_ndims + 73) self.s_caregiver_sound = range(m_ndims + 73, m_ndims + 83) self.s_spaces = dict(s_hand=self.s_hand, s_culbuto_1=self.s_culbuto_1, s_self_sound=self.s_self_sound, s_caregiver_sound=self.s_caregiver_sound) self.arm_modules = ['mod1', 'mod3', 'mod6'] self.diva_modules = ['mod12', 'mod14'] self.arm_goal_selection = 0.10 # Create the learning modules: self.modules['mod1'] = LearningModule("mod1", self.m_arm, self.s_hand, self.conf, explo_noise=self.explo_noise) self.modules['mod3'] = LearningModule( "mod3", self.m_arm, self.c_dims + self.s_culbuto_1, self.conf, context_mode=dict(mode='mcs', context_dims=[0, 1, 2], context_n_dims=3, context_sensory_bounds=[[-2.] * 3, [2.] * 3]), explo_noise=self.explo_noise) self.modules['mod6'] = LearningModule( "mod6", self.m_arm, self.c_dims + self.s_caregiver_sound, self.conf, context_mode=dict(mode='mcs', context_dims=[0, 1, 2], context_n_dims=3, context_sensory_bounds=[[-2.] * 3, [2.] * 3]), explo_noise=self.explo_noise) self.modules['mod12'] = LearningModule( "mod12", self.m_diva, self.c_dims + self.s_culbuto_1, self.conf, context_mode=dict(mode='mcs', context_dims=[0, 1, 2], context_n_dims=3, context_sensory_bounds=[[-2.] * 3, [2.] * 3]), explo_noise=self.explo_noise) #self.modules['mod13'] = LearningModule("mod13", self.m_diva, self.s_self_sound, self.conf, explo_noise=self.explo_noise) self.modules['mod14'] = LearningModule( "mod14", self.m_diva, self.s_self_sound, self.conf, imitate="mod6", explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) for mid in self.modules.keys(): self.cp_evolution[mid] = [] self.pp_evolution[mid] = [] self.count_arm = 0 self.count_diva = 0 self.mids = [ "mod" + str(i) for i in range(1, 15) if "mod" + str(i) in self.modules.keys() ] #Book-keeping self.actions = [] self.observations = []
def __init__(self, config, model_babbling="random", n_motor_babbling=0, explo_noise=0.1, choice_eps=0.2, proba_imitate=0.5): self.config = config self.model_babbling = model_babbling self.n_motor_babbling = n_motor_babbling self.explo_noise = explo_noise self.choice_eps = choice_eps self.proba_imitate = proba_imitate self.conf = make_configuration(**config) self.t = 0 self.modules = {} self.chosen_modules = [] self.cp_evolution = {} self.pp_evolution = {} self.mid_control = None self.last_cmd = None # Define motor and sensory spaces: m_ndims = self.conf.m_ndims # number of motor parameters self.arm_n_dims = 21 self.diva_n_dims = 28 self.m_arm = range(self.arm_n_dims) self.m_diva = range(self.arm_n_dims, self.arm_n_dims + self.diva_n_dims) self.m_space = range(m_ndims) self.c_dims = range(m_ndims, m_ndims + 10) self.s_hand = range(m_ndims + 10, m_ndims + 20) self.s_tool = range(m_ndims + 20, m_ndims + 30) self.s_toy1 = range(m_ndims + 30, m_ndims + 40) self.s_toy2 = range(m_ndims + 40, m_ndims + 50) self.s_toy3 = range(m_ndims + 50, m_ndims + 60) self.s_sound = range(m_ndims + 60, m_ndims + 70) self.s_caregiver = range(m_ndims + 70, m_ndims + 80) self.s_spaces = dict(s_hand=self.s_hand, s_tool=self.s_tool, s_toy1=self.s_toy1, s_toy2=self.s_toy2, s_toy3=self.s_toy3, s_sound=self.s_sound, s_caregiver=self.s_caregiver) # Create the 10 learning modules: self.modules['mod1'] = LearningModule("mod1", self.m_arm, self.s_hand, self.conf, explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) #self.modules['mod1'] = LearningModule("mod1", self.m_arm, self.c_dims + self.s_hand, self.conf, context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) self.modules['mod2'] = LearningModule( "mod2", self.m_arm, self.c_dims[0:2] + self.s_tool, self.conf, context_mode=dict(mode='mcs', context_dims=[0, 1], context_n_dims=2, context_sensory_bounds=[[-1.] * 2, [1.] * 2]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) self.modules['mod3'] = LearningModule( "mod3", self.m_arm, self.c_dims[0:4] + self.s_toy1, self.conf, context_mode=dict(mode='mcs', context_dims=[0, 1, 2, 3], context_n_dims=4, context_sensory_bounds=[[-1.] * 4, [1.] * 4]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) self.modules['mod4'] = LearningModule( "mod4", self.m_arm, self.c_dims[0:2] + self.c_dims[4:6] + self.s_toy2, self.conf, context_mode=dict(mode='mcs', context_dims=[0, 1, 4, 5], context_n_dims=4, context_sensory_bounds=[[-1.] * 4, [1.] * 4]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) self.modules['mod5'] = LearningModule( "mod5", self.m_arm, self.c_dims[0:2] + self.c_dims[6:8] + self.s_toy3, self.conf, context_mode=dict(mode='mcs', context_dims=[0, 1, 6, 7], context_n_dims=4, context_sensory_bounds=[[-1.] * 4, [1.] * 4]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) self.modules['mod6'] = LearningModule( "mod6", self.m_arm, self.c_dims[0:8] + self.s_sound, self.conf, context_mode=dict(mode='mcs', context_dims=[0, 1, 2, 3, 4, 5, 6, 7], context_n_dims=8, context_sensory_bounds=[[-1.] * 8, [1.] * 8]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) self.modules['mod10'] = LearningModule( "mod10", self.m_diva, self.c_dims[2:4] + self.c_dims[8:10] + self.s_toy1, self.conf, context_mode=dict(mode='mcs', context_dims=[2, 3, 8, 9], context_n_dims=4, context_sensory_bounds=[[-1.] * 4, [1.] * 4]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) self.modules['mod11'] = LearningModule( "mod11", self.m_diva, self.c_dims[4:6] + self.c_dims[8:10] + self.s_toy2, self.conf, context_mode=dict(mode='mcs', context_dims=[4, 5, 8, 9], context_n_dims=4, context_sensory_bounds=[[-1.] * 4, [1.] * 4]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) self.modules['mod12'] = LearningModule( "mod12", self.m_diva, self.c_dims[6:8] + self.c_dims[8:10] + self.s_toy3, self.conf, context_mode=dict(mode='mcs', context_dims=[6, 7, 8, 9], context_n_dims=4, context_sensory_bounds=[[-1.] * 4, [1.] * 4]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) #self.modules['mod13'] = LearningModule("mod13", self.m_diva, self.c_dims + self.s_sound, self.conf, imitate="mod6", context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) self.modules['mod13'] = LearningModule( "mod13", self.m_diva, self.s_sound, self.conf, imitate="mod6", explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) #self.modules['mod7'] = LearningModule("mod7", self.m_arm, self.c_dims + self.s_caregiver, self.conf, context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) #self.modules['mod8'] = LearningModule("mod8", self.m_diva, self.c_dims + self.s_hand, self.conf, context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) #self.modules['mod9'] = LearningModule("mod9", self.m_diva, self.c_dims + self.s_tool, self.conf, context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) #self.modules['mod14'] = LearningModule("mod14", self.m_diva, self.c_dims + self.s_caregiver, self.conf, context_mode=dict(mode='mcs', context_n_dims=10, context_sensory_bounds=[[-1.]*10,[1.]*10]), explo_noise=self.explo_noise, proba_imitate=self.proba_imitate) for mid in self.modules.keys(): self.cp_evolution[mid] = [] self.pp_evolution[mid] = [] self.count_arm = 0 self.count_diva = 0 self.mids = [ "mod" + str(i) for i in range(1, 15) if "mod" + str(i) in self.modules.keys() ]
def __init__(self, config, babbling_mode="active", n_motor_babbling=0, explo_noise=0.1, choice_eps=0.2, normalize_interests=True): self.config = config self.babbling_mode = "prop" if babbling_mode == "active" else "random" self.n_motor_babbling = n_motor_babbling self.explo_noise = explo_noise self.choice_eps = choice_eps, self.normalize_interests = normalize_interests self.conf = make_configuration(**config) self.t = 0 self.modules = {} self.chosen_modules = [] self.progresses_evolution = {} self.interests_evolution = {} self.have_to_replay_arm_demo = None self.mid_control = '' # Define motor and sensory spaces: m_ndims = self.conf.m_ndims # number of motor parameters self.m_space = range(m_ndims) self.c_dims = range(m_ndims, m_ndims + 2) self.s_hand = range(m_ndims + 2, m_ndims + 32) self.s_joystick_1 = range(m_ndims + 32, m_ndims + 52) self.s_joystick_2 = range(m_ndims + 52, m_ndims + 72) self.s_ergo = range(m_ndims + 72, m_ndims + 92) self.s_ball = range(m_ndims + 92, m_ndims + 112) self.s_light = range(m_ndims + 112, m_ndims + 122) self.s_sound = range(m_ndims + 122, m_ndims + 132) self.s_spaces = dict(s_hand=self.s_hand, s_joystick_1=self.s_joystick_1, s_joystick_2=self.s_joystick_2, s_ergo=self.s_ergo, s_ball=self.s_ball, s_light=self.s_light, s_sound=self.s_sound) #print #print "Initialize agent with spaces:" #print "Motor", self.m_space #print "Context", self.c_dims #print "Hand", self.s_hand #print "Joystick1", self.s_joystick_1 #print "Joystick2", self.s_joystick_2 #print "Ergo", self.s_ergo #print "Ball", self.s_ball #print "Light", self.s_light #print "Sound", self.s_sound # Create the 6 learning modules: self.modules['mod1'] = LearningModule( "mod1", self.m_space, self.s_hand, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod2'] = LearningModule( "mod2", self.m_space, self.s_joystick_1, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod3'] = LearningModule( "mod3", self.m_space, self.s_joystick_2, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod4'] = LearningModule( "mod4", self.m_space, [self.c_dims[0]] + self.s_ergo, self.conf, context_mode=dict(mode='mcs', context_n_dims=1, context_sensory_bounds=[[-1.], [1.]]), explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod5'] = LearningModule( "mod5", self.m_space, self.c_dims + self.s_ball, self.conf, context_mode=dict(mode='mcs', context_n_dims=2, context_sensory_bounds=[[-1., -1.], [1., 1.]]), explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod6'] = LearningModule( "mod6", self.m_space, self.c_dims + self.s_light, self.conf, context_mode=dict(mode='mcs', context_n_dims=2, context_sensory_bounds=[[-1., -1.], [1., 1.]]), explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod7'] = LearningModule( "mod7", self.m_space, self.c_dims + self.s_sound, self.conf, context_mode=dict(mode='mcs', context_n_dims=2, context_sensory_bounds=[[-1., -1.], [1., 1.]]), explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.space2mid = dict(s_hand="mod1", s_joystick_1="mod2", s_joystick_2="mod3", s_ergo="mod4", s_ball="mod5", s_light="mod6", s_sound="mod7") self.mid2space = dict(mod1="s_hand", mod2="s_joystick_1", mod3="s_joystick_2", mod4="s_ergo", mod5="s_ball", mod6="s_light", mod7="s_sound") for mid in self.modules.keys(): self.progresses_evolution[mid] = [] self.interests_evolution[mid] = []
def __init__(self, name=None, hierarchy_type=0, babbling_name="goal", supervisor_name="interest", supervisor_explo="motor", supervisor_n_explo_points = 0, supervisor_ccm="competence", supervisor_ccl="local", sm_model='NN', im_model='miscRandom_local', im_mode='sg', tdd=False, ns=False, envnoise=0, perturbation=None, allow_split_mod1=False, from_log=None, bootstrap=0, explo_noise=0.01, iterations=None): ################################### EXPERIMENT CONFIG ################################### self.name = name or 'Experiment' self.init_rest_trial = False self.bootstrap = bootstrap self.bootstrap_range_div = 1. self.iter = iterations or 50 self.log_each = self.iter #must be <= iter self.eval_at = [] self.n_eval = 0 self.eval_modes = [] self.gui = False self.hierarchy_type = hierarchy_type self.babbling_name = babbling_name if self.babbling_name == "goal": self.motor_babbling_n_iter = 10 else: self.motor_babbling_n_iter = self.iter self.from_log = from_log ################################### AGENT CONFIG ################################### self.n_dyn_motors = 4 self.n_dmps = self.n_dyn_motors self.dmp_use_initial = False self.dmp_use_goal = True self.n_bfs = 2 self.n_static_motor = 0 self.rest_position = np.zeros(self.n_dmps + self.n_static_motor) self.motor_n_dims = self.n_dyn_motors * self.n_bfs + self.n_static_motor if self.dmp_use_initial: self.motor_n_dims = self.motor_n_dims + self.n_dmps if self.dmp_use_goal: self.motor_n_dims = self.motor_n_dims + self.n_dmps self.move_steps = 50 self.motor_dims = range(self.motor_n_dims) self.s_n_dims = 31 * 3 self.sensori_dims = range(self.motor_n_dims, self.motor_n_dims + self.s_n_dims) self.used_dims = self.motor_n_dims + self.s_n_dims self.choose_children_local = (supervisor_ccl == 'local') self.ims = {'miscRandom_local': (MiscRandomInterest, { 'competence_measure': competence_dist, #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.), 'win_size': 1000, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local'}), 'miscRandom_global': (MiscRandomInterest, { 'competence_measure': competence_dist, #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.), 'win_size': 1000, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'global'}), } self.sms = { 'NN': (NonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':explo_noise}), 'LWLR-BFGS-EXPLO': (NonParametric, {'fwd': 'LWLR', 'k':10, 'sigma':0.1, 'sigma_explo_ratio':explo_noise, 'inv': 'L-BFGS-B', 'maxfun':200, 'ftol':0, 'gtol':0}), 'LWLR-BFGS-NOEXPLO': (NonParametric, {'fwd': 'LWLR', 'k':20, 'sigma':0.1, 'sigma_explo_ratio':0., 'inv': 'L-BFGS-B', 'maxfun':200, 'ftol':0, 'gtol':0}), 'LWLR-CMAES': (NonParametric, {'fwd': 'LWLR', 'k':10, 'sigma':0.1, 'inv': 'CMAES', 'cmaes_sigma':0.05, 'sigma_explo_ratio':explo_noise, 'maxfevals':20}), } self.sm_model = sm_model self.im_model = im_model self.im_name = self.im_model sm = self.sm_model self.std_range = [-1.,1.] m = self.motor_dims s = self.sensori_dims self.operators = ["par"] if self.hierarchy_type == 0: self.m_spaces = dict(m=m) self.s_spaces = dict(s=s) self.modules = dict(mod1 = dict(m = m, s = s, m_list = [m], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), ) elif self.hierarchy_type == 1: self.m_spaces = dict(m_arm=range(12)) self.s_spaces = dict(s_h=range(self.motor_n_dims + 0, self.motor_n_dims + 9), s_t1=range(self.motor_n_dims + 9, self.motor_n_dims + 15), s_t2=range(self.motor_n_dims + 15, self.motor_n_dims + 21), s_o1=range(self.motor_n_dims + 21, self.motor_n_dims + 27), s_o2=range(self.motor_n_dims + 27, self.motor_n_dims + 33), s_o3=range(self.motor_n_dims + 33, self.motor_n_dims + 39), s_o4=range(self.motor_n_dims + 39, self.motor_n_dims + 45), s_o5=range(self.motor_n_dims + 45, self.motor_n_dims + 51), s_o6=range(self.motor_n_dims + 51, self.motor_n_dims + 57), s_o7=range(self.motor_n_dims + 57, self.motor_n_dims + 63), s_o8=range(self.motor_n_dims + 63, self.motor_n_dims + 69), s_o9=range(self.motor_n_dims + 69, self.motor_n_dims + 75), s_o10=range(self.motor_n_dims + 75, self.motor_n_dims + 81), s_o11=range(self.motor_n_dims + 81, self.motor_n_dims + 87), s_o12=range(self.motor_n_dims + 87, self.motor_n_dims + 93)) self.modules = dict(mod1 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_h"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod2 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_t1"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod3 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_t2"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod4 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o1"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod5 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o2"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod6 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o3"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod7 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o4"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod8 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o5"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod9 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o6"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod10 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o7"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod11 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o8"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod12 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o9"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod13 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o10"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod14 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o11"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod15 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o12"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), ) elif self.hierarchy_type == 2: self.m_spaces = dict(m_arm=range(12)) self.s_spaces = dict( s_o1=[self.motor_n_dims + 23, self.motor_n_dims + 26],#range(self.motor_n_dims + 21, self.motor_n_dims + 27), s_o4=[self.motor_n_dims + 41, self.motor_n_dims + 44],#range(self.motor_n_dims + 39, self.motor_n_dims + 45), ) self.modules = dict( mod4 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o1"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod7 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o4"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), ) else: raise NotImplementedError self.supervisor_name = supervisor_name self.supervisor_explo = supervisor_explo self.supervisor_n_explo_points = supervisor_n_explo_points self.supervisor_ccm = supervisor_ccm self.supervisor_ccl = supervisor_ccl if self.supervisor_name == "random": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="random", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, allow_split_mod1=allow_split_mod1) elif self.supervisor_name == "interest": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="prop", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, allow_split_mod1=allow_split_mod1) elif self.supervisor_name == "interest-pmin": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="prop-min", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, allow_split_mod1=allow_split_mod1) elif self.supervisor_name == "interest_greedy": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="greedy", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, allow_split_mod1=allow_split_mod1) elif self.supervisor_name == "interest_bias": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="prop", llb=True, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, allow_split_mod1=allow_split_mod1) else: raise NotImplementedError self.eval_dims = s[-4:-2] self.eval_explo_dims = s[-4:-2] self.eval_range = np.array([[-1.], [1.]]) self.eval_explo_eps = 0.02 self.eval_explo_comp_eps = 0.02 ################################### Env CONFIG ################################### self.max_param = 500. # max DMP weight self.max_params = self.max_param * np.ones((self.n_dmps * self.n_bfs,)) if self.dmp_use_initial: self.max_params = np.append([1]*self.n_dmps, self.max_params) if self.dmp_use_goal: self.max_params = np.append(self.max_params, [1]*self.n_dmps) self.env_cls = IROS2016Environment self.env_cfg = dict(move_steps=self.move_steps, max_params=self.max_params, noise=envnoise, perturbation=perturbation, gui=self.gui) self.rest_position = [0.] * self.motor_n_dims self.m_mins = [-1.] * (self.n_dyn_motors * (self.n_bfs+1)) self.m_maxs = [1.] * (self.n_dyn_motors * (self.n_bfs+1)) self.s_mins = [-1.5] * 31 * 3 self.s_maxs = [1.5] * 31 * 3 ################################### Process CONFIG ################################### self.agent = make_configuration(self.m_mins, self.m_maxs, self.s_mins, self.s_maxs) self.tag = self.name self.log_dir = ''#determined later
def __init__(self, name=None, hierarchy_type=0, babbling_name="goal", supervisor_name="interest", supervisor_explo="motor", supervisor_n_explo_points = 0, supervisor_ccm="competence", supervisor_ccl="local", im_model='miscRandom_local', tdd=False, ns=False, perturbation=None, from_log=None, iterations=None): ################################### EXPERIMENT CONFIG ################################### self.name = name or 'Experiment' self.init_rest_trial = False self.bootstrap = 100 self.bootstrap_range_div = 1. self.iter = iterations or 50 self.log_each = self.iter #must be <= iter self.eval_at = [] self.n_eval = 0 self.eval_modes = [] self.gui = True self.hierarchy_type = hierarchy_type self.babbling_name = babbling_name if self.babbling_name == "goal": self.motor_babbling_n_iter = 0 else: self.motor_babbling_n_iter = self.iter self.from_log = from_log ################################### AGENT CONFIG ################################### self.n_dyn_motors = 3 self.n_dmps = self.n_dyn_motors self.dmp_use_initial = False self.dmp_use_goal = True self.n_bfs = 2 self.n_static_motor = 0 self.rest_position = np.zeros(self.n_dmps + self.n_static_motor) self.motor_n_dims = self.n_dyn_motors * self.n_bfs + self.n_static_motor if self.dmp_use_initial: self.motor_n_dims = self.motor_n_dims + self.n_dmps if self.dmp_use_goal: self.motor_n_dims = self.motor_n_dims + self.n_dmps self.n_context_dims = 2 self.context_mode = dict(mode='mcs', reset_iterations=20, context_n_dims=self.n_context_dims, context_sensory_bounds=np.array([[-1.5, -1.5],[1.5, 1.5]])) self.move_steps = 50 self.motor_dims = range(self.motor_n_dims) self.s_n_dims = 17 self.sensori_dims = range(self.motor_n_dims, self.motor_n_dims + self.s_n_dims) self.used_dims = self.motor_n_dims + self.s_n_dims self.im_model = im_model self.im_name = self.im_model self.ims = {'miscRandom_local': (MiscRandomInterest, { 'competence_measure': competence_dist, #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.), 'win_size': 1000, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local'}), 'context_miscRandom_local': (ContextRandomInterest, { #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.), 'win_size': 1000, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local', 'context_mode': self.context_mode}), } self.choose_children_local = (supervisor_ccl == 'local') self.sms = { 'knn1': (NonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':0.01}), 'context_knn': (ContextNonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':0.01,'context_mode': self.context_mode}), } sm = 'knn1' im_mode = 'sg' self.std_range = [-1.,1.] m = self.motor_dims s = self.sensori_dims self.operators = ["par"] if self.hierarchy_type == 0: self.m_spaces = dict(m=m) self.s_spaces = dict(s=s) self.modules = dict(mod1 = dict(m = m, s = s, m_list = [m], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, context_mode=self.context_mode, motor_babbling_n_iter=self.motor_babbling_n_iter), ) elif self.hierarchy_type == 1: self.m_spaces = dict(m_arm=range(9)) self.s_spaces = dict(s_h=range(self.motor_n_dims + self.n_context_dims + 0, self.motor_n_dims + self.n_context_dims + 6), s_t1=range(self.motor_n_dims + self.n_context_dims + 6, self.motor_n_dims + self.n_context_dims + 12), #s_t2=range(self.motor_n_dims + self.n_context_dims + 15, self.motor_n_dims + self.n_context_dims + 21), s_o=range(self.motor_n_dims, self.motor_n_dims + 2) + range(self.motor_n_dims + self.n_context_dims + 12, self.motor_n_dims + self.n_context_dims + 15)) self.modules = dict(mod1 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_h"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, context_mode=None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod2 = dict(m = self.s_spaces["s_h"], s = self.s_spaces["s_t1"], m_list = [self.s_spaces["s_h"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, context_mode=None, motor_babbling_n_iter=self.motor_babbling_n_iter), # mod3 = dict(m = self.s_spaces["s_h"], # s = self.s_spaces["s_t2"], # m_list = [self.s_spaces["s_h"]], # operator = "par", # babbling_name = "goal", # sm_name = sm, # im_name = self.im_name, # im_mode = im_mode, # from_log = None, # context_mode=None, # motor_babbling_n_iter=self.motor_babbling_n_iter), mod3 = dict(m = self.s_spaces["s_h"], s = self.s_spaces["s_o"], m_list = [self.s_spaces["s_h"]], operator = "par", babbling_name = "goal", sm_name = 'context_knn', im_name = 'context_miscRandom_local', im_mode = im_mode, from_log = None, context_mode=self.context_mode, motor_babbling_n_iter=self.motor_babbling_n_iter), mod4 = dict(m = self.s_spaces["s_t1"], s = self.s_spaces["s_o"], m_list = [self.s_spaces["s_t1"]], operator = "par", babbling_name = "goal", sm_name = 'context_knn', im_name = 'context_miscRandom_local', im_mode = im_mode, from_log = None, context_mode=self.context_mode, motor_babbling_n_iter=self.motor_babbling_n_iter), # mod6 = dict(m = self.s_spaces["s_t2"], # s = self.s_spaces["s_o"], # m_list = [self.s_spaces["s_t2"]], # operator = "par", # babbling_name = "goal", # sm_name = 'context_knn', # im_name = 'context_miscRandom_local', # im_mode = im_mode, # from_log = None, # context_mode=self.context_mode, # motor_babbling_n_iter=self.motor_babbling_n_iter), ) else: raise NotImplementedError self.supervisor_name = supervisor_name self.supervisor_explo = supervisor_explo self.supervisor_n_explo_points = supervisor_n_explo_points self.supervisor_ccm = supervisor_ccm self.supervisor_ccl = supervisor_ccl if self.supervisor_name == "random": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="random", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl) elif self.supervisor_name == "interest": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="prop", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl) elif self.supervisor_name == "interest_greedy": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="greedy", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl) elif self.supervisor_name == "interest_bias": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="prop", llb=True, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl) else: raise NotImplementedError self.eval_dims = s[-4:-2] self.eval_explo_dims = s[-4:-2] self.eval_range = np.array([[-1.], [1.]]) self.eval_explo_eps = 0.02 self.eval_explo_comp_eps = 0.02 ################################### Env CONFIG ################################### self.max_param = 300. # max DMP weight self.max_params = self.max_param * np.ones((self.n_dmps * self.n_bfs,)) if self.dmp_use_initial: self.max_params = np.append([1]*self.n_dmps, self.max_params) if self.dmp_use_goal: self.max_params = np.append(self.max_params, [1]*self.n_dmps) iccm_conf = dict(move_steps=self.move_steps, max_params=self.max_params, gui=self.gui) self.context_mode = dict(mode='mcs', reset_iterations=10, context_n_dims=2, context_sensory_bounds=np.array([[-1.5, -1.5],[1.5, 1.5]])) self.env_cls = ContextEnvironment self.env_cfg = dict(env_cls=ICDL2016Environment, env_conf=iccm_conf, context_mode=self.context_mode) self.rest_position = [0.] * self.motor_n_dims self.m_mins = [-1.] * (self.n_dyn_motors * (self.n_bfs+1)) self.m_maxs = [1.] * (self.n_dyn_motors * (self.n_bfs+1)) self.s_mins = [-1.5] * (17) self.s_maxs = [1.5] * (17) ################################### Process CONFIG ################################### self.agent = make_configuration(self.m_mins, self.m_maxs, self.s_mins, self.s_maxs) self.tag = self.name self.log_dir = ''#determined later
def __init__(self, config, babbling_mode="active", n_motor_babbling=0.1, explo_noise=0.05, choice_eps=0.2, normalize_interests=True): self.config = config self.babbling_mode = babbling_mode self.n_motor_babbling = n_motor_babbling self.explo_noise = explo_noise self.choice_eps = choice_eps, self.normalize_interests = normalize_interests self.conf = make_configuration(**config) self.t = 0 self.modules = {} self.chosen_modules = [] self.goals = [] self.progresses_evolution = {} self.interests_evolution = {} self.ms = None self.have_to_replay_arm_demo = None self.mid_control = '' self.measure_interest = False # Define motor and sensory spaces: m_ndims = self.conf.m_ndims # number of motor parameters self.m_space = range(m_ndims) self.c_dims = range(m_ndims, m_ndims + 2) self.s_hand = range(m_ndims + 2, m_ndims + 32) self.s_joystick_1 = range(m_ndims + 32, m_ndims + 52) self.s_joystick_2 = range(m_ndims + 52, m_ndims + 72) self.s_ergo = range(m_ndims + 72, m_ndims + 92) self.s_ball = range(m_ndims + 92, m_ndims + 112) self.s_light = range(m_ndims + 112, m_ndims + 122) self.s_sound = range(m_ndims + 122, m_ndims + 132) self.s_hand_right = range(m_ndims + 132, m_ndims + 162) self.s_base = range(m_ndims + 162, m_ndims + 192) self.s_arena = range(m_ndims + 192, m_ndims + 212) self.s_obj1 = range(m_ndims + 212, m_ndims + 232) self.s_obj2 = range(m_ndims + 232, m_ndims + 252) self.s_obj3 = range(m_ndims + 252, m_ndims + 272) self.s_rdm1 = range(m_ndims + 272, m_ndims + 292) self.s_rdm2 = range(m_ndims + 292, m_ndims + 312) self.s_spaces = dict(s_hand=self.s_hand, s_joystick_1=self.s_joystick_1, s_joystick_2=self.s_joystick_2, s_ergo=self.s_ergo, s_ball=self.s_ball, s_light=self.s_light, s_sound=self.s_sound, s_hand_right=self.s_hand_right, s_base=self.s_base, s_arena=self.s_arena, s_obj1=self.s_obj1, s_obj2=self.s_obj2, s_obj3=self.s_obj3, s_rdm1=self.s_rdm1, s_rdm2=self.s_rdm2) #print #print "Initialize agent with spaces:" #print "Motor", self.m_space #print "Context", self.c_dims #print "Hand", self.s_hand #print "Joystick1", self.s_joystick_1 #print "Joystick2", self.s_joystick_2 #print "Ergo", self.s_ergo #print "Ball", self.s_ball #print "Light", self.s_light #print "Sound", self.s_sound # Create the 6 learning modules: self.modules['mod1'] = LearningModule( "mod1", self.m_space, self.s_hand, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod2'] = LearningModule( "mod2", self.m_space, self.s_joystick_1, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod3'] = LearningModule( "mod3", self.m_space, self.s_joystick_2, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod4'] = LearningModule( "mod4", self.m_space, [self.c_dims[0]] + self.s_ergo, self.conf, context_mode=dict(mode='mcs', context_n_dims=1, context_sensory_bounds=[[-1.], [1.]]), explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod5'] = LearningModule( "mod5", self.m_space, self.c_dims + self.s_ball, self.conf, context_mode=dict(mode='mcs', context_n_dims=2, context_sensory_bounds=[[-1., -1.], [1., 1.]]), explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod6'] = LearningModule( "mod6", self.m_space, self.c_dims + self.s_light, self.conf, context_mode=dict(mode='mcs', context_n_dims=2, context_sensory_bounds=[[-1., -1.], [1., 1.]]), explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod7'] = LearningModule( "mod7", self.m_space, self.c_dims + self.s_sound, self.conf, context_mode=dict(mode='mcs', context_n_dims=2, context_sensory_bounds=[[-1., -1.], [1., 1.]]), explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod8'] = LearningModule( "mod8", self.m_space, self.s_hand_right, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod9'] = LearningModule( "mod9", self.m_space, self.s_base, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod10'] = LearningModule( "mod10", self.m_space, self.s_arena, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod11'] = LearningModule( "mod11", self.m_space, self.s_obj1, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod12'] = LearningModule( "mod12", self.m_space, self.s_obj2, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod13'] = LearningModule( "mod13", self.m_space, self.s_obj3, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod14'] = LearningModule( "mod14", self.m_space, self.s_rdm1, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.modules['mod15'] = LearningModule( "mod15", self.m_space, self.s_rdm2, self.conf, explo_noise=self.explo_noise, normalize_interests=self.normalize_interests) self.space2mid = dict(s_hand="mod1", s_joystick_1="mod2", s_joystick_2="mod3", s_ergo="mod4", s_ball="mod5", s_light="mod6", s_sound="mod7", s_hand_right='mod8', s_base='mod9', s_arena='mod10', s_obj1='mod11', s_obj2='mod12', s_obj3='mod13', s_rdm1='mod14', s_rdm2='mod15') self.mid2space = dict( mod1="s_hand", mod2="s_joystick_1", mod3="s_joystick_2", mod4="s_ergo", mod5="s_ball", mod6="s_light", mod7="s_sound", mod8="s_hand_right", mod9="s_base", mod10="s_arena", mod11="s_obj1", mod12="s_obj2", mod13="s_obj3", mod14="s_rdm1", mod15="s_rdm2", ) for mid in self.modules.keys(): self.progresses_evolution[mid] = [] self.interests_evolution[mid] = []
def __init__(self, name=None, hierarchy_type=0, babbling_name="goal", supervisor_name="interest", supervisor_explo="motor", supervisor_n_explo_points = 0, supervisor_ccm="competence", supervisor_ccl="local", sm_model='NN', im_model='miscRandom_local', im_mode='sg', tdd=False, ns=False, envnoise=0, perturbation=None, allow_split_mod1=False, from_log=None, bootstrap=0, explo_noise=0.01, iterations=None): ################################### EXPERIMENT CONFIG ################################### self.name = name or 'Experiment' self.init_rest_trial = False self.bootstrap = bootstrap self.bootstrap_range_div = 1. self.iter = iterations or 50 self.log_each = self.iter #must be <= iter self.eval_at = [] self.n_eval = 0 self.eval_modes = [] self.gui = True self.hierarchy_type = hierarchy_type self.babbling_name = babbling_name if self.babbling_name == "goal": self.motor_babbling_n_iter = 10 else: self.motor_babbling_n_iter = self.iter self.from_log = from_log ################################### AGENT CONFIG ################################### self.n_dyn_motors = 4 self.n_dmps = self.n_dyn_motors self.dmp_use_initial = False self.dmp_use_goal = True self.n_bfs = 2 self.n_static_motor = 0 self.rest_position = np.zeros(self.n_dmps + self.n_static_motor) self.motor_n_dims = self.n_dyn_motors * self.n_bfs + self.n_static_motor if self.dmp_use_initial: self.motor_n_dims = self.motor_n_dims + self.n_dmps if self.dmp_use_goal: self.motor_n_dims = self.motor_n_dims + self.n_dmps self.move_steps = 50 self.motor_dims = range(self.motor_n_dims) self.s_n_dims = 31 * 3 self.sensori_dims = range(self.motor_n_dims, self.motor_n_dims + self.s_n_dims) self.used_dims = self.motor_n_dims + self.s_n_dims self.choose_children_local = (supervisor_ccl == 'local') self.ims = {'miscRandom_local': (MiscRandomInterest, { 'competence_measure': competence_dist, #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.), 'win_size': 1000, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'local'}), 'miscRandom_global': (MiscRandomInterest, { 'competence_measure': competence_dist, #'competence_measure': lambda target, reached, dist_max:competence_exp(target, reached, dist_min=0.01, dist_max=dist_max, power=20.), 'win_size': 1000, 'competence_mode': 'knn', 'k': 20, 'progress_mode': 'global'}), } self.sms = { 'NN': (NonParametric, {'fwd': 'NN', 'inv': 'NN', 'sigma_explo_ratio':explo_noise}), 'LWLR-BFGS-EXPLO': (NonParametric, {'fwd': 'LWLR', 'k':10, 'sigma':0.1, 'sigma_explo_ratio':explo_noise, 'inv': 'L-BFGS-B', 'maxfun':200, 'ftol':0, 'gtol':0}), 'LWLR-BFGS-NOEXPLO': (NonParametric, {'fwd': 'LWLR', 'k':20, 'sigma':0.1, 'sigma_explo_ratio':0., 'inv': 'L-BFGS-B', 'maxfun':200, 'ftol':0, 'gtol':0}), 'LWLR-CMAES': (NonParametric, {'fwd': 'LWLR', 'k':10, 'sigma':0.1, 'inv': 'CMAES', 'cmaes_sigma':0.05, 'sigma_explo_ratio':explo_noise, 'maxfevals':20}), } self.sm_model = sm_model self.im_model = im_model self.im_name = self.im_model sm = self.sm_model self.std_range = [-1.,1.] m = self.motor_dims s = self.sensori_dims self.operators = ["par"] if self.hierarchy_type == 0: self.m_spaces = dict(m=m) self.s_spaces = dict(s=s) self.modules = dict(mod1 = dict(m = m, s = s, m_list = [m], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), ) elif self.hierarchy_type == 1: self.m_spaces = dict(m_arm=range(12)) self.s_spaces = dict(s_h=range(self.motor_n_dims + 0, self.motor_n_dims + 9), s_t1=range(self.motor_n_dims + 9, self.motor_n_dims + 15), s_t2=range(self.motor_n_dims + 15, self.motor_n_dims + 21), s_o1=range(self.motor_n_dims + 21, self.motor_n_dims + 27), s_o2=range(self.motor_n_dims + 27, self.motor_n_dims + 33), s_o3=range(self.motor_n_dims + 33, self.motor_n_dims + 39), s_o4=range(self.motor_n_dims + 39, self.motor_n_dims + 45), s_o5=range(self.motor_n_dims + 45, self.motor_n_dims + 51), s_o6=range(self.motor_n_dims + 51, self.motor_n_dims + 57), s_o7=range(self.motor_n_dims + 57, self.motor_n_dims + 63), s_o8=range(self.motor_n_dims + 63, self.motor_n_dims + 69), s_o9=range(self.motor_n_dims + 69, self.motor_n_dims + 75), s_o10=range(self.motor_n_dims + 75, self.motor_n_dims + 81), s_o11=range(self.motor_n_dims + 81, self.motor_n_dims + 87), s_o12=range(self.motor_n_dims + 87, self.motor_n_dims + 93)) self.modules = dict(mod1 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_h"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod2 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_t1"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod3 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_t2"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod4 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o1"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod5 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o2"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod6 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o3"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod7 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o4"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod8 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o5"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod9 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o6"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod10 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o7"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod11 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o8"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod12 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o9"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod13 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o10"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod14 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o11"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod15 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o12"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), ) elif self.hierarchy_type == 2: self.m_spaces = dict(m_arm=range(12)) self.s_spaces = dict( s_o1=[self.motor_n_dims + 23, self.motor_n_dims + 26],#range(self.motor_n_dims + 21, self.motor_n_dims + 27), s_o4=[self.motor_n_dims + 41, self.motor_n_dims + 44],#range(self.motor_n_dims + 39, self.motor_n_dims + 45), ) self.modules = dict( mod4 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o1"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), mod7 = dict(m = self.m_spaces["m_arm"], s = self.s_spaces["s_o4"], m_list = [self.m_spaces["m_arm"]], operator = "par", babbling_name = "goal", sm_name = sm, im_name = self.im_name, im_mode = im_mode, from_log = None, motor_babbling_n_iter=self.motor_babbling_n_iter), ) else: raise NotImplementedError self.supervisor_name = supervisor_name self.supervisor_explo = supervisor_explo self.supervisor_n_explo_points = supervisor_n_explo_points self.supervisor_ccm = supervisor_ccm self.supervisor_ccl = supervisor_ccl if self.supervisor_name == "random": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="random", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, allow_split_mod1=allow_split_mod1) elif self.supervisor_name == "interest": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="prop", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, allow_split_mod1=allow_split_mod1) elif self.supervisor_name == "interest-pmin": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="prop-min", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, allow_split_mod1=allow_split_mod1) elif self.supervisor_name == "interest_greedy": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="greedy", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, allow_split_mod1=allow_split_mod1) elif self.supervisor_name == "interest_bias": self.supervisor_cls = Supervisor self.supervisor_config = dict(choice="prop", llb=True, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, allow_split_mod1=allow_split_mod1) else: raise NotImplementedError self.eval_dims = s[-4:-2] self.eval_explo_dims = s[-4:-2] self.eval_range = np.array([[-1.], [1.]]) self.eval_explo_eps = 0.02 self.eval_explo_comp_eps = 0.02 ################################### Env CONFIG ################################### self.max_param = 500. # max DMP weight self.max_params = self.max_param * np.ones((self.n_dmps * self.n_bfs,)) if self.dmp_use_initial: self.max_params = np.append([1]*self.n_dmps, self.max_params) if self.dmp_use_goal: self.max_params = np.append(self.max_params, [1]*self.n_dmps) self.env_cls = IROS2016Environment self.env_cfg = dict(move_steps=self.move_steps, max_params=self.max_params, noise=envnoise, perturbation=perturbation, gui=self.gui) self.rest_position = [0.] * self.motor_n_dims self.m_mins = [-1.] * (self.n_dyn_motors * (self.n_bfs+1)) self.m_maxs = [1.] * (self.n_dyn_motors * (self.n_bfs+1)) self.s_mins = [-1.5] * 31 * 3 self.s_maxs = [1.5] * 31 * 3 ################################### Process CONFIG ################################### self.agent = make_configuration(self.m_mins, self.m_maxs, self.s_mins, self.s_maxs) self.tag = self.name self.log_dir = ''#determined later
def __init__( self, name=None, hierarchy_type=0, babbling_name="goal", supervisor_name="interest", supervisor_explo="motor", supervisor_n_explo_points=0, supervisor_ccm="competence", supervisor_ccl="local", tdd=False, ns=False, perturbation=None, from_log=None, iterations=None, ): ################################### EXPERIMENT CONFIG ################################### self.name = name or "Experiment" self.init_rest_trial = False self.bootstrap = 100 self.bootstrap_range_div = 1.0 self.iter = iterations or 50 self.log_each = self.iter # must be <= iter self.eval_at = [] self.n_eval = 0 self.eval_modes = [] self.gui = False self.hierarchy_type = hierarchy_type self.babbling_name = babbling_name if self.babbling_name == "goal": self.motor_babbling_n_iter = 0 else: self.motor_babbling_n_iter = self.iter self.from_log = from_log ################################### AGENT CONFIG ################################### self.n_dyn_motors = 4 self.n_dmps = self.n_dyn_motors self.dmp_use_initial = False self.dmp_use_goal = True self.n_bfs = 2 self.n_static_motor = 0 self.rest_position = np.zeros(self.n_dmps + self.n_static_motor) self.motor_n_dims = self.n_dyn_motors * self.n_bfs + self.n_static_motor if self.dmp_use_initial: self.motor_n_dims = self.motor_n_dims + self.n_dmps if self.dmp_use_goal: self.motor_n_dims = self.motor_n_dims + self.n_dmps self.move_steps = 50 self.motor_dims = range(self.motor_n_dims) if self.hierarchy_type <= 1: self.s_n_dims = 5 * self.n_bfs + 5 elif self.hierarchy_type == 2: self.s_n_dims = 7 * self.n_bfs + 5 else: raise NotImplementedError self.sensori_dims = range(self.motor_n_dims, self.motor_n_dims + self.s_n_dims) self.used_dims = self.motor_n_dims + self.s_n_dims self.im_name = "miscRandom_local" self.choose_children_local = supervisor_ccl == "local" self.sms = {"knn1": (NonParametric, {"fwd": "NN", "inv": "NN", "sigma_explo_ratio": 0.01})} sm = "knn1" im_mode = "sg" self.std_range = [-1.0, 1.0] m = self.motor_dims s = self.sensori_dims self.operators = ["par"] if self.hierarchy_type == 0: self.m_spaces = dict(m=m) self.s_spaces = dict(s=s) self.modules = dict( mod1=dict( m=m, s=s, m_list=[m], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter, ) ) elif self.hierarchy_type == 1: self.m_spaces = dict(m_arm=range(12)) self.s_spaces = dict( s_h=range(self.motor_n_dims + 0, self.motor_n_dims + 9), s_t1=range(self.motor_n_dims + 9, self.motor_n_dims + 15), s_t2=range(self.motor_n_dims + 15, self.motor_n_dims + 21), s_o=range(self.motor_n_dims + 21, self.motor_n_dims + 23), s_b=range(self.motor_n_dims + 23, self.motor_n_dims + 25), ) self.modules = dict( mod1=dict( m=self.m_spaces["m_arm"], s=self.s_spaces["s_h"], m_list=[self.m_spaces["m_arm"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter, ), mod2=dict( m=self.s_spaces["s_h"], s=self.s_spaces["s_t1"], m_list=[self.s_spaces["s_h"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter, ), mod3=dict( m=self.s_spaces["s_t1"], s=self.s_spaces["s_o"], m_list=[self.s_spaces["s_t1"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter, ), mod4=dict( m=self.s_spaces["s_o"], s=self.s_spaces["s_b"], m_list=[self.s_spaces["s_o"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter, ), mod5=dict( m=self.s_spaces["s_h"], s=self.s_spaces["s_t2"], m_list=[self.s_spaces["s_h"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter, ), mod6=dict( m=self.s_spaces["s_t2"], s=self.s_spaces["s_o"], m_list=[self.s_spaces["s_t2"]], operator="par", babbling_name="goal", sm_name=sm, im_name=self.im_name, im_mode=im_mode, from_log=None, motor_babbling_n_iter=self.motor_babbling_n_iter, ), ) else: raise NotImplementedError self.supervisor_name = supervisor_name self.supervisor_explo = supervisor_explo self.supervisor_n_explo_points = supervisor_n_explo_points self.supervisor_ccm = supervisor_ccm self.supervisor_ccl = supervisor_ccl if self.supervisor_name == "random": self.supervisor_cls = Supervisor self.supervisor_config = dict( choice="random", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, ) elif self.supervisor_name == "interest": self.supervisor_cls = Supervisor self.supervisor_config = dict( choice="prop", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, ) elif self.supervisor_name == "interest_greedy": self.supervisor_cls = Supervisor self.supervisor_config = dict( choice="greedy", llb=False, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, ) elif self.supervisor_name == "interest_bias": self.supervisor_cls = Supervisor self.supervisor_config = dict( choice="prop", llb=True, explo=self.supervisor_explo, n_explo_points=self.supervisor_n_explo_points, choose_children_mode=self.supervisor_ccm, choose_children_local=self.supervisor_ccl, ) else: raise NotImplementedError self.eval_dims = s[-4:-2] self.eval_explo_dims = s[-4:-2] self.eval_range = np.array([[-1.0], [1.0]]) self.eval_explo_eps = 0.02 self.eval_explo_comp_eps = 0.02 ################################### Env CONFIG ################################### self.max_param = 500.0 # max DMP weight self.max_params = self.max_param * np.ones((self.n_dmps * self.n_bfs,)) if self.dmp_use_initial: self.max_params = np.append([1] * self.n_dmps, self.max_params) if self.dmp_use_goal: self.max_params = np.append(self.max_params, [1] * self.n_dmps) self.env_cls = CogSci2016Environment self.env_cfg = dict( move_steps=self.move_steps, max_params=self.max_params, perturbation=perturbation, gui=self.gui ) self.rest_position = [0.0] * self.motor_n_dims self.m_mins = [-1.0] * (self.n_dyn_motors * (self.n_bfs + 1)) self.m_maxs = [1.0] * (self.n_dyn_motors * (self.n_bfs + 1)) self.s_mins = ( [-1.0] * (3 * (self.n_bfs + 1)) + [-1.5] * (self.n_bfs + 1) + [0.0] * (self.n_bfs + 1) + [-1.5] * (self.n_bfs + 1) + [0.0] * (self.n_bfs + 1) + [-2.0, -2.0, 0.0, 0.0] ) self.s_maxs = ( [1.0] * (3 * (self.n_bfs + 1)) + [1.5, 1.5] * (self.n_bfs + 1) + [1.5, 1.5] * (self.n_bfs + 1) + [2.0, 2.0, 10.0, 0.3] ) ################################### Process CONFIG ################################### self.agent = make_configuration(self.m_mins, self.m_maxs, self.s_mins, self.s_maxs) self.tag = self.name self.log_dir = "" # determined later