def choose_babbling_module(self): if self.model_babbling == "random": mode = "random" elif self.model_babbling == "active": mode = "prop" interests = {} for mid in self.modules.keys(): interests[mid] = self.modules[mid].interest() if mode == 'random': mid = np.random.choice(interests.keys()) elif mode == 'greedy': if np.random.random() < self.choice_eps: mid = np.random.choice(interests.keys()) else: mid = max(interests, key=interests.get) elif mode == 'softmax': temperature = self.choice_eps w = interests.values() mid = self.modules.keys()[softmax_choice(w, temperature)] elif mode == 'prop': w = interests.values() mid = self.modules.keys()[prop_choice(w, eps=self.choice_eps)] self.chosen_modules.append(mid) return mid
def choose_babbling_module(self, mode='prop'): interests = {} for mid in self.modules.keys(): if not ((not self.enable_hand) and mid == "mod1"): interests[mid] = self.modules[mid].interest() else: interests[mid] = 0. if mode == 'random': mid = np.random.choice(self.interests.keys()) elif mode == 'greedy': eps = 0.2 if np.random.random() < eps: mid = np.random.choice(self.interests.keys()) else: mid = max(interests, key=interests.get) elif mode == 'softmax': temperature = 0.1 w = interests.values() mid = self.modules.keys()[softmax_choice(w, temperature)] elif mode == 'prop': w = interests.values() mid = self.modules.keys()[prop_choice(w, eps=self.choice_eps)] self.chosen_modules.append(mid) return mid
def choose_space_child(self, s_space, s, mode="competence", local="local"): """ Choose the children of space s_space among modules that have the good sensori spaces, maximizing competence. """ try: possible_mids = self.hierarchy.space_children(s_space) except KeyError: return None if len(possible_mids) == 1: return possible_mids[0] y = self.set_ms(s=s)[s_space] if mode == "competence": if local: competences = [ -self.modules[pmid].sensorimotor_model.model.imodel.fmodel. dataset.nn_y(y, k=1)[0][0] for pmid in possible_mids ] else: competences = [ self.modules[pmid].competence() for pmid in possible_mids ] return possible_mids[np.array(competences).argmax()] elif mode == "interest_greedy": eps = 0.1 if np.random.random() < eps: return np.random.choice(possible_mids) else: if local == "local": interests = [ self.modules[pmid].interest_pt(y) for pmid in possible_mids ] else: interests = [ self.modules[pmid].interest() for pmid in possible_mids ] return possible_mids[np.array(interests).argmax()] elif mode == "interest_prop": eps = 0.1 if np.random.random() < eps: return np.random.choice(possible_mids) else: if local == "local": interests = [ self.modules[pmid].interest_pt(y) for pmid in possible_mids ] else: interests = [ self.modules[pmid].interest() for pmid in possible_mids ] return possible_mids[prop_choice(interests, eps=0.1)] elif mode == "random": mid = np.random.choice(possible_mids) return mid
def choose_babbling_module(self, auto_create=False, progress_threshold=1e-2, mode='softmax', weight_by_level=False): interests = {} for mid in self.modules.keys(): interests[mid] = self.modules[mid].interest() self.emit('interest_' + mid, [self.t, interests[mid]]) self.emit('competence_' + mid, [self.t, self.modules[mid].competence()]) max_progress = max(interests.values()) # self.emit('babbling_module', "mod2") # return "mod2" #print "max_progress", max_progress if not auto_create or max_progress > progress_threshold: if mode == 'random': mid = np.random.choice(self.modules.keys()) elif mode == 'greedy': eps = 0.1 if np.random.random() < eps: mid = np.random.choice(self.modules.keys()) else: mid = max(interests, key=interests.get) elif mode == 'softmax': temperature = 0.1 mids = interests.keys() w = interests.values() #print "progresses", w #print "competences", [mod.competence() for mod in self.modules.values()] if weight_by_level: levels = self.hierarchy.module_levels() for i in range(len(mids)): f = 2.0 w[i] = w[i] * np.power(f, max(levels.values()) - levels[mids[i]]) #print w mid = mids[softmax_choice(w, temperature)] elif mode == 'prop': mids = interests.keys() w = interests.values() #print "progresses", w #print "competences", [mod.competence() for mod in self.modules.values()] if weight_by_level: levels = self.hierarchy.module_levels() for i in range(len(mids)): f = 10.0 w[i] = w[i] * np.power(f, max(levels.values()) - levels[mids[i]]) #print w mid = mids[prop_choice(w, eps=0.1)] self.chosen_modules[mid] = self.chosen_modules[mid] + 1 #print self.chosen_modules self.emit('babbling_module', mid) return mid else: return self.create_module()
def act(self, n_iter=1, **kwargs): assert n_iter > 0 # Steps of (4 exploring and 1 exploiting iterations): for step in range(n_iter // (self._n_explore + 1)): # Compute the interest of modules interests = [module.interest() for module in self._learning_modules] self._interests_evolution.append(interests) # Choose the babbling module (probabilities proportional to interests, with epsilon of random choice): choice = prop_choice(interests, eps=self._explo_ratio) babbling_module = self._learning_modules[choice] # The babbling module picks a random goal in its sensory space and returns 4 noisy motor commands: m_list = babbling_module.produce(n=self._n_explore) goal = babbling_module.s _, indexes = babbling_module.sensorimotor_model.model.imodel.fmodel.dataset.nn_y(goal) self._goals_states.append(self._outcomes_states[indexes[0]]) for m in m_list: # We perform the actions and observe outcomes self._env.reset() self._env.act(action=m, **kwargs) self._actions.append(m) outcome = self._env.observation # We represent the raw outcome self._rep.act(X_pred=outcome) s = self._rep.representation.ravel() # self._outcomes.append(outcome) self._outcomes_reps.append(s) self._outcomes_states.append(self._env.hidden_state) # Update each sensorimotor models: for module in self._learning_modules: module.update_sm(m, module.get_s(np.concatenate([m, s]))) # Choose the best motor command to reach current goal (with no noise): m = babbling_module.infer(babbling_module.expl_dims, babbling_module.inf_dims, babbling_module.x, n=1, explore=False) # We perform the action and observe outcomes self._env.reset() self._env.act(action=m, **kwargs) self._actions.append(m) outcome = self._env.observation # We represent the raw outcome self._rep.act(X_pred=outcome) s = self._rep.representation.ravel() # self._outcomes.append(outcome) self._outcomes_reps.append(s) self._outcomes_states.append(self._env.hidden_state) # Update the interest of the babbling module: babbling_module.update_im(m, babbling_module.get_s(np.concatenate([m, s]))) # Update each sensorimotor models: for module in self._learning_modules: module.update_sm(m, module.get_s(np.concatenate([m, s]))) explos_modules = [int(100. * (self._n_explore + 1) * module.im.n_points() / float(module.sm.t)) for module in self._learning_modules] self._explo_evolution.append(explos_modules)
def choose_space_child(self, s_space, s, mode="competence", local="local"): """ Choose the children of space s_space among modules that have the good sensori spaces, maximizing competence. """ try: possible_mids = self.hierarchy.space_children(s_space) except KeyError: return None if len(possible_mids) == 1: return possible_mids[0] y = self.set_ms(s=s)[s_space] if mode == "competence": if local: competences = [ -self.modules[pmid].sensorimotor_model.model.imodel.fmodel.dataset.nn_y(y, k=1)[0][0] for pmid in possible_mids ] else: competences = [self.modules[pmid].competence() for pmid in possible_mids] return possible_mids[np.array(competences).argmax()] elif mode == "interest_greedy": eps = 0.1 if np.random.random() < eps: return np.random.choice(possible_mids) else: if local == "local": interests = [self.modules[pmid].interest_pt(y) for pmid in possible_mids] else: interests = [self.modules[pmid].interest() for pmid in possible_mids] return possible_mids[np.array(interests).argmax()] elif mode == "interest_prop": eps = 0.1 if np.random.random() < eps: return np.random.choice(possible_mids) else: if local == "local": interests = [self.modules[pmid].interest_pt(y) for pmid in possible_mids] else: interests = [self.modules[pmid].interest() for pmid in possible_mids] return possible_mids[prop_choice(interests, eps=0.1)] elif mode == "random": mid = np.random.choice(possible_mids) return mid
def choose_interesting_space(self, mode='softmax'): s_spaces = self.config.s_spaces interests = {} for s_space in s_spaces.keys(): if s_space == "s_h": interests[s_space] = self.modules["mod1"].interest() elif s_space == "s_t1": interests[s_space] = self.modules["mod2"].interest() # elif s_space == "s_t2": # interests[s_space] = self.modules["mod3"].interest() elif s_space == "s_o": interests[s_space] = np.sum([self.modules[mid].interest() for mid in ["mod3", "mod4"]]) self.emit('interests', [self.t, interests]) if mode == 'random': s_space = np.random.choice(self.interests.keys()) elif mode == 'greedy': eps = 0.2 if np.random.random() < eps: s_space = np.random.choice(self.interests.keys()) else: s_space = max(interests, key=interests.get) elif mode == 'softmax': temperature = 0.1 w = interests.values() s_space = s_spaces.keys()[softmax_choice(w, temperature)] elif mode == 'prop': w = interests.values() s_space = s_spaces.keys()[prop_choice(w, eps=0.2)] if self.t % 200 == 1: print print 'iterations', self.t - 1 print "competences", np.array([self.modules[mid].competence() for mid in self.modules.keys()]) print "progresses", np.array([self.modules[mid].interest_model.current_progress for mid in self.modules.keys()]) print "interests", np.array([self.modules[mid].interest() for mid in self.modules.keys()]) print "sm db n points", [len(self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset) for mid in self.modules.keys()] print "im db n points", [len(self.modules[mid].interest_model.data_xc) for mid in self.modules.keys()] print self.chosen_modules print "made tool moved object", self.credit_tool_move print "made hand moved object", self.credit_hand_move self.chosen_spaces[s_space] = self.chosen_spaces[s_space] + 1 return s_space
def choose_babbling_module(self): interests = {} for mid in self.modules.keys(): interests[mid] = self.modules[mid].interest() if self.model_babbling == 'random': #mid = np.random.choice(interests.keys()) if np.random.random() < self.arm_goal_selection: mid = np.random.choice(self.arm_modules) else: mid = np.random.choice(self.diva_modules) elif self.model_babbling == 'hand_object_sound': if np.random.random() < 1. / 3.: mid = 'mod1' elif np.random.random() < 1. / 2.: mid = np.random.choice([ 'mod2', 'mod3', 'mod4', 'mod5', 'mod10', 'mod11', 'mod12' ]) else: mid = np.random.choice(['mod6', 'mod13', 'mod14']) elif self.model_babbling == 'object_sound': if np.random.random() < 1. / 2.: mid = np.random.choice([ 'mod1', 'mod2', 'mod3', 'mod4', 'mod5', 'mod10', 'mod11', 'mod12' ]) else: mid = np.random.choice(['mod6', 'mod13', 'mod14']) elif self.model_babbling == 'greedy': if np.random.random() < self.choice_eps: mid = np.random.choice(interests.keys()) else: mid = max(interests, key=interests.get) elif self.model_babbling == 'softmax': temperature = self.choice_eps w = interests.values() mid = self.modules.keys()[softmax_choice(w, temperature)] elif self.model_babbling == 'prop': w = interests.values() mid = self.modules.keys()[prop_choice(w, eps=self.choice_eps)] self.chosen_modules.append(int(mid[3:])) return mid
def active_model_babbling(trial, iterations): env = ArmStickBalls() np.random.seed(trial) explored_s = [] res = [] n_explore = 4 m_ndims = env.conf.m_ndims # number of motor parameters m_space = range(m_ndims) s_hand = range(m_ndims, m_ndims + 6) s_tool = range(m_ndims + 6, m_ndims + 12) s_ball1 = range(m_ndims + 12, m_ndims + 18) s_ball2 = range(m_ndims + 18, m_ndims + 24) s_ball3 = range(m_ndims + 24, m_ndims + 30) s_ball4 = range(m_ndims + 30, m_ndims + 36) learning_modules = {} learning_modules['mod1'] = LearningModule("mod1", m_space, s_hand, env.conf) learning_modules['mod2'] = LearningModule("mod2", m_space, s_tool, env.conf) learning_modules['mod3'] = LearningModule("mod3", m_space, s_ball1, env.conf) learning_modules['mod4'] = LearningModule("mod4", m_space, s_ball2, env.conf) learning_modules['mod5'] = LearningModule("mod5", m_space, s_ball3, env.conf) learning_modules['mod6'] = LearningModule("mod6", m_space, s_ball4, env.conf) for step in range(iterations / (n_explore + 1)): interests = [ learning_modules[mid].interest() for mid in learning_modules.keys() ] #interests_evolution.append(interests) babbling_module = learning_modules.values()[prop_choice(interests, eps=0.2)] m_list = babbling_module.produce(n=n_explore) for m in m_list: s = env.update( m ) # execute this command and observe the corresponding sensory effect if (len(explored_s) == 0) or abs(s[17] - 0.6) > 0.001: explored_s += [s] for mid in learning_modules.keys(): learning_modules[mid].update_sm( m, learning_modules[mid].get_s(array(list(m) + list(s)))) m = babbling_module.infer(babbling_module.expl_dims, babbling_module.inf_dims, babbling_module.x, n=1, explore=False) s = env.update( m ) # execute this command and observe the corresponding sensory effect babbling_module.update_im( m, babbling_module.get_s(array(list(m) + list(s)))) for mid in learning_modules.keys(): learning_modules[mid].update_sm( m, learning_modules[mid].get_s(array(list(m) + list(s)))) if (step + 1) % ((iterations / (n_explore + 1)) / 10) == 0: res += [ int( compute_explo(array(explored_s)[:, [14, 17]], array([-2., -2.]), array([2., 2.]), gs=grid_size)) ] return res #from multiprocessing import Pool #from subprocess import call #import cPickle #import numpy as np #trials = 30 #iterations = 100000 #def f(condition, trial): # call("python run.py " + condition + " " + str(trial) + " " + str(iterations), shell=True) # log_dir = './logs/' # filename = condition + str(trial) + '.pickle' # with open(log_dir + filename, 'r') as f: # res = cPickle.load(f) # return res #def run_rmb(trial): return f("rmb", trial) #def run_rgb(trial): return f("rgb", trial) #def run_amb(trial): return f("amb", trial) #if __name__ == '__main__': # pool = Pool(30) # res_rmb = np.array(pool.map(run_rmb, range(trials))) # res_rgb = np.array(pool.map(run_rgb, range(trials))) # res_amb = np.array(pool.map(run_amb, range(trials))) #%matplotlib inline #fig, ax = plt.subplots() #x = np.linspace(0, iterations, 11) #plt.errorbar(x, np.append([0], np.mean(res_amb, axis=0)), np.append([0], np.std(res_amb, axis=0)), lw=2, label="Active Model Babbling") #plt.errorbar(x, np.append([0], np.mean(res_rgb, axis=0)), np.append([0], np.std(res_rgb, axis=0)), lw=2, label="Random Goal Babbling") #plt.errorbar(x, np.append([0], np.mean(res_rmb, axis=0)), np.append([0], np.std(res_rmb, axis=0)), lw=2, label="Random Motor Babbling") #ax.legend(loc="upper left") #plt.savefig('exploration_stats')
def choose_babbling_module(self, auto_create=False, progress_threshold=1e-2, mode="softmax", weight_by_level=False): interests = {} for mid in self.modules.keys(): interests[mid] = self.modules[mid].interest() self.emit("interests", [self.t, interests]) if mode == "random": mid = np.random.choice(self.modules.keys()) elif mode == "greedy": eps = 0.1 if np.random.random() < eps: mid = np.random.choice(self.modules.keys()) else: mid = max(interests, key=interests.get) elif mode == "softmax": temperature = 0.1 mids = interests.keys() w = interests.values() if weight_by_level: levels = self.hierarchy.module_levels() for i in range(len(mids)): f = 2.0 w[i] = w[i] * np.power(f, max(levels.values()) - levels[mids[i]]) # print w mid = mids[softmax_choice(w, temperature)] elif mode == "prop": mids = interests.keys() w = interests.values() # if self.t % 100 == 0: # print # print 'iteration', self.t # print "competences", np.array([self.modules[mid].competence() for mid in self.modules.keys()]) # if sum(w) > 0: # print "interests", np.array(w) # print "interesting objects:", int(((w[3] + w[6]) / sum(w))*100), "%" # print "cumulated:", int((self.chosen_modules["mod4"] + self.chosen_modules["mod7"]) / float(sum(self.chosen_modules.values())) * 100), "%" # print self.chosen_modules # print "competences", [mod.competence() for mod in self.modules.values()] if weight_by_level: levels = self.hierarchy.module_levels() for i in range(len(mids)): f = 10.0 w[i] = w[i] * np.power(f, max(levels.values()) - levels[mids[i]]) # print w mid = mids[prop_choice(w, eps=0.1)] # print # print # print self.chosen_modules # print "chosen module:", mid elif mode == "prop-min": mids = interests.keys() w = interests.values() # if self.t % 100 == 0: # print "interests", np.array(w), 'iteration', self.t, "competences", np.array([self.modules[mid].competence() for mid in self.modules.keys()]), int(((w[3] + w[4]-2*min(w)) / sum(np.array(w)-min(w)))*100), "%" mid = mids[prop_choice(np.array(w) - min(w), eps=0.1)] self.chosen_modules[mid] = self.chosen_modules[mid] + 1 # print self.chosen_modules self.emit("babbling_module", mid) return mid
def choose_babbling_module(self, auto_create=False, progress_threshold=1e-2, mode='softmax', weight_by_level=False): interests = {} for mid in self.modules.keys(): interests[mid] = self.modules[mid].interest() self.emit('interests', [self.t, interests]) if mode == 'random': mid = np.random.choice(self.modules.keys()) elif mode == 'greedy': eps = 0.1 if np.random.random() < eps: mid = np.random.choice(self.modules.keys()) else: mid = max(interests, key=interests.get) elif mode == 'softmax': temperature = 0.1 mids = interests.keys() w = interests.values() if weight_by_level: levels = self.hierarchy.module_levels() for i in range(len(mids)): f = 2.0 w[i] = w[i] * np.power( f, max(levels.values()) - levels[mids[i]]) #print w mid = mids[softmax_choice(w, temperature)] elif mode == 'prop': mids = interests.keys() w = interests.values() # if self.t % 100 == 0: # print # print 'iteration', self.t # print "competences", np.array([self.modules[mid].competence() for mid in self.modules.keys()]) # if sum(w) > 0: # print "interests", np.array(w) # print "interesting objects:", int(((w[3] + w[6]) / sum(w))*100), "%" # print "cumulated:", int((self.chosen_modules["mod4"] + self.chosen_modules["mod7"]) / float(sum(self.chosen_modules.values())) * 100), "%" # print self.chosen_modules #print "competences", [mod.competence() for mod in self.modules.values()] if weight_by_level: levels = self.hierarchy.module_levels() for i in range(len(mids)): f = 10.0 w[i] = w[i] * np.power( f, max(levels.values()) - levels[mids[i]]) #print w mid = mids[prop_choice(w, eps=0.1)] # print # print # print self.chosen_modules # print "chosen module:", mid elif mode == 'prop-min': mids = interests.keys() w = interests.values() # if self.t % 100 == 0: # print "interests", np.array(w), 'iteration', self.t, "competences", np.array([self.modules[mid].competence() for mid in self.modules.keys()]), int(((w[3] + w[4]-2*min(w)) / sum(np.array(w)-min(w)))*100), "%" mid = mids[prop_choice(np.array(w) - min(w), eps=0.1)] self.chosen_modules[mid] = self.chosen_modules[mid] + 1 #print self.chosen_modules self.emit('babbling_module', mid) return mid
def choose_space_child(self, s_space, s, mode="competence", local="local", k=1): """ Choose the children of space s_space among modules that have the good sensori spaces, maximizing competence. """ try: possible_mids = self.hierarchy.space_children(s_space) except KeyError: print "s_space not found in hierarchy" return None if len(possible_mids) == 1: mid = possible_mids[0] return mid eps = 0.05 if mode == "competence": if local: # for mid in ["mod2", "mod5", 'mod6']: # dists, idxs = self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset.nn_y(s, k=1) # print mid, dists, idxs, self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset.get_xy(idxs[0]), y, s competences = [self.modules[pmid].competence_reached(s) for pmid in possible_mids] # print "sm db n points", [len(self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset) for mid in self.modules.keys()] else: competences = [self.modules[pmid].competence() for pmid in possible_mids] #print "choose space child", competences if k == 1: mid = possible_mids[greedy(competences, eps)] #print "chosen mid", mid return mid else: mid = possible_mids[greedy(competences)] return [(1. - (eps/2.) if pmid == mid else eps/2.) for pmid in possible_mids] if mode == "competence_prop": if local: # for mid in ["mod2", "mod5", 'mod6']: # dists, idxs = self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset.nn_y(s, k=1) # print mid, dists, idxs, self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset.get_xy(idxs[0]), y, s competences = [self.modules[pmid].competence_reached(s) for pmid in possible_mids] #print "choose space child", competences # print "sm db n points", [len(self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset) for mid in self.modules.keys()] else: competences = [self.modules[pmid].competence() for pmid in possible_mids] if k == 1: mid = possible_mids[prop_choice(competences, eps)] return mid else: rectified = 1. / np.array(competences) probas = rectified / np.sum(rectified) return ((1. - eps) * probas + eps/2.) elif mode == "interest": if local=="local": interests = [self.modules[pmid].interest_pt(s) for pmid in possible_mids] else: interests = [self.modules[pmid].interest() for pmid in possible_mids] #print "choose space child", interests if k == 1: mid = possible_mids[greedy(interests, eps=eps)] #print "chosen mid", mid return mid else: mid = possible_mids[greedy(interests)] return [(1. - (eps/2.) if pmid == mid else eps/2.) for pmid in possible_mids] elif mode == "interest_prop": if local=="local": interests = [self.modules[pmid].interest_pt(s) for pmid in possible_mids] else: interests = [self.modules[pmid].interest() for pmid in possible_mids] #print "choose space child", interests if k == 1: mid = possible_mids[prop_choice(interests, eps=eps)] #print "chosen mid", mid return mid else: return ((1. - eps) * (np.array(interests) / np.sum(interests)) + eps/2.) elif mode == "random": mid = np.random.choice(possible_mids) self.chosen_modules[mid] = self.chosen_modules[mid] + 1 return mid
def ModularGoalExplorationFIExperiment(static_env, env_config, explauto_config, representation, interest_model, n_explore, explo_ratio, explo_noise_sdev, win_size, n_exploration_iterations, n_bootstrap, seed, logdir='test', logger=None): np.random.seed(seed) logger.info("Bootstrapping phase") a = exploactors.RandomParameterizationExploration(static_env=static_env, **env_config) a.reset() a.act(n_iter=n_bootstrap, render=False) # Define motor and sensory spaces: explauto_env = ExplautoEnv(**explauto_config) m_ndims = explauto_env.conf.m_ndims # number of motor parameters m_space = range(m_ndims) # We divide the explo noise by 2 to match explauto implementation with respect to our implementation explo_noise_sdev = explo_noise_sdev / 2 # Create the learning modules: learning_modules = [] if representation == 'flat': s_distractball = range(m_ndims, m_ndims + 4) learning_modules.append( LearningModule("mod1", m_space, s_distractball, explauto_env.conf, explo_noise=explo_noise_sdev, win_size=win_size, interest_model=interest_model)) elif representation == 'modular': s_distract = range(m_ndims, m_ndims + 2) s_ball = range(m_ndims + 2, m_ndims + 4) learning_modules.append( LearningModule("mod1", m_space, s_distract, explauto_env.conf, explo_noise=explo_noise_sdev, win_size=win_size, interest_model=interest_model)) learning_modules.append( LearningModule("mod2", m_space, s_ball, explauto_env.conf, explo_noise=explo_noise_sdev, win_size=win_size, interest_model=interest_model)) else: raise NotImplementedError # We update the learning modules with the bootstrap outcomes for i, m in enumerate(a.actions): s = a.outcomes[i] for module in learning_modules: module.update_sm(m, module.get_s(np.concatenate([m, s]))) env = static_env(**env_config) env.reset() outcomes_states = a.outcomes_states interests_evolution = [] explo_evolution = [] goals_states = [] logger.info("Starting exploration") # Steps of (4 exploring and 1 exploiting iterations): for step in range(n_exploration_iterations // (n_explore + 1)): if (step + 1) % 100 == 0: logger.info("Iteration: %i / %i" % ((step + 1) * (n_explore + 1), n_exploration_iterations)) # Compute the interest of modules interests = [module.interest() for module in learning_modules] interests_evolution.append(interests) # Choose the babbling module (probabilities proportional to interests, with epsilon of random choice): babbling_choice = prop_choice(interests, eps=explo_ratio) babbling_module = learning_modules[babbling_choice] # The babbling module picks a random goal in its sensory space and returns 4 noisy motor commands: m_list = babbling_module.produce(n=n_explore) goal = babbling_module.s goals_states.append([babbling_choice, goal]) for m in m_list: env.reset() env.act(action=m, render=False) s = env.observation outcomes_states += [env.hidden_state] # Update each sensorimotor models: for module in learning_modules: module.update_sm(m, module.get_s(np.concatenate([m, s]))) # Choose the best motor command to reach current goal (with no noise): m = babbling_module.infer(babbling_module.expl_dims, babbling_module.inf_dims, babbling_module.x, n=1, explore=False) env.reset() env.act(action=m, render=False) s = env.observation outcomes_states += [env.hidden_state] # Update the interest of the babbling module: babbling_module.update_im( m, babbling_module.get_s(np.concatenate([m, s]))) # Update each sensorimotor models: for module in learning_modules: module.update_sm(m, module.get_s(np.concatenate([m, s]))) explos_modules = [ int(100. * (n_explore + 1) * module.im.n_points() / float(module.sm.t)) for module in learning_modules ] explo_evolution.append(explos_modules) logger.info("Exploration finished, saving data") # We save the set of explored states and interests evolution for each representation explored_states = np.array(outcomes_states) np.save(os.path.join(logdir, 'explored_states'), explored_states.astype(np.float32)) interests_evolution = np.array(interests_evolution) np.save(os.path.join(logdir, 'interests_evolution'), interests_evolution.astype(np.float32)) explo_evolution = np.array(explo_evolution) np.save(os.path.join(logdir, 'explo_evolution'), explo_evolution.astype(np.float32)) # We save the set of goals states with open(logdir + '/goal_states', 'wb') as f: pickle.dump(goals_states, f)
def run(self, iterations=100000, profile=False, print_logs=False): if profile: cp = cProfile.Profile() cp.enable() t = time.clock() while self.i < iterations: if print_logs: # Print number of iterations up to now: if self.i - self.last_print > 1000: self.last_print = 1000 * (self.i // 1000) print("\nIteration:", self.i) print("Time:", int(10. * (time.clock() - t)) / 10.) print("Average steps", int(10. * self.avg_steps) / 10.) print( "n_stick1_moved", self.environment.n_stick1_moved - self.n_stick1_moved) print( "n_stick2_moved", self.environment.n_stick2_moved - self.n_stick2_moved) print("n_obj1_moved", self.environment.n_obj1_moved - self.n_obj1_moved) print("n_obj2_moved", self.environment.n_obj2_moved - self.n_obj2_moved) self.n_stick1_moved = self.environment.n_stick1_moved self.n_stick2_moved = self.environment.n_stick2_moved self.n_obj1_moved = self.environment.n_obj1_moved self.n_obj2_moved = self.environment.n_obj2_moved if self.condition == "AMB": for mid in [ "mod1", "mod2", "mod3", "mod4", "mod7", "mod10" ]: if mid in self.learning_modules: print( "Interest of module", mid, ":", int(1000. * self.learning_modules[mid]. interest_model.current_interest) / 1000.) t = time.clock() # Choose the babbling module (probabilities proportional to interests, with epsilon of random choice): if self.condition == "RMB": # Get the interest of modules interests = [ self.learning_modules[mid].interest() for mid in self.learning_modules.keys() ] self.interests_evolution.append(interests) babbling_module = np.random.choice( list(self.learning_modules.values())) elif self.condition == "AMB": # Get the interest of modules interests = [ self.learning_modules[mid].interest() for mid in self.learning_modules.keys() ] self.interests_evolution.append(interests) babbling_module = list( self.learning_modules.values())[prop_choice(interests, eps=0.2)] #babbling_module = self.learning_modules["mod1"] elif self.condition == "FRGB" or self.condition == "rmb": babbling_module = self.learning_modules["mod1"] elif self.condition == "SGS": babbling_module = self.learning_modules["mod4"] elif self.condition == "FC": fc = ["mod1", "mod2", "mod4", "mod3", "mod7"] m = self.i // (iterations // len(fc)) babbling_module = self.learning_modules[fc[m]] else: raise NotImplementedError # The babbling module picks a random goal in its sensory space and returns 4 noisy motor commands: if babbling_module.t < babbling_module.motor_babbling_n_iter or np.random.random( ) < self.rmb_prop or self.condition == "rmb": m = babbling_module.motor_babbling(steps=1) ms_array, steps = self.execute_perceive(m) self.chosen_modules.append("random") else: self.chosen_modules.append(babbling_module.mid) sg = babbling_module.interest_model.sample() babbling_module.sg = sg for _ in range(self.n_explore): m = babbling_module.inverse(sg) ms_array, steps = self.execute_perceive(m) self.avg_steps = self.avg_steps * 0.99 + 0.01 * steps # Update Interest if self.condition == "AMB": m = babbling_module.inverse(sg, explore=False) ms_array, steps = self.execute_perceive(m) babbling_module.update_im( m, np.concatenate(ms_array[:, babbling_module.s_space])) if profile: cp.disable() cp.dump_stats("test.cprof") if print_logs: print("n stick1_moved", self.environment.n_stick1_moved) print("n stick2_moved", self.environment.n_stick2_moved) print("n obj1_moved", self.environment.n_obj1_moved) print("n obj2_moved", self.environment.n_obj2_moved) print() print("Parameters:", iterations, self.explo_noise, self.optim_explo, self.condition, self.distractors)
def active_model_babbling(trial, iterations): env = ArmStickBalls() np.random.seed(trial) explored_s = [] res = [] n_explore=4 m_ndims = env.conf.m_ndims # number of motor parameters m_space = range(m_ndims) s_hand = range(m_ndims, m_ndims+6) s_tool = range(m_ndims+6, m_ndims+12) s_ball1 = range(m_ndims+12, m_ndims+18) s_ball2 = range(m_ndims+18, m_ndims+24) s_ball3 = range(m_ndims+24, m_ndims+30) s_ball4 = range(m_ndims+30, m_ndims+36) learning_modules = {} learning_modules['mod1'] = LearningModule("mod1", m_space, s_hand, env.conf) learning_modules['mod2'] = LearningModule("mod2", m_space, s_tool, env.conf) learning_modules['mod3'] = LearningModule("mod3", m_space, s_ball1, env.conf) learning_modules['mod4'] = LearningModule("mod4", m_space, s_ball2, env.conf) learning_modules['mod5'] = LearningModule("mod5", m_space, s_ball3, env.conf) learning_modules['mod6'] = LearningModule("mod6", m_space, s_ball4, env.conf) for step in range(iterations / (n_explore + 1)): interests = [learning_modules[mid].interest() for mid in learning_modules.keys()] #interests_evolution.append(interests) babbling_module = learning_modules.values()[prop_choice(interests, eps=0.2)] m_list = babbling_module.produce(n=n_explore) for m in m_list: s = env.update(m) # execute this command and observe the corresponding sensory effect if (len(explored_s) == 0) or abs(s[17] - 0.6) > 0.001: explored_s += [s] for mid in learning_modules.keys(): learning_modules[mid].update_sm(m, learning_modules[mid].get_s(array(list(m) + list(s)))) m = babbling_module.infer(babbling_module.expl_dims, babbling_module.inf_dims, babbling_module.x, n=1, explore=False) s = env.update(m) # execute this command and observe the corresponding sensory effect babbling_module.update_im(m, babbling_module.get_s(array(list(m)+list(s)))) for mid in learning_modules.keys(): learning_modules[mid].update_sm(m, learning_modules[mid].get_s(array(list(m) + list(s)))) if (step+1) % ((iterations / (n_explore + 1))/10) == 0: res += [int(compute_explo(array(explored_s)[:,[14,17]], array([-2., -2.]), array([2., 2.]), gs=grid_size))] return res #from multiprocessing import Pool #from subprocess import call #import cPickle #import numpy as np #trials = 30 #iterations = 100000 #def f(condition, trial): # call("python run.py " + condition + " " + str(trial) + " " + str(iterations), shell=True) # log_dir = './logs/' # filename = condition + str(trial) + '.pickle' # with open(log_dir + filename, 'r') as f: # res = cPickle.load(f) # return res #def run_rmb(trial): return f("rmb", trial) #def run_rgb(trial): return f("rgb", trial) #def run_amb(trial): return f("amb", trial) #if __name__ == '__main__': # pool = Pool(30) # res_rmb = np.array(pool.map(run_rmb, range(trials))) # res_rgb = np.array(pool.map(run_rgb, range(trials))) # res_amb = np.array(pool.map(run_amb, range(trials))) #%matplotlib inline #fig, ax = plt.subplots() #x = np.linspace(0, iterations, 11) #plt.errorbar(x, np.append([0], np.mean(res_amb, axis=0)), np.append([0], np.std(res_amb, axis=0)), lw=2, label="Active Model Babbling") #plt.errorbar(x, np.append([0], np.mean(res_rgb, axis=0)), np.append([0], np.std(res_rgb, axis=0)), lw=2, label="Random Goal Babbling") #plt.errorbar(x, np.append([0], np.mean(res_rmb, axis=0)), np.append([0], np.std(res_rmb, axis=0)), lw=2, label="Random Motor Babbling") #ax.legend(loc="upper left") #plt.savefig('exploration_stats')