Example #1
0
    def choose_babbling_module(self):
        if self.model_babbling == "random":
            mode = "random"
        elif self.model_babbling == "active":
            mode = "prop"
        interests = {}
        for mid in self.modules.keys():
            interests[mid] = self.modules[mid].interest()

        if mode == 'random':
            mid = np.random.choice(interests.keys())
        elif mode == 'greedy':
            if np.random.random() < self.choice_eps:
                mid = np.random.choice(interests.keys())
            else:
                mid = max(interests, key=interests.get)
        elif mode == 'softmax':
            temperature = self.choice_eps
            w = interests.values()
            mid = self.modules.keys()[softmax_choice(w, temperature)]

        elif mode == 'prop':
            w = interests.values()
            mid = self.modules.keys()[prop_choice(w, eps=self.choice_eps)]

        self.chosen_modules.append(mid)
        return mid
Example #2
0
    def choose_babbling_module(self, mode='prop'):
        interests = {}
        for mid in self.modules.keys():
            if not ((not self.enable_hand) and mid == "mod1"):
                interests[mid] = self.modules[mid].interest()
            else:
                interests[mid] = 0.

        if mode == 'random':
            mid = np.random.choice(self.interests.keys())
        elif mode == 'greedy':
            eps = 0.2
            if np.random.random() < eps:
                mid = np.random.choice(self.interests.keys())
            else:
                mid = max(interests, key=interests.get)
        elif mode == 'softmax':
            temperature = 0.1
            w = interests.values()
            mid = self.modules.keys()[softmax_choice(w, temperature)]

        elif mode == 'prop':
            w = interests.values()
            mid = self.modules.keys()[prop_choice(w, eps=self.choice_eps)]

        self.chosen_modules.append(mid)
        return mid
Example #3
0
    def choose_space_child(self, s_space, s, mode="competence", local="local"):
        """ 
        Choose the children of space s_space among modules that have
        the good sensori spaces, maximizing competence.
        """
        try:
            possible_mids = self.hierarchy.space_children(s_space)
        except KeyError:
            return None
        if len(possible_mids) == 1:
            return possible_mids[0]
        y = self.set_ms(s=s)[s_space]
        if mode == "competence":
            if local:
                competences = [
                    -self.modules[pmid].sensorimotor_model.model.imodel.fmodel.
                    dataset.nn_y(y, k=1)[0][0] for pmid in possible_mids
                ]
            else:
                competences = [
                    self.modules[pmid].competence() for pmid in possible_mids
                ]
            return possible_mids[np.array(competences).argmax()]

        elif mode == "interest_greedy":
            eps = 0.1
            if np.random.random() < eps:
                return np.random.choice(possible_mids)
            else:
                if local == "local":
                    interests = [
                        self.modules[pmid].interest_pt(y)
                        for pmid in possible_mids
                    ]
                else:
                    interests = [
                        self.modules[pmid].interest() for pmid in possible_mids
                    ]
                return possible_mids[np.array(interests).argmax()]

        elif mode == "interest_prop":
            eps = 0.1
            if np.random.random() < eps:
                return np.random.choice(possible_mids)
            else:
                if local == "local":
                    interests = [
                        self.modules[pmid].interest_pt(y)
                        for pmid in possible_mids
                    ]
                else:
                    interests = [
                        self.modules[pmid].interest() for pmid in possible_mids
                    ]
                return possible_mids[prop_choice(interests, eps=0.1)]

        elif mode == "random":
            mid = np.random.choice(possible_mids)
            return mid
    def choose_babbling_module(self, auto_create=False, progress_threshold=1e-2, mode='softmax', weight_by_level=False):
        interests = {}
        for mid in self.modules.keys():
            interests[mid] = self.modules[mid].interest()
            self.emit('interest_' + mid, [self.t, interests[mid]])
            self.emit('competence_' + mid, [self.t, self.modules[mid].competence()])
        max_progress = max(interests.values())
        
#         self.emit('babbling_module', "mod2")
#         return "mod2"
    
        #print "max_progress", max_progress
        if not auto_create or max_progress > progress_threshold:
            if mode == 'random':
                mid = np.random.choice(self.modules.keys())
            elif mode == 'greedy':
                eps = 0.1
                if np.random.random() < eps:
                    mid = np.random.choice(self.modules.keys())
                else:
                    mid = max(interests, key=interests.get)
            elif mode == 'softmax':
                temperature = 0.1
                mids = interests.keys()
                w = interests.values()
                #print "progresses", w
                #print "competences", [mod.competence() for mod in self.modules.values()]
                if weight_by_level:
                    levels = self.hierarchy.module_levels()
                    for i in range(len(mids)):
                        f = 2.0
                        w[i] = w[i] * np.power(f, max(levels.values()) - levels[mids[i]])
                #print w
                mid = mids[softmax_choice(w, temperature)]
            
            elif mode == 'prop':
                mids = interests.keys()
                w = interests.values()
                #print "progresses", w
                #print "competences", [mod.competence() for mod in self.modules.values()]
                if weight_by_level:
                    levels = self.hierarchy.module_levels()
                    for i in range(len(mids)):
                        f = 10.0
                        w[i] = w[i] * np.power(f, max(levels.values()) - levels[mids[i]])
                #print w
                mid = mids[prop_choice(w, eps=0.1)]
            
            self.chosen_modules[mid] = self.chosen_modules[mid] + 1
            #print self.chosen_modules
            self.emit('babbling_module', mid)
            return mid
        else:
            return self.create_module()
    def act(self, n_iter=1, **kwargs):

        assert n_iter > 0

        # Steps of (4 exploring and 1 exploiting iterations):
        for step in range(n_iter // (self._n_explore + 1)):
            # Compute the interest of modules
            interests = [module.interest() for module in self._learning_modules]
            self._interests_evolution.append(interests)
            # Choose the babbling module (probabilities proportional to interests, with epsilon of random choice):
            choice = prop_choice(interests, eps=self._explo_ratio)
            babbling_module = self._learning_modules[choice]
            # The babbling module picks a random goal in its sensory space and returns 4 noisy motor commands:
            m_list = babbling_module.produce(n=self._n_explore)
            goal = babbling_module.s
            _, indexes = babbling_module.sensorimotor_model.model.imodel.fmodel.dataset.nn_y(goal)
            self._goals_states.append(self._outcomes_states[indexes[0]])
            for m in m_list:
                # We perform the actions and observe outcomes
                self._env.reset()
                self._env.act(action=m, **kwargs)
                self._actions.append(m)
                outcome = self._env.observation
                # We represent the raw outcome
                self._rep.act(X_pred=outcome)
                s = self._rep.representation.ravel()
                # self._outcomes.append(outcome)
                self._outcomes_reps.append(s)
                self._outcomes_states.append(self._env.hidden_state)
                # Update each sensorimotor models:
                for module in self._learning_modules:
                    module.update_sm(m, module.get_s(np.concatenate([m, s])))
            # Choose the best motor command to reach current goal (with no noise):
            m = babbling_module.infer(babbling_module.expl_dims, babbling_module.inf_dims,
                                      babbling_module.x, n=1, explore=False)
            # We perform the action and observe outcomes
            self._env.reset()
            self._env.act(action=m, **kwargs)
            self._actions.append(m)
            outcome = self._env.observation
            # We represent the raw outcome
            self._rep.act(X_pred=outcome)
            s = self._rep.representation.ravel()
            # self._outcomes.append(outcome)
            self._outcomes_reps.append(s)
            self._outcomes_states.append(self._env.hidden_state)
            # Update the interest of the babbling module:
            babbling_module.update_im(m, babbling_module.get_s(np.concatenate([m, s])))
            # Update each sensorimotor models:
            for module in self._learning_modules:
                module.update_sm(m, module.get_s(np.concatenate([m, s])))
            explos_modules = [int(100. * (self._n_explore + 1) * module.im.n_points() / float(module.sm.t)) for module in
                              self._learning_modules]
            self._explo_evolution.append(explos_modules)
    def choose_space_child(self, s_space, s, mode="competence", local="local"):
        """ 
        Choose the children of space s_space among modules that have
        the good sensori spaces, maximizing competence.
        """
        try:
            possible_mids = self.hierarchy.space_children(s_space)
        except KeyError:
            return None
        if len(possible_mids) == 1:
            return possible_mids[0]
        y = self.set_ms(s=s)[s_space]
        if mode == "competence":
            if local:
                competences = [
                    -self.modules[pmid].sensorimotor_model.model.imodel.fmodel.dataset.nn_y(y, k=1)[0][0]
                    for pmid in possible_mids
                ]
            else:
                competences = [self.modules[pmid].competence() for pmid in possible_mids]
            return possible_mids[np.array(competences).argmax()]

        elif mode == "interest_greedy":
            eps = 0.1
            if np.random.random() < eps:
                return np.random.choice(possible_mids)
            else:
                if local == "local":
                    interests = [self.modules[pmid].interest_pt(y) for pmid in possible_mids]
                else:
                    interests = [self.modules[pmid].interest() for pmid in possible_mids]
                return possible_mids[np.array(interests).argmax()]

        elif mode == "interest_prop":
            eps = 0.1
            if np.random.random() < eps:
                return np.random.choice(possible_mids)
            else:
                if local == "local":
                    interests = [self.modules[pmid].interest_pt(y) for pmid in possible_mids]
                else:
                    interests = [self.modules[pmid].interest() for pmid in possible_mids]
                return possible_mids[prop_choice(interests, eps=0.1)]

        elif mode == "random":
            mid = np.random.choice(possible_mids)
            return mid
    def choose_interesting_space(self, mode='softmax'):
        s_spaces = self.config.s_spaces
        interests = {}
        for s_space in s_spaces.keys():
            if s_space == "s_h":
                interests[s_space] = self.modules["mod1"].interest()
            elif s_space == "s_t1":
                interests[s_space] = self.modules["mod2"].interest()
#             elif s_space == "s_t2":
#                 interests[s_space] = self.modules["mod3"].interest()
            elif s_space == "s_o":
                interests[s_space] = np.sum([self.modules[mid].interest() for mid in ["mod3", "mod4"]])
        
        self.emit('interests', [self.t, interests])
        
        if mode == 'random':
            s_space = np.random.choice(self.interests.keys())
        elif mode == 'greedy':
            eps = 0.2
            if np.random.random() < eps:
                s_space = np.random.choice(self.interests.keys())
            else:
                s_space = max(interests, key=interests.get)
        elif mode == 'softmax':
            temperature = 0.1
            w = interests.values()
            s_space = s_spaces.keys()[softmax_choice(w, temperature)]
        
        elif mode == 'prop':
            w = interests.values()
            s_space = s_spaces.keys()[prop_choice(w, eps=0.2)]
            if self.t % 200 == 1:
                print
                print 'iterations', self.t - 1
                print "competences", np.array([self.modules[mid].competence() for mid in self.modules.keys()])
                print "progresses", np.array([self.modules[mid].interest_model.current_progress for mid in self.modules.keys()])
                print "interests", np.array([self.modules[mid].interest() for mid in self.modules.keys()])
                print "sm db n points", [len(self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset) for mid in self.modules.keys()]
                print "im db n points", [len(self.modules[mid].interest_model.data_xc) for mid in self.modules.keys()]
                print self.chosen_modules
                print "made tool moved object", self.credit_tool_move
                print "made hand moved object", self.credit_hand_move
        
        self.chosen_spaces[s_space] = self.chosen_spaces[s_space] + 1
        return s_space
Example #8
0
    def choose_babbling_module(self):
        interests = {}
        for mid in self.modules.keys():
            interests[mid] = self.modules[mid].interest()
        if self.model_babbling == 'random':
            #mid = np.random.choice(interests.keys())
            if np.random.random() < self.arm_goal_selection:
                mid = np.random.choice(self.arm_modules)
            else:
                mid = np.random.choice(self.diva_modules)
        elif self.model_babbling == 'hand_object_sound':
            if np.random.random() < 1. / 3.:
                mid = 'mod1'
            elif np.random.random() < 1. / 2.:
                mid = np.random.choice([
                    'mod2', 'mod3', 'mod4', 'mod5', 'mod10', 'mod11', 'mod12'
                ])
            else:
                mid = np.random.choice(['mod6', 'mod13', 'mod14'])
        elif self.model_babbling == 'object_sound':
            if np.random.random() < 1. / 2.:
                mid = np.random.choice([
                    'mod1', 'mod2', 'mod3', 'mod4', 'mod5', 'mod10', 'mod11',
                    'mod12'
                ])
            else:
                mid = np.random.choice(['mod6', 'mod13', 'mod14'])
        elif self.model_babbling == 'greedy':
            if np.random.random() < self.choice_eps:
                mid = np.random.choice(interests.keys())
            else:
                mid = max(interests, key=interests.get)
        elif self.model_babbling == 'softmax':
            temperature = self.choice_eps
            w = interests.values()
            mid = self.modules.keys()[softmax_choice(w, temperature)]

        elif self.model_babbling == 'prop':
            w = interests.values()
            mid = self.modules.keys()[prop_choice(w, eps=self.choice_eps)]

        self.chosen_modules.append(int(mid[3:]))
        return mid
Example #9
0
def active_model_babbling(trial, iterations):
    env = ArmStickBalls()
    np.random.seed(trial)
    explored_s = []
    res = []
    n_explore = 4
    m_ndims = env.conf.m_ndims  # number of motor parameters
    m_space = range(m_ndims)
    s_hand = range(m_ndims, m_ndims + 6)
    s_tool = range(m_ndims + 6, m_ndims + 12)
    s_ball1 = range(m_ndims + 12, m_ndims + 18)
    s_ball2 = range(m_ndims + 18, m_ndims + 24)
    s_ball3 = range(m_ndims + 24, m_ndims + 30)
    s_ball4 = range(m_ndims + 30, m_ndims + 36)
    learning_modules = {}
    learning_modules['mod1'] = LearningModule("mod1", m_space, s_hand,
                                              env.conf)
    learning_modules['mod2'] = LearningModule("mod2", m_space, s_tool,
                                              env.conf)
    learning_modules['mod3'] = LearningModule("mod3", m_space, s_ball1,
                                              env.conf)
    learning_modules['mod4'] = LearningModule("mod4", m_space, s_ball2,
                                              env.conf)
    learning_modules['mod5'] = LearningModule("mod5", m_space, s_ball3,
                                              env.conf)
    learning_modules['mod6'] = LearningModule("mod6", m_space, s_ball4,
                                              env.conf)
    for step in range(iterations / (n_explore + 1)):
        interests = [
            learning_modules[mid].interest()
            for mid in learning_modules.keys()
        ]
        #interests_evolution.append(interests)
        babbling_module = learning_modules.values()[prop_choice(interests,
                                                                eps=0.2)]
        m_list = babbling_module.produce(n=n_explore)
        for m in m_list:
            s = env.update(
                m
            )  # execute this command and observe the corresponding sensory effect
            if (len(explored_s) == 0) or abs(s[17] - 0.6) > 0.001:
                explored_s += [s]
            for mid in learning_modules.keys():
                learning_modules[mid].update_sm(
                    m, learning_modules[mid].get_s(array(list(m) + list(s))))
        m = babbling_module.infer(babbling_module.expl_dims,
                                  babbling_module.inf_dims,
                                  babbling_module.x,
                                  n=1,
                                  explore=False)
        s = env.update(
            m
        )  # execute this command and observe the corresponding sensory effect
        babbling_module.update_im(
            m, babbling_module.get_s(array(list(m) + list(s))))
        for mid in learning_modules.keys():
            learning_modules[mid].update_sm(
                m, learning_modules[mid].get_s(array(list(m) + list(s))))
        if (step + 1) % ((iterations / (n_explore + 1)) / 10) == 0:
            res += [
                int(
                    compute_explo(array(explored_s)[:, [14, 17]],
                                  array([-2., -2.]),
                                  array([2., 2.]),
                                  gs=grid_size))
            ]
    return res


#from multiprocessing import Pool
#from subprocess import call
#import cPickle
#import numpy as np

#trials = 30
#iterations = 100000

#def f(condition, trial):
#    call("python run.py " + condition + " " + str(trial) + " " + str(iterations), shell=True)
#    log_dir = './logs/'
#    filename = condition + str(trial) + '.pickle'
#    with open(log_dir + filename, 'r') as f:
#        res = cPickle.load(f)
#    return res

#def run_rmb(trial): return f("rmb", trial)
#def run_rgb(trial): return f("rgb", trial)
#def run_amb(trial): return f("amb", trial)

#if __name__ == '__main__':
#    pool = Pool(30)
#    res_rmb = np.array(pool.map(run_rmb, range(trials)))
#    res_rgb = np.array(pool.map(run_rgb, range(trials)))
#    res_amb = np.array(pool.map(run_amb, range(trials)))

#%matplotlib inline
#fig, ax = plt.subplots()
#x = np.linspace(0, iterations, 11)
#plt.errorbar(x, np.append([0], np.mean(res_amb, axis=0)), np.append([0], np.std(res_amb, axis=0)), lw=2, label="Active Model Babbling")
#plt.errorbar(x, np.append([0], np.mean(res_rgb, axis=0)), np.append([0], np.std(res_rgb, axis=0)), lw=2, label="Random Goal Babbling")
#plt.errorbar(x, np.append([0], np.mean(res_rmb, axis=0)), np.append([0], np.std(res_rmb, axis=0)), lw=2, label="Random Motor Babbling")
#ax.legend(loc="upper left")
#plt.savefig('exploration_stats')
    def choose_babbling_module(self, auto_create=False, progress_threshold=1e-2, mode="softmax", weight_by_level=False):
        interests = {}
        for mid in self.modules.keys():
            interests[mid] = self.modules[mid].interest()
        self.emit("interests", [self.t, interests])

        if mode == "random":
            mid = np.random.choice(self.modules.keys())
        elif mode == "greedy":
            eps = 0.1
            if np.random.random() < eps:
                mid = np.random.choice(self.modules.keys())
            else:
                mid = max(interests, key=interests.get)
        elif mode == "softmax":
            temperature = 0.1
            mids = interests.keys()
            w = interests.values()
            if weight_by_level:
                levels = self.hierarchy.module_levels()
                for i in range(len(mids)):
                    f = 2.0
                    w[i] = w[i] * np.power(f, max(levels.values()) - levels[mids[i]])
            # print w
            mid = mids[softmax_choice(w, temperature)]

        elif mode == "prop":
            mids = interests.keys()
            w = interests.values()
            #             if self.t % 100 == 0:
            #                 print
            #                 print 'iteration', self.t
            #                 print "competences", np.array([self.modules[mid].competence() for mid in self.modules.keys()])
            #                 if sum(w) > 0:
            #                     print "interests", np.array(w)
            #                     print "interesting objects:", int(((w[3] + w[6]) / sum(w))*100), "%"
            #                     print "cumulated:", int((self.chosen_modules["mod4"] + self.chosen_modules["mod7"]) / float(sum(self.chosen_modules.values())) * 100), "%"
            #                 print self.chosen_modules
            # print "competences", [mod.competence() for mod in self.modules.values()]
            if weight_by_level:
                levels = self.hierarchy.module_levels()
                for i in range(len(mids)):
                    f = 10.0
                    w[i] = w[i] * np.power(f, max(levels.values()) - levels[mids[i]])
            # print w
            mid = mids[prop_choice(w, eps=0.1)]
        #             print
        #             print
        #             print self.chosen_modules
        #             print "chosen module:", mid

        elif mode == "prop-min":
            mids = interests.keys()
            w = interests.values()
            #             if self.t % 100 == 0:
            #                 print "interests", np.array(w), 'iteration', self.t, "competences", np.array([self.modules[mid].competence() for mid in self.modules.keys()]), int(((w[3] + w[4]-2*min(w)) / sum(np.array(w)-min(w)))*100), "%"
            mid = mids[prop_choice(np.array(w) - min(w), eps=0.1)]

        self.chosen_modules[mid] = self.chosen_modules[mid] + 1
        # print self.chosen_modules
        self.emit("babbling_module", mid)
        return mid
Example #11
0
    def choose_babbling_module(self,
                               auto_create=False,
                               progress_threshold=1e-2,
                               mode='softmax',
                               weight_by_level=False):
        interests = {}
        for mid in self.modules.keys():
            interests[mid] = self.modules[mid].interest()
        self.emit('interests', [self.t, interests])

        if mode == 'random':
            mid = np.random.choice(self.modules.keys())
        elif mode == 'greedy':
            eps = 0.1
            if np.random.random() < eps:
                mid = np.random.choice(self.modules.keys())
            else:
                mid = max(interests, key=interests.get)
        elif mode == 'softmax':
            temperature = 0.1
            mids = interests.keys()
            w = interests.values()
            if weight_by_level:
                levels = self.hierarchy.module_levels()
                for i in range(len(mids)):
                    f = 2.0
                    w[i] = w[i] * np.power(
                        f,
                        max(levels.values()) - levels[mids[i]])
            #print w
            mid = mids[softmax_choice(w, temperature)]

        elif mode == 'prop':
            mids = interests.keys()
            w = interests.values()
            #             if self.t % 100 == 0:
            #                 print
            #                 print 'iteration', self.t
            #                 print "competences", np.array([self.modules[mid].competence() for mid in self.modules.keys()])
            #                 if sum(w) > 0:
            #                     print "interests", np.array(w)
            #                     print "interesting objects:", int(((w[3] + w[6]) / sum(w))*100), "%"
            #                     print "cumulated:", int((self.chosen_modules["mod4"] + self.chosen_modules["mod7"]) / float(sum(self.chosen_modules.values())) * 100), "%"
            #                 print self.chosen_modules
            #print "competences", [mod.competence() for mod in self.modules.values()]
            if weight_by_level:
                levels = self.hierarchy.module_levels()
                for i in range(len(mids)):
                    f = 10.0
                    w[i] = w[i] * np.power(
                        f,
                        max(levels.values()) - levels[mids[i]])
            #print w
            mid = mids[prop_choice(w, eps=0.1)]
#             print
#             print
#             print self.chosen_modules
#             print "chosen module:", mid

        elif mode == 'prop-min':
            mids = interests.keys()
            w = interests.values()
            #             if self.t % 100 == 0:
            #                 print "interests", np.array(w), 'iteration', self.t, "competences", np.array([self.modules[mid].competence() for mid in self.modules.keys()]), int(((w[3] + w[4]-2*min(w)) / sum(np.array(w)-min(w)))*100), "%"
            mid = mids[prop_choice(np.array(w) - min(w), eps=0.1)]

        self.chosen_modules[mid] = self.chosen_modules[mid] + 1
        #print self.chosen_modules
        self.emit('babbling_module', mid)
        return mid
    def choose_space_child(self, s_space, s, mode="competence", local="local", k=1):
        """ 
        Choose the children of space s_space among modules that have
        the good sensori spaces, maximizing competence.
        """
        try:
            possible_mids = self.hierarchy.space_children(s_space)
        except KeyError:
            print "s_space not found in hierarchy"
            return None
        if len(possible_mids) == 1:
            mid = possible_mids[0]  
            return mid 
        
        eps = 0.05
        if mode == "competence":
            if local:
#                 for mid in ["mod2", "mod5", 'mod6']:
#                     dists, idxs = self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset.nn_y(s, k=1)
#                     print mid, dists, idxs, self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset.get_xy(idxs[0]), y, s
                competences = [self.modules[pmid].competence_reached(s) for pmid in possible_mids]
#                 print "sm db n points", [len(self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset) for mid in self.modules.keys()]
            else:
                competences = [self.modules[pmid].competence() for pmid in possible_mids]
            #print "choose space child", competences
            if k == 1:
                mid = possible_mids[greedy(competences, eps)]
                #print "chosen mid", mid
                return mid
            else:
                mid = possible_mids[greedy(competences)]
                return [(1. - (eps/2.) if pmid == mid else eps/2.) for pmid in possible_mids]
        
        if mode == "competence_prop":
            if local:
#                 for mid in ["mod2", "mod5", 'mod6']:
#                     dists, idxs = self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset.nn_y(s, k=1)
#                     print mid, dists, idxs, self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset.get_xy(idxs[0]), y, s
                competences = [self.modules[pmid].competence_reached(s) for pmid in possible_mids]
                #print "choose space child", competences
#                 print "sm db n points", [len(self.modules[mid].sensorimotor_model.model.imodel.fmodel.dataset) for mid in self.modules.keys()]
            else:
                competences = [self.modules[pmid].competence() for pmid in possible_mids]
            if k == 1:
                mid = possible_mids[prop_choice(competences, eps)]
                return mid
            else:
                rectified = 1. / np.array(competences)
                probas = rectified / np.sum(rectified)
                return ((1. - eps) * probas + eps/2.)
            
        elif mode == "interest":  
            if local=="local":
                interests = [self.modules[pmid].interest_pt(s) for pmid in possible_mids]
            else:
                interests = [self.modules[pmid].interest() for pmid in possible_mids]
            #print "choose space child", interests
            if k == 1:
                mid = possible_mids[greedy(interests, eps=eps)]
                #print "chosen mid", mid
                return mid
            else:
                mid = possible_mids[greedy(interests)]
                return [(1. - (eps/2.) if pmid == mid else eps/2.) for pmid in possible_mids]
            
        elif mode == "interest_prop":  
            if local=="local":
                interests = [self.modules[pmid].interest_pt(s) for pmid in possible_mids]
            else:
                interests = [self.modules[pmid].interest() for pmid in possible_mids]
            #print "choose space child", interests
            if k == 1:
                mid = possible_mids[prop_choice(interests, eps=eps)]
                #print "chosen mid", mid
                return mid
            else:
                return ((1. - eps) * (np.array(interests) / np.sum(interests)) + eps/2.)
            
        elif mode == "random":   
            mid = np.random.choice(possible_mids)
            self.chosen_modules[mid] = self.chosen_modules[mid] + 1
            return mid
def ModularGoalExplorationFIExperiment(static_env,
                                       env_config,
                                       explauto_config,
                                       representation,
                                       interest_model,
                                       n_explore,
                                       explo_ratio,
                                       explo_noise_sdev,
                                       win_size,
                                       n_exploration_iterations,
                                       n_bootstrap,
                                       seed,
                                       logdir='test',
                                       logger=None):
    np.random.seed(seed)

    logger.info("Bootstrapping phase")
    a = exploactors.RandomParameterizationExploration(static_env=static_env,
                                                      **env_config)
    a.reset()
    a.act(n_iter=n_bootstrap, render=False)

    # Define motor and sensory spaces:
    explauto_env = ExplautoEnv(**explauto_config)
    m_ndims = explauto_env.conf.m_ndims  # number of motor parameters
    m_space = range(m_ndims)

    # We divide the explo noise by 2 to match explauto implementation with respect to our implementation
    explo_noise_sdev = explo_noise_sdev / 2
    # Create the learning modules:
    learning_modules = []
    if representation == 'flat':
        s_distractball = range(m_ndims, m_ndims + 4)
        learning_modules.append(
            LearningModule("mod1",
                           m_space,
                           s_distractball,
                           explauto_env.conf,
                           explo_noise=explo_noise_sdev,
                           win_size=win_size,
                           interest_model=interest_model))
    elif representation == 'modular':
        s_distract = range(m_ndims, m_ndims + 2)
        s_ball = range(m_ndims + 2, m_ndims + 4)
        learning_modules.append(
            LearningModule("mod1",
                           m_space,
                           s_distract,
                           explauto_env.conf,
                           explo_noise=explo_noise_sdev,
                           win_size=win_size,
                           interest_model=interest_model))
        learning_modules.append(
            LearningModule("mod2",
                           m_space,
                           s_ball,
                           explauto_env.conf,
                           explo_noise=explo_noise_sdev,
                           win_size=win_size,
                           interest_model=interest_model))
    else:
        raise NotImplementedError

    # We update the learning modules with the bootstrap outcomes
    for i, m in enumerate(a.actions):
        s = a.outcomes[i]
        for module in learning_modules:
            module.update_sm(m, module.get_s(np.concatenate([m, s])))

    env = static_env(**env_config)
    env.reset()

    outcomes_states = a.outcomes_states
    interests_evolution = []
    explo_evolution = []
    goals_states = []

    logger.info("Starting exploration")
    # Steps of (4 exploring and 1 exploiting iterations):
    for step in range(n_exploration_iterations // (n_explore + 1)):
        if (step + 1) % 100 == 0:
            logger.info("Iteration: %i / %i" %
                        ((step + 1) *
                         (n_explore + 1), n_exploration_iterations))
        # Compute the interest of modules
        interests = [module.interest() for module in learning_modules]
        interests_evolution.append(interests)
        # Choose the babbling module (probabilities proportional to interests, with epsilon of random choice):
        babbling_choice = prop_choice(interests, eps=explo_ratio)
        babbling_module = learning_modules[babbling_choice]
        # The babbling module picks a random goal in its sensory space and returns 4 noisy motor commands:
        m_list = babbling_module.produce(n=n_explore)
        goal = babbling_module.s
        goals_states.append([babbling_choice, goal])
        for m in m_list:
            env.reset()
            env.act(action=m, render=False)
            s = env.observation
            outcomes_states += [env.hidden_state]
            # Update each sensorimotor models:
            for module in learning_modules:
                module.update_sm(m, module.get_s(np.concatenate([m, s])))
        # Choose the best motor command to reach current goal (with no noise):
        m = babbling_module.infer(babbling_module.expl_dims,
                                  babbling_module.inf_dims,
                                  babbling_module.x,
                                  n=1,
                                  explore=False)
        env.reset()
        env.act(action=m, render=False)
        s = env.observation
        outcomes_states += [env.hidden_state]
        # Update the interest of the babbling module:
        babbling_module.update_im(
            m, babbling_module.get_s(np.concatenate([m, s])))
        # Update each sensorimotor models:
        for module in learning_modules:
            module.update_sm(m, module.get_s(np.concatenate([m, s])))
        explos_modules = [
            int(100. * (n_explore + 1) * module.im.n_points() /
                float(module.sm.t)) for module in learning_modules
        ]
        explo_evolution.append(explos_modules)

    logger.info("Exploration finished, saving data")
    # We save the set of explored states and interests evolution for each representation
    explored_states = np.array(outcomes_states)
    np.save(os.path.join(logdir, 'explored_states'),
            explored_states.astype(np.float32))
    interests_evolution = np.array(interests_evolution)
    np.save(os.path.join(logdir, 'interests_evolution'),
            interests_evolution.astype(np.float32))
    explo_evolution = np.array(explo_evolution)
    np.save(os.path.join(logdir, 'explo_evolution'),
            explo_evolution.astype(np.float32))
    # We save the set of goals states
    with open(logdir + '/goal_states', 'wb') as f:
        pickle.dump(goals_states, f)
Example #14
0
    def run(self, iterations=100000, profile=False, print_logs=False):

        if profile:
            cp = cProfile.Profile()
            cp.enable()

        t = time.clock()
        while self.i < iterations:
            if print_logs:
                # Print number of iterations up to now:
                if self.i - self.last_print > 1000:
                    self.last_print = 1000 * (self.i // 1000)
                    print("\nIteration:", self.i)
                    print("Time:", int(10. * (time.clock() - t)) / 10.)
                    print("Average steps", int(10. * self.avg_steps) / 10.)
                    print(
                        "n_stick1_moved",
                        self.environment.n_stick1_moved - self.n_stick1_moved)
                    print(
                        "n_stick2_moved",
                        self.environment.n_stick2_moved - self.n_stick2_moved)
                    print("n_obj1_moved",
                          self.environment.n_obj1_moved - self.n_obj1_moved)
                    print("n_obj2_moved",
                          self.environment.n_obj2_moved - self.n_obj2_moved)
                    self.n_stick1_moved = self.environment.n_stick1_moved
                    self.n_stick2_moved = self.environment.n_stick2_moved
                    self.n_obj1_moved = self.environment.n_obj1_moved
                    self.n_obj2_moved = self.environment.n_obj2_moved

                    if self.condition == "AMB":
                        for mid in [
                                "mod1", "mod2", "mod3", "mod4", "mod7", "mod10"
                        ]:
                            if mid in self.learning_modules:
                                print(
                                    "Interest of module", mid, ":",
                                    int(1000. * self.learning_modules[mid].
                                        interest_model.current_interest) /
                                    1000.)

                    t = time.clock()

            # Choose the babbling module (probabilities proportional to interests, with epsilon of random choice):
            if self.condition == "RMB":
                # Get the interest of modules
                interests = [
                    self.learning_modules[mid].interest()
                    for mid in self.learning_modules.keys()
                ]
                self.interests_evolution.append(interests)
                babbling_module = np.random.choice(
                    list(self.learning_modules.values()))
            elif self.condition == "AMB":
                # Get the interest of modules
                interests = [
                    self.learning_modules[mid].interest()
                    for mid in self.learning_modules.keys()
                ]
                self.interests_evolution.append(interests)
                babbling_module = list(
                    self.learning_modules.values())[prop_choice(interests,
                                                                eps=0.2)]
                #babbling_module = self.learning_modules["mod1"]
            elif self.condition == "FRGB" or self.condition == "rmb":
                babbling_module = self.learning_modules["mod1"]
            elif self.condition == "SGS":
                babbling_module = self.learning_modules["mod4"]
            elif self.condition == "FC":
                fc = ["mod1", "mod2", "mod4", "mod3", "mod7"]
                m = self.i // (iterations // len(fc))
                babbling_module = self.learning_modules[fc[m]]
            else:
                raise NotImplementedError

            # The babbling module picks a random goal in its sensory space and returns 4 noisy motor commands:
            if babbling_module.t < babbling_module.motor_babbling_n_iter or np.random.random(
            ) < self.rmb_prop or self.condition == "rmb":
                m = babbling_module.motor_babbling(steps=1)
                ms_array, steps = self.execute_perceive(m)
                self.chosen_modules.append("random")
            else:
                self.chosen_modules.append(babbling_module.mid)
                sg = babbling_module.interest_model.sample()
                babbling_module.sg = sg
                for _ in range(self.n_explore):
                    m = babbling_module.inverse(sg)
                    ms_array, steps = self.execute_perceive(m)
                    self.avg_steps = self.avg_steps * 0.99 + 0.01 * steps

                # Update Interest
                if self.condition == "AMB":
                    m = babbling_module.inverse(sg, explore=False)
                    ms_array, steps = self.execute_perceive(m)
                    babbling_module.update_im(
                        m, np.concatenate(ms_array[:,
                                                   babbling_module.s_space]))

        if profile:
            cp.disable()
            cp.dump_stats("test.cprof")

        if print_logs:
            print("n stick1_moved", self.environment.n_stick1_moved)
            print("n stick2_moved", self.environment.n_stick2_moved)
            print("n obj1_moved", self.environment.n_obj1_moved)
            print("n obj2_moved", self.environment.n_obj2_moved)

            print()
            print("Parameters:", iterations, self.explo_noise,
                  self.optim_explo, self.condition, self.distractors)
def active_model_babbling(trial, iterations):
    env = ArmStickBalls()
    np.random.seed(trial)
    explored_s = []
    res = []
    n_explore=4
    m_ndims = env.conf.m_ndims # number of motor parameters
    m_space = range(m_ndims)
    s_hand  = range(m_ndims, m_ndims+6)
    s_tool  = range(m_ndims+6, m_ndims+12)
    s_ball1 = range(m_ndims+12, m_ndims+18)
    s_ball2 = range(m_ndims+18, m_ndims+24)
    s_ball3 = range(m_ndims+24, m_ndims+30)
    s_ball4 = range(m_ndims+30, m_ndims+36)
    learning_modules = {}
    learning_modules['mod1'] = LearningModule("mod1", m_space, s_hand, env.conf)
    learning_modules['mod2'] = LearningModule("mod2", m_space, s_tool, env.conf)
    learning_modules['mod3'] = LearningModule("mod3", m_space, s_ball1, env.conf)
    learning_modules['mod4'] = LearningModule("mod4", m_space, s_ball2, env.conf)
    learning_modules['mod5'] = LearningModule("mod5", m_space, s_ball3, env.conf)
    learning_modules['mod6'] = LearningModule("mod6", m_space, s_ball4, env.conf)
    for step in range(iterations / (n_explore + 1)):
        interests = [learning_modules[mid].interest() for mid in learning_modules.keys()]
        #interests_evolution.append(interests)
        babbling_module = learning_modules.values()[prop_choice(interests, eps=0.2)]
        m_list = babbling_module.produce(n=n_explore)
        for m in m_list:
            s = env.update(m) # execute this command and observe the corresponding sensory effect
            if (len(explored_s) == 0) or abs(s[17] - 0.6) > 0.001:
                explored_s += [s]
            for mid in learning_modules.keys():
                learning_modules[mid].update_sm(m, learning_modules[mid].get_s(array(list(m) + list(s))))
        m = babbling_module.infer(babbling_module.expl_dims, babbling_module.inf_dims, babbling_module.x, n=1, explore=False)    
        s = env.update(m) # execute this command and observe the corresponding sensory effect
        babbling_module.update_im(m, babbling_module.get_s(array(list(m)+list(s))))
        for mid in learning_modules.keys():
            learning_modules[mid].update_sm(m, learning_modules[mid].get_s(array(list(m) + list(s))))
        if (step+1) % ((iterations / (n_explore + 1))/10) == 0:
            res += [int(compute_explo(array(explored_s)[:,[14,17]], array([-2., -2.]), array([2., 2.]), gs=grid_size))]
    return res

#from multiprocessing import Pool
#from subprocess import call
#import cPickle
#import numpy as np

#trials = 30
#iterations = 100000
    
#def f(condition, trial):
#    call("python run.py " + condition + " " + str(trial) + " " + str(iterations), shell=True)
#    log_dir = './logs/'
#    filename = condition + str(trial) + '.pickle'
#    with open(log_dir + filename, 'r') as f:
#        res = cPickle.load(f)
#    return res

#def run_rmb(trial): return f("rmb", trial)
#def run_rgb(trial): return f("rgb", trial)
#def run_amb(trial): return f("amb", trial)


#if __name__ == '__main__':
#    pool = Pool(30)
#    res_rmb = np.array(pool.map(run_rmb, range(trials)))
#    res_rgb = np.array(pool.map(run_rgb, range(trials)))
#    res_amb = np.array(pool.map(run_amb, range(trials)))

#%matplotlib inline
#fig, ax = plt.subplots()
#x = np.linspace(0, iterations, 11)
#plt.errorbar(x, np.append([0], np.mean(res_amb, axis=0)), np.append([0], np.std(res_amb, axis=0)), lw=2, label="Active Model Babbling")
#plt.errorbar(x, np.append([0], np.mean(res_rgb, axis=0)), np.append([0], np.std(res_rgb, axis=0)), lw=2, label="Random Goal Babbling")
#plt.errorbar(x, np.append([0], np.mean(res_rmb, axis=0)), np.append([0], np.std(res_rmb, axis=0)), lw=2, label="Random Motor Babbling")
#ax.legend(loc="upper left")
#plt.savefig('exploration_stats')