Example #1
0
    def habituation(self, i_select, int_sal):
        '''
        update intrinsic saliences based on performed action

        i_select is index of selected action (int)
        int_sal is previous intrinsic saliences (ndarray of floats)

        returns: int_sal_new, updated intrinsic saliences (ndarray of floats)
        '''
        assert aux.isNotFalse(i_select),\
            "\nInput 'i_select' to habituation() is False/None!"
        assert isinstance(i_select, int),\
            "\nInput 'i_select' ({}) to habituation() has wrong type ({})!"\
            "\nType must be 'int'!".format(i_select, type(i_select))
        assert i_select in range(self.N_actions),\
            "\nInput 'i_select' ({}) to habituation() is out of bounds!"\
            "\ni_select must be in range({})!".format(i_select, self.N_actions)
        aux.check_that_1d_float_array(int_sal, 'int_sal')
        assert len(int_sal) == self.N_actions,\
            "\nInput 'int_sal' ({}) to habituation() doesn't match number of actions!"\
            "\nLength is {} and must be {}!".format(int_sal, len(int_sal), self.N_actions)
        assert (int_sal >= 0.0).all(),\
            "\nInput 'int_sal' ({}) to habituation() contains negative entries!".format(int_sal)

        int_sal_new = int_sal.copy()
        int_sal_new[i_select] = int_sal[i_select] * self.hab_slowness[i_select]

        return int_sal_new
Example #2
0
    def get_novelty_saliences(self, probs):
        '''
        calculate novelty saliences from contingency predictions

        probs is ndarray of contingency predictions, i.e. probabilities (ndarray of floats)

        returns: nov_saliences, contains novelty saliences, 
            i.e. expected information per action (ndarray of floats)
        '''

        aux.check_that_1d_float_array(probs, 'probs')
        aux.check_that_contains_probabilities(probs, 'probs')
        assert len(probs) == self.N_actions,\
            "\nInput 'probs' ({}) of get_novelty_saliences() doesn't match number of actions!"\
            "\nLength is {} and must be {}!".format(probs, len(probs), self.N_actions)

        nov_saliences = np.zeros([self.N_actions])
        for i in xrange(self.N_actions):
            p_i = probs[i]
            if p_i > 0.0 and p_i < 1.0:  # p_i=0 and p_i=1 cause unstable log computations,
                # nov_salience is 0 anyway in those cases
                nov_saliences[i] = 0.5 * (-p_i * np.log2(p_i) -
                                          (1.0 - p_i) * np.log2(1.0 - p_i))
                # scaled binary entropy

        return nov_saliences
Example #3
0
    def select_action(self, tot_sal):
        '''
        softmax action selection

        tot_sal is ndarray of combined intrinsic and novelty saliences (ndarray of floats)

        returns i_selected_action, which is index of selected action (int)
        '''

        aux.check_that_1d_float_array(tot_sal, 'tot_sal')

        assert len(tot_sal)==self.N_actions,\
            "\nLength of salience vector input to select_action() ({}) does not match "\
            "number of actions ({})!".format(len(tot_sal), self.N_actions)

        ###### actual softmax
        action_probabilities = np.exp(tot_sal / self.exploration_rate)
        action_probabilities /= action_probabilities.sum()
        # normalize entries

        aux.check_that_contains_probabilities(action_probabilities,
                                              'action_probabilities')
        assert abs(action_probabilities.sum() - 1.0) < 1e-6,\
            "\nAction probabilities don't sum up to 1: {}!"\
            "\nSum is {}!".format(action_probabilities, action_probabilities.sum())

        i_selected_action = np.random.choice(range(self.N_actions),
                                             p=action_probabilities)
        # samples an action given the probability distribution

        return i_selected_action
Example #4
0
    def update_predictions(self, i_action, env_response, probs):
        '''
        update prediction variables based on selected action and environment response

        i_action is index of selected action (int)
        env_response is environment response to action (binary int)
        probs is current prediction variables (ndarray of floats)

        returns: new_probs, updated prediction variables (ndarray of floats)
        '''
        assert aux.isNotFalse(i_action),\
            "\nInput 'i_action' to update_predictions() is False/None!"
        assert isinstance(i_action, int),\
            "\nInput 'i_action' ({}) to update_predictions() has wrong type ({})!"\
            "\nType must be 'int'!".format(i_action, type(i_action))
        assert i_action in range(self.N_actions),\
            "\nInput 'i_action' ({}) to update_predictions() is out of bounds!"\
            "\ni_action must be in range({})!".format(i_action, self.N_actions)
        assert aux.isNotFalse(env_response),\
            "\nInput 'env_response' to update_predictions() is False/None!"
        assert isinstance(env_response, int),\
            "\nInput 'env_response' ({}) to update_predictions() has wrong type ({})!"\
            "\nType must be 'int'!".format(env_response, type(env_response))
        assert env_response in range(2),\
            "\nInput 'env_response' ({}) to update_predictions() is out of bounds!"\
            "\nenv_reponse must be 0 or 1!".format(env_response)
        aux.check_that_1d_float_array(probs, 'probs')
        aux.check_that_contains_probabilities(probs, 'probs')
        assert len(probs) == self.N_actions,\
            "\nInput 'probs' ({}) to update_predictions() doesn't match number of actions!"\
            "\nLength is {} and must be {}!".format(probs, len(probs), self.N_actions)

        new_probs = probs.copy()
        # exponential prediction adaptation
        new_probs[i_action] = env_response - self.learning_speed * (
            env_response - probs[i_action])

        return new_probs
Example #5
0
    def __init__(self,
                 initial_int_sal=None,
                 initial_probs=None,
                 exploration_rate=1.,
                 learning_speed=0.9,
                 hab_slowness=0.9,
                 response_probs=None,
                 T_max=int(1e4),
                 triggered_int_sal=None,
                 verbose=False,
                 output=False):
        '''
        constructor

        initial_int_sal is initial distribution of intrinsic saliences for all actions
            type is list or np.ndarray, default is np.ones([4])
        initial_probs is initial values of contingency predictions for all actions
            type is list or np.ndarray, length must match initial_int_sal, default is np.zeros([4])
        exploration_rate is used for softmax action selection
            type is float, default is 1
        learning_speed is adaptation rate of prediction system (float), default is 0.9
        hab_slowness is factor for action habituation (float or ndarray), default is 0.9,
            high means slow habituation, low means fast habituation
        response_probs contains probabilities with which responses are triggered by actions,
            type is ndarray with probability value for each action, default is np.ones([4])
        triggered_int_sal specifies intrinsic salience values for each action triggered by
            contingent environment responses,
            type is ndarray with shape (number of actions,) if salience changes are equal for all
            responses or (number of actions, number of actions) if salience changes differ for
            different responses,
            put negative numbers where saliences should not be updated
            default is False
        T_max is number of time steps (int), default is 1e4
        verbose is verbose output mode
            type is boolean, default is True
        output is file output mode
            type is boolean, default is True
        '''

        if verbose:
            print 'Constructing Agent object. Received arguments:'
            if isinstance(initial_int_sal, np.ndarray) or initial_int_sal:
                print ' initial_int_sal:', initial_int_sal
            if isinstance(initial_probs, np.ndarray) or initial_probs:
                print ' initial_probs:', initial_probs
            if isinstance(exploration_rate,
                          np.ndarray) and exploration_rate != 1.:
                print ' exploration_rate:', exploration_rate
            if isinstance(learning_speed,
                          np.ndarray) and learning_speed != 0.9:
                print ' learning_speed:', learning_speed
            if isinstance(hab_slowness, np.ndarray) or hab_slowness != 0.9:
                print ' hab_slowness:', hab_slowness
            if isinstance(response_probs, np.ndarray) or response_probs:
                print ' response_probs:', response_probs
            if isinstance(triggered_int_sal, np.ndarray) or triggered_int_sal:
                print ' triggered_int_sal:', triggered_int_sal
            if isinstance(T_max, np.ndarray) or T_max != 1e4:
                print ' T_max:', T_max

            ####### initial_int_sal
        if aux.isNotFalse(initial_int_sal):  # user specified initial_int_sal
            assert isinstance(initial_int_sal, list) or isinstance(initial_int_sal, np.ndarray),\
                "\nInput 'initial_int_sal' ({}) of Agent constructor has wrong type ({})!\
                 \nType must be 'list' or 'np.ndarray'!"                                                        .format(initial_int_sal, type(initial_int_sal))

            if isinstance(initial_int_sal, list):
                initial_int_sal = np.array(initial_int_sal)

            aux.check_that_1d_float_array(initial_int_sal, 'initial_int_sal')

            self.initial_int_sal = np.array(initial_int_sal, dtype=float)

        elif initial_int_sal == None:  # user didn't specify initial_probs
            if aux.isNotFalse(initial_probs):
                assert isinstance(initial_probs, list) or isinstance(initial_probs, np.ndarray),\
                    "\nInput 'initial_probs' ({}) of Agent constructor has wrong type ({})!\
                    \nType must be 'list' or 'np.ndarray'!"                                                           .format(initial_probs, type(initial_probs))
                self.initial_int_sal = np.ones([len(initial_probs)])
            else:
                self.initial_int_sal = np.ones([4])
        else:
            raise AssertionError,\
                "\nInput 'initial_int_sal' ({}) of Agent constructor has wrong type ({})!\
                 \nType must be 'list' or 'np.ndarray'!"                                                        .format(initial_int_sal, type(initial_int_sal))

        self.int_sal = self.initial_int_sal  # initial saliences that are updated
        self.N_actions = len(self.initial_int_sal)

        ####### initial_probs
        if aux.isNotFalse(initial_probs):  # user specified initial_probs
            assert isinstance(initial_probs, list) or isinstance(initial_probs, np.ndarray),\
                "\nInput 'initial_probs' ({}) of Agent constructor has wrong type ({})!\
                \nType must be 'list' or 'np.ndarray'!"                                                       .format(initial_probs, type(initial_probs))

            if isinstance(initial_probs, list):
                initial_probs = np.array(initial_probs)

            aux.check_that_1d_float_array(initial_probs, 'initial_probs')
            aux.check_that_contains_probabilities(initial_probs,
                                                  'initial_probs')

            self.initial_probs = np.array(initial_probs, dtype=float)

            assert len(initial_probs) == self.N_actions,\
                "\nInput 'initial_probs' ({}) of Agent constructor has wrong length ({})!\
                \nLenth must match length of 'initial_int_sal' ({})!"                                                                     .format(initial_probs,
                    len(initial_probs), self.N_actions)

        elif initial_probs == None:  # user didn't specify initial_probs
            self.initial_probs = np.zeros([self.N_actions])

        else:
            raise AssertionError,\
                "\nInput 'initial_probs' ({}) of Agent constructor has wrong type ({})!\
                \nType must be 'list' or 'np.ndarray'!"                                                       .format(initial_probs, type(initial_probs))

        self.probs = self.initial_probs  # probs that are updated
        self.nov_sal = self.get_novelty_saliences(self.probs)

        ####### exploration_rate
        assert aux.isNotFalse(exploration_rate),\
            "\nInput 'exploration_rate' of Agent constructor is False/None!"
        assert isinstance(exploration_rate, float) or isinstance(exploration_rate, int),\
            "\nInput 'exploration_rate' ({}) of Agent constructor has wrong type ({})!"\
            "\nType must be 'float'!".format(exploration_rate, type(exploration_rate))

        if isinstance(exploration_rate, int):
            exploration_rate = float(exploration_rate)

        assert exploration_rate > 0.0,\
            "\nInput 'exploration_rate' ({}) of Agent constructor must be greater "\
            "than 0!".format(exploration_rate)

        self.exploration_rate = exploration_rate

        ####### learning_speed
        assert aux.isNotFalse(learning_speed),\
            "\nInput 'learning_speed' of Agent constructor is False/None!"
        assert isinstance(learning_speed, float),\
            "\nInput 'learning_speed' ({}) of Agent constructor has wrong type ({})!"\
            "\nType must be 'float'!".format(learning_speed, type(learning_speed))
        assert learning_speed > 0.0 and learning_speed < 1.0,\
            "\nInput 'learning_speed' ({}) of Agent constructor is out of bounds!"\
            "\nlearning_speed must be in ]0;1[!".format(learning_speed)

        self.learning_speed = learning_speed

        ####### hab_slowness
        assert aux.isNotFalse(hab_slowness),\
            "\nInput 'hab_slowness' of Agent constructor is False/None!"
        assert isinstance(hab_slowness, float) or isinstance(hab_slowness, np.ndarray),\
            "\nInput 'hab_slowness' ({}) of Agent constructor has wrong type ({})!"\
            "\nType must be 'float' or 'ndarray'!".format(hab_slowness, type(hab_slowness))

        if isinstance(hab_slowness, float):
            assert hab_slowness > 0.0 and hab_slowness < 1.0,\
                "\nInput 'hab_slowness' ({}) of Agent constructor is out of bounds!"\
                "\nhab_slowness must be in ]0;1[!".format(hab_slowness)
            hab_slowness = np.ones([self.N_actions]) * hab_slowness

        elif isinstance(hab_slowness, np.ndarray):
            aux.check_that_1d_float_array(hab_slowness, 'hab_slowness')
            assert len(hab_slowness) == self.N_actions,\
                "\nInput 'hab_slowness' ({}) of Agent constructor doesn't match number "\
                "of actions!\nLength is {} and must be {}!".format(hab_slowness,
                    len(hab_slowness), self.N_actions)
            assert (hab_slowness < 1.0).all() and (hab_slowness > 0.0).all(),\
                "\nInput 'hab_slowness' ({}) of Agent constructor contains invalid entries!"\
                "\nEntries must be in ]0;1[!".format(hab_slowness)

        self.hab_slowness = hab_slowness

        ####### response_probs
        if aux.isNotFalse(response_probs):
            aux.check_that_1d_float_array(response_probs, 'response_probs')
            assert len(response_probs) == self.N_actions,\
                "\nInput 'response_probs' ({}) of Agent constructor doesn't match number "\
                "of actions!\nLength is {} and must be {}!".format(response_probs, len(response_probs),
                    self.N_actions)
            assert (response_probs >= 0.0).all() and (response_probs <= 1.0).all(),\
                "\nInput 'response_probs' ({}) of Agent constructor contains invalid elements!"\
                "\nElements must be in [0.0; 1.0]!".format(response_probs)

            self.response_probs = response_probs
        else:
            self.response_probs = np.ones([self.N_actions])

            ####### triggered_int_sal
        if aux.isNotFalse(triggered_int_sal):
            assert isinstance(triggered_int_sal, np.ndarray),\
                "\nInput 'triggered_int_sal' ({}) of Agent constructor has wrong type ({})!"\
                "\nType must be 'ndarray'!".format(triggered_int_sal, type(triggered_int_sal))
            assert triggered_int_sal.ndim in [1,2],\
                "\nInput 'triggered_int_sal' ({}) of Agent constructor has wrong dimension ({})!"\
                "\nDimension must be either 1 or 2!".format(triggered_int_sal, triggered_int_sal.ndim)
            assert triggered_int_sal.shape in [(self.N_actions,), (self.N_actions, self.N_actions)],\
                "\nInput 'triggered_int_sal' ({}) of Agent constructor has wrong shape ({})!"\
                "\nShape must be either ({},) or ({}, {})!".format(triggered_int_sal,
                    triggered_int_sal.shape, self.N_actions, self.N_actions, self.N_actions)
            assert triggered_int_sal.dtype == float,\
                "\nInput 'triggered_int_sal' ({}) of Agent constructor has wrong dtype ({})!"\
                "\nDtype must be 'float'!".format(triggered_int_sal, triggered_int_sal.dtype)
            assert np.isfinite(triggered_int_sal).all(),\
                "\nInput 'triggered_int_sal' ({}) of Agent constructor contains invalid elements!"\
                "".format(triggered_int_sal)

            if triggered_int_sal.ndim == 1:  # convert 1d input to general 2d array
                triggered_int_sal = triggered_int_sal.repeat(self.N_actions)
                triggered_int_sal = triggered_int_sal.reshape(
                    self.N_actions, self.N_actions)
                self.triggered_int_sal = triggered_int_sal.swapaxes(0, 1)
            elif triggered_int_sal.ndim == 2:
                self.triggered_int_sal = triggered_int_sal
        else:
            self.triggered_int_sal = False

            ####### T_max
        assert isinstance(T_max, int),\
            "\nInput 'T_max' ({}) of Agent constructor has wrong type ({})!"\
            "\nType must be 'int'!".format(T_max, type(T_max))
        assert T_max > 0,\
            "\nInput 'T_max' ({}) of Agent constructor is too small!"\
            "\nT_max must be positive!".format(T_max)

        self.t = 0
        self.T_max = T_max

        ####### output
        assert isinstance(output, bool),\
            "\nInput 'output' ({}) of Agent constructor has wrong type ({})!"\
            "\nType must be 'bool'!".format(output, type(output))
        self.output = output
        outputpath, time_string = aux.get_outputpath()

        # dictionary for storing all data
        self.data = {
            'exploration_rate': self.exploration_rate,
            'learning_speed': self.learning_speed,
            'hab_slowness': self.hab_slowness,
            'response_probs': self.response_probs,
            'triggered_int_sal': self.triggered_int_sal
        }
        for label in ['int_sal', 'nov_sal', 'tot_sal', 'probs']:
            self.data.update({label: np.zeros([self.N_actions, self.T_max])})
        for label in ['i_select', 'env_response']:
            self.data.update({label: np.zeros([self.T_max], dtype=int)})

        if output:
            # output files are created in ./output/[date]/[time]/
            os.system('touch {}output.txt'.format(outputpath))
            self.outputfile_txt = open('{}output.txt'.format(outputpath), 'w')
            outputfile_txt_const = open(
                '{}output_constants.txt'.format(outputpath), 'w')
            self.outputfilename_dat = '{}output.dat'.format(outputpath)
            outputfile_dat = open(self.outputfilename_dat, 'wb')

            cPickle.dump(self.data, outputfile_dat)
            outputfile_dat.close()

            # create header of text file
            self.outputfile_txt.write(
                'Time step\tSelected action\tEnvironment response')
            for name in [
                    'Intrinsic saliences', 'Novelty saliences',
                    'Total saliences', 'Predictions'
            ]:
                self.outputfile_txt.write('\t{}'.format(name))
                for i in xrange(1, self.N_actions):
                    self.outputfile_txt.write('\t')
            self.outputfile_txt.flush()

            # write constants
            outputfile_txt_const.write('Start time:\t{}\nExploration rate:\t{}\nLearning speed:\t{}'\
                '\nHabituation slowness:'.format(time_string, self.exploration_rate,
                self.learning_speed))
            for i_action in xrange(self.N_actions):
                outputfile_txt_const.write('\t{}'.format(
                    self.hab_slowness[i_action]))
            outputfile_txt_const.write('\nResponse probabilities:')
            for i_action in xrange(self.N_actions):
                outputfile_txt_const.write('\t{}'.format(
                    self.response_probs[i_action]))
            outputfile_txt_const.write('\nTriggered intrinsic saliences:')
            if aux.isNotFalse(self.triggered_int_sal):
                for i_action in xrange(self.N_actions):
                    outputfile_txt_const.write('\t{}'.format(
                        self.triggered_int_sal[i_action]))
            else:
                outputfile_txt_const.write('\tNone')
            outputfile_txt_const.close()

        self.random_numbers = np.random.rand(T_max)
        self.tot_sal = self.int_sal + self.nov_sal

        self.verbose = verbose

        if verbose:
            print 'Agent object constructed. Attributes are:'
            print 'initial_int_sal:', self.initial_int_sal
            print 'N_actions:', self.N_actions
            print 'initial_probs:', self.initial_probs
            print 'nov_sal:', self.nov_sal
            print 'tot_sal:', self.tot_sal
            print 'exploration_rate:', self.exploration_rate
            print 'learning_speed:', self.learning_speed
            print 'hab_slowness:', self.hab_slowness
            print 'response_probs:', self.response_probs
            print 'triggered_int_sal:', self.triggered_int_sal
            print 'T_max:', self.T_max
            print 'output:', self.output
            print 'Start time is', time_string