コード例 #1
0
ファイル: workmate_LJ04.py プロジェクト: lieke2020/phd_start
    def construct_input(self):
        """
        Turn obs into a vector; uses coding defined in 'inputs.py'
        """
        # input consists of: observation and time t
        self.x_sens = inputs.get_obs(self.obs, self.t)
        #self.x = np.append(self.x_sens,1.0) #add bias unit
        self.x = self.x_sens

        # sensory input is mapped onto a latent variable
        # x -> l (sigmoid layer)
        l_in = self.W_lx.dot(self.x)
        self.l_sens = self.transfer(l_in)  #apply sigmoid transformation
        #self.l_sens = l_in

        # Compute match value - S equal in each block!
        Snew = self.W_Sl.dot(self.l_sens)
        Sold = self.S.reshape((self.nblocks, self.block_size))
        m = np.zeros(self.nblocks)
        for aa in range(self.nblocks):
            m[aa] = 1 - WorkMATe.match(Snew, Sold[aa, :])

        # add match nodes + bias to latent input vector
        self.l = np.r_[self.l_sens, m, 1.0]
        return
コード例 #2
0
ファイル: workmate_LJ03.py プロジェクト: lieke2020/phd_start
    def construct_input(self):
        """
        Turn obs into a vector; uses coding defined in 'inputs.py'
        """
        # input consists of: observation and time t
        self.x_sens = inputs.get_obs(self.obs, self.t)
        #self.x = np.append(self.x_sens,1.0) #add bias unit
        #works faster without bias unit

        # random input encoding
        self.l_in = self.W_lx.dot(self.x_sens)
        self.l_sens = l_sens = self.l_in
        #self.l_sens = l_sens = self.transfer_r(self.l_in)
        #learn a lot better without sigmoid activation

        # Compute match value:
        Sproj = self.W_Sl.dot(self.l_sens).reshape(
            (self.nblocks, self.block_size))
        matches = 1 - scipy.spatial.distance.cdist(
            Sproj, self.S.reshape(
                Sproj.shape), metric=WorkMATe.match).diagonal()

        # add match nodes + bias to input vector
        self.l = np.r_[l_sens, matches, 1.0]
        return
コード例 #3
0
    def construct_input(self):
        """
        Turn obs into a vector; uses coding defined in 'inputs.py'
        """
        # input consists of: observation and time t
        self.x_sens = inputs.get_obs(self.obs, self.t)
        self.x = np.append(self.x_sens, 1.0)  #add bias neuron

        #x -> l (sigmoid layer)
        l_in = self.W_lx.dot(self.x)
        self.l_sens = l_sens = self.transfer(l_in)

        # Compute match value
        Sproj = self.W_Sl.dot(l_sens).reshape((self.nblocks, self.block_size))

        # Compute match value:
        #Sproj = self.W_Sx.dot(x_sens).reshape( (self.nblocks, self.block_size) )
        #this could be done much neater than it is done now..
        matches = 1 - scipy.spatial.distance.cdist(
            Sproj, self.S.reshape(
                Sproj.shape), metric=WorkMATe.match).diagonal()

        # add match nodes + bias to input vector
        self.l = np.r_[l_sens, matches, 1.0]
        return
コード例 #4
0
ファイル: workmate_LJ07b.py プロジェクト: lieke2020/phd_start
 def construct_input(self):
     """
     Turn obs into a vector; uses coding defined in 'inputs.py'
     """
     # input consists of: observation and time t
     self.x_sens = inputs.get_obs(self.obs, self.t)
     self.x = np.r_[self.x_sens, self.bias]  #only bias included
     return
コード例 #5
0
    def construct_input(self):
        """
        Turn obs into a vector; uses coding defined in 'inputs.py'
        """
        # input consists of: observation and time t
        self.x_sens = x_sens = inputs.get_obs(self.obs, self.t)

        # Compute match value:
        Sproj = self.W_Sx.dot(x_sens).reshape((self.nblocks, self.block_size))
        matches = 1 - scipy.spatial.distance.cdist(
            Sproj, self.S.reshape(
                Sproj.shape), metric=WorkMATe.match2).diagonal()

        # add match nodes + bias to input vector
        self.x = np.r_[x_sens, matches, 1.0]
        return
コード例 #6
0
    def construct_input(self):
        """
        Turn obs into a vector; uses coding defined in 'inputs.py'
        """
        # input consists of: observation and time t
        self.x_sens = inputs.get_obs(self.obs, self.t)
        self.x = np.append(self.x_sens, 1.0)  #add bias unit

        # sensory input is mapped onto a latent variable
        # x -> l (sigmoid layer)
        l_in = self.W_lx.dot(self.x)
        self.l_sens = self.transfer(l_in)

        # Compute match value:
        Sproj = self.W_Sl.dot(self.l_sens).reshape(
            (self.nblocks, self.block_size))
        matches = 1 - scipy.spatial.distance.cdist(
            Sproj, self.S.reshape(
                Sproj.shape), metric=WorkMATe.match).diagonal()

        # add match nodes + bias to latent input vector
        self.l = np.r_[self.l_sens, matches, 1.0]
        return
コード例 #7
0
    def __init__(self, env=None, nhidden=20, nblocks=2, block_size=20):
        super(WorkMATe, self).__init__()
        assert env is not None
        self.env = env

        ## learning params (adopted from Rombouts et al., 2015)
        self.beta = 0.15
        #self.beta2 = 0.015
        self.gamma = 0.90
        self.L = 0.8
        # exploration:
        self.epsilon = 0.025
        self.bias = 1

        ## member lambda functions:
        # sigmoid transfer function, offset at 2.5
        sigmoid_offset = 2.5
        self.transfer = lambda x: 1 / (1. + np.exp(sigmoid_offset - x))
        self.dtransfer = lambda x: x * (1. - x)  # derivative

        #Relu activation function
        self.transfer_r = lambda x: np.maximum(x, 0)
        self.dtransfer_r = lambda x: np.greater(x, 0).astype(int)

        #tanh activation function
        tan = 2.5
        self.transfer_t = lambda x: np.tanh(x - tan)
        self.dtransfer_t = lambda x: 1 - np.tanh(x - tan)**2

        # softmax normalization; for action selection - boltzmann controller
        self.softmaxnorm = lambda x: (np.exp(x - x.max()) / np.exp(x - x.max())
                                      .sum())

        ## init network architecture -- inputs and output shape from env
        # input and hidden
        nx = inputs.get_obs('a').size
        nl = block_size
        nh = nhidden
        # memory cell properties:
        self.nblocks = nblocks
        self.block_size = block_size
        nS = nblocks * block_size

        # output -- q layer consisting of 2 modules
        # module for n external actions, internal actions for nblocks + 1 (null)
        mod_sz = env.n_actions, nblocks + 1
        nq = np.sum(mod_sz)
        # indices of module for each node:
        self.zmods = np.hstack([[i] * sz for i, sz in enumerate(mod_sz)])

        ## init network layers (activations 0)
        # (x will be constructed when processing 'new_obs')
        self.S = np.zeros(nS)
        self.l = np.zeros(nl)
        self.h = np.zeros(nh)
        self.q = np.zeros(nq)

        ## init weights, tags traces, (+1 indicates projection from bias node)
        wl, wh = -.5, .5
        # Input projection with bias node
        self.W_lx = np.random.sample((nl, nx + 1)) * (wh - wl) + wl
        self.W_lx_start = np.copy(self.W_lx)
        # Memory projection (x > S)
        self.W_Sx = np.random.sample((nS, nx)) * (wh - wl) + wl
        # Note that time and sensory input cells are not separated in memory

        # PLASTIC CONNECTIONS (all except memory projection)
        wl, wh = -.5, .5

        # connections l -> h; nl + match nodes + bias
        self.W_hl = np.random.sample((nh, nl + nblocks + 1)) * (wh - wl) + wl
        self.W_hl_start = np.copy(self.W_hl)

        # connections S->h
        self.W_hS = np.random.sample((nh, nS)) * (wh - wl) + wl
        # connections h->q:
        self.W_qh = np.random.sample((nq, nh + 1)) * (wh - wl) + wl
        # tags are shaped like weights but initialized at 0:
        zeros_ = np.zeros_like
        self.Tag_W_lx, self.Trace_W_lx = zeros_(self.W_lx), zeros_(self.W_lx)
        self.Tag_W_hl, self.Trace_W_hl = zeros_(self.W_hl), zeros_(self.W_hl)
        self.Tag_W_hS, self.Trace_W_hS = zeros_(self.W_hS), zeros_(self.W_hS)
        self.Tag_W_qh, self.Trace_W_qh = zeros_(self.W_qh), zeros_(self.W_qh)

        # Init action state
        self.action = -1
        # (prev) predicted reward:
        self.qat_1 = self.qat = None
        self.t = 0
        return
コード例 #8
0
ファイル: premate_LJ01.py プロジェクト: lieke2020/phd_start
    def __init__(self, env=None, nhidden=20):
        super(PreMATe, self).__init__()
        assert env is not None
        self.env = env

        ## learning params (adopted from Rombouts et al., 2015)
        self.beta = 0.4
        self.gamma = 0.90
        self.L = 0.8
        # exploration rate
        self.epsilon = 0.025
        self.bias = 1

        ## member lambda functions:
        # sigmoid transfer function, offset at 2.5
        sigmoid_offset = 2.5
        self.transfer = lambda x: 1 / (1. + np.exp(sigmoid_offset - x))
        self.dtransfer = lambda x: x * (1. - x)  # derivative

        # softmax normalization; for action selection - boltzmann controller
        self.softmaxnorm = lambda x: (np.exp(x - x.max()) / np.exp(x - x.max())
                                      .sum())

        ## init network architecture -- inputs and output shape from env
        # input and hidden
        nx = inputs.get_obs('a').size
        nl = nhidden
        nh = nhidden
        # output -- q layer consisting of as many nodes as inputs that need to be discriminated
        nq = np.sum(len(all_stim))

        ## init network layers (activations 0)
        # (x will be constructed when processing 'new_obs')
        self.l = np.zeros(nl)
        self.h = np.zeros(nh)
        self.q = np.zeros(nq)

        ## init weights, tags traces, (+1 indicates projection from bias node)
        # ALL PLASTIC CONNECTIONS
        wl, wh = -.5, .5
        # Input projection with bias node
        # connections x -> l
        self.W_lx = np.random.sample((nl, nx + 1)) * (wh - wl) + wl
        self.W_lx_start = np.copy(self.W_lx)

        # connections l -> h; nl + match nodes + bias
        self.W_hl = np.random.sample((nh, nl + 1)) * (wh - wl) + wl
        self.W_hl_start = np.copy(self.W_hl)

        # connections h->q:
        self.W_qh = np.random.sample((nq, nh + 1)) * (wh - wl) + wl
        self.W_qh_start = np.copy(self.W_qh)

        # tags are shaped like weights but initialized at 0:
        zeros_ = np.zeros_like
        self.Tag_W_lx, self.Trace_W_lx = zeros_(self.W_lx), zeros_(self.W_lx)
        self.Tag_W_hl, self.Trace_W_hl = zeros_(self.W_hl), zeros_(self.W_hl)
        self.Tag_W_qh, self.Trace_W_qh = zeros_(self.W_qh), zeros_(self.W_qh)

        # Init action state
        self.action = -1
        # (prev) predicted reward:
        self.qat_1 = self.qat = None
        self.t = 0
        return
コード例 #9
0
    def __init__(self, env=None, nhidden=15, nblocks=2, block_size=15):
        super(WorkMATe, self).__init__()
        assert env is not None
        self.env = env

        ## learning params (adopted from Rombouts et al., 2015)
        self.beta = 0.15
        self.gamma = 0.90
        self.L = 0.8
        # exploration:
        self.epsilon = 0.025

        ## member lambda functions:
        # sigmoid transfer function, offset at 2.5
        sigmoid_offset = 2.5
        self.transfer = lambda x: 1 / (1. + np.exp(sigmoid_offset - x))
        self.dtransfer = lambda x: x * (1. - x)  # derivative
        # softmax normalization; for action selection - boltzmann controller
        self.softmaxnorm = lambda x: (np.exp(x - x.max()) / np.exp(x - x.max())
                                      .sum())

        ## init network architecture -- inputs and output shape from env
        # input, latent and hidden
        nx = inputs.get_obs('a').size
        nl = block_size  #latent layer has size memory block
        nh = nhidden
        # memory cell properties:
        self.nblocks = nblocks
        self.block_size = block_size
        nS = nblocks * block_size

        # output -- q layer consisting of 2 modules
        # module for n external actions, internal actions for nblocks + 1 (null)
        mod_sz = env.n_actions, nblocks + 1
        nq = np.sum(mod_sz)
        # indices of module for each node:
        self.zmods = np.hstack([[i] * sz for i, sz in enumerate(mod_sz)])

        ## init network layers (activations 0)
        # (x will be constructed when processing 'new_obs')
        self.l = np.zeros(nl)
        self.S = np.zeros(nS)
        self.h = np.zeros(nh)
        self.q = np.zeros(nq)

        ## init weights, tags traces, (+1 indicates projection from bias node)

        wl, wh = -.50, .50
        #Input projection (x > l); nx + bias
        self.W_lx = np.random.sample((nl, nx + 1)) * (wh - wl) + wl

        # Memory projection (l > S)
        # Note that time and sensory input cells are not separated in memory
        # this projection is not random but a fixed one-on-one mapping
        W_Sl = np.identity(nl)
        for i in range(nblocks - 1):
            self.W_Sl = np.vstack((W_Sl, np.identity(nl)))

        # PLASTIC CONNECTIONS (all except memory projection)
        wl, wh = -.25, .25
        # connections x->h; nx + match nodes + bias
        self.W_hl = np.random.sample((nh, nl + nblocks + 1)) * (wh - wl) + wl
        # connections S->h;
        self.W_hS = np.random.sample((nh, nS)) * (wh - wl) + wl
        # connections h->q;
        self.W_qh = np.random.sample((nq, nh + 1)) * (wh - wl) + wl
        # tags are shaped like weights but initialized at 0:
        zeros_ = np.zeros_like
        self.Tag_W_hl, self.Trace_W_hl = zeros_(self.W_hl), zeros_(self.W_hl)
        self.Tag_W_hS, self.Trace_W_hS = zeros_(self.W_hS), zeros_(self.W_hS)
        self.Tag_W_qh, self.Trace_W_qh = zeros_(self.W_qh), zeros_(self.W_qh)
        #ADDED BY LJC
        self.Tag_W_Sl, self.Trace_W_Sl = zeros_(self.W_Sl), zeros_(self.W_Sl)

        # Init action state
        self.action = -1
        # (prev) predicted reward:
        self.qat_1 = self.qat = None
        self.t = 0
        return