Beispiel #1
0
    def __init__(self, x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, name='AttentionModel'):
        # random for rng
        self.rng = rng
        self.rng_std = rng_std
        # n * W * H --> n * dim_input --> n * dim_hidden
        self.glimpse_shape = glimpse_shape
        dim_input = np.prod(glimpse_shape)

        W_x = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w'])
        W_h, b_h = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name))

        w_location, b_location = generate_wb(dim_hidden, 2, '{}->location'.format(name), params=['w', 'b'])
        b_location.set_value([14, 14])

        def forward(times, s_prev, C_prev, x, W_x, W_h, b_h, w_l, b_l): #, w_l0, b_l0): 
            # current input, previous hidden state, w_input, w_hidden, w_output
            # x.shape = n * W * H 
            # s_prev, C_prev.shape = N * dim_hidden

            # get location vector
#           loc_mean = activation( s_prev.dot(w_l) + b_l )  # n * 2
#           loc_mean = activation(s_prev.dot(w_l0)+b_l0).dot(w_l) + b_l  # n * 2  TODO
            loc_mean = s_prev.dot(w_l) + b_l  # n * 2  TODO
            # glimpse
            glimpse, loc = self._glimpse(x, loc_mean) # n * dim_hidden, n * 2
            # input

            # LSTM
            res    =   glimpse.dot(W_x) +  s_prev.dot(W_h)      + b_h.dimshuffle('x', 0) 
            f = activation(res[:, 0*dim_hidden:1*dim_hidden]) # N * dh
            i = activation(res[:, 1*dim_hidden:2*dim_hidden]) # N * dh
            C_hat = T.tanh(res[:, 2*dim_hidden:3*dim_hidden]) # N * dh
            o = activation(res[:, 3*dim_hidden:4*dim_hidden]) # N * dh
            C = f*C_prev + i*C_hat # N * dh
            s = o * T.tanh(C)      # N * dh
            return s, C, loc, loc_mean # n*dim_h, n*dim_h, n * 2, n * 2

        [s, C, loc, loc_mean], updates = theano.scan(
            fn=forward,
            sequences = T.arange(glimpse_times), #x.swapaxes(0, 1),
            outputs_info = [T.zeros((x.shape[0], dim_hidden)), 
                            T.zeros((x.shape[0], dim_hidden)), 
                            None, None], 
            non_sequences = [x, W_x, W_h, b_h, w_location, b_location],#w_location0, b_location0],
            truncate_gradient=bptt_truncate,
            strict = True)
        # s: Time * n * dim_hidden
        # loc: Time * n * 2

        self.output = s.swapaxes(0, 1) # N * Time * dim_hidden
        self.cell = s.swapaxes(0, 1)
        self.location = loc.swapaxes(0, 1) # N * T * dim_h
        self.location_mean = loc_mean.swapaxes(0, 1) + T.stack(glimpse_shape[0]/2, glimpse_shape[1]/2).dimshuffle('x', 'x', 0) # N * T * 2
        self.location_p = 1.0/(T.sqrt(2*np.pi)*rng_std)*T.exp(-((loc-loc_mean)**2)/(2.0*rng_std**2)).swapaxes(0,1) # N * T * 2  locx and locy is independent
#       self.location_logp = - float(1.0/(2.0*rng_std**2)) * ((loc-loc_mean)**2).swapaxes(0,1)
                # this part is useless in training >> - T.log(T.sqrt(2*T.pi)*rng_std) 
        self.params = [W_x, W_h, b_h]
        self.reinforceParams = [w_location, b_location] #, w_location0, b_location0]
Beispiel #2
0
    def __init__(self, x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, name='AttentionModel'):
        # random for rng
        self.rng = rng
        self.rng_std = rng_std
        # n * W * H --> n * dim_input --> n * dim_hidden
        self.glimpse_shape = glimpse_shape
        dim_input = np.prod(glimpse_shape)
        w_input = generate_wb(dim_input, dim_hidden, '{}->input'.format(name), params=['w'])
        w_hidden, b_hidden = generate_wb(dim_hidden, dim_hidden, '{}->hidden'.format(name), params=['w', 'b'])

#       w_location0, b_location0 = generate_wb(dim_hidden, 100, '{}->location0'.format(name), params=['w', 'b'])
#       w_location, b_location = generate_wb(100, 2, '{}->location'.format(name), params=['w', 'b'])
        w_location, b_location = generate_wb(dim_hidden, 2, '{}->location'.format(name), params=['w', 'b'])
        b_location.set_value([14, 14])

        def forward(times, s_prev, x, w_i, w_h, b_h, w_l, b_l): #, w_l0, b_l0): 
            # current input, previous hidden state, w_input, w_hidden, w_output
            # x.shape = n * W * H 

            # get location vector
#           loc_mean = activation( s_prev.dot(w_l) + b_l )  # n * 2
#           loc_mean = activation(s_prev.dot(w_l0)+b_l0).dot(w_l) + b_l  # n * 2  TODO
            loc_mean = s_prev.dot(w_l) + b_l  # n * 2  TODO
            # glimpse
            glimpse, loc = self._glimpse(x, loc_mean) # n * dim_hidden, n * 2
            # input
            s = activation( glimpse.dot(w_i) + s_prev.dot(w_h) + b_h ) # n * dim_hidden
            return s, loc, loc_mean # n*dim_h, n * 2, n * 2

        [s, loc, loc_mean], updates = theano.scan(
            fn=forward,
            sequences = T.arange(glimpse_times), #x.swapaxes(0, 1),
            outputs_info = [T.zeros((x.shape[0], dim_hidden)), None, None], 
            non_sequences = [x, w_input, w_hidden, b_hidden, w_location, b_location],#w_location0, b_location0],
            truncate_gradient=bptt_truncate,
            strict = True)
        # s: Time * n * dim_hidden
        # loc: Time * n * 2

        self.output = s.swapaxes(0, 1) # N * Time * dim_hidden
        self.location = loc.swapaxes(0, 1) # N * T * dim_h
        self.location_mean = loc_mean.swapaxes(0, 1) + T.stack(glimpse_shape[0]/2, glimpse_shape[1]/2).dimshuffle('x', 'x', 0) # N * T * 2
        self.location_p = 1.0/(T.sqrt(2*np.pi)*rng_std)*T.exp(-((loc-loc_mean)**2)/(2.0*rng_std**2)).swapaxes(0,1) # N * T * 2  locx and locy is independent
#       self.location_logp = - float(1.0/(2.0*rng_std**2)) * ((loc-loc_mean)**2).swapaxes(0,1)
                # this part is useless in training >> - T.log(T.sqrt(2*T.pi)*rng_std) 
        self.params = [w_input, w_hidden, b_hidden]
        self.reinforceParams = [w_location, b_location] #, w_location0, b_location0]
Beispiel #3
0
    def __init__(self, x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, name='AttentionModel'):
        # random for rng
        self.rng = rng
        self.rng_std = rng_std
        # n * W * H --> n * dim_input --> n * dim_hidden
        self.glimpse_shape = glimpse_shape
        dim_input = np.prod(glimpse_shape)
        w_input = generate_wb(dim_input, dim_hidden, '{}->input'.format(name), params=['w'])
        w_hidden, b_hidden = generate_wb(dim_hidden, dim_hidden, '{}->hidden'.format(name), params=['w', 'b'])

#       w_location0, b_location0 = generate_wb(dim_hidden, 100, '{}->location0'.format(name), params=['w', 'b'])
#       w_location, b_location = generate_wb(100, 2, '{}->location'.format(name), params=['w', 'b'])
        w_location, b_location = generate_wb(dim_hidden, 2, '{}->location'.format(name), params=['w', 'b'])
        b_location.set_value([14, 14])

        def forward(times, s_prev, x, w_i, w_h, b_h, w_l, b_l): #, w_l0, b_l0): 
            # current input, previous hidden state, w_input, w_hidden, w_output
            # x.shape = n * W * H 

            # get location vector
#           loc_mean = activation( s_prev.dot(w_l) + b_l )  # n * 2
#           loc_mean = activation(s_prev.dot(w_l0)+b_l0).dot(w_l) + b_l  # n * 2  TODO
            loc_mean = s_prev.dot(w_l) + b_l  # n * 2  TODO
            # glimpse
            glimpse, loc = self._glimpse(x, loc_mean) # n * dim_hidden, n * 2
            # input
            s = activation( glimpse.dot(w_i) + s_prev.dot(w_h) + b_h ) # n * dim_hidden
            return s, loc, loc_mean # n*dim_h, n * 2, n * 2

        [s, loc, loc_mean], updates = theano.scan(
            fn=forward,
            sequences = T.arange(glimpse_times), #x.swapaxes(0, 1),
            outputs_info = [T.zeros((x.shape[0], dim_hidden)), None, None], 
            non_sequences = [x, w_input, w_hidden, b_hidden, w_location, b_location],#w_location0, b_location0],
            truncate_gradient=bptt_truncate,
            strict = True)
        # s: Time * n * dim_hidden
        # loc: Time * n * 2

        self.output = s.swapaxes(0, 1) # N * Time * dim_hidden
        self.location = loc.swapaxes(0, 1) # N * T * dim_h
        self.location_mean = loc_mean.swapaxes(0, 1) # N * T * 2
        self.location_p = 1.0/(T.sqrt(2*np.pi)*rng_std)*T.exp(-((loc-loc_mean)**2)/(2.0*rng_std**2)).swapaxes(0,1) # N * T * 2  locx and locy is independent
#       self.location_logp = - float(1.0/(2.0*rng_std**2)) * ((loc-loc_mean)**2).swapaxes(0,1)
                # this part is useless in training >> - T.log(T.sqrt(2*T.pi)*rng_std) 
        self.params = [w_input, w_hidden, b_hidden]
        self.reinforceParams = [w_location, b_location] #, w_location0, b_location0]
Beispiel #4
0
    def __init__(self,
                 x,
                 item_count,
                 dim_input,
                 glimpse_times,
                 dim_hidden,
                 rng,
                 activation=T.tanh,
                 bptt_truncate=-1,
                 name='AttentionModel',
                 minimum_p=1e-10):
        '''
            Itemwise hard attention

            Only one item with dim_input will be considered each glimpse.
        '''
        # random for rng
        self.rng = rng
        self.glimpse_times = glimpse_times

        #       W_x0 = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w'])
        #       W_h0, b_h0 = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name))
        W_x = generate_wb(dim_input,
                          4 * dim_hidden,
                          '{}_x'.format(name),
                          params=['w'])
        W_c = generate_wb(dim_hidden,
                          3 * dim_hidden,
                          '{}_c'.format(name),
                          params=['w'])
        W_h, b_h = generate_wb(dim_hidden, 4 * dim_hidden,
                               '{}_hidden'.format(name))

        w_location, b_location = generate_wb(dim_hidden,
                                             item_count,
                                             '{}->item'.format(name),
                                             params=['w', 'b'])

        # example
        #       In [121]: y, u = theano.scan(fn=lambda p, x: rng.choice(size=(1,), a=x, p=p)[0], sequences=p/p.sum(1).dimshuffle(0, 'x'), outputs_info=None, non_sequences=x)
        #       In [122]: f = theano.function([p, x], y, updates=u)

        #       def get_pick(p, item_count):
        #           return rng.choice(size=[1], a=item_count, p=p)[0]

        def forward(times, s_prev, C_prev, x, W_x, W_c, W_h, b_h, w_l,
                    b_l):  #, w_l0, b_l0):
            # current input, previous hidden state, w_input, w_hidden, w_output

            # get item p
            item_p = T.nnet.softmax(s_prev.dot(w_l) + b_l)  # n * item_count
            # max pick
            # picked = item_p.argmax(1)

            # random pick, something wrong here
            #           picked, _ = theano.scan(fn=get_pick,
            #                               sequences=[item_p],
            #                               outputs_info=None,
            #                               non_sequences=[x.shape[2]],
            #                               strict=True)

            # random pick via threshold
            accp, _ = theano.scan(fn=lambda last, current: last + current,
                                  sequences=item_p.T,
                                  outputs_info=T.zeros((item_p.shape[0], )),
                                  strict=True)  # item_count * N
            thres = rng.uniform(size=(x.shape[0], 1),
                                low=0,
                                high=1,
                                dtype=theano.config.floatX)
            picked = (-1.0 / (accp.T - thres)).argmin(1)

            items = x[T.arange(x.shape[0]), times, picked]  # n * dim_input

            # LSTM
            res = items.dot(W_x) + s_prev.dot(W_h) + b_h.dimshuffle('x', 0)
            peephole = C_prev.dot(W_c)
            f = T.nnet.sigmoid(
                res[:, 0 * dim_hidden:1 * dim_hidden] +
                peephole[:, 0 * dim_hidden:1 * dim_hidden])  # N * dh
            i = T.nnet.sigmoid(
                res[:, 1 * dim_hidden:2 * dim_hidden] +
                peephole[:, 1 * dim_hidden:2 * dim_hidden])  # N * dh
            C_hat = T.tanh(res[:, 2 * dim_hidden:3 * dim_hidden])  # N * dh
            o = T.nnet.sigmoid(
                res[:, 3 * dim_hidden:4 * dim_hidden] +
                peephole[:, 2 * dim_hidden:3 * dim_hidden])  # N * dh
            C = f * C_prev + i * C_hat  # N * dh
            s = o * T.tanh(C)  # N * dh
            return s, C, items, picked, item_p

        [s, C, items, picked, item_p], updates = theano.scan(
            fn=forward,
            #           sequences = x.swapaxes(0, 1),  #x:  Time * N * item count * item feature len(1)
            sequences=T.arange(glimpse_times),  #x.swapaxes(0, 1),
            outputs_info=[
                T.zeros((x.shape[0], dim_hidden)),
                T.zeros((x.shape[0], dim_hidden)), None, None, None
            ],
            non_sequences=[x, W_x, W_c, W_h, b_h, w_location,
                           b_location],  #w_location0, b_location0],
            truncate_gradient=bptt_truncate,
            strict=True)

        self.output = s.swapaxes(0, 1)  # N * Time * dim_hidden
        self.cell = C.swapaxes(0, 1)

        self.params = [W_x, W_c, W_h, b_h]
        self.reinforceParams = [w_location,
                                b_location]  #, w_location0, b_location0]

        # for debug
        self.item_p = item_p.swapaxes(0, 1)  # N * Time * item_count
        self.picked = picked.swapaxes(0, 1)  # N * Time * item_count
        self.items = items.swapaxes(0, 1)  # N * Time * dim_input
Beispiel #5
0
    def __init__(
            self,
            glimpse_shape,
            glimpse_times,
            dim_hidden,
            dim_fc,
            dim_out,
            reward_base,
            rng_std=1.0,
            activation=T.tanh,
            bptt_truncate=-1,
            lmbd=0.1  # gdupdate + lmbd*rlupdate
    ):
        if reward_base == None:
            reward_base = np.zeros((glimpse_times)).astype('float32')
            reward_base[-1] = 1.0
        x = T.ftensor3('x')  # N * W * H
        y = T.ivector('y')  # label
        lr = T.fscalar('lr')
        reward_base = theano.shared(name='reward_base',
                                    value=np.array(reward_base).astype(
                                        theano.config.floatX),
                                    borrow=True)  # Time (vector)
        reward_bias = T.fvector('reward_bias')
        rng = MRG_RandomStreams(np.random.randint(9999999))
        #       rng = theano.tensor.shared_randomstreams.RandomStreams(np.random.randint(9999999))

        # ============================================================================================
        #            Output of RNN in each time is connected to a shared FC to output the logloss
        i = InputLayer(x)
        # shared w and b
        w_fc, b_fc = generate_wb(dim_hidden, dim_out, 'fc', params=['w', 'b'])
        # attention unit
        au = AttentionUnit(x, glimpse_shape, glimpse_times, dim_hidden, rng,
                           rng_std, activation, bptt_truncate)
        # au.output: N * Time * dim_hidden
        fcs = [
            FullConnectLayer(au.output[:, i, :],
                             dim_hidden,
                             dim_out,
                             activation,
                             'FC[{}]'.format(i),
                             givens={
                                 'w': w_fc,
                                 'b': b_fc
                             }) for i in xrange(glimpse_times)
        ]
        # fc.output: N * dim_out
        sms = [SoftmaxLayer(fcs[i].output) for i in xrange(glimpse_times)]
        # sm.output: N * dim_out
        layers = [au] + sms + [fcs[0]]
        # ==============================================================================================

        output = T.stack(*[sms[i].output for i in xrange(glimpse_times)]).sum(
            0)  # glimpse_times *  N * classes
        hidoutput = au.output  # N * dim_output
        location = au.location  # N * T * dim_hidden
        prediction = output.argmax(1)  # N

        # calc
        equalvec = T.eq(prediction, y)  # [0, 1, 0, 0, 1 ...]
        correct = T.cast(T.sum(equalvec), 'float32')
        #       noequalvec = T.neq(prediction, y)
        #       nocorrect = T.cast(T.sum(noequalvec), 'float32')
        logLoss = T.log(output)[T.arange(y.shape[0]), y]  #
        reward_biased = T.outer(equalvec,
                                reward_base) - reward_bias.dimshuffle('x', 0)
        # N * Time
        # (R_t - b_t), where b = E[R]

        # gradient descent
        gdobjective = logLoss.sum() / x.shape[
            0]  # correct * dim_output (only has value on the correctly predicted sample)
        gdparams = reduce(lambda x, y: x + y.params, layers, [])
        gdupdates = map(lambda x: (x, x + lr * T.grad(gdobjective, x)),
                        gdparams)

        # reinforce learning
        rlobjective = (reward_biased.dimshuffle(0, 1, 'x') *
                       T.log(au.location_p)).sum() / x.shape[0]
        # location_p: N * Time * 2
        # location_logp: N * Time
        # reward_biased: N * 2
        rlparams = au.reinforceParams
        rlupdates = map(lambda x: (x, x + lr * lmbd * T.grad(rlobjective, x)),
                        rlparams)

        print 'compile step()'
        self.step = theano.function([x, y, lr, reward_bias], [
            gdobjective, rlobjective, correct,
            T.outer(equalvec, reward_base)
        ],
                                    updates=gdupdates + rlupdates)
        #       print 'compile gdstep()'
        #       self.gdstep = theano.function([x, y, lr], [gdobjective, correct, location], updates=gdupdates)
        #       print 'compile rlstep()'
        #       self.rlstep = theano.function([x, y, lr], [rlobjective], updates=rlupdates)
        print 'compile predict()'
        self.predict = theano.function([x], prediction)
        #       print 'compile forward()'
        #       self.forward = theano.function([x], map(lambda x: x.output, layers)) #[layers[-3].output, fc.output])
        #       print 'compile error()'
        #       self.error = theano.function([x, y], gdobjective)
        print 'compile locate()'
        self.locate = theano.function(
            [x],
            [au.location_mean, location])  #[layers[-3].output, fc.output])
        print 'compile debug()'
        self.debug = theano.function([x, y, lr, reward_bias],
                                     [reward_biased, au.location_p],
                                     on_unused_input='warn')

        # self.xxx
        self.glimpse_times = glimpse_times
Beispiel #6
0
    def __init__(self, x, item_count, dim_input, glimpse_times, dim_hidden, rng, activation=T.tanh, bptt_truncate=-1, name='AttentionModel', minimum_p=1e-10):
        '''
            Itemwise hard attention

            Only one item with dim_input will be considered each glimpse.
        '''
        # random for rng
        self.rng = rng
        self.glimpse_times = glimpse_times

#       W_x0 = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w'])
#       W_h0, b_h0 = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name))
        W_x = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w'])
        W_c = generate_wb(dim_hidden, 3*dim_hidden, '{}_c'.format(name), params=['w'])
        W_h, b_h = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name))

        w_location, b_location = generate_wb(dim_hidden, item_count, '{}->item'.format(name), params=['w', 'b'])

        # example
#       In [121]: y, u = theano.scan(fn=lambda p, x: rng.choice(size=(1,), a=x, p=p)[0], sequences=p/p.sum(1).dimshuffle(0, 'x'), outputs_info=None, non_sequences=x)
#       In [122]: f = theano.function([p, x], y, updates=u)

#       def get_pick(p, item_count):
#           return rng.choice(size=[1], a=item_count, p=p)[0]

        def forward(times, s_prev, C_prev, x, W_x, W_c, W_h, b_h, w_l, b_l): #, w_l0, b_l0): 
            # current input, previous hidden state, w_input, w_hidden, w_output

            # get item p 
            item_p = T.nnet.softmax( s_prev.dot(w_l) + b_l )  # n * item_count 
            # max pick
            # picked = item_p.argmax(1)

            # random pick, something wrong here
#           picked, _ = theano.scan(fn=get_pick, 
#                               sequences=[item_p],
#                               outputs_info=None,
#                               non_sequences=[x.shape[2]],
#                               strict=True)

            # random pick via threshold
            accp, _ =theano.scan(fn=lambda last, current: last+current, 
                    sequences=item_p.T, 
                    outputs_info=T.zeros((item_p.shape[0],)), 
                    strict=True) # item_count * N
            thres = rng.uniform(size=(x.shape[0],1), low=0, high=1, dtype=theano.config.floatX)
            picked = (-1.0/(accp.T-thres)).argmin(1)

            items = x[T.arange(x.shape[0]), times, picked] # n * dim_input

            # LSTM
            res    =   items.dot(W_x) +  s_prev.dot(W_h)      + b_h.dimshuffle('x', 0) 
            peephole = C_prev.dot(W_c)
            f = T.nnet.sigmoid(res[:, 0*dim_hidden:1*dim_hidden] + peephole[:, 0*dim_hidden:1*dim_hidden]) # N * dh
            i = T.nnet.sigmoid(res[:, 1*dim_hidden:2*dim_hidden] + peephole[:, 1*dim_hidden:2*dim_hidden]) # N * dh
            C_hat =     T.tanh(res[:, 2*dim_hidden:3*dim_hidden]) # N * dh
            o = T.nnet.sigmoid(res[:, 3*dim_hidden:4*dim_hidden] + peephole[:, 2*dim_hidden:3*dim_hidden]) # N * dh
            C = f*C_prev + i*C_hat # N * dh
            s = o * T.tanh(C)      # N * dh
            return s, C, items, picked, item_p

        [s, C, items, picked, item_p], updates = theano.scan(
            fn=forward,
#           sequences = x.swapaxes(0, 1),  #x:  Time * N * item count * item feature len(1)
            sequences = T.arange(glimpse_times), #x.swapaxes(0, 1),
            outputs_info = [T.zeros((x.shape[0], dim_hidden)), 
                            T.zeros((x.shape[0], dim_hidden)), 
                            None, None, None], 
            non_sequences = [x, W_x, W_c, W_h, b_h, w_location, b_location],#w_location0, b_location0],
            truncate_gradient=bptt_truncate,
            strict = True)

        self.output = s.swapaxes(0, 1) # N * Time * dim_hidden
        self.cell = C.swapaxes(0, 1)

        self.params = [W_x, W_c, W_h, b_h]
        self.reinforceParams = [w_location, b_location] #, w_location0, b_location0]

        # for debug
        self.item_p = item_p.swapaxes(0, 1)  # N * Time * item_count 
        self.picked = picked.swapaxes(0, 1) # N * Time * item_count
        self.items = items.swapaxes(0, 1) # N * Time * dim_input 
Beispiel #7
0
    def __init__(self, 
        glimpse_shape, glimpse_times, 
        dim_hidden, dim_fc, dim_out, 
        reward_base, 
        rng_std=1.0, activation=T.tanh, bptt_truncate=-1, 
        lmbd=0.1 # gdupdate + lmbd*rlupdate
        ): 
        if reward_base == None: 
            reward_base = np.zeros((glimpse_times)).astype('float32')
            reward_base[-1] = 1.0
        x = T.ftensor3('x')  # N * W * H 
        y = T.ivector('y')  # label 
        lr = T.fscalar('lr')
        reward_base = theano.shared(name='reward_base', value=np.array(reward_base).astype(theano.config.floatX), borrow=True) # Time (vector)
        reward_bias = T.fvector('reward_bias')
        rng = MRG_RandomStreams(np.random.randint(9999999))
#       rng = theano.tensor.shared_randomstreams.RandomStreams(np.random.randint(9999999))


# ============================================================================================
#            Output of RNN in each time is connected to a shared FC to output the logloss
        i = InputLayer(x)
        # shared w and b
        w_fc, b_fc = generate_wb(dim_hidden, dim_out, 'fc', params=['w', 'b'])
        # attention unit
        au = AttentionUnit(x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std, activation, bptt_truncate)
        # au.output: N * Time * dim_hidden
        fcs = [FullConnectLayer(au.output[:,i,:], dim_hidden, dim_out, activation, 'FC[{}]'.format(i), givens={'w':w_fc, 'b':b_fc}) for i in xrange(glimpse_times)]
        # fc.output: N * dim_out
        sms = [SoftmaxLayer(fcs[i].output) for i in xrange(glimpse_times)]
        # sm.output: N * dim_out
        layers = [au]+sms+[fcs[0]]
# ==============================================================================================

        output = T.stack(*[sms[i].output for i in xrange(glimpse_times)]).sum(0) # glimpse_times *  N * classes 
        hidoutput = au.output    # N * dim_output 
        location = au.location   # N * T * dim_hidden
        prediction = output.argmax(1) # N

        # calc
        equalvec = T.eq(prediction, y) # [0, 1, 0, 0, 1 ...]
        correct = T.cast(T.sum(equalvec), 'float32')
#       noequalvec = T.neq(prediction, y)
#       nocorrect = T.cast(T.sum(noequalvec), 'float32')
        logLoss = T.log(output)[T.arange(y.shape[0]), y] # 
        reward_biased = T.outer(equalvec, reward_base)-reward_bias.dimshuffle('x', 0)
            # N * Time
            # (R_t - b_t), where b = E[R]
        
        # gradient descent
        gdobjective = logLoss.sum()/x.shape[0]  # correct * dim_output (only has value on the correctly predicted sample)
        gdparams = reduce(lambda x, y: x+y.params, layers, []) 
        gdupdates = map(lambda x: (x, x+lr*T.grad(gdobjective, x)), gdparams)

        # reinforce learning
        rlobjective = (reward_biased.dimshuffle(0, 1, 'x') * T.log(au.location_p)).sum() / x.shape[0]
            # location_p: N * Time * 2
            # location_logp: N * Time
            # reward_biased: N * 2
        rlparams = au.reinforceParams 
        rlupdates = map(lambda x: (x, x+lr*lmbd*T.grad(rlobjective, x)), rlparams)
         
        print 'compile step()'
        self.step = theano.function([x, y, lr, reward_bias], [gdobjective, rlobjective, correct, T.outer(equalvec, reward_base)], updates=gdupdates+rlupdates)
    #       print 'compile gdstep()'
    #       self.gdstep = theano.function([x, y, lr], [gdobjective, correct, location], updates=gdupdates)
    #       print 'compile rlstep()'
    #       self.rlstep = theano.function([x, y, lr], [rlobjective], updates=rlupdates)
        print 'compile predict()'
        self.predict = theano.function([x], prediction)
#       print 'compile forward()'
#       self.forward = theano.function([x], map(lambda x: x.output, layers)) #[layers[-3].output, fc.output])
#       print 'compile error()'
#       self.error = theano.function([x, y], gdobjective)
        print 'compile locate()'
        self.locate = theano.function([x], [au.location_mean, location]) #[layers[-3].output, fc.output])
        print 'compile debug()'
        self.debug = theano.function([x, y, lr, reward_bias], [reward_biased, au.location_p], on_unused_input='warn')

        # self.xxx
        self.glimpse_times = glimpse_times
Beispiel #8
0
    def __init__(self, x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, name='AttentionModel', minimum_p=1e-10):
        # random for rng
        self.rng = rng
        self.rng_std = rng_std
        # n * W * H --> n * dim_input --> n * dim_hidden
        self.glimpse_shape = glimpse_shape
        dim_input = np.prod(glimpse_shape)

#       W_x0 = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w'])
#       W_h0, b_h0 = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name))
        W_x = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w'])
        W_c = generate_wb(dim_hidden, 3*dim_hidden, '{}_c'.format(name), params=['w'])
        W_h, b_h = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name))

        w_location, b_location = generate_wb(dim_hidden, 2, '{}->location'.format(name), params=['w', 'b'])
#       b_location.set_value([14, 14])

        def forward(times, s_prev, C_prev, x, W_x, W_c, W_h, b_h, w_l, b_l): #, w_l0, b_l0): 
            # current input, previous hidden state, w_input, w_hidden, w_output
            # x.shape = n * W * H 
            # s_prev, C_prev.shape = N * dim_hidden

            # get location vector
#           loc_mean = activation( s_prev.dot(w_l) + b_l )  # n * 2
#           loc_mean = activation(s_prev.dot(w_l0)+b_l0).dot(w_l) + b_l  # n * 2  TODO
            loc_mean = s_prev.dot(w_l) + b_l  # n * 2  TODO
            # glimpse
            glimpse, loc = self._glimpse(x, loc_mean) # n * dim_hidden, n * 2
            # input

            # LSTM
            res    =   glimpse.dot(W_x) +  s_prev.dot(W_h)      + b_h.dimshuffle('x', 0) 
            peephole = C_prev.dot(W_c)
            f = T.nnet.sigmoid(res[:, 0*dim_hidden:1*dim_hidden] + peephole[:, 0*dim_hidden:1*dim_hidden]) # N * dh
            i = T.nnet.sigmoid(res[:, 1*dim_hidden:2*dim_hidden] + peephole[:, 1*dim_hidden:2*dim_hidden]) # N * dh
            C_hat =     T.tanh(res[:, 2*dim_hidden:3*dim_hidden]) # N * dh
            o = T.nnet.sigmoid(res[:, 3*dim_hidden:4*dim_hidden] + peephole[:, 2*dim_hidden:3*dim_hidden]) # N * dh
            C = f*C_prev + i*C_hat # N * dh
            s = o * T.tanh(C)      # N * dh
            return s, C, loc, loc_mean, glimpse, \
                    T.concatenate([\
                    res[:, 0*dim_hidden:1*dim_hidden], 
                    res[:, 1*dim_hidden:2*dim_hidden], 
                    res[:, 2*dim_hidden:3*dim_hidden], 
                    res[:, 3*dim_hidden:4*dim_hidden],
                    f, i, C_hat, o, C, s]) # n*dim_h, n*dim_h, n * 2, n * 2


        [s, C, loc, loc_mean, glimpse, innerstate], updates = theano.scan(
            fn=forward,
            sequences = T.arange(glimpse_times), #x.swapaxes(0, 1),
            outputs_info = [T.zeros((x.shape[0], dim_hidden)), 
                            T.zeros((x.shape[0], dim_hidden)), 
                            None, None, None, None], 
            non_sequences = [x, W_x, W_c, W_h, b_h, w_location, b_location],#w_location0, b_location0],
            truncate_gradient=bptt_truncate,
            strict = True)
        # s: Time * n * dim_hidden
        # loc: Time * n * 2

        self.output = s.swapaxes(0, 1) # N * Time * dim_hidden
        self.cell = s.swapaxes(0, 1)
        self.location = loc.swapaxes(0, 1) # N * T * dim_h
        self.params = [W_x, W_c, W_h, b_h]
        self.reinforceParams = [w_location, b_location] #, w_location0, b_location0]

        # for debug
        self.location_mean = loc_mean.swapaxes(0, 1) # N * T * 2
        self.glimpse = glimpse.swapaxes(0, 1) # N * Time * glimpse_shape
        self.location_p = T.maximum( 1.0/(T.sqrt(2*np.pi)*rng_std)*T.exp(-((loc-loc_mean)**2)/(2.0*rng_std**2)), minimum_p ).swapaxes(0,1) # N * T * 2  locx and locy is independent
#       self.location_logp = - float(1.0/(2.0*rng_std**2)) * ((loc-loc_mean)**2).swapaxes(0,1)
                # this part is useless in training >> - T.log(T.sqrt(2*T.pi)*rng_std) 
        self.innerstate = innerstate.swapaxes(0, 1)