def __init__(self, x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, name='AttentionModel'): # random for rng self.rng = rng self.rng_std = rng_std # n * W * H --> n * dim_input --> n * dim_hidden self.glimpse_shape = glimpse_shape dim_input = np.prod(glimpse_shape) W_x = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w']) W_h, b_h = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name)) w_location, b_location = generate_wb(dim_hidden, 2, '{}->location'.format(name), params=['w', 'b']) b_location.set_value([14, 14]) def forward(times, s_prev, C_prev, x, W_x, W_h, b_h, w_l, b_l): #, w_l0, b_l0): # current input, previous hidden state, w_input, w_hidden, w_output # x.shape = n * W * H # s_prev, C_prev.shape = N * dim_hidden # get location vector # loc_mean = activation( s_prev.dot(w_l) + b_l ) # n * 2 # loc_mean = activation(s_prev.dot(w_l0)+b_l0).dot(w_l) + b_l # n * 2 TODO loc_mean = s_prev.dot(w_l) + b_l # n * 2 TODO # glimpse glimpse, loc = self._glimpse(x, loc_mean) # n * dim_hidden, n * 2 # input # LSTM res = glimpse.dot(W_x) + s_prev.dot(W_h) + b_h.dimshuffle('x', 0) f = activation(res[:, 0*dim_hidden:1*dim_hidden]) # N * dh i = activation(res[:, 1*dim_hidden:2*dim_hidden]) # N * dh C_hat = T.tanh(res[:, 2*dim_hidden:3*dim_hidden]) # N * dh o = activation(res[:, 3*dim_hidden:4*dim_hidden]) # N * dh C = f*C_prev + i*C_hat # N * dh s = o * T.tanh(C) # N * dh return s, C, loc, loc_mean # n*dim_h, n*dim_h, n * 2, n * 2 [s, C, loc, loc_mean], updates = theano.scan( fn=forward, sequences = T.arange(glimpse_times), #x.swapaxes(0, 1), outputs_info = [T.zeros((x.shape[0], dim_hidden)), T.zeros((x.shape[0], dim_hidden)), None, None], non_sequences = [x, W_x, W_h, b_h, w_location, b_location],#w_location0, b_location0], truncate_gradient=bptt_truncate, strict = True) # s: Time * n * dim_hidden # loc: Time * n * 2 self.output = s.swapaxes(0, 1) # N * Time * dim_hidden self.cell = s.swapaxes(0, 1) self.location = loc.swapaxes(0, 1) # N * T * dim_h self.location_mean = loc_mean.swapaxes(0, 1) + T.stack(glimpse_shape[0]/2, glimpse_shape[1]/2).dimshuffle('x', 'x', 0) # N * T * 2 self.location_p = 1.0/(T.sqrt(2*np.pi)*rng_std)*T.exp(-((loc-loc_mean)**2)/(2.0*rng_std**2)).swapaxes(0,1) # N * T * 2 locx and locy is independent # self.location_logp = - float(1.0/(2.0*rng_std**2)) * ((loc-loc_mean)**2).swapaxes(0,1) # this part is useless in training >> - T.log(T.sqrt(2*T.pi)*rng_std) self.params = [W_x, W_h, b_h] self.reinforceParams = [w_location, b_location] #, w_location0, b_location0]
def __init__(self, x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, name='AttentionModel'): # random for rng self.rng = rng self.rng_std = rng_std # n * W * H --> n * dim_input --> n * dim_hidden self.glimpse_shape = glimpse_shape dim_input = np.prod(glimpse_shape) w_input = generate_wb(dim_input, dim_hidden, '{}->input'.format(name), params=['w']) w_hidden, b_hidden = generate_wb(dim_hidden, dim_hidden, '{}->hidden'.format(name), params=['w', 'b']) # w_location0, b_location0 = generate_wb(dim_hidden, 100, '{}->location0'.format(name), params=['w', 'b']) # w_location, b_location = generate_wb(100, 2, '{}->location'.format(name), params=['w', 'b']) w_location, b_location = generate_wb(dim_hidden, 2, '{}->location'.format(name), params=['w', 'b']) b_location.set_value([14, 14]) def forward(times, s_prev, x, w_i, w_h, b_h, w_l, b_l): #, w_l0, b_l0): # current input, previous hidden state, w_input, w_hidden, w_output # x.shape = n * W * H # get location vector # loc_mean = activation( s_prev.dot(w_l) + b_l ) # n * 2 # loc_mean = activation(s_prev.dot(w_l0)+b_l0).dot(w_l) + b_l # n * 2 TODO loc_mean = s_prev.dot(w_l) + b_l # n * 2 TODO # glimpse glimpse, loc = self._glimpse(x, loc_mean) # n * dim_hidden, n * 2 # input s = activation( glimpse.dot(w_i) + s_prev.dot(w_h) + b_h ) # n * dim_hidden return s, loc, loc_mean # n*dim_h, n * 2, n * 2 [s, loc, loc_mean], updates = theano.scan( fn=forward, sequences = T.arange(glimpse_times), #x.swapaxes(0, 1), outputs_info = [T.zeros((x.shape[0], dim_hidden)), None, None], non_sequences = [x, w_input, w_hidden, b_hidden, w_location, b_location],#w_location0, b_location0], truncate_gradient=bptt_truncate, strict = True) # s: Time * n * dim_hidden # loc: Time * n * 2 self.output = s.swapaxes(0, 1) # N * Time * dim_hidden self.location = loc.swapaxes(0, 1) # N * T * dim_h self.location_mean = loc_mean.swapaxes(0, 1) + T.stack(glimpse_shape[0]/2, glimpse_shape[1]/2).dimshuffle('x', 'x', 0) # N * T * 2 self.location_p = 1.0/(T.sqrt(2*np.pi)*rng_std)*T.exp(-((loc-loc_mean)**2)/(2.0*rng_std**2)).swapaxes(0,1) # N * T * 2 locx and locy is independent # self.location_logp = - float(1.0/(2.0*rng_std**2)) * ((loc-loc_mean)**2).swapaxes(0,1) # this part is useless in training >> - T.log(T.sqrt(2*T.pi)*rng_std) self.params = [w_input, w_hidden, b_hidden] self.reinforceParams = [w_location, b_location] #, w_location0, b_location0]
def __init__(self, x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, name='AttentionModel'): # random for rng self.rng = rng self.rng_std = rng_std # n * W * H --> n * dim_input --> n * dim_hidden self.glimpse_shape = glimpse_shape dim_input = np.prod(glimpse_shape) w_input = generate_wb(dim_input, dim_hidden, '{}->input'.format(name), params=['w']) w_hidden, b_hidden = generate_wb(dim_hidden, dim_hidden, '{}->hidden'.format(name), params=['w', 'b']) # w_location0, b_location0 = generate_wb(dim_hidden, 100, '{}->location0'.format(name), params=['w', 'b']) # w_location, b_location = generate_wb(100, 2, '{}->location'.format(name), params=['w', 'b']) w_location, b_location = generate_wb(dim_hidden, 2, '{}->location'.format(name), params=['w', 'b']) b_location.set_value([14, 14]) def forward(times, s_prev, x, w_i, w_h, b_h, w_l, b_l): #, w_l0, b_l0): # current input, previous hidden state, w_input, w_hidden, w_output # x.shape = n * W * H # get location vector # loc_mean = activation( s_prev.dot(w_l) + b_l ) # n * 2 # loc_mean = activation(s_prev.dot(w_l0)+b_l0).dot(w_l) + b_l # n * 2 TODO loc_mean = s_prev.dot(w_l) + b_l # n * 2 TODO # glimpse glimpse, loc = self._glimpse(x, loc_mean) # n * dim_hidden, n * 2 # input s = activation( glimpse.dot(w_i) + s_prev.dot(w_h) + b_h ) # n * dim_hidden return s, loc, loc_mean # n*dim_h, n * 2, n * 2 [s, loc, loc_mean], updates = theano.scan( fn=forward, sequences = T.arange(glimpse_times), #x.swapaxes(0, 1), outputs_info = [T.zeros((x.shape[0], dim_hidden)), None, None], non_sequences = [x, w_input, w_hidden, b_hidden, w_location, b_location],#w_location0, b_location0], truncate_gradient=bptt_truncate, strict = True) # s: Time * n * dim_hidden # loc: Time * n * 2 self.output = s.swapaxes(0, 1) # N * Time * dim_hidden self.location = loc.swapaxes(0, 1) # N * T * dim_h self.location_mean = loc_mean.swapaxes(0, 1) # N * T * 2 self.location_p = 1.0/(T.sqrt(2*np.pi)*rng_std)*T.exp(-((loc-loc_mean)**2)/(2.0*rng_std**2)).swapaxes(0,1) # N * T * 2 locx and locy is independent # self.location_logp = - float(1.0/(2.0*rng_std**2)) * ((loc-loc_mean)**2).swapaxes(0,1) # this part is useless in training >> - T.log(T.sqrt(2*T.pi)*rng_std) self.params = [w_input, w_hidden, b_hidden] self.reinforceParams = [w_location, b_location] #, w_location0, b_location0]
def __init__(self, x, item_count, dim_input, glimpse_times, dim_hidden, rng, activation=T.tanh, bptt_truncate=-1, name='AttentionModel', minimum_p=1e-10): ''' Itemwise hard attention Only one item with dim_input will be considered each glimpse. ''' # random for rng self.rng = rng self.glimpse_times = glimpse_times # W_x0 = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w']) # W_h0, b_h0 = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name)) W_x = generate_wb(dim_input, 4 * dim_hidden, '{}_x'.format(name), params=['w']) W_c = generate_wb(dim_hidden, 3 * dim_hidden, '{}_c'.format(name), params=['w']) W_h, b_h = generate_wb(dim_hidden, 4 * dim_hidden, '{}_hidden'.format(name)) w_location, b_location = generate_wb(dim_hidden, item_count, '{}->item'.format(name), params=['w', 'b']) # example # In [121]: y, u = theano.scan(fn=lambda p, x: rng.choice(size=(1,), a=x, p=p)[0], sequences=p/p.sum(1).dimshuffle(0, 'x'), outputs_info=None, non_sequences=x) # In [122]: f = theano.function([p, x], y, updates=u) # def get_pick(p, item_count): # return rng.choice(size=[1], a=item_count, p=p)[0] def forward(times, s_prev, C_prev, x, W_x, W_c, W_h, b_h, w_l, b_l): #, w_l0, b_l0): # current input, previous hidden state, w_input, w_hidden, w_output # get item p item_p = T.nnet.softmax(s_prev.dot(w_l) + b_l) # n * item_count # max pick # picked = item_p.argmax(1) # random pick, something wrong here # picked, _ = theano.scan(fn=get_pick, # sequences=[item_p], # outputs_info=None, # non_sequences=[x.shape[2]], # strict=True) # random pick via threshold accp, _ = theano.scan(fn=lambda last, current: last + current, sequences=item_p.T, outputs_info=T.zeros((item_p.shape[0], )), strict=True) # item_count * N thres = rng.uniform(size=(x.shape[0], 1), low=0, high=1, dtype=theano.config.floatX) picked = (-1.0 / (accp.T - thres)).argmin(1) items = x[T.arange(x.shape[0]), times, picked] # n * dim_input # LSTM res = items.dot(W_x) + s_prev.dot(W_h) + b_h.dimshuffle('x', 0) peephole = C_prev.dot(W_c) f = T.nnet.sigmoid( res[:, 0 * dim_hidden:1 * dim_hidden] + peephole[:, 0 * dim_hidden:1 * dim_hidden]) # N * dh i = T.nnet.sigmoid( res[:, 1 * dim_hidden:2 * dim_hidden] + peephole[:, 1 * dim_hidden:2 * dim_hidden]) # N * dh C_hat = T.tanh(res[:, 2 * dim_hidden:3 * dim_hidden]) # N * dh o = T.nnet.sigmoid( res[:, 3 * dim_hidden:4 * dim_hidden] + peephole[:, 2 * dim_hidden:3 * dim_hidden]) # N * dh C = f * C_prev + i * C_hat # N * dh s = o * T.tanh(C) # N * dh return s, C, items, picked, item_p [s, C, items, picked, item_p], updates = theano.scan( fn=forward, # sequences = x.swapaxes(0, 1), #x: Time * N * item count * item feature len(1) sequences=T.arange(glimpse_times), #x.swapaxes(0, 1), outputs_info=[ T.zeros((x.shape[0], dim_hidden)), T.zeros((x.shape[0], dim_hidden)), None, None, None ], non_sequences=[x, W_x, W_c, W_h, b_h, w_location, b_location], #w_location0, b_location0], truncate_gradient=bptt_truncate, strict=True) self.output = s.swapaxes(0, 1) # N * Time * dim_hidden self.cell = C.swapaxes(0, 1) self.params = [W_x, W_c, W_h, b_h] self.reinforceParams = [w_location, b_location] #, w_location0, b_location0] # for debug self.item_p = item_p.swapaxes(0, 1) # N * Time * item_count self.picked = picked.swapaxes(0, 1) # N * Time * item_count self.items = items.swapaxes(0, 1) # N * Time * dim_input
def __init__( self, glimpse_shape, glimpse_times, dim_hidden, dim_fc, dim_out, reward_base, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, lmbd=0.1 # gdupdate + lmbd*rlupdate ): if reward_base == None: reward_base = np.zeros((glimpse_times)).astype('float32') reward_base[-1] = 1.0 x = T.ftensor3('x') # N * W * H y = T.ivector('y') # label lr = T.fscalar('lr') reward_base = theano.shared(name='reward_base', value=np.array(reward_base).astype( theano.config.floatX), borrow=True) # Time (vector) reward_bias = T.fvector('reward_bias') rng = MRG_RandomStreams(np.random.randint(9999999)) # rng = theano.tensor.shared_randomstreams.RandomStreams(np.random.randint(9999999)) # ============================================================================================ # Output of RNN in each time is connected to a shared FC to output the logloss i = InputLayer(x) # shared w and b w_fc, b_fc = generate_wb(dim_hidden, dim_out, 'fc', params=['w', 'b']) # attention unit au = AttentionUnit(x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std, activation, bptt_truncate) # au.output: N * Time * dim_hidden fcs = [ FullConnectLayer(au.output[:, i, :], dim_hidden, dim_out, activation, 'FC[{}]'.format(i), givens={ 'w': w_fc, 'b': b_fc }) for i in xrange(glimpse_times) ] # fc.output: N * dim_out sms = [SoftmaxLayer(fcs[i].output) for i in xrange(glimpse_times)] # sm.output: N * dim_out layers = [au] + sms + [fcs[0]] # ============================================================================================== output = T.stack(*[sms[i].output for i in xrange(glimpse_times)]).sum( 0) # glimpse_times * N * classes hidoutput = au.output # N * dim_output location = au.location # N * T * dim_hidden prediction = output.argmax(1) # N # calc equalvec = T.eq(prediction, y) # [0, 1, 0, 0, 1 ...] correct = T.cast(T.sum(equalvec), 'float32') # noequalvec = T.neq(prediction, y) # nocorrect = T.cast(T.sum(noequalvec), 'float32') logLoss = T.log(output)[T.arange(y.shape[0]), y] # reward_biased = T.outer(equalvec, reward_base) - reward_bias.dimshuffle('x', 0) # N * Time # (R_t - b_t), where b = E[R] # gradient descent gdobjective = logLoss.sum() / x.shape[ 0] # correct * dim_output (only has value on the correctly predicted sample) gdparams = reduce(lambda x, y: x + y.params, layers, []) gdupdates = map(lambda x: (x, x + lr * T.grad(gdobjective, x)), gdparams) # reinforce learning rlobjective = (reward_biased.dimshuffle(0, 1, 'x') * T.log(au.location_p)).sum() / x.shape[0] # location_p: N * Time * 2 # location_logp: N * Time # reward_biased: N * 2 rlparams = au.reinforceParams rlupdates = map(lambda x: (x, x + lr * lmbd * T.grad(rlobjective, x)), rlparams) print 'compile step()' self.step = theano.function([x, y, lr, reward_bias], [ gdobjective, rlobjective, correct, T.outer(equalvec, reward_base) ], updates=gdupdates + rlupdates) # print 'compile gdstep()' # self.gdstep = theano.function([x, y, lr], [gdobjective, correct, location], updates=gdupdates) # print 'compile rlstep()' # self.rlstep = theano.function([x, y, lr], [rlobjective], updates=rlupdates) print 'compile predict()' self.predict = theano.function([x], prediction) # print 'compile forward()' # self.forward = theano.function([x], map(lambda x: x.output, layers)) #[layers[-3].output, fc.output]) # print 'compile error()' # self.error = theano.function([x, y], gdobjective) print 'compile locate()' self.locate = theano.function( [x], [au.location_mean, location]) #[layers[-3].output, fc.output]) print 'compile debug()' self.debug = theano.function([x, y, lr, reward_bias], [reward_biased, au.location_p], on_unused_input='warn') # self.xxx self.glimpse_times = glimpse_times
def __init__(self, x, item_count, dim_input, glimpse_times, dim_hidden, rng, activation=T.tanh, bptt_truncate=-1, name='AttentionModel', minimum_p=1e-10): ''' Itemwise hard attention Only one item with dim_input will be considered each glimpse. ''' # random for rng self.rng = rng self.glimpse_times = glimpse_times # W_x0 = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w']) # W_h0, b_h0 = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name)) W_x = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w']) W_c = generate_wb(dim_hidden, 3*dim_hidden, '{}_c'.format(name), params=['w']) W_h, b_h = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name)) w_location, b_location = generate_wb(dim_hidden, item_count, '{}->item'.format(name), params=['w', 'b']) # example # In [121]: y, u = theano.scan(fn=lambda p, x: rng.choice(size=(1,), a=x, p=p)[0], sequences=p/p.sum(1).dimshuffle(0, 'x'), outputs_info=None, non_sequences=x) # In [122]: f = theano.function([p, x], y, updates=u) # def get_pick(p, item_count): # return rng.choice(size=[1], a=item_count, p=p)[0] def forward(times, s_prev, C_prev, x, W_x, W_c, W_h, b_h, w_l, b_l): #, w_l0, b_l0): # current input, previous hidden state, w_input, w_hidden, w_output # get item p item_p = T.nnet.softmax( s_prev.dot(w_l) + b_l ) # n * item_count # max pick # picked = item_p.argmax(1) # random pick, something wrong here # picked, _ = theano.scan(fn=get_pick, # sequences=[item_p], # outputs_info=None, # non_sequences=[x.shape[2]], # strict=True) # random pick via threshold accp, _ =theano.scan(fn=lambda last, current: last+current, sequences=item_p.T, outputs_info=T.zeros((item_p.shape[0],)), strict=True) # item_count * N thres = rng.uniform(size=(x.shape[0],1), low=0, high=1, dtype=theano.config.floatX) picked = (-1.0/(accp.T-thres)).argmin(1) items = x[T.arange(x.shape[0]), times, picked] # n * dim_input # LSTM res = items.dot(W_x) + s_prev.dot(W_h) + b_h.dimshuffle('x', 0) peephole = C_prev.dot(W_c) f = T.nnet.sigmoid(res[:, 0*dim_hidden:1*dim_hidden] + peephole[:, 0*dim_hidden:1*dim_hidden]) # N * dh i = T.nnet.sigmoid(res[:, 1*dim_hidden:2*dim_hidden] + peephole[:, 1*dim_hidden:2*dim_hidden]) # N * dh C_hat = T.tanh(res[:, 2*dim_hidden:3*dim_hidden]) # N * dh o = T.nnet.sigmoid(res[:, 3*dim_hidden:4*dim_hidden] + peephole[:, 2*dim_hidden:3*dim_hidden]) # N * dh C = f*C_prev + i*C_hat # N * dh s = o * T.tanh(C) # N * dh return s, C, items, picked, item_p [s, C, items, picked, item_p], updates = theano.scan( fn=forward, # sequences = x.swapaxes(0, 1), #x: Time * N * item count * item feature len(1) sequences = T.arange(glimpse_times), #x.swapaxes(0, 1), outputs_info = [T.zeros((x.shape[0], dim_hidden)), T.zeros((x.shape[0], dim_hidden)), None, None, None], non_sequences = [x, W_x, W_c, W_h, b_h, w_location, b_location],#w_location0, b_location0], truncate_gradient=bptt_truncate, strict = True) self.output = s.swapaxes(0, 1) # N * Time * dim_hidden self.cell = C.swapaxes(0, 1) self.params = [W_x, W_c, W_h, b_h] self.reinforceParams = [w_location, b_location] #, w_location0, b_location0] # for debug self.item_p = item_p.swapaxes(0, 1) # N * Time * item_count self.picked = picked.swapaxes(0, 1) # N * Time * item_count self.items = items.swapaxes(0, 1) # N * Time * dim_input
def __init__(self, glimpse_shape, glimpse_times, dim_hidden, dim_fc, dim_out, reward_base, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, lmbd=0.1 # gdupdate + lmbd*rlupdate ): if reward_base == None: reward_base = np.zeros((glimpse_times)).astype('float32') reward_base[-1] = 1.0 x = T.ftensor3('x') # N * W * H y = T.ivector('y') # label lr = T.fscalar('lr') reward_base = theano.shared(name='reward_base', value=np.array(reward_base).astype(theano.config.floatX), borrow=True) # Time (vector) reward_bias = T.fvector('reward_bias') rng = MRG_RandomStreams(np.random.randint(9999999)) # rng = theano.tensor.shared_randomstreams.RandomStreams(np.random.randint(9999999)) # ============================================================================================ # Output of RNN in each time is connected to a shared FC to output the logloss i = InputLayer(x) # shared w and b w_fc, b_fc = generate_wb(dim_hidden, dim_out, 'fc', params=['w', 'b']) # attention unit au = AttentionUnit(x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std, activation, bptt_truncate) # au.output: N * Time * dim_hidden fcs = [FullConnectLayer(au.output[:,i,:], dim_hidden, dim_out, activation, 'FC[{}]'.format(i), givens={'w':w_fc, 'b':b_fc}) for i in xrange(glimpse_times)] # fc.output: N * dim_out sms = [SoftmaxLayer(fcs[i].output) for i in xrange(glimpse_times)] # sm.output: N * dim_out layers = [au]+sms+[fcs[0]] # ============================================================================================== output = T.stack(*[sms[i].output for i in xrange(glimpse_times)]).sum(0) # glimpse_times * N * classes hidoutput = au.output # N * dim_output location = au.location # N * T * dim_hidden prediction = output.argmax(1) # N # calc equalvec = T.eq(prediction, y) # [0, 1, 0, 0, 1 ...] correct = T.cast(T.sum(equalvec), 'float32') # noequalvec = T.neq(prediction, y) # nocorrect = T.cast(T.sum(noequalvec), 'float32') logLoss = T.log(output)[T.arange(y.shape[0]), y] # reward_biased = T.outer(equalvec, reward_base)-reward_bias.dimshuffle('x', 0) # N * Time # (R_t - b_t), where b = E[R] # gradient descent gdobjective = logLoss.sum()/x.shape[0] # correct * dim_output (only has value on the correctly predicted sample) gdparams = reduce(lambda x, y: x+y.params, layers, []) gdupdates = map(lambda x: (x, x+lr*T.grad(gdobjective, x)), gdparams) # reinforce learning rlobjective = (reward_biased.dimshuffle(0, 1, 'x') * T.log(au.location_p)).sum() / x.shape[0] # location_p: N * Time * 2 # location_logp: N * Time # reward_biased: N * 2 rlparams = au.reinforceParams rlupdates = map(lambda x: (x, x+lr*lmbd*T.grad(rlobjective, x)), rlparams) print 'compile step()' self.step = theano.function([x, y, lr, reward_bias], [gdobjective, rlobjective, correct, T.outer(equalvec, reward_base)], updates=gdupdates+rlupdates) # print 'compile gdstep()' # self.gdstep = theano.function([x, y, lr], [gdobjective, correct, location], updates=gdupdates) # print 'compile rlstep()' # self.rlstep = theano.function([x, y, lr], [rlobjective], updates=rlupdates) print 'compile predict()' self.predict = theano.function([x], prediction) # print 'compile forward()' # self.forward = theano.function([x], map(lambda x: x.output, layers)) #[layers[-3].output, fc.output]) # print 'compile error()' # self.error = theano.function([x, y], gdobjective) print 'compile locate()' self.locate = theano.function([x], [au.location_mean, location]) #[layers[-3].output, fc.output]) print 'compile debug()' self.debug = theano.function([x, y, lr, reward_bias], [reward_biased, au.location_p], on_unused_input='warn') # self.xxx self.glimpse_times = glimpse_times
def __init__(self, x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, name='AttentionModel', minimum_p=1e-10): # random for rng self.rng = rng self.rng_std = rng_std # n * W * H --> n * dim_input --> n * dim_hidden self.glimpse_shape = glimpse_shape dim_input = np.prod(glimpse_shape) # W_x0 = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w']) # W_h0, b_h0 = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name)) W_x = generate_wb(dim_input, 4*dim_hidden, '{}_x'.format(name), params=['w']) W_c = generate_wb(dim_hidden, 3*dim_hidden, '{}_c'.format(name), params=['w']) W_h, b_h = generate_wb(dim_hidden, 4*dim_hidden, '{}_hidden'.format(name)) w_location, b_location = generate_wb(dim_hidden, 2, '{}->location'.format(name), params=['w', 'b']) # b_location.set_value([14, 14]) def forward(times, s_prev, C_prev, x, W_x, W_c, W_h, b_h, w_l, b_l): #, w_l0, b_l0): # current input, previous hidden state, w_input, w_hidden, w_output # x.shape = n * W * H # s_prev, C_prev.shape = N * dim_hidden # get location vector # loc_mean = activation( s_prev.dot(w_l) + b_l ) # n * 2 # loc_mean = activation(s_prev.dot(w_l0)+b_l0).dot(w_l) + b_l # n * 2 TODO loc_mean = s_prev.dot(w_l) + b_l # n * 2 TODO # glimpse glimpse, loc = self._glimpse(x, loc_mean) # n * dim_hidden, n * 2 # input # LSTM res = glimpse.dot(W_x) + s_prev.dot(W_h) + b_h.dimshuffle('x', 0) peephole = C_prev.dot(W_c) f = T.nnet.sigmoid(res[:, 0*dim_hidden:1*dim_hidden] + peephole[:, 0*dim_hidden:1*dim_hidden]) # N * dh i = T.nnet.sigmoid(res[:, 1*dim_hidden:2*dim_hidden] + peephole[:, 1*dim_hidden:2*dim_hidden]) # N * dh C_hat = T.tanh(res[:, 2*dim_hidden:3*dim_hidden]) # N * dh o = T.nnet.sigmoid(res[:, 3*dim_hidden:4*dim_hidden] + peephole[:, 2*dim_hidden:3*dim_hidden]) # N * dh C = f*C_prev + i*C_hat # N * dh s = o * T.tanh(C) # N * dh return s, C, loc, loc_mean, glimpse, \ T.concatenate([\ res[:, 0*dim_hidden:1*dim_hidden], res[:, 1*dim_hidden:2*dim_hidden], res[:, 2*dim_hidden:3*dim_hidden], res[:, 3*dim_hidden:4*dim_hidden], f, i, C_hat, o, C, s]) # n*dim_h, n*dim_h, n * 2, n * 2 [s, C, loc, loc_mean, glimpse, innerstate], updates = theano.scan( fn=forward, sequences = T.arange(glimpse_times), #x.swapaxes(0, 1), outputs_info = [T.zeros((x.shape[0], dim_hidden)), T.zeros((x.shape[0], dim_hidden)), None, None, None, None], non_sequences = [x, W_x, W_c, W_h, b_h, w_location, b_location],#w_location0, b_location0], truncate_gradient=bptt_truncate, strict = True) # s: Time * n * dim_hidden # loc: Time * n * 2 self.output = s.swapaxes(0, 1) # N * Time * dim_hidden self.cell = s.swapaxes(0, 1) self.location = loc.swapaxes(0, 1) # N * T * dim_h self.params = [W_x, W_c, W_h, b_h] self.reinforceParams = [w_location, b_location] #, w_location0, b_location0] # for debug self.location_mean = loc_mean.swapaxes(0, 1) # N * T * 2 self.glimpse = glimpse.swapaxes(0, 1) # N * Time * glimpse_shape self.location_p = T.maximum( 1.0/(T.sqrt(2*np.pi)*rng_std)*T.exp(-((loc-loc_mean)**2)/(2.0*rng_std**2)), minimum_p ).swapaxes(0,1) # N * T * 2 locx and locy is independent # self.location_logp = - float(1.0/(2.0*rng_std**2)) * ((loc-loc_mean)**2).swapaxes(0,1) # this part is useless in training >> - T.log(T.sqrt(2*T.pi)*rng_std) self.innerstate = innerstate.swapaxes(0, 1)