def __init__(self, dim_in, dim_hidden, dim_out, activation=T.tanh): x = T.fmatrix('x') y = T.ivector('y') lr = T.fscalar('lr') layers = [InputLayer(x)] for ind, (Idim, Odim) in enumerate( zip([dim_in] + dim_hidden, dim_hidden + [dim_out])): fc = FullConnectLayer(layers[-1].output, Idim, Odim, activation=activation,\ name='FC[{}]'.format(ind)) layers.append(fc) sm = SoftmaxLayer(layers[-1].output) layers.append(sm) output = sm.output logloss = T.nnet.categorical_crossentropy( T.clip(output, 1e-15, 1 - 1e-15), y) loss = T.sum(logloss) / y.shape[0] prediction = output.argmax(1) params = reduce(lambda x, y: x + y.params, layers, []) updates = map(lambda x: (x, x - lr * T.grad(loss, x)), params) print 'compile step()' self.step = theano.function([x, y, lr], [loss], updates=updates) print 'compile predict()' self.predict = theano.function([x], prediction) print 'compile predict_proba()' self.predict_proba = theano.function([x], output) # for saving self.params = params
def __init__(self, dim_input, shapes, dim_output, activation=T.tanh, bptt_truncate=-1): x = T.tensor3('x') # number of sequence * time span * feature size y = T.ivector('y') # only one label (last one) is available lr = T.scalar('lr', dtype=theano.config.floatX) layers = [InputLayer(x)] shapes = [dim_input] + shapes for ind, shape in enumerate( zip(shapes[:-1], shapes[1:])): # Input, dim1, dim2, ..., dimL, Output layer = LSTM(layers[-1].output, shape[0], shape[1], activation, bptt_truncate, 'LSTM[{}]'.format(ind)) layers.append(layer) fc = FullConnectLayer(layers[-1].output[:, -1, :], shapes[-1], dim_output, activation, 'FC') sm = SoftmaxLayer(fc.output) layers.extend([fc, sm]) output = sm.output loss = T.sum(T.nnet.categorical_crossentropy(output, y)) prediction = output.argmax(1) params = reduce(lambda x, y: x + y.params, layers, []) updates = map(lambda x: (x, x - lr * T.grad(loss, x)), params) print 'compile step()' self.step = theano.function([x, y, lr], [loss], updates=updates) print 'compile error()' self.error = theano.function([x, y], loss) print 'compile forward()' self.forward = theano.function([x], map( lambda x: x.output, layers)) #[layers[-3].output, fc.output]) print 'compile predict()' self.predict = theano.function([x], prediction)
def buildmodel(self, shape=[9, 10, 9], activation=T.nnet.sigmoid): x = T.fmatrix('x') a = T.ivector('a') r = T.fvector('r') lr = T.fscalar('lr') layers = [InputLayer(x)] for ind, (Idim, Odim) in enumerate(zip(shape[:-1], shape[1:])): fc = FullConnectLayer(layers[-1].output, Idim, Odim, activation=activation,\ name='FC[{}]'.format(ind)) layers.append(fc) sm = SoftmaxLayer(layers[-1].output) layers.append(sm) output = sm.output loss = T.sum(T.log(sm.output[T.arange(a.shape[0]), a]) * r) prediction = output.argmax(1) params = reduce(lambda x, y: x + y.params, layers, []) updates = map(lambda x: (x, x + lr * T.grad(loss, x)), params) self.debug = theano.function([x, a, r], loss) self.train = theano.function([x, a, r, lr], loss, updates=updates) self.react = theano.function([x], prediction)
def __init__( self, glimpse_shape, glimpse_times, dim_hidden, dim_fc, dim_out, reward_base, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, lmbd=0.1 # gdupdate + lmbd*rlupdate ): if reward_base == None: reward_base = np.zeros((glimpse_times)).astype('float32') reward_base[-1] = 1.0 x = T.ftensor3('x') # N * W * H y = T.ivector('y') # label lr = T.fscalar('lr') reward_base = theano.shared(name='reward_base', value=np.array(reward_base).astype( theano.config.floatX), borrow=True) # Time (vector) reward_bias = T.fvector('reward_bias') rng = MRG_RandomStreams(np.random.randint(9999999)) # rng = theano.tensor.shared_randomstreams.RandomStreams(np.random.randint(9999999)) i = InputLayer(x) au = AttentionUnit(x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std, activation, bptt_truncate) # All hidden states are put into decoder # layers = [i, au, InputLayer(au.output[:,:,:].flatten(2))] # dim_fc = [glimpse_times*dim_hidden] + dim_fc + [dim_out] # Only the last hidden states layers = [i, au, InputLayer(au.output[:, -1, :])] dim_fc = [dim_hidden] + dim_fc + [dim_out] for Idim, Odim in zip(dim_fc[:-1], dim_fc[1:]): fc = FullConnectLayer(layers[-1].output, Idim, Odim, activation, 'FC') layers.append(fc) sm = SoftmaxLayer(layers[-1].output) layers.append(sm) output = sm.output # N * classes hidoutput = au.output # N * dim_output location = au.location # N * T * dim_hidden prediction = output.argmax(1) # N # calc equalvec = T.eq(prediction, y) # [0, 1, 0, 0, 1 ...] correct = T.cast(T.sum(equalvec), 'float32') # noequalvec = T.neq(prediction, y) # nocorrect = T.cast(T.sum(noequalvec), 'float32') logLoss = T.log(output)[T.arange(y.shape[0]), y] # reward_biased = T.outer(equalvec, reward_base) - reward_bias.dimshuffle('x', 0) # N * Time # (R_t - b_t), where b = E[R] # gradient descent gdobjective = logLoss.sum() / x.shape[ 0] # correct * dim_output (only has value on the correctly predicted sample) gdparams = reduce(lambda x, y: x + y.params, layers, []) gdupdates = map(lambda x: (x, x + lr * T.grad(gdobjective, x)), gdparams) # reinforce learning rlobjective = (reward_biased.dimshuffle(0, 1, 'x') * T.log(au.location_p)).sum() / x.shape[0] # location_p: N * Time * 2 # location_logp: N * Time # reward_biased: N * 2 rlparams = au.reinforceParams rlupdates = map(lambda x: (x, x + lr * lmbd * T.grad(rlobjective, x)), rlparams) # Hidden state keeps unchange in time deltas = T.stack(*[((au.output[:, i, :].mean(0) - au.output[:, i + 1, :].mean(0))**2).sum() for i in xrange(glimpse_times - 1)]) # N * Time * dim_hidden print 'compile step()' self.step = theano.function([x, y, lr, reward_bias], [ gdobjective, rlobjective, correct, T.outer(equalvec, reward_base) ], updates=gdupdates + rlupdates) # print 'compile gdstep()' # self.gdstep = theano.function([x, y, lr], [gdobjective, correct, location], updates=gdupdates) # print 'compile rlstep()' # self.rlstep = theano.function([x, y, lr], [rlobjective], updates=rlupdates) print 'compile predict()' self.predict = theano.function([x], prediction) # print 'compile forward()' # self.forward = theano.function([x], map(lambda x: x.output, layers)) #[layers[-3].output, fc.output]) # print 'compile error()' # self.error = theano.function([x, y], gdobjective) print 'compile locate()' self.locate = theano.function( [x], [au.location_mean, location]) #[layers[-3].output, fc.output]) print 'compile debug()' self.debug = theano.function([x, y, lr, reward_bias], [deltas, au.location_p], on_unused_input='warn') # self.xxx self.glimpse_times = glimpse_times
def __init__( self, item_count, dim_input, glimpse_times, dim_hidden, dim_fc, dim_out, reward_base, activation=T.tanh, bptt_truncate=-1, lmbd=0.1, # gdupdate + lmbd*rlupdate DEBUG=False, ): # super(AttentionUnit, self).__init__() if reward_base == None: reward_base = np.zeros((glimpse_times)).astype('float32') reward_base[-1] = 1.0 x = T.ftensor4( 'x' ) # x: N * Time spac: * item count * item feature len(1) # old x: (N, item_count, dim_input) y = T.ivector('y') # label lr = T.fscalar('lr') reward_base = theano.shared(name='reward_base', value=np.array(reward_base).astype( theano.config.floatX), borrow=True) # Time (vector) reward_bias = T.fvector('reward_bias') rng = T.shared_randomstreams.RandomStreams(123) # rng = MRG_RandomStreams(np.random.randint(9999999)) self.glimpse_times = glimpse_times i = InputLayer(x) au = AttentionUnit(x, item_count, dim_input, glimpse_times, dim_hidden, rng, activation, bptt_truncate) layers = [i, au] # only last state counts layers.append(InputLayer(au.output[:, -1, :])) dim_fc = [dim_hidden] + dim_fc + [dim_out] # all states count # layers.append(InputLayer(au.output[:,:,:].flatten(2)) ) # dim_fc = [dim_hidden*glimpse_times] + dim_fc + [dim_out] for Idim, Odim in zip(dim_fc[:-1], dim_fc[1:]): fc = FullConnectLayer(layers[-1].output, Idim, Odim, activation, 'FC') layers.append(fc) sm = SoftmaxLayer(layers[-1].output) layers.append(sm) output = sm.output # N * classes hidoutput = au.output # N * dim_output prediction = output.argmax(1) # N # calc equalvec = T.eq(prediction, y) # [0, 1, 0, 0, 1 ...] correct = T.cast(T.sum(equalvec), 'float32') logLoss = T.log(output)[T.arange(y.shape[0]), y] reward_biased = T.outer( equalvec, reward_base) - reward_bias.dimshuffle('x', 0) # N * Time # gradient descent gdobjective = logLoss.sum() / x.shape[ 0] # correct * dim_output (only has value on the correctly predicted sample) gdparams = reduce(lambda x, y: x + y.params, layers, []) gdupdates = map(lambda x: (x, x + lr * T.grad(gdobjective, x)), gdparams) # reinforce learning # without maximum, then -log(p) will decrease the total p # rlobjective = (reward_biased.dimshuffle(0, 1, 'x') * T.log(au.item_p)).sum() / x.shape[0] rlobjective = (T.maximum(reward_biased.dimshuffle(0, 1, 'x'), 0) * T.log(au.item_p)).sum() / correct # item_p: N * Time * item_count # reward_biased: N * Time rlparams = au.reinforceParams rlupdates = map(lambda x: (x, x + lr * lmbd * T.grad(rlobjective, x)), rlparams) # Hidden state keeps unchange in time deltas = T.stack(*[((au.output[:, i, :].mean(0) - au.output[:, i + 1, :].mean(0))**2).sum() for i in xrange(glimpse_times - 1)]) # N * Time * dim_hidden print 'compile step()' self.step = theano.function([x, y, lr, reward_bias], [ gdobjective, rlobjective, correct, T.outer(equalvec, reward_base) ], updates=gdupdates + rlupdates + rng.updates()) # print 'compile gdstep()' # self.gdstep = theano.function([x, y, lr], [gdobjective, correct, location], updates=gdupdates) # print 'compile rlstep()' # self.rlstep = theano.function([x, y, lr], [rlobjective], updates=rlupdates) print 'compile predict()' self.predict = theano.function([x], prediction) print 'compile picked()' self.picked = theano.function([x], au.picked) # item indices print 'compile item_p()' self.item_p = theano.function([x], au.item_p) # item indices if DEBUG: print 'compile error()' self.error = theano.function([x, y, reward_bias], [gdobjective, rlobjective]) print 'compile forward()' self.forward = theano.function([x], map( lambda x: x.output, layers)) #[layers[-3].output, fc.output]) # print 'compile glimpse()' # self.glimpse = theano.function([x], au.glimpse) #[layers[-3].output, fc.output]) # print 'compile innerstate()' # self.getinnerstate = theano.function([x], au.innerstate) # print 'compile locate()' # self.locate = theano.function([x], [au.location_mean, location]) #[layers[-3].output, fc.output]) # print 'compile debug()' # self.debug = theano.function([x, y, lr, reward_bias], [deltas, au.location_p], on_unused_input='warn') # self.xxx self.layers = layers self.params = gdparams + rlparams
def __init__( self, glimpse_shape, glimpse_times, dim_hidden, dim_fc, dim_out, reward_base, rng_std=1.0, activation=T.tanh, bptt_truncate=-1, lmbd=0.1 # gdupdate + lmbd*rlupdate ): if reward_base == None: reward_base = np.zeros((glimpse_times)).astype('float32') reward_base[-1] = 1.0 x = T.ftensor3('x') # N * W * H y = T.ivector('y') # label lr = T.fscalar('lr') reward_base = theano.shared(name='reward_base', value=np.array(reward_base).astype( theano.config.floatX), borrow=True) # Time (vector) reward_bias = T.fvector('reward_bias') rng = MRG_RandomStreams(np.random.randint(9999999)) # rng = theano.tensor.shared_randomstreams.RandomStreams(np.random.randint(9999999)) # ============================================================================================ # Output of RNN in each time is connected to a shared FC to output the logloss i = InputLayer(x) # shared w and b w_fc, b_fc = generate_wb(dim_hidden, dim_out, 'fc', params=['w', 'b']) # attention unit au = AttentionUnit(x, glimpse_shape, glimpse_times, dim_hidden, rng, rng_std, activation, bptt_truncate) # au.output: N * Time * dim_hidden fcs = [ FullConnectLayer(au.output[:, i, :], dim_hidden, dim_out, activation, 'FC[{}]'.format(i), givens={ 'w': w_fc, 'b': b_fc }) for i in xrange(glimpse_times) ] # fc.output: N * dim_out sms = [SoftmaxLayer(fcs[i].output) for i in xrange(glimpse_times)] # sm.output: N * dim_out layers = [au] + sms + [fcs[0]] # ============================================================================================== output = T.stack(*[sms[i].output for i in xrange(glimpse_times)]).sum( 0) # glimpse_times * N * classes hidoutput = au.output # N * dim_output location = au.location # N * T * dim_hidden prediction = output.argmax(1) # N # calc equalvec = T.eq(prediction, y) # [0, 1, 0, 0, 1 ...] correct = T.cast(T.sum(equalvec), 'float32') # noequalvec = T.neq(prediction, y) # nocorrect = T.cast(T.sum(noequalvec), 'float32') logLoss = T.log(output)[T.arange(y.shape[0]), y] # reward_biased = T.outer(equalvec, reward_base) - reward_bias.dimshuffle('x', 0) # N * Time # (R_t - b_t), where b = E[R] # gradient descent gdobjective = logLoss.sum() / x.shape[ 0] # correct * dim_output (only has value on the correctly predicted sample) gdparams = reduce(lambda x, y: x + y.params, layers, []) gdupdates = map(lambda x: (x, x + lr * T.grad(gdobjective, x)), gdparams) # reinforce learning rlobjective = (reward_biased.dimshuffle(0, 1, 'x') * T.log(au.location_p)).sum() / x.shape[0] # location_p: N * Time * 2 # location_logp: N * Time # reward_biased: N * 2 rlparams = au.reinforceParams rlupdates = map(lambda x: (x, x + lr * lmbd * T.grad(rlobjective, x)), rlparams) print 'compile step()' self.step = theano.function([x, y, lr, reward_bias], [ gdobjective, rlobjective, correct, T.outer(equalvec, reward_base) ], updates=gdupdates + rlupdates) # print 'compile gdstep()' # self.gdstep = theano.function([x, y, lr], [gdobjective, correct, location], updates=gdupdates) # print 'compile rlstep()' # self.rlstep = theano.function([x, y, lr], [rlobjective], updates=rlupdates) print 'compile predict()' self.predict = theano.function([x], prediction) # print 'compile forward()' # self.forward = theano.function([x], map(lambda x: x.output, layers)) #[layers[-3].output, fc.output]) # print 'compile error()' # self.error = theano.function([x, y], gdobjective) print 'compile locate()' self.locate = theano.function( [x], [au.location_mean, location]) #[layers[-3].output, fc.output]) print 'compile debug()' self.debug = theano.function([x, y, lr, reward_bias], [reward_biased, au.location_p], on_unused_input='warn') # self.xxx self.glimpse_times = glimpse_times