def _step(x_in, h_, c_): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += x_in # preact += tparams[_p(prefix, 'b')] h = [[]] * h_depth c = [[]] * h_depth outp = [[]] * h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) c[di] = tensor.tanh(sliceT(preact, 3, h_sz)) c[di] = f * sliceT(c_, di, h_sz) + i * c[di] h[di] = o * tensor.tanh(c[di]) outp[di] = h[di] if self.en_residual_conn: if (di > 0): outp[di] += outp[di - 1] print "Connecting residual at %d" % (di) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(outp[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c_out = tensor.concatenate(c, axis=1) h_out = tensor.concatenate(h + [outp[-1]], axis=1) return h_out, c_out
def _step(m_, x_, h_, c_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += x_ if options.get('en_aux_inp', 0): preact += tensor.dot(xAux, tparams[_p(prefix, 'W_aux')]) # preact += tparams[_p(prefix, 'b')] h = [[]] * h_depth c = [[]] * h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) c[di] = tensor.tanh(sliceT(preact, 3, h_sz)) c[di] = f * sliceT(c_, di, h_sz) + i * c[di] h[di] = o * tensor.tanh(c[di]) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(h[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c_out = tensor.concatenate(c, axis=1) h_out = tensor.concatenate(h, axis=1) return h_out, c_out
def _step(m_, x_, h_, c_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += x_ if options.get('en_aux_inp',0): preact += tensor.dot(xAux,tparams[_p(prefix,'W_aux')]) # preact += tparams[_p(prefix, 'b')] h = [[]]*h_depth c = [[]]*h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) c[di] = tensor.tanh(sliceT(preact, 3, h_sz)) c[di] = f * sliceT(c_, di, h_sz) + i * c[di] h[di] = o * tensor.tanh(c[di]) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(h[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c_out = tensor.concatenate(c,axis=1) h_out = tensor.concatenate(h,axis=1) return h_out, c_out
def lstm_enc_layer(self, tparams, state_below, prefix='lstm'): nsteps = state_below.shape[0] h_depth = self.hidden_depth h_sz = self.hidden_size if state_below.ndim == 3: n_samples = state_below.shape[1] else: n_samples = 1 def _step(x_in, h_, c_): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += x_in # preact += tparams[_p(prefix, 'b')] h = [[]] * h_depth c = [[]] * h_depth outp = [[]] * h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) c[di] = tensor.tanh(sliceT(preact, 3, h_sz)) c[di] = f * sliceT(c_, di, h_sz) + i * c[di] h[di] = o * tensor.tanh(c[di]) outp[di] = h[di] if self.en_residual_conn: if (di > 0): outp[di] += outp[di - 1] print "Connecting residual at %d" % (di) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(outp[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c_out = tensor.concatenate(c, axis=1) h_out = tensor.concatenate(h + [outp[-1]], axis=1) return h_out, c_out state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) rval, updates = theano.scan(_step, sequences=[state_below], outputs_info=[ tensor.alloc(numpy_floatX(0.), n_samples, (h_depth + 1) * h_sz), tensor.alloc(numpy_floatX(0.), n_samples, h_depth * h_sz), ], name=_p(prefix, '_layers'), n_steps=nsteps) return rval, updates
def lstm_layer(self, tparams, state_below, aux_input, use_noise, options, prefix='lstm', mask=None): nsteps = state_below.shape[0] h_depth = options.get('hidden_depth',1) h_sz = options['hidden_size'] if state_below.ndim == 3: n_samples = state_below.shape[1] else: n_samples = 1 assert mask is not None def _step(m_, x_, h_, c_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += x_ if options.get('en_aux_inp',0): preact += tensor.dot(xAux,tparams[_p(prefix,'W_aux')]) # preact += tparams[_p(prefix, 'b')] h = [[]]*h_depth c = [[]]*h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) c[di] = tensor.tanh(sliceT(preact, 3, h_sz)) c[di] = f * sliceT(c_, di, h_sz) + i * c[di] h[di] = o * tensor.tanh(c[di]) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(h[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c_out = tensor.concatenate(c,axis=1) h_out = tensor.concatenate(h,axis=1) return h_out, c_out state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp',0) == 0: aux_input = [] rval, updates = theano.scan(_step, sequences=[mask, state_below], outputs_info=[tensor.alloc(numpy_floatX(0.), n_samples, h_depth*h_sz), tensor.alloc(numpy_floatX(0.), n_samples, h_depth*h_sz), #tensor.alloc(numpy_floatX(0.),n_samples,options['output_size'])], ], non_sequences = [aux_input] , name=_p(prefix, '_layers'), n_steps=nsteps) return rval, updates
def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(h_, tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp', 0): preact += tensor.dot(xAux, tparams[_p(prefix, 'W_aux')]) i = tensor.nnet.sigmoid(sliceT(preact, 0, options['hidden_size'])) f = tensor.nnet.sigmoid(sliceT(preact, 1, options['hidden_size'])) o = tensor.nnet.sigmoid(sliceT(preact, 2, options['hidden_size'])) c = tensor.tanh(sliceT(preact, 3, options['hidden_size'])) c = f * c_ + i * c h = o * tensor.tanh(c) p = tensor.dot(h, tparams['Wd']) + tparams['bd'] p = tensor.nnet.softmax(p) lProb = tensor.log(p + 1e-20) def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill(lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences=[lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_.shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] xCandIdx = srtIdx // beam_size # Floor division xW = tparams['Wemb'][xWIdx.flatten()] doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx)) h = h.take(xCandIdx.flatten(), axis=0) c = c.take(xCandIdx.flatten(), axis=0) return [xW, h, c, xWlogProb, doneVec, xWIdx, xCandIdx], theano.scan_module.until(doneVec.all())
def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(h_, tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp',0): preact += tensor.dot(xAux,tparams[_p(prefix,'W_aux')]) i = tensor.nnet.sigmoid(sliceT(preact, 0, options['hidden_size'])) f = tensor.nnet.sigmoid(sliceT(preact, 1, options['hidden_size'])) o = tensor.nnet.sigmoid(sliceT(preact, 2, options['hidden_size'])) c = tensor.tanh(sliceT(preact, 3, options['hidden_size'])) c = f * c_ + i * c h = o * tensor.tanh(c) p = tensor.dot(h,tparams['Wd']) + tparams['bd'] p = tensor.nnet.softmax(p) lProb = tensor.log(p + 1e-20) def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences = [lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_.shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] xCandIdx = srtIdx // beam_size # Floor division xW = tparams['Wemb'][xWIdx.flatten()] doneVec = tensor.eq(xWIdx,tensor.zeros_like(xWIdx)) h = h.take(xCandIdx.flatten(),axis=0); c = c.take(xCandIdx.flatten(),axis=0) return [xW, h, c, xWlogProb, doneVec, xWIdx, xCandIdx], theano.scan_module.until(doneVec.all())
def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp', 0): preact += tensor.dot(xAux, tparams[_p(prefix, 'W_aux')]) hL = [[]] * h_depth cL = [[]] * h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) cL[di] = tensor.tanh(sliceT(preact, 3, h_sz)) cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di] hL[di] = o * tensor.tanh(cL[di]) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(hL[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c = tensor.concatenate(cL, axis=1) h = tensor.concatenate(hL, axis=1) p = tensor.dot(hL[-1], tparams['Wd']) + tparams['bd'] smooth_factor = tensor.as_tensor_variable(numpy_floatX( options.get('softmax_smooth_factor', 1.0)), name='sm_f') p = tensor.nnet.softmax(p * smooth_factor) lProb = tensor.log(p + 1e-20) #xCandIdx = tensor.as_tensor_variable([0]) lProb = lProb.flatten() xWIdx = tensor.argmax(lProb, keepdims=True) xWlogProb = lProb[xWIdx] + lP_ if options.get('softmax_propogate', 0) == 0: xW = tparams['Wemb'][xWIdx.flatten()] else: xW = p.dot(tparams['Wemb']) doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx)) return [xW, h, c, xWlogProb, doneVec, xWIdx, p], theano.scan_module.until(doneVec.all())
def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp',0): preact += tensor.dot(xAux,tparams[_p(prefix,'W_aux')]) hL = [[]]*h_depth cL = [[]]*h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) cL[di] = tensor.tanh(sliceT(preact, 3, h_sz)) cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di] hL[di] = o * tensor.tanh(cL[di]) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(hL[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c = tensor.concatenate(cL,axis=1) h = tensor.concatenate(hL,axis=1) p = tensor.dot(hL[-1],tparams['Wd']) + tparams['bd'] smooth_factor = tensor.as_tensor_variable(numpy_floatX(options.get('softmax_smooth_factor',1.0)), name='sm_f') p = tensor.nnet.softmax(p*smooth_factor) lProb = tensor.log(p + 1e-20) #xCandIdx = tensor.as_tensor_variable([0]) lProb = lProb.flatten() xWIdx = tensor.argmax(lProb,keepdims=True) xWlogProb = lProb[xWIdx] + lP_ if options.get('softmax_propogate',0) == 0: xW = tparams['Wemb'][xWIdx.flatten()] else: xW = p.dot(tparams['Wemb']) doneVec = tensor.eq(xWIdx,tensor.zeros_like(xWIdx)) return [xW, h, c, xWlogProb, doneVec, xWIdx, p], theano.scan_module.until(doneVec.all())
def lstm_predict_layer(self, tparams, Xi, aux_input, options, beam_size, prefix='lstm'): nMaxsteps = 30 n_samples = 1 # ---------------------- STEP FUNCTION ---------------------- # def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(h_, tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp',0): preact += tensor.dot(xAux,tparams[_p(prefix,'W_aux')]) i = tensor.nnet.sigmoid(sliceT(preact, 0, options['hidden_size'])) f = tensor.nnet.sigmoid(sliceT(preact, 1, options['hidden_size'])) o = tensor.nnet.sigmoid(sliceT(preact, 2, options['hidden_size'])) c = tensor.tanh(sliceT(preact, 3, options['hidden_size'])) c = f * c_ + i * c h = o * tensor.tanh(c) p = tensor.dot(h,tparams['Wd']) + tparams['bd'] p = tensor.nnet.softmax(p) lProb = tensor.log(p + 1e-20) def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences = [lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_.shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] xCandIdx = srtIdx // beam_size # Floor division xW = tparams['Wemb'][xWIdx.flatten()] doneVec = tensor.eq(xWIdx,tensor.zeros_like(xWIdx)) h = h.take(xCandIdx.flatten(),axis=0); c = c.take(xCandIdx.flatten(),axis=0) return [xW, h, c, xWlogProb, doneVec, xWIdx, xCandIdx], theano.scan_module.until(doneVec.all()) # ------------------- END of STEP FUNCTION -------------------- # if options.get('en_aux_inp',0) == 0: aux_input = [] hidden_size = options['hidden_size'] h = tensor.alloc(numpy_floatX(0.),beam_size,hidden_size) c = tensor.alloc(numpy_floatX(0.),beam_size,hidden_size) lP = tensor.alloc(numpy_floatX(0.), beam_size); dV = tensor.alloc(np.int8(0.), beam_size); # Propogate the image feature vector [xW, h, c, _, _, _, _], _ = _stepP(Xi, h[:1,:], c[:1,:], lP, dV,aux_input) xWStart = tparams['Wemb'][[0]] [xW, h, c, lP, dV, idx0, cand0], _ = _stepP(xWStart, h[:1,:], c[:1,:], lP, dV, aux_input) aux_input = tensor.extra_ops.repeat(aux_input,beam_size,axis=0) # Now lets do the loop. rval, updates = theano.scan(_stepP, outputs_info=[xW, h, c, lP, dV, None, None], non_sequences = [aux_input], name=_p(prefix, 'predict_layers'), n_steps=nMaxsteps) return rval[3][-1], tensor.concatenate([idx0.reshape([1,beam_size]), rval[5]],axis=0), tensor.concatenate([cand0.reshape([1,beam_size]), rval[6]],axis=0)
def lstm_advers_gen_layer(self, tparams, Xi, aux_input, options, beam_size, prefix='lstm'): nMaxsteps = options.get('maxlen', 15) n_samples = 1 h_depth = options.get('hidden_depth', 1) h_sz = options['hidden_size'] # ---------------------- STEP FUNCTION ---------------------- # def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp', 0): preact += tensor.dot(xAux, tparams[_p(prefix, 'W_aux')]) hL = [[]] * h_depth cL = [[]] * h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) cL[di] = tensor.tanh(sliceT(preact, 3, h_sz)) cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di] hL[di] = o * tensor.tanh(cL[di]) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(hL[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c = tensor.concatenate(cL, axis=1) h = tensor.concatenate(hL, axis=1) p = tensor.dot(hL[-1], tparams['Wd']) + tparams['bd'] smooth_factor = tensor.as_tensor_variable(numpy_floatX( options.get('softmax_smooth_factor', 1.0)), name='sm_f') p = tensor.nnet.softmax(p * smooth_factor) lProb = tensor.log(p + 1e-20) #xCandIdx = tensor.as_tensor_variable([0]) lProb = lProb.flatten() xWIdx = tensor.argmax(lProb, keepdims=True) xWlogProb = lProb[xWIdx] + lP_ if options.get('softmax_propogate', 0) == 0: xW = tparams['Wemb'][xWIdx.flatten()] else: xW = p.dot(tparams['Wemb']) doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx)) return [xW, h, c, xWlogProb, doneVec, xWIdx, p], theano.scan_module.until(doneVec.all()) # ------------------- END of STEP FUNCTION -------------------- # if options.get('en_aux_inp', 0) == 0: aux_input = [] h = tensor.alloc(numpy_floatX(0.), n_samples, h_sz * h_depth) c = tensor.alloc(numpy_floatX(0.), n_samples, h_sz * h_depth) lP = tensor.alloc(numpy_floatX(0.), beam_size) dV = tensor.alloc(np.int8(0.), beam_size) # Propogate the image feature vector [xW, h, c, _, _, _, _], _ = _stepP(Xi, h, c, lP, dV, aux_input) xWStart = tparams['Wemb'][0, :] [xW, h, c, lP, dV, idx0, p0], _ = _stepP(xWStart, h, c, lP, dV, aux_input) #if options.get('en_aux_inp',0) == 1: # aux_input = tensor.extra_ops.repeat(aux_input,beam_size,axis=0) # Now lets do the loop. rval, updates = theano.scan( _stepP, outputs_info=[xW, h, c, lP, dV, None, None], non_sequences=[aux_input], name=_p(prefix, 'predict_layers'), n_steps=nMaxsteps - 1) return rval[3][-1], tensor.concatenate( [idx0.reshape([1, beam_size]), rval[5]], axis=0), tensor.concatenate( [tensor.shape_padleft(p0, n_ones=1), rval[6]], axis=0), updates
def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp', 0): preact += tensor.dot(xAux, tparams[_p(prefix, 'W_aux')]) hL = [[]] * h_depth cL = [[]] * h_depth outp = [[]] * h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) cL[di] = tensor.tanh(sliceT(preact, 3, h_sz)) cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di] hL[di] = o * tensor.tanh(cL[di]) outp[di] = hL[di] if options.get('en_residual_conn', 1): if (di > 0): outp[di] += outp[di - 1] print "Connecting residual at %d" % (di) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(outp[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c = tensor.concatenate(cL, axis=1) h = tensor.concatenate(hL, axis=1) if options.get('class_out_factoring', 0) == 1: pC = tensor.dot(outp[-1], tparams['WdCls']) + tparams['bdCls'] pCSft = tensor.nnet.softmax(pC) xCIdx = tensor.argmax(pCSft) pW = tensor.dot( outp[-1], tparams['Wd'][:, xCIdx, :]) + tparams['bd'][:, xCIdx, :] smooth_factor = tensor.as_tensor_variable(numpy_floatX( options.get('softmax_smooth_factor', 1.0)), name='sm_f') pWSft = tensor.nnet.softmax(pW * smooth_factor) lProb = tensor.log(pWSft + 1e-20) + tensor.log(pCSft[0, xCIdx] + 1e-20) else: p = tensor.dot(outp[-1], tparams['Wd']) + tparams['bd'] smooth_factor = tensor.as_tensor_variable(numpy_floatX( options.get('softmax_smooth_factor', 1.0)), name='sm_f') p = tensor.nnet.softmax(p * smooth_factor) lProb = tensor.log(p + 1e-20) if beam_size > 1: def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill(lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse( tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse( tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences=[lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_.shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xCandIdx = srtIdx // beam_size # Floor division h = h.take(xCandIdx.flatten(), axis=0) c = c.take(xCandIdx.flatten(), axis=0) xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] else: xCandIdx = tensor.as_tensor_variable([0]) lProb = lProb.flatten() xWIdx = tensor.argmax(lProb, keepdims=True) xWlogProb = lProb[xWIdx] + lP_ if options.get('class_out_factoring', 0) == 1: clsoffset = tensor.as_tensor_variable( options['ixtoclsinfo'][:, 0]) xWIdx += clsoffset[xCIdx] h = h.take(xCandIdx.flatten(), axis=0) c = c.take(xCandIdx.flatten(), axis=0) if options.get('softmax_propogate', 0) == 0: xW = tparams['Wemb'][xWIdx.flatten()] else: xW = p.dot(tparams['Wemb']) doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx)) return [xW, h, c, xWlogProb, doneVec, xWIdx, xCandIdx], theano.scan_module.until(doneVec.all())
def lstm_predict_layer(self, tparams, Xi, aux_input, options, beam_size, prefix='lstm'): nMaxsteps = options.get('maxlen', 30) if nMaxsteps is None: nMaxsteps = 30 n_samples = 1 h_depth = options.get('hidden_depth', 1) h_sz = options['hidden_size'] # ---------------------- STEP FUNCTION ---------------------- # def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp', 0): preact += tensor.dot(xAux, tparams[_p(prefix, 'W_aux')]) hL = [[]] * h_depth cL = [[]] * h_depth outp = [[]] * h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) cL[di] = tensor.tanh(sliceT(preact, 3, h_sz)) cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di] hL[di] = o * tensor.tanh(cL[di]) outp[di] = hL[di] if options.get('en_residual_conn', 1): if (di > 0): outp[di] += outp[di - 1] print "Connecting residual at %d" % (di) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(outp[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c = tensor.concatenate(cL, axis=1) h = tensor.concatenate(hL, axis=1) if options.get('class_out_factoring', 0) == 1: pC = tensor.dot(outp[-1], tparams['WdCls']) + tparams['bdCls'] pCSft = tensor.nnet.softmax(pC) xCIdx = tensor.argmax(pCSft) pW = tensor.dot( outp[-1], tparams['Wd'][:, xCIdx, :]) + tparams['bd'][:, xCIdx, :] smooth_factor = tensor.as_tensor_variable(numpy_floatX( options.get('softmax_smooth_factor', 1.0)), name='sm_f') pWSft = tensor.nnet.softmax(pW * smooth_factor) lProb = tensor.log(pWSft + 1e-20) + tensor.log(pCSft[0, xCIdx] + 1e-20) else: p = tensor.dot(outp[-1], tparams['Wd']) + tparams['bd'] smooth_factor = tensor.as_tensor_variable(numpy_floatX( options.get('softmax_smooth_factor', 1.0)), name='sm_f') p = tensor.nnet.softmax(p * smooth_factor) lProb = tensor.log(p + 1e-20) if beam_size > 1: def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill(lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse( tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse( tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences=[lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_.shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xCandIdx = srtIdx // beam_size # Floor division h = h.take(xCandIdx.flatten(), axis=0) c = c.take(xCandIdx.flatten(), axis=0) xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] else: xCandIdx = tensor.as_tensor_variable([0]) lProb = lProb.flatten() xWIdx = tensor.argmax(lProb, keepdims=True) xWlogProb = lProb[xWIdx] + lP_ if options.get('class_out_factoring', 0) == 1: clsoffset = tensor.as_tensor_variable( options['ixtoclsinfo'][:, 0]) xWIdx += clsoffset[xCIdx] h = h.take(xCandIdx.flatten(), axis=0) c = c.take(xCandIdx.flatten(), axis=0) if options.get('softmax_propogate', 0) == 0: xW = tparams['Wemb'][xWIdx.flatten()] else: xW = p.dot(tparams['Wemb']) doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx)) return [xW, h, c, xWlogProb, doneVec, xWIdx, xCandIdx], theano.scan_module.until(doneVec.all()) # ------------------- END of STEP FUNCTION -------------------- # if options.get('en_aux_inp', 0) == 0: aux_input = [] h = tensor.alloc(numpy_floatX(0.), beam_size, h_sz * h_depth) c = tensor.alloc(numpy_floatX(0.), beam_size, h_sz * h_depth) lP = tensor.alloc(numpy_floatX(0.), beam_size) dV = tensor.alloc(np.int8(0.), beam_size) # Propogate the image feature vector [xW, h, c, _, _, _, _], _ = _stepP(Xi, h[:1, :], c[:1, :], lP, dV, aux_input) xWStart = tparams['Wemb'][[0]] [xW, h, c, lP, dV, idx0, cand0], _ = _stepP(xWStart, h[:1, :], c[:1, :], lP, dV, aux_input) if options.get('en_aux_inp', 0) == 1: aux_input = tensor.extra_ops.repeat(aux_input, beam_size, axis=0) # Now lets do the loop. rval, updates = theano.scan( _stepP, outputs_info=[xW, h, c, lP, dV, None, None], non_sequences=[aux_input], name=_p(prefix, 'predict_layers'), n_steps=nMaxsteps) return rval[3][-1], tensor.concatenate( [idx0.reshape([1, beam_size]), rval[5]], axis=0), tensor.concatenate( [cand0.reshape([1, beam_size]), rval[6]], axis=0), tensor.concatenate( [tensor.shape_padleft(xW, n_ones=1), rval[0]], axis=0), updates
def lstm_predict_layer(self, tparams, Xi, aux_input, options, beam_size, prefix='lstm'): nMaxsteps = 30 n_samples = 1 # ---------------------- STEP FUNCTION ---------------------- # def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(h_, tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp', 0): preact += tensor.dot(xAux, tparams[_p(prefix, 'W_aux')]) i = tensor.nnet.sigmoid(sliceT(preact, 0, options['hidden_size'])) f = tensor.nnet.sigmoid(sliceT(preact, 1, options['hidden_size'])) o = tensor.nnet.sigmoid(sliceT(preact, 2, options['hidden_size'])) c = tensor.tanh(sliceT(preact, 3, options['hidden_size'])) c = f * c_ + i * c h = o * tensor.tanh(c) p = tensor.dot(h, tparams['Wd']) + tparams['bd'] p = tensor.nnet.softmax(p) lProb = tensor.log(p + 1e-20) def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill(lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences=[lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_.shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] xCandIdx = srtIdx // beam_size # Floor division xW = tparams['Wemb'][xWIdx.flatten()] doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx)) h = h.take(xCandIdx.flatten(), axis=0) c = c.take(xCandIdx.flatten(), axis=0) return [xW, h, c, xWlogProb, doneVec, xWIdx, xCandIdx], theano.scan_module.until(doneVec.all()) # ------------------- END of STEP FUNCTION -------------------- # if options.get('en_aux_inp', 0) == 0: aux_input = [] hidden_size = options['hidden_size'] h = tensor.alloc(numpy_floatX(0.), beam_size, hidden_size) c = tensor.alloc(numpy_floatX(0.), beam_size, hidden_size) lP = tensor.alloc(numpy_floatX(0.), beam_size) dV = tensor.alloc(np.int8(0.), beam_size) # Propogate the image feature vector [xW, h, c, _, _, _, _], _ = _stepP(Xi, h[:1, :], c[:1, :], lP, dV, aux_input) xWStart = tparams['Wemb'][[0]] [xW, h, c, lP, dV, idx0, cand0], _ = _stepP(xWStart, h[:1, :], c[:1, :], lP, dV, aux_input) aux_input = tensor.extra_ops.repeat(aux_input, beam_size, axis=0) # Now lets do the loop. rval, updates = theano.scan( _stepP, outputs_info=[xW, h, c, lP, dV, None, None], non_sequences=[aux_input], name=_p(prefix, 'predict_layers'), n_steps=nMaxsteps) return rval[3][-1], tensor.concatenate( [idx0.reshape([1, beam_size]), rval[5]], axis=0), tensor.concatenate( [cand0.reshape([1, beam_size]), rval[6]], axis=0)
def lstm_predict_layer(self, tparams, Xi, aux_input, options, beam_size, prefix='lstm'): nMaxsteps = options.get('maxlen',30) if nMaxsteps is None: nMaxsteps = 30 n_samples = 1 h_depth = options.get('hidden_depth',1) h_sz = options['hidden_size'] # ---------------------- STEP FUNCTION ---------------------- # def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp',0): preact += tensor.dot(xAux,tparams[_p(prefix,'W_aux')]) hL = [[]]*h_depth cL = [[]]*h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) cL[di] = tensor.tanh(sliceT(preact, 3, h_sz)) cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di] hL[di] = o * tensor.tanh(cL[di]) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(hL[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c = tensor.concatenate(cL,axis=1) h = tensor.concatenate(hL,axis=1) if options.get('class_out_factoring',0) == 1: pC = tensor.dot(hL[-1],tparams['WdCls']) + tparams['bdCls'] pCSft = tensor.nnet.softmax(pC) xCIdx = tensor.argmax(pCSft) pW = tensor.dot(h[-1],tparams['Wd'][:,xCIdx,:]) + tparams['bd'][:,xCIdx,:] smooth_factor = tensor.as_tensor_variable(numpy_floatX(options.get('softmax_smooth_factor',1.0)), name='sm_f') pWSft = tensor.nnet.softmax(pW*smooth_factor) lProb = tensor.log(pWSft + 1e-20) + tensor.log(pCSft[0,xCIdx] + 1e-20) else: p = tensor.dot(hL[-1],tparams['Wd']) + tparams['bd'] smooth_factor = tensor.as_tensor_variable(numpy_floatX(options.get('softmax_smooth_factor',1.0)), name='sm_f') p = tensor.nnet.softmax(p*smooth_factor) lProb = tensor.log(p + 1e-20) if beam_size > 1: def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences = [lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_.shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xCandIdx = srtIdx // beam_size # Floor division h = h.take(xCandIdx.flatten(),axis=0) c = c.take(xCandIdx.flatten(),axis=0) xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] else: xCandIdx = tensor.as_tensor_variable([0]) lProb = lProb.flatten() xWIdx = tensor.argmax(lProb,keepdims=True) xWlogProb = lProb[xWIdx] + lP_ if options.get('class_out_factoring',0) == 1: clsoffset = tensor.as_tensor_variable(options['ixtoclsinfo'][:,0]) xWIdx += clsoffset[xCIdx] h = h.take(xCandIdx.flatten(),axis=0) c = c.take(xCandIdx.flatten(),axis=0) if options.get('softmax_propogate',0) == 0: xW = tparams['Wemb'][xWIdx.flatten()] else: xW = p.dot(tparams['Wemb']) doneVec = tensor.eq(xWIdx,tensor.zeros_like(xWIdx)) return [xW, h, c, xWlogProb, doneVec, xWIdx, xCandIdx], theano.scan_module.until(doneVec.all()) # ------------------- END of STEP FUNCTION -------------------- # if options.get('en_aux_inp',0) == 0: aux_input = [] h = tensor.alloc(numpy_floatX(0.),beam_size,h_sz*h_depth) c = tensor.alloc(numpy_floatX(0.),beam_size,h_sz*h_depth) lP = tensor.alloc(numpy_floatX(0.), beam_size); dV = tensor.alloc(np.int8(0.), beam_size); # Propogate the image feature vector [xW, h, c, _, _, _, _], _ = _stepP(Xi, h[:1,:], c[:1,:], lP, dV,aux_input) xWStart = tparams['Wemb'][[0]] [xW, h, c, lP, dV, idx0, cand0], _ = _stepP(xWStart, h[:1,:], c[:1,:], lP, dV, aux_input) if options.get('en_aux_inp',0) == 1: aux_input = tensor.extra_ops.repeat(aux_input,beam_size,axis=0) # Now lets do the loop. rval, updates = theano.scan(_stepP, outputs_info=[xW, h, c, lP, dV, None, None], non_sequences = [aux_input], name=_p(prefix, 'predict_layers'), n_steps=nMaxsteps) return rval[3][-1], tensor.concatenate([idx0.reshape([1,beam_size]), rval[5]],axis=0), tensor.concatenate([cand0.reshape([1,beam_size]), rval[6]],axis=0), tensor.concatenate([tensor.shape_padleft(xW,n_ones=1),rval[0]],axis=0), updates
def _stepP(*in_list): x_inp = [] h_inp = [] c_inp = [] for i in xrange(nmodels): x_inp.append(in_list[i]) h_inp.append(in_list[nmodels+i]) c_inp.append(in_list[2*nmodels+i]) lP_ = in_list[3*nmodels] dV_ = in_list[3*nmodels+1] p_comb = tensor.alloc(numpy_floatX(0.), options[0]['output_size']); cf = [] h = [] xW = [] for i in xrange(nmodels): preact = tensor.dot(h_inp[i], tparams[i][_p(prefix, 'W_hid')]) preact += (tensor.dot(x_inp[i], tparams[i][_p(prefix, 'W_inp')]) + tparams[i][_p(prefix, 'b')]) if options[i].get('en_aux_inp',0): preact += tensor.dot(aux_input2[i],tparams[i][_p(prefix,'W_aux')]) inp = tensor.nnet.sigmoid(sliceT(preact, 0, options[i]['hidden_size'])) f = tensor.nnet.sigmoid(sliceT(preact, 1, options[i]['hidden_size'])) o = tensor.nnet.sigmoid(sliceT(preact, 2, options[i]['hidden_size'])) c = tensor.tanh(sliceT(preact, 3, options[i]['hidden_size'])) cf.append(f * c_inp[i] + inp * c) h.append(o * tensor.tanh(cf[i])) p = tensor.dot(h[i],tparams[i]['Wd']) + tparams[i]['bd'] if i == 0: p_comb = tparams[i]['comb_weight']*tensor.nnet.softmax(p) else: p_comb += tparams[i]['comb_weight']*tensor.nnet.softmax(p) lProb = tensor.log(p_comb + 1e-20) def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences = [lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_inp[0].shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] xCandIdx = srtIdx // beam_size # Floor division doneVec = tensor.eq(xWIdx,tensor.zeros_like(xWIdx)) x_out = [] h_out = [] c_out = [] for i in xrange(nmodels): x_out.append(tparams[i]['Wemb'][xWIdx.flatten()]) h_out.append(h[i].take(xCandIdx.flatten(),axis=0)) c_out.append(cf[i].take(xCandIdx.flatten(),axis=0)) out_list = [] out_list.extend(x_out) out_list.extend(h_out) out_list.extend(c_out) out_list.extend([xWlogProb, doneVec, xWIdx, xCandIdx]) return out_list, theano.scan_module.until(doneVec.all())
def lstm_multi_model_pred(self,tparams, Xi, aux_input, options, beam_size, nmodels, prefix='lstm'): nMaxsteps = 30 # ---------------------- STEP FUNCTION ---------------------- # def _stepP(*in_list): x_inp = [] h_inp = [] c_inp = [] for i in xrange(nmodels): x_inp.append(in_list[i]) h_inp.append(in_list[nmodels+i]) c_inp.append(in_list[2*nmodels+i]) lP_ = in_list[3*nmodels] dV_ = in_list[3*nmodels+1] p_comb = tensor.alloc(numpy_floatX(0.), options[0]['output_size']); cf = [] h = [] xW = [] for i in xrange(nmodels): preact = tensor.dot(h_inp[i], tparams[i][_p(prefix, 'W_hid')]) preact += (tensor.dot(x_inp[i], tparams[i][_p(prefix, 'W_inp')]) + tparams[i][_p(prefix, 'b')]) if options[i].get('en_aux_inp',0): preact += tensor.dot(aux_input2[i],tparams[i][_p(prefix,'W_aux')]) inp = tensor.nnet.sigmoid(sliceT(preact, 0, options[i]['hidden_size'])) f = tensor.nnet.sigmoid(sliceT(preact, 1, options[i]['hidden_size'])) o = tensor.nnet.sigmoid(sliceT(preact, 2, options[i]['hidden_size'])) c = tensor.tanh(sliceT(preact, 3, options[i]['hidden_size'])) cf.append(f * c_inp[i] + inp * c) h.append(o * tensor.tanh(cf[i])) p = tensor.dot(h[i],tparams[i]['Wd']) + tparams[i]['bd'] if i == 0: p_comb = tparams[i]['comb_weight']*tensor.nnet.softmax(p) else: p_comb += tparams[i]['comb_weight']*tensor.nnet.softmax(p) lProb = tensor.log(p_comb + 1e-20) def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences = [lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_inp[0].shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] xCandIdx = srtIdx // beam_size # Floor division doneVec = tensor.eq(xWIdx,tensor.zeros_like(xWIdx)) x_out = [] h_out = [] c_out = [] for i in xrange(nmodels): x_out.append(tparams[i]['Wemb'][xWIdx.flatten()]) h_out.append(h[i].take(xCandIdx.flatten(),axis=0)) c_out.append(cf[i].take(xCandIdx.flatten(),axis=0)) out_list = [] out_list.extend(x_out) out_list.extend(h_out) out_list.extend(c_out) out_list.extend([xWlogProb, doneVec, xWIdx, xCandIdx]) return out_list, theano.scan_module.until(doneVec.all()) # ------------------- END of STEP FUNCTION -------------------- # #Xi = tensor.extra_ops.repeat(Xi,beam_size,axis=0) lP = tensor.alloc(numpy_floatX(0.), beam_size); dV = tensor.alloc(np.int8(0.), beam_size); h_inp = [] c_inp = [] x_inp = [] for i in xrange(nmodels): hidden_size = options[i]['hidden_size'] h = theano.shared(np.zeros((1,hidden_size),dtype='float32')) c = theano.shared(np.zeros((1,hidden_size),dtype='float32')) h_inp.append(h) c_inp.append(c) x_inp.append(Xi[i]) aux_input2 = aux_input in_list = [] in_list.extend(x_inp); in_list.extend(h_inp); in_list.extend(c_inp) in_list.append(lP); in_list.append(dV) # Propogate the image feature vector out_list,_ = _stepP(*in_list) for i in xrange(nmodels): h_inp[i] = out_list[nmodels + i] c_inp[i] = out_list[2*nmodels + i] x_inp = [] for i in xrange(nmodels): x_inp.append(tparams[i]['Wemb'][[0]]) h_inp[i] = h_inp[i][:1,:] c_inp[i] = c_inp[i][:1,:] #if options[i].get('en_aux_inp',0): # aux_input2.append(aux_input[i]) in_list = [] in_list.extend(x_inp); in_list.extend(h_inp); in_list.extend(c_inp) in_list.append(lP); in_list.append(dV) out_list, _ = _stepP(*in_list) aux_input2 = [] for i in xrange(nmodels): x_inp[i] = out_list[i] h_inp[i] = out_list[nmodels + i] c_inp[i] = out_list[2*nmodels + i] aux_input2.append(tensor.extra_ops.repeat(aux_input[i],beam_size,axis=0)) lP = out_list[3*nmodels] dV = out_list[3*nmodels+1] idx0 = out_list[3*nmodels+2] cand0 = out_list[3*nmodels+3] in_list = [] in_list.extend(x_inp); in_list.extend(h_inp); in_list.extend(c_inp) in_list.append(lP); in_list.append(dV) in_list.append(None);in_list.append(None); # Now lets do the loop. rval, updates = theano.scan(_stepP, outputs_info=in_list, name=_p(prefix, 'predict_layers'), n_steps=nMaxsteps) return rval[3*nmodels][-1], tensor.concatenate([idx0.reshape([1,beam_size]), rval[3*nmodels+2]],axis=0), tensor.concatenate([cand0.reshape([1,beam_size]), rval[3*nmodels+3]],axis=0), rval[3*nmodels]
def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp',0): preact += tensor.dot(xAux,tparams[_p(prefix,'W_aux')]) hL = [[]]*h_depth cL = [[]]*h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) cL[di] = tensor.tanh(sliceT(preact, 3, h_sz)) cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di] hL[di] = o * tensor.tanh(cL[di]) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(hL[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c = tensor.concatenate(cL,axis=1) h = tensor.concatenate(hL,axis=1) if options.get('class_out_factoring',0) == 1: pC = tensor.dot(hL[-1],tparams['WdCls']) + tparams['bdCls'] pCSft = tensor.nnet.softmax(pC) xCIdx = tensor.argmax(pCSft) pW = tensor.dot(h[-1],tparams['Wd'][:,xCIdx,:]) + tparams['bd'][:,xCIdx,:] smooth_factor = tensor.as_tensor_variable(numpy_floatX(options.get('softmax_smooth_factor',1.0)), name='sm_f') pWSft = tensor.nnet.softmax(pW*smooth_factor) lProb = tensor.log(pWSft + 1e-20) + tensor.log(pCSft[0,xCIdx] + 1e-20) else: p = tensor.dot(hL[-1],tparams['Wd']) + tparams['bd'] smooth_factor = tensor.as_tensor_variable(numpy_floatX(options.get('softmax_smooth_factor',1.0)), name='sm_f') p = tensor.nnet.softmax(p*smooth_factor) lProb = tensor.log(p + 1e-20) if beam_size > 1: def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences = [lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_.shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xCandIdx = srtIdx // beam_size # Floor division h = h.take(xCandIdx.flatten(),axis=0) c = c.take(xCandIdx.flatten(),axis=0) xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] else: xCandIdx = tensor.as_tensor_variable([0]) lProb = lProb.flatten() xWIdx = tensor.argmax(lProb,keepdims=True) xWlogProb = lProb[xWIdx] + lP_ if options.get('class_out_factoring',0) == 1: clsoffset = tensor.as_tensor_variable(options['ixtoclsinfo'][:,0]) xWIdx += clsoffset[xCIdx] h = h.take(xCandIdx.flatten(),axis=0) c = c.take(xCandIdx.flatten(),axis=0) if options.get('softmax_propogate',0) == 0: xW = tparams['Wemb'][xWIdx.flatten()] else: xW = p.dot(tparams['Wemb']) doneVec = tensor.eq(xWIdx,tensor.zeros_like(xWIdx)) return [xW, h, c, xWlogProb, doneVec, xWIdx, xCandIdx], theano.scan_module.until(doneVec.all())
def lstm_advers_gen_layer(self, tparams, Xi, aux_input, options, beam_size, prefix='lstm'): nMaxsteps = options.get('maxlen',15) n_samples = 1 h_depth = options.get('hidden_depth',1) h_sz = options['hidden_size'] # ---------------------- STEP FUNCTION ---------------------- # def _stepP(x_, h_, c_, lP_, dV_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(x_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp',0): preact += tensor.dot(xAux,tparams[_p(prefix,'W_aux')]) hL = [[]]*h_depth cL = [[]]*h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) cL[di] = tensor.tanh(sliceT(preact, 3, h_sz)) cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di] hL[di] = o * tensor.tanh(cL[di]) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(hL[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c = tensor.concatenate(cL,axis=1) h = tensor.concatenate(hL,axis=1) p = tensor.dot(hL[-1],tparams['Wd']) + tparams['bd'] smooth_factor = tensor.as_tensor_variable(numpy_floatX(options.get('softmax_smooth_factor',1.0)), name='sm_f') p = tensor.nnet.softmax(p*smooth_factor) lProb = tensor.log(p + 1e-20) #xCandIdx = tensor.as_tensor_variable([0]) lProb = lProb.flatten() xWIdx = tensor.argmax(lProb,keepdims=True) xWlogProb = lProb[xWIdx] + lP_ if options.get('softmax_propogate',0) == 0: xW = tparams['Wemb'][xWIdx.flatten()] else: xW = p.dot(tparams['Wemb']) doneVec = tensor.eq(xWIdx,tensor.zeros_like(xWIdx)) return [xW, h, c, xWlogProb, doneVec, xWIdx, p], theano.scan_module.until(doneVec.all()) # ------------------- END of STEP FUNCTION -------------------- # if options.get('en_aux_inp',0) == 0: aux_input = [] h = tensor.alloc(numpy_floatX(0.),n_samples,h_sz*h_depth) c = tensor.alloc(numpy_floatX(0.),n_samples,h_sz*h_depth) lP = tensor.alloc(numpy_floatX(0.), beam_size); dV = tensor.alloc(np.int8(0.), beam_size); # Propogate the image feature vector [xW, h, c, _, _, _, _], _ = _stepP(Xi, h, c, lP, dV,aux_input) xWStart = tparams['Wemb'][0,:] [xW, h, c, lP, dV, idx0, p0], _ = _stepP(xWStart, h, c, lP, dV, aux_input) #if options.get('en_aux_inp',0) == 1: # aux_input = tensor.extra_ops.repeat(aux_input,beam_size,axis=0) # Now lets do the loop. rval, updates = theano.scan(_stepP, outputs_info=[xW, h, c, lP, dV, None, None], non_sequences = [aux_input], name=_p(prefix, 'predict_layers'), n_steps=nMaxsteps-1) return rval[3][-1], tensor.concatenate([idx0.reshape([1,beam_size]), rval[5]],axis=0), tensor.concatenate([tensor.shape_padleft(p0,n_ones=1),rval[6]],axis=0), updates
def lstm_multi_model_pred(self, tparams, Xi, aux_input, options, beam_size, nmodels, prefix='lstm'): nMaxsteps = 30 # ---------------------- STEP FUNCTION ---------------------- # def _stepP(*in_list): x_inp = [] h_inp = [] c_inp = [] for i in xrange(nmodels): x_inp.append(in_list[i]) h_inp.append(in_list[nmodels + i]) c_inp.append(in_list[2 * nmodels + i]) lP_ = in_list[3 * nmodels] dV_ = in_list[3 * nmodels + 1] p_comb = tensor.alloc(numpy_floatX(0.), options[0]['output_size']) cf = [] h = [] xW = [] for i in xrange(nmodels): preact = tensor.dot(h_inp[i], tparams[i][_p(prefix, 'W_hid')]) preact += ( tensor.dot(x_inp[i], tparams[i][_p(prefix, 'W_inp')]) + tparams[i][_p(prefix, 'b')]) if options[i].get('en_aux_inp', 0): preact += tensor.dot(aux_input2[i], tparams[i][_p(prefix, 'W_aux')]) inp = tensor.nnet.sigmoid( sliceT(preact, 0, options[i]['hidden_size'])) f = tensor.nnet.sigmoid( sliceT(preact, 1, options[i]['hidden_size'])) o = tensor.nnet.sigmoid( sliceT(preact, 2, options[i]['hidden_size'])) c = tensor.tanh(sliceT(preact, 3, options[i]['hidden_size'])) cf.append(f * c_inp[i] + inp * c) h.append(o * tensor.tanh(cf[i])) p = tensor.dot(h[i], tparams[i]['Wd']) + tparams[i]['bd'] if i == 0: p_comb = tparams[i]['comb_weight'] * tensor.nnet.softmax(p) else: p_comb += tparams[i]['comb_weight'] * tensor.nnet.softmax( p) lProb = tensor.log(p_comb + 1e-20) def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill(lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences=[lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_inp[0].shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] xCandIdx = srtIdx // beam_size # Floor division doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx)) x_out = [] h_out = [] c_out = [] for i in xrange(nmodels): x_out.append(tparams[i]['Wemb'][xWIdx.flatten()]) h_out.append(h[i].take(xCandIdx.flatten(), axis=0)) c_out.append(cf[i].take(xCandIdx.flatten(), axis=0)) out_list = [] out_list.extend(x_out) out_list.extend(h_out) out_list.extend(c_out) out_list.extend([xWlogProb, doneVec, xWIdx, xCandIdx]) return out_list, theano.scan_module.until(doneVec.all()) # ------------------- END of STEP FUNCTION -------------------- # #Xi = tensor.extra_ops.repeat(Xi,beam_size,axis=0) lP = tensor.alloc(numpy_floatX(0.), beam_size) dV = tensor.alloc(np.int8(0.), beam_size) h_inp = [] c_inp = [] x_inp = [] for i in xrange(nmodels): hidden_size = options[i]['hidden_size'] h = theano.shared(np.zeros((1, hidden_size), dtype='float32')) c = theano.shared(np.zeros((1, hidden_size), dtype='float32')) h_inp.append(h) c_inp.append(c) x_inp.append(Xi[i]) aux_input2 = aux_input in_list = [] in_list.extend(x_inp) in_list.extend(h_inp) in_list.extend(c_inp) in_list.append(lP) in_list.append(dV) # Propogate the image feature vector out_list, _ = _stepP(*in_list) for i in xrange(nmodels): h_inp[i] = out_list[nmodels + i] c_inp[i] = out_list[2 * nmodels + i] x_inp = [] for i in xrange(nmodels): x_inp.append(tparams[i]['Wemb'][[0]]) h_inp[i] = h_inp[i][:1, :] c_inp[i] = c_inp[i][:1, :] #if options[i].get('en_aux_inp',0): # aux_input2.append(aux_input[i]) in_list = [] in_list.extend(x_inp) in_list.extend(h_inp) in_list.extend(c_inp) in_list.append(lP) in_list.append(dV) out_list, _ = _stepP(*in_list) aux_input2 = [] for i in xrange(nmodels): x_inp[i] = out_list[i] h_inp[i] = out_list[nmodels + i] c_inp[i] = out_list[2 * nmodels + i] aux_input2.append( tensor.extra_ops.repeat(aux_input[i], beam_size, axis=0)) lP = out_list[3 * nmodels] dV = out_list[3 * nmodels + 1] idx0 = out_list[3 * nmodels + 2] cand0 = out_list[3 * nmodels + 3] in_list = [] in_list.extend(x_inp) in_list.extend(h_inp) in_list.extend(c_inp) in_list.append(lP) in_list.append(dV) in_list.append(None) in_list.append(None) # Now lets do the loop. rval, updates = theano.scan(_stepP, outputs_info=in_list, name=_p(prefix, 'predict_layers'), n_steps=nMaxsteps) return rval[3 * nmodels][-1], tensor.concatenate( [idx0.reshape([1, beam_size]), rval[3 * nmodels + 2]], axis=0), tensor.concatenate( [cand0.reshape([1, beam_size]), rval[3 * nmodels + 3]], axis=0), rval[3 * nmodels]
def _stepP(U, xW_, h_, c_, lP_, dV_, xAux, xNoise): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(xW_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) preact += xAux if options.get('gen_input_noise', 0): preact += xNoise hL = [[]] * h_depth cL = [[]] * h_depth outp = [[]] * h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) cL[di] = tensor.tanh(sliceT(preact, 3, h_sz)) cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di] hL[di] = o * tensor.tanh(cL[di]) outp[di] = hL[di] if options.get('en_residual_conn', 1): if (di > 0): outp[di] += outp[di - 1] print "Connecting residual at %d" % (di) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(outp[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c = tensor.concatenate(cL, axis=1) h = tensor.concatenate(hL, axis=1) logits = tensor.dot(outp[-1], tparams['Wd']) + tparams['bd'] #p = tensor.dot(outp[-1],l2norm(tparams['Wd'],axis=0))# + tparams['bd'] if options.get('use_gumbel_mse', 0) == 0 or options.get( 'greedy', 0): p = tensor.nnet.softmax(logits) else: p = gumbel_softmax_sample( self.trng, logits * self.softmax_smooth_factor, self.gumb_temp, U, options.get('use_gumbel_hard', False)) if options.get('computelogprob', 0): lProb = tensor.log( tensor.nnet.softmax(logits * self.softmax_smooth_factor) + 1e-20) else: lProb = logits # Idx of the correct word should come from the xWIdx = ~dV_ * tensor.argmax(p, axis=-1) xWlogProb = ~dV_ * lProb[tensor.arange(nBatchSamps * n_samp), xWIdx] + lP_ #xW = tparams['Wemb'][xWIdx.flatten()] if options.get('use_gumbel_hard', 0) and options.get( 'use_gumbel_mse', 0) and not options.get('greedy', 0): xW = p.dot(tparams['Wemb']) else: xW = theano.gradient.disconnected_grad( tparams['Wemb'][xWIdx.flatten()].reshape( [xWIdx.shape[0], -1])) doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx)) return [xW, h, c, xWlogProb, doneVec, xWIdx, p], theano.scan_module.until(doneVec.all())
def lstm_advers_gen_layer(self, tparams, xI, xAux, options, prefix='lstm'): nBatchSamps = xI.shape[0] nMaxsteps = options.get('maxlen', 15) if nMaxsteps is None: nMaxsteps = 30 n_samp = options.get('n_gen_samples', 1) h_depth = options.get('hidden_depth', 1) h_sz = options['hidden_size'] # ---------------------- STEP FUNCTION ---------------------- # def _stepP(U, xW_, h_, c_, lP_, dV_, xAux, xNoise): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += (tensor.dot(xW_, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) preact += xAux if options.get('gen_input_noise', 0): preact += xNoise hL = [[]] * h_depth cL = [[]] * h_depth outp = [[]] * h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) cL[di] = tensor.tanh(sliceT(preact, 3, h_sz)) cL[di] = f * sliceT(c_, di, h_sz) + i * cL[di] hL[di] = o * tensor.tanh(cL[di]) outp[di] = hL[di] if options.get('en_residual_conn', 1): if (di > 0): outp[di] += outp[di - 1] print "Connecting residual at %d" % (di) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(outp[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c = tensor.concatenate(cL, axis=1) h = tensor.concatenate(hL, axis=1) logits = tensor.dot(outp[-1], tparams['Wd']) + tparams['bd'] #p = tensor.dot(outp[-1],l2norm(tparams['Wd'],axis=0))# + tparams['bd'] if options.get('use_gumbel_mse', 0) == 0 or options.get( 'greedy', 0): p = tensor.nnet.softmax(logits) else: p = gumbel_softmax_sample( self.trng, logits * self.softmax_smooth_factor, self.gumb_temp, U, options.get('use_gumbel_hard', False)) if options.get('computelogprob', 0): lProb = tensor.log( tensor.nnet.softmax(logits * self.softmax_smooth_factor) + 1e-20) else: lProb = logits # Idx of the correct word should come from the xWIdx = ~dV_ * tensor.argmax(p, axis=-1) xWlogProb = ~dV_ * lProb[tensor.arange(nBatchSamps * n_samp), xWIdx] + lP_ #xW = tparams['Wemb'][xWIdx.flatten()] if options.get('use_gumbel_hard', 0) and options.get( 'use_gumbel_mse', 0) and not options.get('greedy', 0): xW = p.dot(tparams['Wemb']) else: xW = theano.gradient.disconnected_grad( tparams['Wemb'][xWIdx.flatten()].reshape( [xWIdx.shape[0], -1])) doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx)) return [xW, h, c, xWlogProb, doneVec, xWIdx, p], theano.scan_module.until(doneVec.all()) # ------------------- END of STEP FUNCTION -------------------- # if options.get('use_gumbel_mse', 0) == 0: U = self.trng.uniform((nMaxsteps, 1), low=0., high=1., dtype=theano.config.floatX) else: U = self.trng.uniform((nMaxsteps + 1, nBatchSamps * n_samp, options['vocabulary_size']), low=0., high=1., dtype=theano.config.floatX) xI = tensor.extra_ops.repeat(xI, n_samp, axis=0) xAux = tensor.extra_ops.repeat(tensor.dot(xAux, tparams[_p(prefix, 'W_aux')]), n_samp, axis=0) if options.get('gen_input_noise', 0): xNoise = tensor.dot( self.trng.normal([nBatchSamps * n_samp, self.noise_dim]), tparams[_p(prefix, 'W_noise')]) else: xNoise = [] if options.get('gen_use_rand_init', 0) and not options.get('gen_input_noise', 0): h = tensor.unbroadcast( self.trng.uniform([nBatchSamps * n_samp, h_sz * h_depth], low=-0.1, high=0.1), 0, 1) c = tensor.unbroadcast( self.trng.uniform([nBatchSamps * n_samp, h_sz * h_depth], low=-0.1, high=0.1), 0, 1) else: h = tensor.zeros([nBatchSamps * n_samp, h_sz * h_depth]) c = tensor.zeros([nBatchSamps * n_samp, h_sz * h_depth]) lP = tensor.alloc(numpy_floatX(0.), nBatchSamps * n_samp) dV = tensor.alloc(np.bool_(0.), nBatchSamps * n_samp) # Propogate the image feature vector [_, h, c, _, _, _, _], _ = _stepP(U[0, :], xI, h, c, lP, dV, xAux, xNoise) xWStart = tensor.unbroadcast( tensor.tile(tparams['Wemb'][[0]], [nBatchSamps * n_samp, 1]), 0, 1) # Now lets do the loop. rval, updates = theano.scan( _stepP, sequences=[U[1:, :]], outputs_info=[xWStart, h, c, lP, dV, None, None], non_sequences=[xAux, xNoise], name=_p(prefix, 'adv_predict_layers'), n_steps=nMaxsteps) seq_lengths = theano.gradient.disconnected_grad( tensor.argmax(tensor.concatenate( [rval[4][:-1, :], tensor.ones((1, nBatchSamps * n_samp))], axis=0), axis=0) + 1) return rval[3][-1], rval[5], rval[6], updates, seq_lengths
def _stepP(*in_list): x_inp = [] h_inp = [] c_inp = [] for i in xrange(nmodels): x_inp.append(in_list[i]) h_inp.append(in_list[nmodels + i]) c_inp.append(in_list[2 * nmodels + i]) lP_ = in_list[3 * nmodels] dV_ = in_list[3 * nmodels + 1] p_comb = tensor.alloc(numpy_floatX(0.), options[0]['output_size']) cf = [] h = [] xW = [] for i in xrange(nmodels): preact = tensor.dot(h_inp[i], tparams[i][_p(prefix, 'W_hid')]) preact += ( tensor.dot(x_inp[i], tparams[i][_p(prefix, 'W_inp')]) + tparams[i][_p(prefix, 'b')]) if options[i].get('en_aux_inp', 0): preact += tensor.dot(aux_input2[i], tparams[i][_p(prefix, 'W_aux')]) inp = tensor.nnet.sigmoid( sliceT(preact, 0, options[i]['hidden_size'])) f = tensor.nnet.sigmoid( sliceT(preact, 1, options[i]['hidden_size'])) o = tensor.nnet.sigmoid( sliceT(preact, 2, options[i]['hidden_size'])) c = tensor.tanh(sliceT(preact, 3, options[i]['hidden_size'])) cf.append(f * c_inp[i] + inp * c) h.append(o * tensor.tanh(cf[i])) p = tensor.dot(h[i], tparams[i]['Wd']) + tparams[i]['bd'] if i == 0: p_comb = tparams[i]['comb_weight'] * tensor.nnet.softmax(p) else: p_comb += tparams[i]['comb_weight'] * tensor.nnet.softmax( p) lProb = tensor.log(p_comb + 1e-20) def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill(lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq(dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest rvalLcl, updatesLcl = theano.scan(_FindB_best, sequences=[lProb, lP_, dV_], name=_p(prefix, 'FindBest'), n_steps=x_inp[0].shape[0]) xWIdxBest = rvalLcl[1] lProbBest = rvalLcl[0] xWIdxBest = xWIdxBest.flatten() lProb = lProbBest.flatten() # Now sort and find the best among these best extensions for the current beams srtIdx = tensor.argsort(-lProb) srtIdx = srtIdx[:beam_size] xWlogProb = lProb[srtIdx] xWIdx = xWIdxBest[srtIdx] xCandIdx = srtIdx // beam_size # Floor division doneVec = tensor.eq(xWIdx, tensor.zeros_like(xWIdx)) x_out = [] h_out = [] c_out = [] for i in xrange(nmodels): x_out.append(tparams[i]['Wemb'][xWIdx.flatten()]) h_out.append(h[i].take(xCandIdx.flatten(), axis=0)) c_out.append(cf[i].take(xCandIdx.flatten(), axis=0)) out_list = [] out_list.extend(x_out) out_list.extend(h_out) out_list.extend(c_out) out_list.extend([xWlogProb, doneVec, xWIdx, xCandIdx]) return out_list, theano.scan_module.until(doneVec.all())
def lstm_layer(self, tparams, state_below, aux_input, use_noise, options, prefix='lstm', mask=None): nsteps = state_below.shape[0] h_depth = options.get('hidden_depth', 1) h_sz = options['hidden_size'] if state_below.ndim == 3: n_samples = state_below.shape[1] else: n_samples = 1 assert mask is not None def _step(m_, x_, h_, c_, xAux): preact = tensor.dot(sliceT(h_, 0, h_sz), tparams[_p(prefix, 'W_hid')]) preact += x_ if options.get('en_aux_inp', 0): preact += tensor.dot(xAux, tparams[_p(prefix, 'W_aux')]) # preact += tparams[_p(prefix, 'b')] h = [[]] * h_depth c = [[]] * h_depth for di in xrange(h_depth): i = tensor.nnet.sigmoid(sliceT(preact, 0, h_sz)) f = tensor.nnet.sigmoid(sliceT(preact, 1, h_sz)) o = tensor.nnet.sigmoid(sliceT(preact, 2, h_sz)) c[di] = tensor.tanh(sliceT(preact, 3, h_sz)) c[di] = f * sliceT(c_, di, h_sz) + i * c[di] h[di] = o * tensor.tanh(c[di]) if di < (h_depth - 1): preact = tensor.dot(sliceT(h_, di+1, h_sz), tparams[_p(prefix, ('W_hid_' + str(di+1)))]) + \ tensor.dot(h[di], tparams[_p(prefix, ('W_inp_' + str(di+1)))]) c_out = tensor.concatenate(c, axis=1) h_out = tensor.concatenate(h, axis=1) return h_out, c_out state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W_inp')]) + tparams[_p(prefix, 'b')]) if options.get('en_aux_inp', 0) == 0: aux_input = [] rval, updates = theano.scan( _step, sequences=[mask, state_below], outputs_info=[ tensor.alloc(numpy_floatX(0.), n_samples, h_depth * h_sz), tensor.alloc(numpy_floatX(0.), n_samples, h_depth * h_sz), #tensor.alloc(numpy_floatX(0.),n_samples,options['output_size'])], ], non_sequences=[aux_input], name=_p(prefix, '_layers'), n_steps=nsteps) return rval, updates