def build_model(self, tparams, options): trng = RandomStreams(1234) # Used for dropout. use_noise = theano.shared(numpy_floatX(0.)) xW = tensor.matrix('xW', dtype='int64') mask = tensor.matrix('mask', dtype=config.floatX) n_timesteps = xW.shape[0] n_samples = xW.shape[1] embW = tparams['Wemb'][xW.flatten()].reshape( [n_timesteps, n_samples, options['word_encoding_size']]) embW_rev = tparams['Wemb'][xW[::-1, :].flatten()].reshape( [n_timesteps, n_samples, options['word_encoding_size']]) xI = tensor.matrix('xI', dtype=config.floatX) xAux = tensor.matrix('xAux', dtype=config.floatX) if options.get('swap_aux', 0): xAuxEmb = tensor.dot(xAux, tparams['WIemb_aux']) + tparams['b_Img_aux'] else: xAuxEmb = xAux embImg = (tensor.dot(xI, tparams['WIemb']) + tparams['b_Img']).reshape( [1, n_samples, options['image_encoding_size']]) emb = tensor.concatenate([embImg, embW], axis=0) emb_rev = tensor.set_subtensor( embW_rev[mask[::-1, :].argmax(axis=0) - 1, tensor.arange(n_samples), :], embImg[0, :, :]) #This is implementation of input dropout !! if options['use_dropout']: emb = dropout_layer(emb, use_noise, trng, options['drop_prob_encoder'], shp=emb.shape) if options.get('en_aux_inp', 0): xAuxEmb = dropout_layer(xAuxEmb, use_noise, trng, options['drop_prob_aux'], shp=xAuxEmb.shape) ############################################################################################################################# # This implements core lstm rval, updatesLSTM = basic_lstm_layer(tparams, emb[:n_timesteps, :, :], xAuxEmb, use_noise, options, prefix='lstm', sched_prob_mask=[]) ############################################################################################################################# # This implements core reverse lstm rev_rval, rev_updatesLSTM = basic_lstm_layer( tparams, emb_rev[:n_timesteps, :, :], xAuxEmb, use_noise, options, prefix='rev_lstm', sched_prob_mask=[]) ############################################################################################################################# # NOTE1: we are leaving out the first prediction, which was made for the image and is meaningless. if options['use_dropout']: # XXX : Size given to dropout is missing one dimension. This keeps the dropped units consistent across time!?. # ### Is this a good bug ? p = dropout_layer( sliceT(rval[0][1:, :, :], options.get('hidden_depth', 1), options['hidden_size']), use_noise, trng, options['drop_prob_decoder'], (n_samples, options['hidden_size'])) rev_p = dropout_layer( sliceT(rev_rval[0][:, :, :], options.get('hidden_depth', 1), options['hidden_size']), use_noise, trng, options['drop_prob_decoder'], (n_samples, options['hidden_size'])) else: p = sliceT(rval[0][1:, :, :], options.get('hidden_depth', 1), options['hidden_size']) rev_p = sliceT(rev_rval[0][:, :, :], options.get('hidden_depth', 1), options['hidden_size']) n_out_samps = (n_timesteps - 2) * n_samples if options.get('class_out_factoring', 0) == 0: pW = (tensor.dot(p[:-1, :, :] + rev_p[::-1, :, :][2:, :, :], tparams['Wd']) + tparams['bd']).reshape( [n_out_samps, options['output_size']]) pWSft = tensor.nnet.softmax(pW) totProb = pWSft[tensor.arange(n_out_samps), xW[1:-1, :].flatten()] out_list = [pWSft, totProb, p] else: ixtoclsinfo_t = tensor.as_tensor_variable(options['ixtoclsinfo']) xC = ixtoclsinfo_t[xW[1:, :].flatten(), 0] pW = ((tparams['Wd'][:, xC, :].T * (p.reshape([1, n_out_samps, options['hidden_size']]))).sum( axis=-1).T + tparams['bd'][:, xC, :]) pWSft = tensor.nnet.softmax(pW[0, :, :]) pC = (tensor.dot(p, tparams['WdCls']) + tparams['bdCls']).reshape( [n_out_samps, options['nClasses']]) pCSft = tensor.nnet.softmax(pC) totProb = pWSft[tensor.arange(n_out_samps), ixtoclsinfo_t[xW[1:,:].flatten(),3]] * \ pCSft[tensor.arange(n_out_samps), xC] out_list = [pWSft, pCSft, totProb, p] # XXX : THIS IS VERY FISHY, CHECK THE MASK INDEXING AGAIN probs_valid = tensor.log(totProb + 1e-10) * mask[1:-1, :].flatten() tot_cost = -(probs_valid.sum()) tot_pplx = -(tensor.log2(totProb + 1e-10) * mask[1:-1, :].flatten()).sum() cost = [tot_cost / options['batch_size'], tot_pplx] inp_list = [xW, mask, xI] if options.get('en_aux_inp', 0): inp_list.append(xAux) if options.get('sched_sampling_mode', None) != None: inp_list.append(curr_epoch) per_sent_prob = probs_valid.reshape([n_timesteps - 2, n_samples]).sum(axis=0) f_per_sentLogP = theano.function(inp_list, per_sent_prob, name='f_pred_logprob', updates=updatesLSTM) f_pred_prob = ['', f_per_sentLogP, ''] return use_noise, inp_list, f_pred_prob, cost, out_list, updatesLSTM
def build_eval_other_sent(self, tparams, options, model_npy): zipp(model_npy, self.model_th) # Used for dropout. use_noise = theano.shared(numpy_floatX(0.)) xW = tensor.matrix('xW', dtype='int64') mask = tensor.matrix('mask', dtype=config.floatX) n_timesteps = xW.shape[0] n_samples = xW.shape[1] n_out_samps = (n_timesteps - 1) * n_samples embW = tparams['Wemb'][xW.flatten()].reshape( [n_timesteps, n_samples, options['word_encoding_size']]) xI = tensor.matrix('xI', dtype=config.floatX) xAux = tensor.matrix('xAux', dtype=config.floatX) if options.get('swap_aux', 0): xAuxEmb = tensor.dot(xAux, tparams['WIemb_aux']) + tparams['b_Img_aux'] else: xAuxEmb = xAux embImg = (tensor.dot(xI, tparams['WIemb']) + tparams['b_Img']).reshape( [1, n_samples, options['image_encoding_size']]) emb = tensor.concatenate([embImg, embW], axis=0) rval, updatesLSTM = basic_lstm_layer(tparams, emb[:n_timesteps, :, :], xAuxEmb, use_noise, options, prefix=options['generator']) p = sliceT(rval[0][1:, :, :], options.get('hidden_depth', 1), options['hidden_size']) pW = (tensor.dot(p, tparams['Wd']) + tparams['bd']).reshape( [n_out_samps, options['output_size']]) pWSft = tensor.nnet.softmax(pW) totProb = pWSft[tensor.arange(n_out_samps), xW[1:, :].flatten()] # #pred = tensor.nnet.softmax(p) # # #pred = rval[2] # # #pred = pred[1:,:,:] # # def accumCost(pred,xW,m,c_sum,ppl_sum): # pred = tensor.nnet.softmax(pred) # c_sum += (tensor.log(pred[tensor.arange(n_samples), xW]+1e-20) * m) # ppl_sum += -(tensor.log2(pred[tensor.arange(n_samples), xW]+1e-10) * m) # return c_sum, ppl_sum # # sums, upd = theano.scan(fn=accumCost, # outputs_info=[tensor.alloc(numpy_floatX(0.), 1,n_samples), # tensor.alloc(numpy_floatX(0.), 1,n_samples)], # sequences = [p, xW[1:,:], mask[1:,:]]) # NOTE1: we are leaving out the first prediction, which was made for the image # and is meaningless. Here cost[0] contains log probability (log10) and cost[1] contains # perplexity (log2) tot_cost = -(tensor.log(totProb + 1e-10) * mask[1:, :].flatten()).sum() cost = tot_cost / options['batch_size'] inp_list = [xW, mask, xI] if options.get('en_aux_inp', 0): inp_list.append(xAux) self.f_pred_prob_other = theano.function(inp_list, p, name='f_pred_prob', updates=updatesLSTM) #f_pred = theano.function([xW, mask], pred.argmax(axis=1), name='f_pred') #cost = -tensor.log(pred[tensor.arange(n_timesteps),tensor.arange(n_samples), xW] + 1e-8).mean() self.f_eval_other = theano.function(inp_list, cost, name='f_eval') return use_noise, inp_list, self.f_pred_prob_other, cost, pW, updatesLSTM
def build_model(self, tparams, options): trng = RandomStreams(1234) # Used for dropout. self.use_noise = theano.shared(numpy_floatX(0.)) if self.use_shared_features == False: xI = tensor.tensor3('xI', dtype=config.floatX) xIemb = xI n_timesteps = xI.shape[0] n_samples = xI.shape[1] else: xI = tensor.matrix('xI', dtype='int64') n_timesteps = xI.shape[0] n_samples = xI.shape[1] #feats = tensor.concatenate([self.features,tensor.alloc(numpy_floatX(0.),self.image_feat_size,1)],axis=1).T xIemb = self.features[xI.flatten(), :].reshape( [n_timesteps, n_samples, self.image_feat_size]) samp_lens = tensor.vector('sL', dtype='int64') #This is implementation of input dropout !! if options['use_dropout']: emb = dropout_layer(xIemb, self.use_noise, trng, options['drop_prob_encoder'], shp=xIemb.shape) ############################################################################################################################# # This implements core lstm rval, updatesLSTM = self.lstm_enc_layer(tparams, emb, prefix=self.mp + 'lstm') ############################################################################################################################# # This implements core reverse lstm if self.encoder == 'bilstm': rev_rval, rev_updatesLSTM = basic_lstm_layer(tparams, emb[::-1, :, :], prefix=self.mp + 'rev_lstm') ############################################################################################################################# # NOTE1: we are leaving out the first prediction, which was made for the image and is meaningless. p = sliceT(rval[0][samp_lens, tensor.arange(n_samples), :], self.hidden_depth, self.hidden_size) if self.encoder == 'bilstm': rev_p = sliceT(rev_rval[0][-1, :, :], self.hidden_depth, self.hidden_size) feat_enc = p + rev_p if self.encoder == 'bilstm' else p if options.get('encoder_add_mean', 0): feat_enc = feat_enc + (sliceT(rval[0], self.hidden_depth, self.hidden_size).sum(axis=0) / samp_lens[:, None]) inp_list = [xI, samp_lens] return self.use_noise, inp_list, feat_enc, updatesLSTM
def build_eval_other_sent(self, tparams, options, model_npy): zipp(model_npy, self.model_th) # Used for dropout. use_noise = theano.shared(numpy_floatX(0.)) xW = tensor.matrix('xW', dtype='int64') mask = tensor.matrix('mask', dtype=config.floatX) n_timesteps = xW.shape[0] n_samples = xW.shape[1] n_out_samps = (n_timesteps - 1) * n_samples embW = tparams['Wemb'][xW.flatten()].reshape( [n_timesteps, n_samples, options['word_encoding_size']]) xI = tensor.matrix('xI', dtype=config.floatX) xAux = tensor.matrix('xAux', dtype=config.floatX) if options.get('swap_aux', 0): xAuxEmb = tensor.dot(xAux, tparams['WIemb_aux']) + tparams['b_Img_aux'] else: xAuxEmb = xAux embImg = (tensor.dot(xI, tparams['WIemb']) + tparams['b_Img']).reshape( [1, n_samples, options['image_encoding_size']]) emb = tensor.concatenate([embImg, embW], axis=0) rval, updatesLSTM = basic_lstm_layer(tparams, emb[:n_timesteps, :, :], xAuxEmb, use_noise, options, prefix=options['generator']) p = sliceT(rval[0][1:, :, :], options.get('hidden_depth', 1), options['hidden_size']) if options.get('class_out_factoring', 0) == 0: pW = (tensor.dot(p, tparams['Wd']) + tparams['bd']).reshape( [n_out_samps, options['output_size']]) pWSft = tensor.nnet.softmax(pW) totProb = pWSft[tensor.arange(n_out_samps), xW[1:, :].flatten()] out_list = [pWSft, totProb, p] else: ixtoclsinfo_t = tensor.as_tensor_variable(self.clsinfo) xC = ixtoclsinfo_t[xW[1:, :].flatten(), 0] pW = ((tparams['Wd'][:, xC, :].T * ((p.reshape([1, n_out_samps, options['hidden_size']]) - tparams['WdCls'][:, xC].T))).sum(axis=-1).T + tparams['bd'][:, xC, :]) pWSft = tensor.nnet.softmax(pW[0, :, :]) pC = (tensor.dot(p, tparams['WdCls']) + tparams['bdCls']).reshape( [n_out_samps, options['nClasses']]) pCSft = tensor.nnet.softmax(pC) totProb = pWSft[tensor.arange(n_out_samps), ixtoclsinfo_t[xW[1:,:].flatten(),3]] * \ pCSft[tensor.arange(n_out_samps), xC] tot_cost = -(tensor.log(totProb + 1e-10) * mask[1:, :].flatten() ).reshape([n_timesteps - 1, n_samples]) cost = tot_cost.sum(axis=0) inp_list = [xW, mask, xI] if options.get('en_aux_inp', 0): inp_list.append(xAux) self.f_pred_prob_other = theano.function([xW, xI, xAux], pWSft, name='f_pred_prob', updates=updatesLSTM) #f_pred = theano.function([xW, mask], pred.argmax(axis=1), name='f_pred') #cost = -tensor.log(pred[tensor.arange(n_timesteps),tensor.arange(n_samples), xW] + 1e-8).mean() self.f_eval_other = theano.function(inp_list, cost, name='f_eval') return use_noise, inp_list, self.f_pred_prob_other, cost, pW, updatesLSTM
def build_model(self, tparams, options, xI=None, xAux=None, attn_nw=None): self.trng = RandomStreams(int(time.time())) # Used for dropout. use_noise = theano.shared(numpy_floatX(0.)) xW = tensor.matrix('xW', dtype='int64') mask = tensor.matrix('mask', dtype=config.floatX) n_timesteps = xW.shape[0] n_samples = xW.shape[1] embW = tparams['Wemb'][xW.flatten()].reshape( [n_timesteps, n_samples, options['word_encoding_size']]) if xI == None: xI = tensor.matrix('xI', dtype=config.floatX) embImg = (tensor.dot(xI, tparams['WIemb']) + tparams['b_Img']) xI_is_inp = True else: embImg = xI xI_is_inp = False if xAux == None: xAux = tensor.matrix( 'xAux', dtype=config.floatX) if attn_nw == None else tensor.tensor3( 'xAux', dtype=config.floatX) if (options.get('swap_aux', 1)) and (attn_nw == None): xAuxEmb = tensor.dot( xAux, tparams['WIemb_aux']) + tparams['b_Img_aux'] else: xAuxEmb = xAux xA_is_inp = True else: xA_is_inp = False if options.get('encode_gt_sentences', 0): xAuxEmb = tensor.dot( xAux, tparams['WIemb_aux']) + tparams['b_Img_aux'] else: xAuxEmb = xAux embImg = embImg.reshape([1, n_samples, options['image_encoding_size']]) emb = tensor.concatenate([embImg, embW], axis=0) #This is implementation of input dropout !! if options['use_dropout']: emb = dropout_layer(emb, use_noise, self.trng, options['drop_prob_encoder'], shp=emb.shape) if (options.get('en_aux_inp', 0)) and (attn_nw == None): xAuxEmb = dropout_layer(xAuxEmb, use_noise, self.trng, options['drop_prob_aux'], shp=xAuxEmb.shape) # Implement scehduled sampling! if options.get('sched_sampling_mode', None) != None: curr_epoch = tensor.scalar(name='curr_epoch', dtype=config.floatX) # Assign the probabilies according to the scheduling mode if options['sched_sampling_mode'] == 'linear': prob = tensor.maximum( options['sslin_min'], options['sched_sampling_const'] - options['sslin_slope'] * curr_epoch) elif options['sched_sampling_mode'] == 'exp': raise ValueError( 'ERROR: %s --> This solver type is not yet supported' % (options['sched_sampling_mode'])) elif options['sched_sampling_mode'] == 'invsig': raise ValueError( 'ERROR: %s --> This solver type is not yet supported' % (options['sched_sampling_mode'])) else: raise ValueError( 'ERROR: %s --> This scheduling type is unknown' % (options['sched_sampling_mode'])) # Now to build the mask. We don't want to do this coin toss when # feeding in image feature and the start symbol sched_mask = self.trng.binomial((n_timesteps - 2, n_samples), p=prob, n=1, dtype='int64') sched_mask = tensor.concatenate( [sched_mask, tensor.alloc(1, 2, n_samples)], axis=0) else: sched_mask = [] ############################################################################################################################# # This implements core lstm rval, updatesLSTM = basic_lstm_layer(tparams, emb[:n_timesteps, :, :], xAuxEmb, use_noise, options, prefix=options['generator'], sched_prob_mask=sched_mask, attn_nw=attn_nw) ############################################################################################################################# # NOTE1: we are leaving out the first prediction, which was made for the image and is meaningless. if options['use_dropout']: # XXX : Size given to dropout is missing one dimension. This keeps the dropped units consistent across time!?. # ### Is this a good bug ? p = dropout_layer( sliceT(rval[0][1:, :, :], options.get('hidden_depth', 1), options['hidden_size']), use_noise, self.trng, options['drop_prob_decoder'], (n_samples, options['hidden_size'])) else: p = sliceT(rval[0][1:, :, :], options.get('hidden_depth', 1), options['hidden_size']) if options.get('class_out_factoring', 0) == 1: if options.get('cls_diff_layer', 0) == 1: pC_inp = dropout_layer( sliceT(rval[0][1:, :, :], options.get('hidden_depth', 1) - 2, options['hidden_size']), use_noise, self.trng, options['drop_prob_decoder'], (n_samples, options['hidden_size'])) else: pC_inp = p n_out_samps = (n_timesteps - 1) * n_samples if options.get('class_out_factoring', 0) == 0: pW = (tensor.dot(p, tparams['Wd']) + tparams['bd']).reshape( [n_out_samps, options['output_size']]) if options.get('use_gumbel_mse', 0) == 0: pWSft = tensor.nnet.softmax(pW) else: w_out = ifelse( self.usegumbel, gumbel_softmax_sample(self.trng, pW, self.gumb_temp, hard=options.get( 'use_gumbel_hard', False)), tensor.nnet.softmax(pW)) # This is not exactly right, but just testing pWSft = w_out totProb = pWSft[tensor.arange(n_out_samps), xW[1:, :].flatten()] out_list = [pWSft, totProb, pW] else: ixtoclsinfo_t = tensor.as_tensor_variable(self.clsinfo) xC = ixtoclsinfo_t[xW[1:, :].flatten(), 0] if options.get('cls_zmean', 1): pW = ((tparams['Wd'][:, xC, :].T * ((p.reshape([1, n_out_samps, options['hidden_size']]) - tparams['WdCls'][:, xC].T))).sum(axis=-1).T + tparams['bd'][:, xC, :]) else: pW = ((tparams['Wd'][:, xC, :].T * (p.reshape([1, n_out_samps, options['hidden_size']])) ).sum(axis=-1).T + tparams['bd'][:, xC, :]) pWSft = tensor.nnet.softmax(pW[0, :, :]) pC = (tensor.dot(pC_inp, tparams['WdCls']) + tparams['bdCls']).reshape([n_out_samps, options['nClasses']]) pCSft = tensor.nnet.softmax(pC) totProb = pWSft[tensor.arange(n_out_samps), ixtoclsinfo_t[xW[1:,:].flatten(),3]] * \ pCSft[tensor.arange(n_out_samps), xC] out_list = [pWSft, pCSft, totProb, p] tot_cost = -(tensor.log(totProb + 1e-10) * mask[1:, :].flatten()).sum() tot_pplx = -(tensor.log2(totProb + 1e-10) * mask[1:, :].flatten()).sum() cost = [ tot_cost / tensor.cast(n_samples, dtype=config.floatX), tot_pplx ] inp_list = [xW, mask] if xI_is_inp: inp_list.append(xI) if options.get('en_aux_inp', 0) and xA_is_inp: inp_list.append(xAux) if options.get('sched_sampling_mode', None) != None: inp_list.append(curr_epoch) f_pred_prob = theano.function([xW, xI, xAux], out_list, name='f_pred_prob', updates=updatesLSTM) return use_noise, inp_list, f_pred_prob, cost, out_list, updatesLSTM
def build_model(self, tparams, options): trng = RandomStreams(1234) # Used for dropout. use_noise = theano.shared(numpy_floatX(0.)) xW = tensor.matrix('xW', dtype='int64') mask = tensor.matrix('mask', dtype=config.floatX) n_timesteps = xW.shape[0] n_samples = xW.shape[1] embW = tparams['Wemb'][xW.flatten()].reshape([n_timesteps, n_samples, options['word_encoding_size']]) xI = tensor.matrix('xI', dtype=config.floatX) xAux = tensor.matrix('xAux', dtype=config.floatX) if options.get('swap_aux',0): xAuxEmb = tensor.dot(xAux,tparams['WIemb_aux']) + tparams['b_Img_aux'] else: xAuxEmb = xAux embImg = (tensor.dot(xI, tparams['WIemb']) + tparams['b_Img']).reshape([1,n_samples,options['image_encoding_size']]); emb = tensor.concatenate([embImg, embW], axis=0) #This is implementation of input dropout !! if options['use_dropout']: emb = dropout_layer(emb, use_noise, trng, options['drop_prob_encoder'], shp = emb.shape) if options.get('en_aux_inp',0): xAuxEmb = dropout_layer(xAuxEmb, use_noise, trng, options['drop_prob_aux'], shp = xAuxEmb.shape) # Implement scehduled sampling! if options.get('sched_sampling_mode',None) != None: curr_epoch = tensor.scalar(name='curr_epoch',dtype=config.floatX) # Assign the probabilies according to the scheduling mode if options['sched_sampling_mode'] == 'linear': prob = tensor.maximum(options['sslin_min'],options['sched_sampling_const'] - options['sslin_slope'] * curr_epoch) elif options['sched_sampling_mode'] == 'exp': raise ValueError('ERROR: %s --> This solver type is not yet supported'%(options['sched_sampling_mode'])) elif options['sched_sampling_mode'] == 'invsig': raise ValueError('ERROR: %s --> This solver type is not yet supported'%(options['sched_sampling_mode'])) else: raise ValueError('ERROR: %s --> This scheduling type is unknown'%(options['sched_sampling_mode'])) # Now to build the mask. We don't want to do this coin toss when # feeding in image feature and the start symbol sched_mask = trng.binomial((n_timesteps - 2, n_samples), p=prob, n=1, dtype='int64') sched_mask = tensor.concatenate([sched_mask, tensor.alloc(1, 2, n_samples)],axis=0) else: sched_mask = [] ############################################################################################################################# # This implements core lstm rval, updatesLSTM = basic_lstm_layer(tparams, emb[:n_timesteps,:,:], xAuxEmb, use_noise, options, prefix=options['generator'], sched_prob_mask = sched_mask) ############################################################################################################################# # NOTE1: we are leaving out the first prediction, which was made for the image and is meaningless. if options['use_dropout']: # XXX : Size given to dropout is missing one dimension. This keeps the dropped units consistent across time!?. # ### Is this a good bug ? p = dropout_layer(sliceT(rval[0][1:,:,:],options.get('hidden_depth',1)-1,options['hidden_size']), use_noise, trng, options['drop_prob_decoder'], (n_samples,options['hidden_size'])) else: p = sliceT(rval[0][1:,:,:],options.get('hidden_depth',1)-1,options['hidden_size']) n_out_samps = (n_timesteps-1) * n_samples if options.get('class_out_factoring',0) == 0: pW = (tensor.dot(p,tparams['Wd']) + tparams['bd']).reshape([n_out_samps,options['output_size']]) pWSft = tensor.nnet.softmax(pW) totProb = pWSft[tensor.arange(n_out_samps), xW[1:,:].flatten()] out_list = [pWSft, totProb, p] else: ixtoclsinfo_t = tensor.as_tensor_variable(options['ixtoclsinfo']) xC = ixtoclsinfo_t[xW[1:,:].flatten(),0] pW = ((tparams['Wd'][:,xC,:].T*(p.reshape([1,n_out_samps,options['hidden_size']]))).sum(axis=-1).T + tparams['bd'][:,xC,:]) pWSft = tensor.nnet.softmax(pW[0,:,:]) pC = (tensor.dot(p,tparams['WdCls']) + tparams['bdCls']).reshape([n_out_samps,options['nClasses']]) pCSft = tensor.nnet.softmax(pC) totProb = pWSft[tensor.arange(n_out_samps), ixtoclsinfo_t[xW[1:,:].flatten(),3]] * \ pCSft[tensor.arange(n_out_samps), xC] out_list = [pWSft, pCSft, totProb, p] tot_cost = -(tensor.log(totProb + 1e-10) * mask[1:,:].flatten()).sum() tot_pplx = -(tensor.log2(totProb + 1e-10) * mask[1:,:].flatten()).sum() cost = [tot_cost/options['batch_size'], tot_pplx] inp_list = [xW, mask, xI] if options.get('en_aux_inp',0): inp_list.append(xAux) if options.get('sched_sampling_mode',None) != None: inp_list.append(curr_epoch) f_pred_prob = [] #theano.function(inp_list, p, name='f_pred_prob', updates=updatesLSTM) return use_noise, inp_list, f_pred_prob, cost, out_list , updatesLSTM
def build_eval_other_sent(self, tparams, options,model_npy): zipp(model_npy, self.model_th) # Used for dropout. use_noise = theano.shared(numpy_floatX(0.)) xW = tensor.matrix('xW', dtype='int64') mask = tensor.matrix('mask', dtype=config.floatX) n_timesteps = xW.shape[0] n_samples = xW.shape[1] embW = tparams['Wemb'][xW.flatten()].reshape([n_timesteps, n_samples, options['word_encoding_size']]) xI = tensor.matrix('xI', dtype=config.floatX) xAux = tensor.matrix('xAux', dtype=config.floatX) embImg = (tensor.dot(xI, tparams['WIemb']) + tparams['b_Img']).reshape([1,n_samples,options['image_encoding_size']]); emb = tensor.concatenate([embImg, embW], axis=0) rval, updatesLSTM = basic_lstm_layer(tparams, emb[:n_timesteps,:,:], xAux, use_noise, options, prefix=options['generator']) p = rval[0] p = tensor.dot(p,tparams['Wd']) + tparams['bd'] #pred = tensor.nnet.softmax(p) #pred = rval[2] #pred = pred[1:,:,:] p = p[1:,:,:] def accumCost(pred,xW,m,c_sum,ppl_sum): pred = tensor.nnet.softmax(pred) c_sum += (tensor.log(pred[tensor.arange(n_samples), xW]+1e-20) * m) ppl_sum += -(tensor.log2(pred[tensor.arange(n_samples), xW]+1e-10) * m) return c_sum, ppl_sum sums, upd = theano.scan(fn=accumCost, outputs_info=[tensor.alloc(numpy_floatX(0.), 1,n_samples), tensor.alloc(numpy_floatX(0.), 1,n_samples)], sequences = [p, xW[1:,:], mask[1:,:]]) # NOTE1: we are leaving out the first prediction, which was made for the image # and is meaningless. Here cost[0] contains log probability (log10) and cost[1] contains # perplexity (log2) cost = sums[0][-1] inp_list = [xW, mask, xI] if options.get('en_aux_inp',0): inp_list.append(xAux) self.f_pred_prob_other = theano.function(inp_list, p, name='f_pred_prob', updates=updatesLSTM) #f_pred = theano.function([xW, mask], pred.argmax(axis=1), name='f_pred') #cost = -tensor.log(pred[tensor.arange(n_timesteps),tensor.arange(n_samples), xW] + 1e-8).mean() self.f_eval_other = theano.function(inp_list, cost, name='f_eval') return use_noise, inp_list, self.f_pred_prob_other, cost, p, updatesLSTM