def fflayer(self, tfparams, state_below, options, prefix='rconv', activ='lambda x: tf.tanh(x)', **kwargs): return eval( activ)(batch_matmul(state_below, tfparams[_p(prefix, 'W')]) + tfparams[_p(prefix, 'b')])
def step(prev, elems): # gather previous internal state and output state if options['use_dropout']: m_, x_, dp_ = elems else: m_, x_ = elems h_, c_, _, _, _ = prev preact = tf.matmul(h_, U, name="MatMul_preact") # (64,512)*(512,2048) = (64,2048) or (m,2048) in sampling preact = preact + x_ i = _slice(preact, 0, dim) # (64,512) (0-511) or (m,512) in sampling f = _slice(preact, 1, dim) # (64,512) (512,1023) or (m,512) in sampling o = _slice(preact, 2, dim) # (64,512) (1024-1535) or (m,512) in sampling if options['use_dropout']: i = i * _slice(dp_, 0, dim) f = f * _slice(dp_, 1, dim) o = o * _slice(dp_, 2, dim) i = tf.sigmoid(i) f = tf.sigmoid(f) o = tf.sigmoid(o) c = tf.tanh(_slice(preact, 3, dim)) # (64,512) (1024-1535) or (m,512) in sampling c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ # (m,1)*(m,512) + (m,1)*(m,512) = (m,512) in sampling h = o * tf.tanh(c) # (m,512)*(m,512) = (m,512) in sampling h = m_[:, None] * h + (1. - m_)[:, None] * h_ # attention pstate_ = tf.matmul(h, Wd_att) # shape = (64,512)*(512,2048) = (64,2048) or (m,2048) in sampling pctx_t = pctx_ + pstate_[:, None, :] # shape = (64,28,2048)+(64,?,2048) = (64,28,2048) # DOUBT pctx_ += ?? VERIFIED # (1,28,2048) + (m,?,2048) = (m,28,2048) pctx_t = tanh(pctx_t) alpha = batch_matmul(pctx_t, U_att) + c_att # (64,28,2048)*(2048,1) + (1,) = (64,28,1) or (m,28,1) in sampling alpha_pre = alpha alpha_shape = tf.shape(alpha) alpha = tf.nn.softmax(tf.reshape(alpha,[alpha_shape[0], alpha_shape[1]])) # softmax (64,28) or (m,28) in sampling ctx_ = tf.reduce_sum((context * alpha[:, :, None]), 1) # (m, ctx_dim) # (64*28*2048)*(64,28,1).sum(1) = (64,2048) or (m,2048) in sampling if options['selector']: sel_ = tf.sigmoid(tf.matmul(h_, W_sel) + b_sel) # (64,512)*(512,1)+(scalar) = (64,1) or (m,1) in sampling sel_shape = tf.shape(sel_) sel_ = tf.reshape(sel_,[sel_shape[0]]) # (64,) or (m,) in sampling ctx_ = sel_[:, None] * ctx_ # (64,1)*(64,2048) = (64,2048) or (m,2048) in sampling else: sel_ = tf.zeros(shape=(n_samples,), dtype=tf.float32) rval = [h, c, alpha, ctx_, sel_] return rval
def lstm_layer(self, tfparams, state_below, mask=None, init_state=None, init_memory=None, one_step=False, prefix='lstm', **kwargs): # state_below (t, m, dim_word), or (m, dim_word) in sampling if one_step: if init_memory is None: raise ValueError('previous memory must be provided') if init_state is None: raise ValueError('previous state must be provided') dim = tfparams[_p(prefix, 'U')].shape[0] state_below_shape = tf.shape(state_below) if state_below.shape.ndims == 3: n_samples = state_below_shape[1] else: n_samples = 1 # mask if mask is None: mask = tf.ones(shape=[state_below_shape[0]], dtype=tf.float32) # CHECK if init_state is None: # init_state = tf.constant(0., shape=(n_samples, dim), dtype=tf.float32) # DOUBT ? getting same ans for tf.variable and tf.constant init_state = tf.zeros(shape=[n_samples, dim], dtype=tf.float32) if init_memory is None: # init_memory = tf.constant(0., shape=(n_samples, dim), dtype=tf.float32) init_memory = tf.zeros(shape=[n_samples, dim], dtype=tf.float32) def _slice(_x, n, dim): if _x.shape.ndims == 3: return _x[:, :, n * dim:(n + 1) * dim] elif _x.shape.ndims == 2: return _x[:, n * dim:(n + 1) * dim] return _x[n * dim:(n + 1) * dim] U = tfparams[_p(prefix, 'U')] b = tfparams[_p(prefix, 'b')] def step(prev, elems): m_, x_ = elems h_, c_ = prev preact = tf.matmul( h_, U) # (64,512)*(512,2048) = (64,2048) or (1,2048) in sampling preact = preact + x_ i = tf.sigmoid(_slice(preact, 0, dim)) # (64,512) f = tf.sigmoid(_slice(preact, 1, dim)) # (64,512) o = tf.sigmoid(_slice(preact, 2, dim)) # (64,512) c = tf.tanh(_slice(preact, 3, dim)) # (64,512) c = f * c_ + i * c h = o * tf.tanh(c) if m_.shape.ndims == 0: # when using this for minibatchsize=1 h = m_ * h + (1. - m_) * h_ c = m_ * c + (1. - m_) * c_ else: h = m_[:, None] * h + (1. - m_)[:, None] * h_ c = m_[:, None] * c + (1. - m_)[:, None] * c_ return [h, c] state_below = batch_matmul( state_below, tfparams[_p(prefix, 'W')] ) + b # (19,64,512)*(512,2048)+(2048,) = (19,64,2048) or (m,2048) in sampling if one_step: rval = step(elems=[mask, state_below], prev=[init_state, init_memory]) else: rval = tf.scan(step, (mask, state_below), initializer=[init_state, init_memory], name=_p(prefix, '_layers')) return rval
def lstm_cond_layer(self, tfparams, state_below, options, prefix='lstm', mask=None, context=None, context_pca=None, one_step=False, init_memory=None, init_state=None, trng=None, use_noise=None, mode=None, **kwargs): # state_below (t, m, dim_word), or (m, dim_word) in sampling # mask (t, m) # context (m, f, dim_ctx), or (1, f, dim_ctx) in sampling # init_memory, init_state (m , dim) # t = time steps # m = batch size if context is None: raise ValueError('Context must be provided') if one_step: if init_memory is None: raise ValueError('previous memory must be provided') if init_state is None: raise ValueError('previous state must be provided') state_below_shape = tf.shape(state_below, name="state_below_shape") if state_below.shape.ndims == 3: n_samples = state_below_shape[1] else: n_samples = 1 dim = tfparams[_p(prefix, 'U')].shape[0] if mask is None: mask = tf.ones( shape=[state_below_shape[0]], dtype=tf.float32, name="mask_fill" ) # (m,) in sampling DOUBT ? (m, 1) or (m, ) CHECK VERIFIED if init_state is None: init_state = tf.zeros( shape=(n_samples, dim), dtype=tf.float32, name="init_state_const" ) # DOUBT ? getting same ans for tf.variable and tf.constant if init_memory is None: init_memory = tf.zeros(0., shape=(n_samples, dim), dtype=tf.float32, name="init_memory_const") # projected context with tf.name_scope("pctx_"): # pctx_ = batch_matmul(context, tfparams[_p(prefix, 'Wc_att')]) + tfparams[_p(prefix, 'b_att')] # (64,28,2048)*(2048,2048)+(2048,) = (64,28,2048) or (1,28,2048) in sampling pctx_ = batch_matmul( context_pca, tfparams[_p(prefix, 'Wc_att')] ) + tfparams[_p( prefix, 'b_att' )] # (64,512)*(512,512)+(512,) = (64,512) or (1,512) in sampling # projected x with tf.name_scope("state_below"): state_below = batch_matmul( state_below, tfparams[_p(prefix, 'W')] ) + tfparams[_p( prefix, 'b' )] # (19,64,512)*(512,2048)+(2048) = (19,64,2048) or (m,2048) in sampling Wd_att = tfparams[_p(prefix, 'Wd_att')] # (512,2048) U_att = tfparams[_p(prefix, 'U_att')] # (2048,1) c_att = tfparams[_p(prefix, 'c_att')] # (1,) if options['selector']: W_sel = tfparams[_p(prefix, 'W_sel')] b_sel = tfparams[_p(prefix, 'b_sel')] U = tfparams[_p(prefix, 'U')] # (512,2048) pctx_shape = tf.shape(pctx_, name="pctx_shape") context_shape = tf.shape(context, name="pctx_shape") # init_alpha = tf.zeros(shape=(n_samples, pctx_shape[1]), dtype=tf.float32, name="init_alpha_fill") init_alpha = tf.zeros(shape=(n_samples, context_shape[1]), dtype=tf.float32, name="init_alpha_fill") # init_ctx = tf.zeros(shape=(n_samples, U_att.shape[0]), dtype=tf.float32, name="init_ctx_fill") init_ctx = tf.zeros(shape=(n_samples, context_shape[2]), dtype=tf.float32, name="init_ctx_fill") init_beta = tf.zeros(shape=(n_samples, ), dtype=tf.float32, name="init_beta_fill") def _slice(_x, n, dim): if _x.shape.ndims == 3: return _x[:, :, n * dim:(n + 1) * dim] return _x[:, n * dim:(n + 1) * dim] def step(prev, elems): # gather previous internal state and output state if options['use_dropout']: m_, x_, dp_ = elems else: m_, x_ = elems h_, c_, _, _, _ = prev preact = tf.matmul( h_, U, name="MatMul_preact" ) # (64,512)*(512,2048) = (64,2048) or (m,2048) in sampling preact = preact + x_ i = _slice(preact, 0, dim) # (64,512) (0-511) or (m,512) in sampling f = _slice(preact, 1, dim) # (64,512) (512,1023) or (m,512) in sampling o = _slice(preact, 2, dim) # (64,512) (1024-1535) or (m,512) in sampling if options['use_dropout']: i = i * _slice(dp_, 0, dim) f = f * _slice(dp_, 1, dim) o = o * _slice(dp_, 2, dim) i = tf.sigmoid(i) f = tf.sigmoid(f) o = tf.sigmoid(o) c = tf.tanh( _slice(preact, 3, dim)) # (64,512) (1024-1535) or (m,512) in sampling c = f * c_ + i * c c = m_[:, None] * c + ( 1. - m_ )[:, None] * c_ # (m,1)*(m,512) + (m,1)*(m,512) = (m,512) in sampling h = o * tf.tanh(c) # (m,512)*(m,512) = (m,512) in sampling h = m_[:, None] * h + (1. - m_)[:, None] * h_ # print "h shape: ", h.shape # print "c shape: ", c.shape # attention pstate_ = tf.matmul( h, Wd_att ) # shape = (64,512)*(512,512) = (64,512) or (m,512) in sampling # print "pstate_ shape: ", pstate_.shape # pctx_t = pctx_ + pstate_[:, None, :] # shape = (64,28,2048)+(64,?,2048) = (64,28,2048) # DOUBT pctx_ += ?? VERIFIED pctx_t = pctx_ + pstate_ # shape = (64,512)+(64,512) = (64,512) # (1,28,2048) + (m,?,2048) = (m,28,2048) pctx_t = tanh(pctx_t) # print "pctx_t shape: ", pctx_t.shape alpha = tf.expand_dims( batch_matmul(pctx_t, U_att), -1 ) + c_att # ((64,512)*(512,28),1) + (1,) = (64,28,1) or (m,28,1) in sampling # print "alpha shape: ", alpha.shape alpha_pre = alpha alpha_shape = tf.shape(alpha) alpha = tf.nn.softmax( tf.reshape(alpha, [alpha_shape[0], alpha_shape[1] ])) # softmax (64,28) or (m,28) in sampling # print "alpha shape: ", alpha.shape ctx_ = tf.reduce_sum( (context * alpha[:, :, None]), 1 ) # (m, ctx_dim) # (64*28*2048)*(64,28,1).sum(1) = (64,2048) or (m,2048) in sampling # print "ctx_ shape: ", ctx_.shape if options['selector']: sel_ = tf.sigmoid( tf.matmul(h_, W_sel) + b_sel ) # (64,512)*(512,1)+(scalar) = (64,1) or (m,1) in sampling sel_shape = tf.shape(sel_) sel_ = tf.reshape(sel_, [sel_shape[0]]) # (64,) or (m,) in sampling ctx_ = sel_[:, None] * ctx_ # (64,1)*(64,2048) = (64,2048) or (m,2048) in sampling else: sel_ = tf.zeros(shape=(n_samples, ), dtype=tf.float32) rval = [h, c, alpha, ctx_, sel_] return rval if options['use_dropout']: dp_shape = tf.shape(state_below, name="dp_shape") if one_step: dp_mask = tf.cond( use_noise, lambda: tf.nn.dropout(tf.fill([dp_shape[0], 3 * dim], np.float32(0.5)), keep_prob=0.5), lambda: tf.fill([dp_shape[0], 3 * dim], np.float32(0.5)), name="one_step_dp_cond") else: dp_mask = tf.cond( use_noise, lambda: tf.nn.dropout(tf.fill( [dp_shape[0], dp_shape[1], 3 * dim], np.float32(0.5)), keep_prob=0.5), lambda: tf.fill([dp_shape[0], dp_shape[1], 3 * dim], np.float32(0.5)), name="dp_cond") if one_step: if options['use_dropout']: rval = step(elems=[mask, state_below, dp_mask], prev=[ init_state, init_memory, init_alpha, init_ctx, init_beta ]) else: rval = step(elems=[mask, state_below], prev=[ init_state, init_memory, init_alpha, init_ctx, init_beta ]) else: seqs = [mask, state_below] if options['use_dropout']: seqs.append(dp_mask) rval = tf.scan(step, seqs, initializer=[ init_state, init_memory, init_alpha, init_ctx, init_beta ], name=_p(prefix, 'layers')) return rval