コード例 #1
0
ファイル: model.py プロジェクト: vyraun/Attentive_reader
def build_attention(tparams,
                    options,
                    desc,
                    desc_mask,
                    dlen,
                    q,
                    q_mask=None,
                    sfx=None,
                    name=None):

    if desc.ndim != desc_mask.ndim:
        desc_mask_ = desc_mask.dimshuffle(0, 1, 'x')

    assert desc.ndim == desc_mask_.ndim

    if q_mask is not None:
        assert q.ndim == q_mask.ndim
        q *= q_mask

    masked_desc = desc * desc_mask_

    desc_in = desc.reshape((-1, desc.shape[-1]))
    projd = get_layer('ff')[1](tparams=tparams,
                               state_below=desc_in,
                               options=options,
                               prefix='ff_att_ctx',
                               activ='Linear')

    projq = get_layer('ff')[1](tparams, q,
                               options,
                               prefix='ff_att_q',
                               use_bias=False,
                               activ='Linear')

    """
    Unnormalized dist metric between the rep of desc and q.
    """
    sim_vals = 0
    if options['use_dq_sims']:
        q_proj = dot(q, tparams['ff_att_bi_dq'])
        desc_proj = dot(masked_desc,
                        tparams['ff_att_bi_dq']).reshape((masked_desc.shape[0],
                        masked_desc.shape[1], -1))
        sim_vals = (desc_proj * q_proj.dimshuffle('x', 0, 1)).sum(-1)
        sim_vals = sim_vals.dimshuffle(0, 1, 'x')

    projd = projd.reshape((masked_desc.shape[0], masked_desc.shape[1], -1))

    #Intermediate layer for annotation values.
    proj_att = Tanh(projd + projq.dimshuffle('x', 0, 1) + sim_vals)
    W_proj = tparams['ff_att_proj'].dimshuffle('x', 'x', 0)
    dot_proj = (W_proj * proj_att).sum(-1)
    pre_softmax = dot_proj
    alphas = Masked_Softmax(pre_softmax, mask=desc_mask, ax=0).dimshuffle(0, 1, 'x')
    ctx = (masked_desc * alphas).sum(0)

    return ctx, alphas
コード例 #2
0
def fflayer(tparams,
            state_below,
            options,
            prefix='rconv',
            use_bias=True,
            activ='lambda x: tensor.tanh(x)',
            **kwargs):

    if use_bias:
        return eval(activ)(dot(state_below, tparams[prfx(prefix, 'W')]) + tparams[prfx(prefix, 'b')])
    else:
        return eval(activ)(dot(state_below, tparams[prfx(prefix, 'W')]))
コード例 #3
0
    def _step_slice(mask, sbelow, sbelowx, sbefore, U, Ux):
        preact = dot(sbefore, U)
        preact += sbelow

        r = Sigmoid(_slice(preact, 0, dim))
        u = Sigmoid(_slice(preact, 1, dim))

        preactx = dot(r * sbefore, Ux)

        # preactx = preactx
        preactx = preactx + sbelowx

        h = Tanh(preactx)

        h = u * sbefore + (1. - u) * h
        h = mask[:, None] * h + (1. - mask)[:, None] * sbefore

        return h
コード例 #4
0
ファイル: model.py プロジェクト: vyraun/Attentive_reader
def build_bidir_model(inp,
                      inp_mask,
                      tparams,
                      options,
                      sfx=None,
                      nsteps=None,
                      use_dropout=False,
                      use_noise=None,
                      truncate=None,
                      name=None):

    if use_dropout:
        assert use_noise is not None

    assert name is not None
    assert sfx is not None

    #inpr = inp[::-1]
    inpr_mask = inp_mask[::-1]

    n_timesteps = inp.shape[0]
    n_samples = inp.shape[1]

    emb = dot(inp, tparams['Wemb_%s' % sfx])
    emb = emb.reshape([n_timesteps, n_samples, -1])

    if use_dropout:
        emb = dropout_layer(emb, use_noise,
                            p=options['dropout_rate'])

    """
    Forward RNN
    """
    proj = get_layer(options[name])[1](tparams=tparams,
                                       state_below=emb,
                                       options=options,
                                       prefix=name,
                                       nsteps=nsteps,
                                       truncate=truncate,
                                       mask=inp_mask)

    """
    Reverse RNN.
    """
    #embr = dot(inpr, tparams['Wemb_%s' % sfx])
    embr = emb[::-1]#embr.reshape([n_timesteps, n_samples, -1])
    projr = get_layer(options[name])[1](tparams=tparams,
                                        state_below=embr,
                                        options=options,
                                        prefix=name + "_r",
                                        nsteps=nsteps,
                                        truncate=truncate,
                                        mask=inpr_mask)
    return proj, projr
コード例 #5
0
    def _step(mask, sbelow, sbefore, cell_before):
        preact = dot(sbefore, param('U'))
        preact += sbelow
        preact += tparams[prfx(prefix, 'b')]

        f = Sigmoid(_slice(preact, 0, dim))
        o = Sigmoid(_slice(preact, 1, dim))
        c = Tanh(_slice(preact, 2, dim))

        c = f * cell_before + (1 - f) * c
        c = mask * c + (1. - mask) * cell_before
        h = o * tensor.tanh(c)
        h = mask * h + (1. - mask) * sbefore

        return h, c
コード例 #6
0
    def _step(mask, sbelow, sbefore, cell_before, *args):
        preact = dot(sbefore, param('U'))
        preact += sbelow
        preact += param('b')

        i = Sigmoid(_slice(preact, 0, dim))
        f = Sigmoid(_slice(preact, 1, dim))
        o = Sigmoid(_slice(preact, 2, dim))
        c = Tanh(_slice(preact, 3, dim))

        c = f * cell_before + i * c
        c = mask * c + (1. - mask) * cell_before
        h = o * tensor.tanh(c)
        h = mask * h + (1. - mask) * sbefore

        return h, c
コード例 #7
0
    def _step_slice(mask,
                    sbelow,
                    sbelowx,
                    xc_, sbefore,
                    ctx_, alpha_,
                    pctx_, cc_,
                    U, Wc,
                    Wd_att, U_att,
                    c_tt, Ux, Wcx):
        # attention
        pstate_ = dot(sbefore, Wd_att)
        pctx__ = pctx_ + pstate_[None, :, :]
        pctx__ += xc_
        pctx__ = Tanh(pctx__)
        alpha = dot(pctx__, U_att)+c_tt
        alpha = alpha.reshape([alpha.shape[0], alpha.shape[1]])
        alpha = tensor.exp(alpha)
        if context_mask:
            alpha = alpha * context_mask

        alpha = alpha / alpha.sum(0, keepdims=True)
        ctx_ = (cc_ * alpha[:, :, None]).sum(0)
        # current context

        preact = dot(sbefore, U)
        preact += sbelow
        preact += dot(ctx_, Wc)
        preact = Sigmoid(preact)

        r = _slice(preact, 0, dim)
        u = _slice(preact, 1, dim)

        preactx = dot(sbefore, Ux)
        preactx *= r
        preactx += sbelowx
        preactx += dot(ctx_, Wcx)

        h = Tanh(preactx)

        h = u * sbefore + (1. - u) * h
        h = mask[:, None] * h + (1. - mask)[:, None] * sbefore

        return h, ctx_, alpha.T
コード例 #8
0
def gru_layer(tparams,
              state_below,
              options,
              prefix='gru',
              mask=None,
              nsteps=None,
              truncate=None,
              init_state=None,
              **kwargs):

    if nsteps is None:
        nsteps = state_below.shape[0]

    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    param = lambda name: tparams[prfx(prefix, name)]
    dim = param('Ux').shape[1]

    if mask is None:
        mask = tensor.alloc(1., state_below.shape[0], 1)

    if mask.ndim == 3 and mask.ndim == state_below.ndim:
        mask = mask.reshape((mask.shape[0], \
                mask.shape[1] * mask.shape[2])).dimshuffle(0, 1, 'x')
    elif mask.ndim == 2:
        mask = mask.dimshuffle(0, 1, 'x')

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n*dim:(n+1)*dim]
        return _x[:, n*dim:(n+1)*dim]

    state_below_ = dot(state_below, param('W')) + param('b')
    state_belowx = dot(state_below, param('Wx')) + param('bx')

    # initial/previous state
    if init_state is None:
        if not options['learn_h0']:
            init_state = tensor.alloc(0., n_samples, dim)
        else:
            init_state0 = sharedX(numpy.zeros((options['dim'])),
                                 name=prfx(prefix, "h0"))
            init_state = tensor.concatenate([[init_state0] \
                                                for i in xrange(options['batch_size'])],
                                            axis=0)
            tparams[prfx(prefix, 'h0')] = init_state0

    U = tparams[prfx(prefix, 'U')]
    Ux = tparams[prfx(prefix, 'Ux')]

    def _step_slice(mask, sbelow, sbelowx, sbefore, U, Ux):
        preact = dot(sbefore, U)
        preact += sbelow

        r = Sigmoid(_slice(preact, 0, dim))
        u = Sigmoid(_slice(preact, 1, dim))

        preactx = dot(r * sbefore, Ux)

        # preactx = preactx
        preactx = preactx + sbelowx

        h = Tanh(preactx)

        h = u * sbefore + (1. - u) * h
        h = mask[:, None] * h + (1. - mask)[:, None] * sbefore

        return h

    seqs = [mask, state_below_, state_belowx]
    _step = _step_slice

    rval, updates = theano.scan(_step,
                                sequences=seqs,
                                outputs_info=[init_state],
                                non_sequences=[U, Ux],
                                name=prfx(prefix, '_layers'),
                                n_steps=nsteps,
                                truncate_gradient=truncate,
                                profile=profile,
                                strict=True)
    rval = [rval]
    return rval
コード例 #9
0
def gru_cond_layer(tparams,
                   state_below,
                   options,
                   prefix='gru',
                   mask=None,
                   context=None,
                   one_step=False,
                   init_memory=None,
                   init_state=None,
                   context_mask=None,
                   nsteps=None,
                   **kwargs):

    assert context, 'Context must be provided'

    if one_step:
        assert init_state, 'previous state must be provided'

    if nsteps is None:
        nsteps = state_below.shape[0]

    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    # mask
    if mask is None:
        mask = tensor.alloc(1., state_below.shape[0], 1)

    dim = tparams[prfx(prefix, 'Wcx')].shape[1]

    # initial/previous state
    if init_state is None:
        init_state = tensor.alloc(0., n_samples, dim)

    # projected context
    assert context.ndim == 3, 'Context must be 3-d: #annotation x #sample x dim'
    pctx_ = dot(context, tparams[prfx(prefix, 'Wc_att')]) + tparams[prfx(prefix, 'b_att')]

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n*dim:(n+1)*dim]
        return _x[:, n*dim:(n+1)*dim]

    # projected x
    state_belowx = dot(state_below, tparams[prfx(prefix, 'Wx')]) + \
            tparams[prfx(prefix, 'bx')]

    state_below_ = dot(state_below, tparams[prfx(prefix, 'W')]) + \
            tparams[prfx(prefix, 'b')]

    state_belowc = dot(state_below, tparams[prfx(prefix, 'Wi_att')])

    def _step_slice(mask,
                    sbelow,
                    sbelowx,
                    xc_, sbefore,
                    ctx_, alpha_,
                    pctx_, cc_,
                    U, Wc,
                    Wd_att, U_att,
                    c_tt, Ux, Wcx):
        # attention
        pstate_ = dot(sbefore, Wd_att)
        pctx__ = pctx_ + pstate_[None, :, :]
        pctx__ += xc_
        pctx__ = Tanh(pctx__)
        alpha = dot(pctx__, U_att)+c_tt
        alpha = alpha.reshape([alpha.shape[0], alpha.shape[1]])
        alpha = tensor.exp(alpha)
        if context_mask:
            alpha = alpha * context_mask

        alpha = alpha / alpha.sum(0, keepdims=True)
        ctx_ = (cc_ * alpha[:, :, None]).sum(0)
        # current context

        preact = dot(sbefore, U)
        preact += sbelow
        preact += dot(ctx_, Wc)
        preact = Sigmoid(preact)

        r = _slice(preact, 0, dim)
        u = _slice(preact, 1, dim)

        preactx = dot(sbefore, Ux)
        preactx *= r
        preactx += sbelowx
        preactx += dot(ctx_, Wcx)

        h = Tanh(preactx)

        h = u * sbefore + (1. - u) * h
        h = mask[:, None] * h + (1. - mask)[:, None] * sbefore

        return h, ctx_, alpha.T

    seqs = [mask, state_below_, state_belowx, state_belowc]
    _step = _step_slice

    shared_vars = [tparams[prfx(prefix, 'U')],
                   tparams[prfx(prefix, 'Wc')],
                   tparams[prfx(prefix, 'Wd_att')],
                   tparams[prfx(prefix, 'U_att')],
                   tparams[prfx(prefix, 'c_tt')],
                   tparams[prfx(prefix, 'Ux')],
                   tparams[prfx(prefix, 'Wcx')]]

    if one_step:
        rval = _step(*(seqs+[init_state, None, None, pctx_, context]+shared_vars))
    else:
        rval, updates = theano.scan(_step,
                                    sequences=seqs,
                                    outputs_info=[init_state,
                                                  tensor.alloc(0., n_samples, context.shape[2]),
                                                  tensor.alloc(0., n_samples, context.shape[0])],
                                    non_sequences=[pctx_,
                                                   context]+shared_vars,
                                    name=prfx(prefix, '_layers'),
                                    n_steps=nsteps,
                                    profile=profile,
                                    strict=True)
    return rval
コード例 #10
0
def lstm_tied_layer(tparams,
                    state_below,
                    options,
                    prefix='lstm_tied',
                    mask=None,
                    one_step=False,
                    init_state=None,
                    init_memory=None,
                    nsteps=None,
                    **kwargs):

    if nsteps is None:
        nsteps = state_below.shape[0]

    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    param = lambda name: tparams[prfx(prefix, name)]
    dim = param('U').shape[0]

    if mask is None:
        mask = tensor.alloc(1., state_below.shape[0], 1)

    # initial/previous state
    if init_state is None:
        if not options['learn_h0']:
            init_state = tensor.alloc(0., n_samples, dim)
        else:
            init_state0 = sharedX(numpy.zeros((options['dim'])),
                                 name=prfx(prefix, "h0"))
            init_state = tensor.concatenate([[init_state0] \
                                                for i in xrange(options['batch_size'])],
                                            axis=0)
            tparams[prfx(prefix, 'h0')] = init_state0

    # initial/previous memory
    if init_memory is None:
        init_memory = tensor.alloc(0., n_samples, dim)

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n*dim:(n+1)*dim]
        return _x[:, n*dim:(n+1)*dim]

    def _step(mask, sbelow, sbefore, cell_before):
        preact = dot(sbefore, param('U'))
        preact += sbelow
        preact += tparams[prfx(prefix, 'b')]

        f = Sigmoid(_slice(preact, 0, dim))
        o = Sigmoid(_slice(preact, 1, dim))
        c = Tanh(_slice(preact, 2, dim))

        c = f * cell_before + (1 - f) * c
        c = mask * c + (1. - mask) * cell_before
        h = o * tensor.tanh(c)
        h = mask * h + (1. - mask) * sbefore

        return h, c

    state_below = dot(state_below, param('W')) + param('b')

    if one_step:
        mask = mask.dimshuffle(0, 'x')
        h, c = _step(mask, state_below, init_state, init_memory)
        rval = [h, c]
    else:
        if mask.ndim == 3 and mask.ndim == state_below.ndim:
            mask = mask.reshape((mask.shape[0], mask.shape[1]*mask.shape[2])).dimshuffle(0, 1, 'x')
        elif mask.ndim == 2:
            mask = mask.dimshuffle(0, 1, 'x')
        rval, updates = theano.scan(_step,
                                    sequences=[mask, state_below],
                                    outputs_info=[init_state,
                                                  init_memory],
                                    name=prfx(prefix, '_layers'),
                                    n_steps=nsteps)
    return rval