Beispiel #1
0
def AttMemLayer(incomings,
                params,
                linear=0,
                w_name=None,
                w=None,
                w_initializer=init.HeUniform()):
    '''
  incomings = (u, u_shape, A, A_shape, C, C_shape)
  '''
    ((u, u_shape), (A, A_shape), (C, C_shape)) = incomings
    u_repeat = T.extra_ops.repeat(u.reshape((-1, 1, u_shape[-1])), C_shape[1],
                                  1)
    Au = T.concatenate((A, u_repeat), axis=2)

    w_name = w_name or 'AttMem_%d' % len(params)
    w_name = add_param((C_shape[-1] + u_shape[-1], 1), params, w_name, w,
                       w_initializer)
    #Aup = T.tensordot(Au, params[w_name], axes=[len(C_shape)-1, 0])
    #Aup = Aup.reshape((-1, C_shape[1]))
    #p = nnet.softmax(Aup)
    p = nnet.softmax(
        T.tensordot(Au, params[w_name], axes=[len(C_shape) - 1, 0]).reshape(
            (-1, C_shape[1])))

    p_shape = A_shape[:2]
    O = (C * p[:, :, None]).sum(axis=1)

    return ((O, u_shape), (p, p_shape))
Beispiel #2
0
def TemporalEncodeLayer(incoming,
                        params,
                        T_name=None,
                        T_val=None,
                        T_init=init.HeUniform()):
    incoming, input_shape = incoming
    output_shape = input_shape
    T_name = add_param(input_shape[-2:],
                       params,
                       name=T_name,
                       val=T_val,
                       initializer=T_init)
    output = incoming + params[T_name]
    return (output, output_shape)
Beispiel #3
0
def Conv2DLayer(incoming,
                params,
                num_out,
                filter_h,
                filter_w=None,
                filter=None,
                filter_name=None,
                stride_h=None,
                stride_w=None,
                padding='half',
                activation=nnet.relu,
                w_initializer=init.HeUniform(),
                b_initializer=init.Const(0.)):
    '''
  incoming shoule be a tensor4: (batch_size, channel_size, height, width)
  filter should be None or ndarray or shared
  here num_in == channel_size. how to infer automatically?
  '''
    incoming, input_shape = incoming
    num_in, input_h, input_w = input_shape[-3:]

    assert filter_h % 2 == 1
    if not filter_w:
        filter_w = filter_h
    if not stride_h:
        stride_h = 1
    if not stride_w:
        stride_w = stride_h
    assert filter==None or \
        (isinstance(filter, np.ndarray) and \
        filter.shape==(num_out, incoming.shape[1], filter_h, filter_w))\
        or (isinstance(filter, theano.tensor.sharedvar.TensorSharedVariable) and \
        filter.get_value().shape==(num_out, incoming.shape[1], filter_h, filter_w))
    filter_name = add_param((num_out, num_in, filter_h, filter_w), params,
                            filter_name or 'conv2d_filter_%d' % len(params),
                            filter, w_initializer)
    if padding == 'half':
        output_h, output_w = input_h, input_w
    else:
        raise NotImplementedError(
            "not implemented output shape for padding patterns other than 'half'"
        )
    output_shape = (input_shape[0], num_out, output_h, output_w)
    output = activation(
        nnet.conv2d(incoming,
                    params[filter_name],
                    border_mode=padding,
                    subsample=(stride_h, stride_w)))
    return (output, output_shape)
Beispiel #4
0
def EmbeddingLayer(incoming,
                   params,
                   num_in,
                   num_out,
                   w_name=None,
                   w=None,
                   initializer=init.HeUniform()):
    '''
  input a (batch of) iscalar i, output the corresponding embedding vector, which
  is, the ith row of embedding matrix w.
  num_in is the number of possible inputs (upper bound of i, vocabulary size)
  '''
    incoming, input_shape = incoming
    #output_shape = (input_shape[0], input_shape[1], num_out)
    output_shape = tuple(list(input_shape) + [num_out])

    w_name = add_param((num_in, num_out), params, w_name
                       or 'emb_%d' % len(params), w, initializer)

    return (params[w_name][incoming], output_shape)
Beispiel #5
0
def LinearLayer(incoming,
                params,
                num_out,
                activation=lambda x: x,
                w_name=None,
                w=None,
                w_initializer=init.HeUniform()):
    incoming, input_shape = incoming
    num_in = np.prod(input_shape[-1])

    #output_shape = (input_shape[0], num_out)
    output_shape = input_shape[:-1] + (num_out, )
    w_name = w_name or 'fc_w_%d' % len(params)
    w_name = add_param((num_in, num_out), params, w_name, w, w_initializer)
    '''
  if incoming.ndim > 2:
    incoming = incoming.flatten(2)
  return (activation(T.dot(incoming, params[w_name])), output_shape)
  '''
    return (activation(
        T.tensordot(incoming, params[w_name], axes=[len(input_shape) - 1,
                                                    0])), output_shape)
Beispiel #6
0
def FCLayer(incoming,
            params,
            num_out,
            activation=nnet.relu,
            w_name=None,
            b_name=None,
            w=None,
            b=None,
            w_initializer=init.HeUniform(),
            b_initializer=init.Const(0.)):
    incoming, input_shape = incoming
    num_in = np.prod(input_shape[1:])

    output_shape = (input_shape[0], num_out)
    w_name = w_name or 'fc_w_%d' % len(params)
    b_name = b_name or 'b_fc_%d' % len(params)
    w_name = add_param((num_in, num_out), params, w_name, w, w_initializer)
    b_name = add_param((num_out, ), params, b_name, b, b_initializer)
    if incoming.ndim > 2:
        incoming = incoming.flatten(2)
    return (activation(T.dot(incoming, params[w_name]) + params[b_name]),
            output_shape)
Beispiel #7
0
def MultAttMemLayer(incomings,
                    params,
                    num_hid,
                    linear=0,
                    w_name=None,
                    w=None,
                    w_initializer=None):
    '''
  hun_hid should be a tuple with length=len(w_name)-1
  incomings = (u, u_shape, A, A_shape, C, C_shape)
  '''
    if not w_name:
        _w_name = [None for _ in range(len(num_hid) + 1)]
    else:
        _w_name = [wn for wn in w_name]
    if not w:
        w = [None for _ in range(len(num_hid) + 1)]
    if not w_initializer:
        w_initializer = [init.HeUniform() for _ in range(len(num_hid) + 1)]
    ((u, u_shape), (A, A_shape), (C, C_shape)) = incomings
    u_repeat = T.extra_ops.repeat(u.reshape((-1, 1, u_shape[-1])), C_shape[1],
                                  1)
    Au = T.concatenate((A, u_repeat), axis=2)

    _num_hid = (C_shape[-1] + u_shape[-1], ) + num_hid + (1, )
    for i, nh in enumerate(_num_hid[:-1]):
        _w_name[i] = _w_name[i] or 'AttMem_%d' % len(params)
        _w_name[i] = add_param((nh, _num_hid[i + 1]), params, _w_name[i], w[i],
                               w_initializer[i])
        Au = T.tensordot(Au, params[_w_name[i]], axes=[len(C_shape) - 1, 0])
    p = nnet.softmax(Au.reshape((-1, C_shape[1])))

    p_shape = A_shape[:2]
    O = (C * p[:, :, None]).sum(axis=1)

    return ((O, u_shape), (p, p_shape))
Beispiel #8
0
def add_param(shape,
              params,
              name=None,
              val=None,
              initializer=init.HeUniform()):
    if name not in params:
        if name is None:
            name = name_suf(name, '_%d' % len(params))
        if isinstance(val, theano.tensor.sharedvar.TensorSharedVariable):
            assert (shape == val.get_value().shape)
            assert (val.dtype == theano.config.floatX)
            '''
      if val.dtype != theano.config.floatX:
        val = val.astype(theano.config.floatX)
      '''
            params[name] = val
            return name
        if val is None:
            val = cast_floatX(initializer(shape))
        else:
            val = cast_floatX(val)
        assert (val.shape == shape)
        params[name] = theano.shared(val)
    return name
Beispiel #9
0
def LSTMLayer(incoming,
              cell_init,
              hid_init,
              params,
              num_hidden,
              mask=None,
              activation=T.tanh,
              gate_act=nnet.sigmoid,
              only_return_final=False,
              w_xi_name=None,
              w_hi_name=None,
              b_i_name=None,
              w_xi=None,
              w_hi=None,
              b_i=None,
              w_xf_name=None,
              w_hf_name=None,
              b_f_name=None,
              w_xf=None,
              w_hf=None,
              b_f=None,
              w_xo_name=None,
              w_ho_name=None,
              b_o_name=None,
              w_xo=None,
              w_ho=None,
              b_o=None,
              w_xc_name=None,
              w_hc_name=None,
              b_c_name=None,
              w_xc=None,
              w_hc=None,
              b_c=None,
              w_initializer=init.HeUniform(),
              b_initializer=init.Const(0.)):
    '''
  hid_init and cell_init can be a number, an array or a tensor expression
  '''
    incoming, input_shape = incoming
    num_in = input_shape[-1]

    # add parameters
    wxi_name = add_param((num_in, num_hidden), params, w_xi_name
                         or 'lstm_wxi_%d' % len(params), w_xi, w_initializer)
    whi_name = add_param((num_hidden, num_hidden), params, w_hi_name
                         or 'lstm_whi_%d' % len(params), w_hi, w_initializer)
    bi_name = add_param((num_hidden, ), params, b_i_name
                        or 'lstm_bi_%d' % len(params), b_i, b_initializer)
    wxf_name = add_param((num_in, num_hidden), params, w_xf_name
                         or 'lstm_wxf_%d' % len(params), w_xf, w_initializer)
    whf_name = add_param((num_hidden, num_hidden), params, w_hf_name
                         or 'lstm_whf_%d' % len(params), w_hf, w_initializer)
    bf_name = add_param((num_hidden, ), params, b_f_name
                        or 'lstm_bf_%d' % len(params), b_f, b_initializer)
    wxo_name = add_param((num_in, num_hidden), params, w_xo_name
                         or 'lstm_wxo_%d' % len(params), w_xo, w_initializer)
    who_name = add_param((num_hidden, num_hidden), params, w_ho_name
                         or 'lstm_who_%d' % len(params), w_ho, w_initializer)
    bo_name = add_param((num_hidden, ), params, b_o_name
                        or 'lstm_bo_%d' % len(params), b_o, b_initializer)
    wxc_name = add_param((num_in, num_hidden), params, w_xc_name
                         or 'lstm_wxc_%d' % len(params), w_xc, w_initializer)
    whc_name = add_param((num_hidden, num_hidden), params, w_hc_name
                         or 'lstm_whc_%d' % len(params), w_hc, w_initializer)
    bc_name = add_param((num_hidden, ), params, b_c_name
                        or 'lstm_bc_%d' % len(params), b_c, b_initializer)

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n * dim:(n + 1) * dim]
        return _x[:, n * dim:(n + 1) * dim]

    wx_concat = T.concatenate((params[wxi_name], params[wxf_name],
                               params[wxo_name], params[wxc_name]),
                              axis=1)
    wh_concat = T.concatenate((params[whi_name], params[whf_name],
                               params[who_name], params[whc_name]),
                              axis=1)
    b_concat = T.concatenate(
        (params[bi_name], params[bf_name], params[bo_name], params[bc_name]),
        axis=0)

    # define step function to be used in the loop
    def step(income, hid_prev, cell_prev):
        lin_trans = income.dot(wx_concat) + hid_prev.dot(wh_concat) + b_concat
        i = gate_act(_slice(lin_trans, 0, num_hidden))
        f = gate_act(_slice(lin_trans, 1, num_hidden))
        o = gate_act(_slice(lin_trans, 2, num_hidden))
        c = activation(_slice(lin_trans, 3, num_hidden))

        cell = f * cell_prev + i * c
        hid = o * activation(cell)
        return [hid, cell]

    def step_mask(income, m, hid_prev, cell_prev):
        hid, cell = step(income, hid_prev, cell_prev)
        hid = T.switch(m, hid, hid_prev)
        cell = T.switch(m, cell, cell_prev)
        return [hid, cell]

    # setup hid_init and cell_init
    if isinstance(hid_init, int) or isinstance(hid_init, float):
        hid_init = hid_init * T.ones((incoming.shape[0], num_hidden))
    if isinstance(hid_init, np.ndarray):
        assert hid_init.shape == (num_hidden, )
        hid_init = np.array(hid_init, dtype=theano.config.floatX)
        hid_init = hid_init * T.ones((incoming.shape[0], num_hidden))
    if isinstance(cell_init, int) or isinstance(cell_init, float):
        cell_init = cell_init * T.ones((incoming.shape[0], num_hidden))
    if isinstance(cell_init, np.ndarray):
        assert cell_init.shape == (num_hidden, )
        cell_init = np.array(cell_init, dtype=theano.config.floatX)
        cell_init = cell_init * T.ones((incoming.shape[0], num_hidden))

    # compose loop
    if mask is not None:
        results, updates = theano.scan(
            fn=step_mask,
            outputs_info=[hid_init, cell_init],
            #outputs_info={'initial':[hid_init, cell_init], 'taps':[-1]},
            sequences=[
                incoming.dimshuffle((1, 0, 2)),
                mask.dimshuffle(1, 0, 'x')
            ])
    else:
        results, updates = theano.scan(
            fn=step,
            outputs_info=[hid_init, cell_init],
            #outputs_info=[{'initial':[hid_init, cell_init], 'taps':[-1]}],
            sequences=[incoming.dimshuffle((1, 0, 2))])
    if only_return_final:
        output_shape = (input_shape[0], num_hidden)
        return (results[0][-1], output_shape)
    else:
        output_shape = (input_shape[0], input_shape[1], num_hidden)
        #cell_stat = results[1].dimshuffle((1, 0, 2))
        hid_state = results[0].dimshuffle((1, 0, 2))
        return (hid_state, output_shape)
Beispiel #10
0
def RNNLayer(incoming,
             hid_init,
             params,
             num_hidden,
             mask=None,
             activation=nnet.relu,
             only_return_final=False,
             w_xh_name=None,
             w_hh_name=None,
             b_name=None,
             w_xh=None,
             w_hh=None,
             b=None,
             w_initializer=init.HeUniform(),
             b_initializer=init.Const(0.)):
    incoming, input_shape = incoming
    num_in = input_shape[-1]

    rnnwxh_name = add_param((num_in, num_hidden), params, w_xh_name
                            or 'rnn_wxh_%d' % len(params), w_xh, w_initializer)
    rnnwhh_name = add_param((num_hidden, num_hidden), params, w_hh_name
                            or 'rnn_whh_%d' % len(params), w_hh, w_initializer)
    rnnb_name = add_param((num_hidden, ), params, b_name
                          or 'rnn_b_%d' % len(params), b, b_initializer)

    # setup hid_init
    if isinstance(hid_init, int) or isinstance(hid_init, float):
        hid_init = hid_init * T.ones((incoming.shape[0], num_hidden))
    if isinstance(hid_init, np.ndarray):
        assert hid_init.shape == (num_hidden, )
        hid_init = np.array(hid_init, dtype=theano.config.floatX)
        hid_init = hid_init * T.ones((incoming.shape[0], num_hidden))

    # setup step function
    def step(income, hid_prev):
        return activation(
            income.dot(params[rnnwxh_name]) +
            hid_prev.dot(params[rnnwhh_name]) + params[rnnb_name])

    def step_mask(income, m, hid_prev):
        return T.switch(m, step(income, hid_prev), hid_prev)

    if mask is not None:
        results, updates = theano.scan(fn=step_mask,
                                       outputs_info=[{
                                           'initial': hid_init,
                                           'taps': [-1]
                                       }],
                                       sequences=[
                                           incoming.dimshuffle((1, 0, 2)),
                                           mask.dimshuffle(1, 0, 'x')
                                       ])
    else:
        results, updates = theano.scan(
            fn=step,
            outputs_info=[{
                'initial': hid_init,
                'taps': [-1]
            }],
            sequences=[incoming.dimshuffle((1, 0, 2))])

    if only_return_final:
        output_shape = (input_shape[0], num_hidden)
        return (results[-1], output_shape)
    else:
        output_shape = (input_shape[0], input_shape[1], num_hidden)
        return (results.dimshuffle((1, 0, 2)), output_shape)