def AttMemLayer(incomings, params, linear=0, w_name=None, w=None, w_initializer=init.HeUniform()): ''' incomings = (u, u_shape, A, A_shape, C, C_shape) ''' ((u, u_shape), (A, A_shape), (C, C_shape)) = incomings u_repeat = T.extra_ops.repeat(u.reshape((-1, 1, u_shape[-1])), C_shape[1], 1) Au = T.concatenate((A, u_repeat), axis=2) w_name = w_name or 'AttMem_%d' % len(params) w_name = add_param((C_shape[-1] + u_shape[-1], 1), params, w_name, w, w_initializer) #Aup = T.tensordot(Au, params[w_name], axes=[len(C_shape)-1, 0]) #Aup = Aup.reshape((-1, C_shape[1])) #p = nnet.softmax(Aup) p = nnet.softmax( T.tensordot(Au, params[w_name], axes=[len(C_shape) - 1, 0]).reshape( (-1, C_shape[1]))) p_shape = A_shape[:2] O = (C * p[:, :, None]).sum(axis=1) return ((O, u_shape), (p, p_shape))
def TemporalEncodeLayer(incoming, params, T_name=None, T_val=None, T_init=init.HeUniform()): incoming, input_shape = incoming output_shape = input_shape T_name = add_param(input_shape[-2:], params, name=T_name, val=T_val, initializer=T_init) output = incoming + params[T_name] return (output, output_shape)
def Conv2DLayer(incoming, params, num_out, filter_h, filter_w=None, filter=None, filter_name=None, stride_h=None, stride_w=None, padding='half', activation=nnet.relu, w_initializer=init.HeUniform(), b_initializer=init.Const(0.)): ''' incoming shoule be a tensor4: (batch_size, channel_size, height, width) filter should be None or ndarray or shared here num_in == channel_size. how to infer automatically? ''' incoming, input_shape = incoming num_in, input_h, input_w = input_shape[-3:] assert filter_h % 2 == 1 if not filter_w: filter_w = filter_h if not stride_h: stride_h = 1 if not stride_w: stride_w = stride_h assert filter==None or \ (isinstance(filter, np.ndarray) and \ filter.shape==(num_out, incoming.shape[1], filter_h, filter_w))\ or (isinstance(filter, theano.tensor.sharedvar.TensorSharedVariable) and \ filter.get_value().shape==(num_out, incoming.shape[1], filter_h, filter_w)) filter_name = add_param((num_out, num_in, filter_h, filter_w), params, filter_name or 'conv2d_filter_%d' % len(params), filter, w_initializer) if padding == 'half': output_h, output_w = input_h, input_w else: raise NotImplementedError( "not implemented output shape for padding patterns other than 'half'" ) output_shape = (input_shape[0], num_out, output_h, output_w) output = activation( nnet.conv2d(incoming, params[filter_name], border_mode=padding, subsample=(stride_h, stride_w))) return (output, output_shape)
def EmbeddingLayer(incoming, params, num_in, num_out, w_name=None, w=None, initializer=init.HeUniform()): ''' input a (batch of) iscalar i, output the corresponding embedding vector, which is, the ith row of embedding matrix w. num_in is the number of possible inputs (upper bound of i, vocabulary size) ''' incoming, input_shape = incoming #output_shape = (input_shape[0], input_shape[1], num_out) output_shape = tuple(list(input_shape) + [num_out]) w_name = add_param((num_in, num_out), params, w_name or 'emb_%d' % len(params), w, initializer) return (params[w_name][incoming], output_shape)
def LinearLayer(incoming, params, num_out, activation=lambda x: x, w_name=None, w=None, w_initializer=init.HeUniform()): incoming, input_shape = incoming num_in = np.prod(input_shape[-1]) #output_shape = (input_shape[0], num_out) output_shape = input_shape[:-1] + (num_out, ) w_name = w_name or 'fc_w_%d' % len(params) w_name = add_param((num_in, num_out), params, w_name, w, w_initializer) ''' if incoming.ndim > 2: incoming = incoming.flatten(2) return (activation(T.dot(incoming, params[w_name])), output_shape) ''' return (activation( T.tensordot(incoming, params[w_name], axes=[len(input_shape) - 1, 0])), output_shape)
def FCLayer(incoming, params, num_out, activation=nnet.relu, w_name=None, b_name=None, w=None, b=None, w_initializer=init.HeUniform(), b_initializer=init.Const(0.)): incoming, input_shape = incoming num_in = np.prod(input_shape[1:]) output_shape = (input_shape[0], num_out) w_name = w_name or 'fc_w_%d' % len(params) b_name = b_name or 'b_fc_%d' % len(params) w_name = add_param((num_in, num_out), params, w_name, w, w_initializer) b_name = add_param((num_out, ), params, b_name, b, b_initializer) if incoming.ndim > 2: incoming = incoming.flatten(2) return (activation(T.dot(incoming, params[w_name]) + params[b_name]), output_shape)
def MultAttMemLayer(incomings, params, num_hid, linear=0, w_name=None, w=None, w_initializer=None): ''' hun_hid should be a tuple with length=len(w_name)-1 incomings = (u, u_shape, A, A_shape, C, C_shape) ''' if not w_name: _w_name = [None for _ in range(len(num_hid) + 1)] else: _w_name = [wn for wn in w_name] if not w: w = [None for _ in range(len(num_hid) + 1)] if not w_initializer: w_initializer = [init.HeUniform() for _ in range(len(num_hid) + 1)] ((u, u_shape), (A, A_shape), (C, C_shape)) = incomings u_repeat = T.extra_ops.repeat(u.reshape((-1, 1, u_shape[-1])), C_shape[1], 1) Au = T.concatenate((A, u_repeat), axis=2) _num_hid = (C_shape[-1] + u_shape[-1], ) + num_hid + (1, ) for i, nh in enumerate(_num_hid[:-1]): _w_name[i] = _w_name[i] or 'AttMem_%d' % len(params) _w_name[i] = add_param((nh, _num_hid[i + 1]), params, _w_name[i], w[i], w_initializer[i]) Au = T.tensordot(Au, params[_w_name[i]], axes=[len(C_shape) - 1, 0]) p = nnet.softmax(Au.reshape((-1, C_shape[1]))) p_shape = A_shape[:2] O = (C * p[:, :, None]).sum(axis=1) return ((O, u_shape), (p, p_shape))
def add_param(shape, params, name=None, val=None, initializer=init.HeUniform()): if name not in params: if name is None: name = name_suf(name, '_%d' % len(params)) if isinstance(val, theano.tensor.sharedvar.TensorSharedVariable): assert (shape == val.get_value().shape) assert (val.dtype == theano.config.floatX) ''' if val.dtype != theano.config.floatX: val = val.astype(theano.config.floatX) ''' params[name] = val return name if val is None: val = cast_floatX(initializer(shape)) else: val = cast_floatX(val) assert (val.shape == shape) params[name] = theano.shared(val) return name
def LSTMLayer(incoming, cell_init, hid_init, params, num_hidden, mask=None, activation=T.tanh, gate_act=nnet.sigmoid, only_return_final=False, w_xi_name=None, w_hi_name=None, b_i_name=None, w_xi=None, w_hi=None, b_i=None, w_xf_name=None, w_hf_name=None, b_f_name=None, w_xf=None, w_hf=None, b_f=None, w_xo_name=None, w_ho_name=None, b_o_name=None, w_xo=None, w_ho=None, b_o=None, w_xc_name=None, w_hc_name=None, b_c_name=None, w_xc=None, w_hc=None, b_c=None, w_initializer=init.HeUniform(), b_initializer=init.Const(0.)): ''' hid_init and cell_init can be a number, an array or a tensor expression ''' incoming, input_shape = incoming num_in = input_shape[-1] # add parameters wxi_name = add_param((num_in, num_hidden), params, w_xi_name or 'lstm_wxi_%d' % len(params), w_xi, w_initializer) whi_name = add_param((num_hidden, num_hidden), params, w_hi_name or 'lstm_whi_%d' % len(params), w_hi, w_initializer) bi_name = add_param((num_hidden, ), params, b_i_name or 'lstm_bi_%d' % len(params), b_i, b_initializer) wxf_name = add_param((num_in, num_hidden), params, w_xf_name or 'lstm_wxf_%d' % len(params), w_xf, w_initializer) whf_name = add_param((num_hidden, num_hidden), params, w_hf_name or 'lstm_whf_%d' % len(params), w_hf, w_initializer) bf_name = add_param((num_hidden, ), params, b_f_name or 'lstm_bf_%d' % len(params), b_f, b_initializer) wxo_name = add_param((num_in, num_hidden), params, w_xo_name or 'lstm_wxo_%d' % len(params), w_xo, w_initializer) who_name = add_param((num_hidden, num_hidden), params, w_ho_name or 'lstm_who_%d' % len(params), w_ho, w_initializer) bo_name = add_param((num_hidden, ), params, b_o_name or 'lstm_bo_%d' % len(params), b_o, b_initializer) wxc_name = add_param((num_in, num_hidden), params, w_xc_name or 'lstm_wxc_%d' % len(params), w_xc, w_initializer) whc_name = add_param((num_hidden, num_hidden), params, w_hc_name or 'lstm_whc_%d' % len(params), w_hc, w_initializer) bc_name = add_param((num_hidden, ), params, b_c_name or 'lstm_bc_%d' % len(params), b_c, b_initializer) def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n * dim:(n + 1) * dim] return _x[:, n * dim:(n + 1) * dim] wx_concat = T.concatenate((params[wxi_name], params[wxf_name], params[wxo_name], params[wxc_name]), axis=1) wh_concat = T.concatenate((params[whi_name], params[whf_name], params[who_name], params[whc_name]), axis=1) b_concat = T.concatenate( (params[bi_name], params[bf_name], params[bo_name], params[bc_name]), axis=0) # define step function to be used in the loop def step(income, hid_prev, cell_prev): lin_trans = income.dot(wx_concat) + hid_prev.dot(wh_concat) + b_concat i = gate_act(_slice(lin_trans, 0, num_hidden)) f = gate_act(_slice(lin_trans, 1, num_hidden)) o = gate_act(_slice(lin_trans, 2, num_hidden)) c = activation(_slice(lin_trans, 3, num_hidden)) cell = f * cell_prev + i * c hid = o * activation(cell) return [hid, cell] def step_mask(income, m, hid_prev, cell_prev): hid, cell = step(income, hid_prev, cell_prev) hid = T.switch(m, hid, hid_prev) cell = T.switch(m, cell, cell_prev) return [hid, cell] # setup hid_init and cell_init if isinstance(hid_init, int) or isinstance(hid_init, float): hid_init = hid_init * T.ones((incoming.shape[0], num_hidden)) if isinstance(hid_init, np.ndarray): assert hid_init.shape == (num_hidden, ) hid_init = np.array(hid_init, dtype=theano.config.floatX) hid_init = hid_init * T.ones((incoming.shape[0], num_hidden)) if isinstance(cell_init, int) or isinstance(cell_init, float): cell_init = cell_init * T.ones((incoming.shape[0], num_hidden)) if isinstance(cell_init, np.ndarray): assert cell_init.shape == (num_hidden, ) cell_init = np.array(cell_init, dtype=theano.config.floatX) cell_init = cell_init * T.ones((incoming.shape[0], num_hidden)) # compose loop if mask is not None: results, updates = theano.scan( fn=step_mask, outputs_info=[hid_init, cell_init], #outputs_info={'initial':[hid_init, cell_init], 'taps':[-1]}, sequences=[ incoming.dimshuffle((1, 0, 2)), mask.dimshuffle(1, 0, 'x') ]) else: results, updates = theano.scan( fn=step, outputs_info=[hid_init, cell_init], #outputs_info=[{'initial':[hid_init, cell_init], 'taps':[-1]}], sequences=[incoming.dimshuffle((1, 0, 2))]) if only_return_final: output_shape = (input_shape[0], num_hidden) return (results[0][-1], output_shape) else: output_shape = (input_shape[0], input_shape[1], num_hidden) #cell_stat = results[1].dimshuffle((1, 0, 2)) hid_state = results[0].dimshuffle((1, 0, 2)) return (hid_state, output_shape)
def RNNLayer(incoming, hid_init, params, num_hidden, mask=None, activation=nnet.relu, only_return_final=False, w_xh_name=None, w_hh_name=None, b_name=None, w_xh=None, w_hh=None, b=None, w_initializer=init.HeUniform(), b_initializer=init.Const(0.)): incoming, input_shape = incoming num_in = input_shape[-1] rnnwxh_name = add_param((num_in, num_hidden), params, w_xh_name or 'rnn_wxh_%d' % len(params), w_xh, w_initializer) rnnwhh_name = add_param((num_hidden, num_hidden), params, w_hh_name or 'rnn_whh_%d' % len(params), w_hh, w_initializer) rnnb_name = add_param((num_hidden, ), params, b_name or 'rnn_b_%d' % len(params), b, b_initializer) # setup hid_init if isinstance(hid_init, int) or isinstance(hid_init, float): hid_init = hid_init * T.ones((incoming.shape[0], num_hidden)) if isinstance(hid_init, np.ndarray): assert hid_init.shape == (num_hidden, ) hid_init = np.array(hid_init, dtype=theano.config.floatX) hid_init = hid_init * T.ones((incoming.shape[0], num_hidden)) # setup step function def step(income, hid_prev): return activation( income.dot(params[rnnwxh_name]) + hid_prev.dot(params[rnnwhh_name]) + params[rnnb_name]) def step_mask(income, m, hid_prev): return T.switch(m, step(income, hid_prev), hid_prev) if mask is not None: results, updates = theano.scan(fn=step_mask, outputs_info=[{ 'initial': hid_init, 'taps': [-1] }], sequences=[ incoming.dimshuffle((1, 0, 2)), mask.dimshuffle(1, 0, 'x') ]) else: results, updates = theano.scan( fn=step, outputs_info=[{ 'initial': hid_init, 'taps': [-1] }], sequences=[incoming.dimshuffle((1, 0, 2))]) if only_return_final: output_shape = (input_shape[0], num_hidden) return (results[-1], output_shape) else: output_shape = (input_shape[0], input_shape[1], num_hidden) return (results.dimshuffle((1, 0, 2)), output_shape)