def Conv2DLayer(incoming, params, num_out, filter_h, filter_w=None, filter=None, filter_name=None, stride_h=None, stride_w=None, padding='half', activation=nnet.relu, w_initializer=init.HeUniform(), b_initializer=init.Const(0.)): ''' incoming shoule be a tensor4: (batch_size, channel_size, height, width) filter should be None or ndarray or shared here num_in == channel_size. how to infer automatically? ''' incoming, input_shape = incoming num_in, input_h, input_w = input_shape[-3:] assert filter_h % 2 == 1 if not filter_w: filter_w = filter_h if not stride_h: stride_h = 1 if not stride_w: stride_w = stride_h assert filter==None or \ (isinstance(filter, np.ndarray) and \ filter.shape==(num_out, incoming.shape[1], filter_h, filter_w))\ or (isinstance(filter, theano.tensor.sharedvar.TensorSharedVariable) and \ filter.get_value().shape==(num_out, incoming.shape[1], filter_h, filter_w)) filter_name = add_param((num_out, num_in, filter_h, filter_w), params, filter_name or 'conv2d_filter_%d' % len(params), filter, w_initializer) if padding == 'half': output_h, output_w = input_h, input_w else: raise NotImplementedError( "not implemented output shape for padding patterns other than 'half'" ) output_shape = (input_shape[0], num_out, output_h, output_w) output = activation( nnet.conv2d(incoming, params[filter_name], border_mode=padding, subsample=(stride_h, stride_w))) return (output, output_shape)
def FCLayer(incoming, params, num_out, activation=nnet.relu, w_name=None, b_name=None, w=None, b=None, w_initializer=init.HeUniform(), b_initializer=init.Const(0.)): incoming, input_shape = incoming num_in = np.prod(input_shape[1:]) output_shape = (input_shape[0], num_out) w_name = w_name or 'fc_w_%d' % len(params) b_name = b_name or 'b_fc_%d' % len(params) w_name = add_param((num_in, num_out), params, w_name, w, w_initializer) b_name = add_param((num_out, ), params, b_name, b, b_initializer) if incoming.ndim > 2: incoming = incoming.flatten(2) return (activation(T.dot(incoming, params[w_name]) + params[b_name]), output_shape)
def LSTMLayer(incoming, cell_init, hid_init, params, num_hidden, mask=None, activation=T.tanh, gate_act=nnet.sigmoid, only_return_final=False, w_xi_name=None, w_hi_name=None, b_i_name=None, w_xi=None, w_hi=None, b_i=None, w_xf_name=None, w_hf_name=None, b_f_name=None, w_xf=None, w_hf=None, b_f=None, w_xo_name=None, w_ho_name=None, b_o_name=None, w_xo=None, w_ho=None, b_o=None, w_xc_name=None, w_hc_name=None, b_c_name=None, w_xc=None, w_hc=None, b_c=None, w_initializer=init.HeUniform(), b_initializer=init.Const(0.)): ''' hid_init and cell_init can be a number, an array or a tensor expression ''' incoming, input_shape = incoming num_in = input_shape[-1] # add parameters wxi_name = add_param((num_in, num_hidden), params, w_xi_name or 'lstm_wxi_%d' % len(params), w_xi, w_initializer) whi_name = add_param((num_hidden, num_hidden), params, w_hi_name or 'lstm_whi_%d' % len(params), w_hi, w_initializer) bi_name = add_param((num_hidden, ), params, b_i_name or 'lstm_bi_%d' % len(params), b_i, b_initializer) wxf_name = add_param((num_in, num_hidden), params, w_xf_name or 'lstm_wxf_%d' % len(params), w_xf, w_initializer) whf_name = add_param((num_hidden, num_hidden), params, w_hf_name or 'lstm_whf_%d' % len(params), w_hf, w_initializer) bf_name = add_param((num_hidden, ), params, b_f_name or 'lstm_bf_%d' % len(params), b_f, b_initializer) wxo_name = add_param((num_in, num_hidden), params, w_xo_name or 'lstm_wxo_%d' % len(params), w_xo, w_initializer) who_name = add_param((num_hidden, num_hidden), params, w_ho_name or 'lstm_who_%d' % len(params), w_ho, w_initializer) bo_name = add_param((num_hidden, ), params, b_o_name or 'lstm_bo_%d' % len(params), b_o, b_initializer) wxc_name = add_param((num_in, num_hidden), params, w_xc_name or 'lstm_wxc_%d' % len(params), w_xc, w_initializer) whc_name = add_param((num_hidden, num_hidden), params, w_hc_name or 'lstm_whc_%d' % len(params), w_hc, w_initializer) bc_name = add_param((num_hidden, ), params, b_c_name or 'lstm_bc_%d' % len(params), b_c, b_initializer) def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n * dim:(n + 1) * dim] return _x[:, n * dim:(n + 1) * dim] wx_concat = T.concatenate((params[wxi_name], params[wxf_name], params[wxo_name], params[wxc_name]), axis=1) wh_concat = T.concatenate((params[whi_name], params[whf_name], params[who_name], params[whc_name]), axis=1) b_concat = T.concatenate( (params[bi_name], params[bf_name], params[bo_name], params[bc_name]), axis=0) # define step function to be used in the loop def step(income, hid_prev, cell_prev): lin_trans = income.dot(wx_concat) + hid_prev.dot(wh_concat) + b_concat i = gate_act(_slice(lin_trans, 0, num_hidden)) f = gate_act(_slice(lin_trans, 1, num_hidden)) o = gate_act(_slice(lin_trans, 2, num_hidden)) c = activation(_slice(lin_trans, 3, num_hidden)) cell = f * cell_prev + i * c hid = o * activation(cell) return [hid, cell] def step_mask(income, m, hid_prev, cell_prev): hid, cell = step(income, hid_prev, cell_prev) hid = T.switch(m, hid, hid_prev) cell = T.switch(m, cell, cell_prev) return [hid, cell] # setup hid_init and cell_init if isinstance(hid_init, int) or isinstance(hid_init, float): hid_init = hid_init * T.ones((incoming.shape[0], num_hidden)) if isinstance(hid_init, np.ndarray): assert hid_init.shape == (num_hidden, ) hid_init = np.array(hid_init, dtype=theano.config.floatX) hid_init = hid_init * T.ones((incoming.shape[0], num_hidden)) if isinstance(cell_init, int) or isinstance(cell_init, float): cell_init = cell_init * T.ones((incoming.shape[0], num_hidden)) if isinstance(cell_init, np.ndarray): assert cell_init.shape == (num_hidden, ) cell_init = np.array(cell_init, dtype=theano.config.floatX) cell_init = cell_init * T.ones((incoming.shape[0], num_hidden)) # compose loop if mask is not None: results, updates = theano.scan( fn=step_mask, outputs_info=[hid_init, cell_init], #outputs_info={'initial':[hid_init, cell_init], 'taps':[-1]}, sequences=[ incoming.dimshuffle((1, 0, 2)), mask.dimshuffle(1, 0, 'x') ]) else: results, updates = theano.scan( fn=step, outputs_info=[hid_init, cell_init], #outputs_info=[{'initial':[hid_init, cell_init], 'taps':[-1]}], sequences=[incoming.dimshuffle((1, 0, 2))]) if only_return_final: output_shape = (input_shape[0], num_hidden) return (results[0][-1], output_shape) else: output_shape = (input_shape[0], input_shape[1], num_hidden) #cell_stat = results[1].dimshuffle((1, 0, 2)) hid_state = results[0].dimshuffle((1, 0, 2)) return (hid_state, output_shape)
def RNNLayer(incoming, hid_init, params, num_hidden, mask=None, activation=nnet.relu, only_return_final=False, w_xh_name=None, w_hh_name=None, b_name=None, w_xh=None, w_hh=None, b=None, w_initializer=init.HeUniform(), b_initializer=init.Const(0.)): incoming, input_shape = incoming num_in = input_shape[-1] rnnwxh_name = add_param((num_in, num_hidden), params, w_xh_name or 'rnn_wxh_%d' % len(params), w_xh, w_initializer) rnnwhh_name = add_param((num_hidden, num_hidden), params, w_hh_name or 'rnn_whh_%d' % len(params), w_hh, w_initializer) rnnb_name = add_param((num_hidden, ), params, b_name or 'rnn_b_%d' % len(params), b, b_initializer) # setup hid_init if isinstance(hid_init, int) or isinstance(hid_init, float): hid_init = hid_init * T.ones((incoming.shape[0], num_hidden)) if isinstance(hid_init, np.ndarray): assert hid_init.shape == (num_hidden, ) hid_init = np.array(hid_init, dtype=theano.config.floatX) hid_init = hid_init * T.ones((incoming.shape[0], num_hidden)) # setup step function def step(income, hid_prev): return activation( income.dot(params[rnnwxh_name]) + hid_prev.dot(params[rnnwhh_name]) + params[rnnb_name]) def step_mask(income, m, hid_prev): return T.switch(m, step(income, hid_prev), hid_prev) if mask is not None: results, updates = theano.scan(fn=step_mask, outputs_info=[{ 'initial': hid_init, 'taps': [-1] }], sequences=[ incoming.dimshuffle((1, 0, 2)), mask.dimshuffle(1, 0, 'x') ]) else: results, updates = theano.scan( fn=step, outputs_info=[{ 'initial': hid_init, 'taps': [-1] }], sequences=[incoming.dimshuffle((1, 0, 2))]) if only_return_final: output_shape = (input_shape[0], num_hidden) return (results[-1], output_shape) else: output_shape = (input_shape[0], input_shape[1], num_hidden) return (results.dimshuffle((1, 0, 2)), output_shape)