def _rnn(X, n_filters, parameters, memory): X_weight, h_weight, bias = parameters previous_h, previous_c = memory if previous_h is 0: array = _rnn_convolution(X, n_filters, X_weight) else: array = \ _rnn_convolution(X, n_filters, X_weight) + _rnn_convolution(previous_h, n_filters, h_weight) array = layers.broadcast_plus(array, bias) group = layers.slice(X=array, axis=1, n_outputs=4) i = layers.sigmoid(group[0]) f = layers.sigmoid(group[1]) o = layers.sigmoid(group[2]) g = layers.tanh(group[3]) next_c = f * previous_c + i * g next_h = o * layers.tanh(next_c) memory = next_h, next_c return memory
def lstm(X, n_filters, cache): WX = cache.setdefault('WX', layers.variable('X_weight')) WH = cache.setdefault('WH', layers.variable('H_weight')) bias = cache.setdefault( 'bias', layers.variable('lstm_bias', shape=(1, n_filters * 4, 1, 1))) network = _rnn_convolution(X, n_filters * 4, WH) + \ (_rnn_convolution(cache['h'], n_filters * 4, WH) if 'h' in cache else 0) network = layers.broadcast_plus(network, bias) group = layers.slice(X=network, axis=1, n_outputs=4) i = layers.sigmoid(group[0]) f = layers.sigmoid(group[1]) o = layers.sigmoid(group[2]) g = layers.tanh(group[3]) cache['c'] = f * cache.get('c', 0) + i * g cache['h'] = o * layers.tanh(cache['c']) return cache
def lstm(X, D, cache): time = cache.setdefault('time', -1) cache['time'] += 1 WX = cache.setdefault('WX', layers.variable('X_weight')) WH = cache.setdefault('WH', layers.variable('H_weight')) bias = cache.setdefault('bias', layers.variable('lstm_bias', shape=(1, D * 4))) network = _rnn_linearity(X, D * 4, WX) + (_rnn_linearity(cache['h'], D * 4, WH) if 'h' in cache else 0) network = layers.broadcast_plus(network, bias) group = layers.slice(X=network, axis=1, n_outputs=4) i = layers.sigmoid(group[0]) f = layers.sigmoid(group[1]) o = layers.sigmoid(group[2]) g = layers.tanh(group[3]) cache['c'] = f * cache.get('c', 0) + i * g cache['h'] = o * layers.tanh(cache['c']) return cache
def elman(X, D, cache): time = cache.setdefault('time', -1) cache['time'] += 1 WX = cache.setdefault('WX', layers.variable('X_weight')) WH = cache.setdefault('WH', layers.variable('H_weight')) bias = cache.setdefault('bias', layers.variable('elman_bias', shape=(1, D))) network = _rnn_linearity(X, D, WX) + (_rnn_linearity(cache['h'], D, WH) if 'h' in cache else 0) network = layers.broadcast_plus(network, bias) cache['h'] = layers.tanh(network) return cache
def elman(X, n_filters, cache): time = cache.setdefault('time', -1) cache['time'] += 1 WX = cache.setdefault('WX', layers.variable('X_weight')) WH = cache.setdefault('WH', layers.variable('H_weight')) bias = cache.setdefault( 'bias', layers.variable('elman_bias', shape=(1, n_filters, 1, 1))) network = _rnn_convolution(X, n_filters, WX) + \ (_rnn_convolution(cache['h'], n_filters, WH) if 'h' in cache else 0) network = layers.broadcast_plus(network, bias) # network = layers.batch_normalization(network, fix_gamma=False, id='ElmanBN%d' % time) cache['h'] = layers.tanh(network) return cache
def _gru(X, settings, parameters, memory): n_filters = settings['n_filters'] * 4 X_weight, h_weight, bias = parameters previous_h, previous_c = memory # TODO normalization if previous_h is 0: array = _gru_convolution(X, n_filters, X_weight) array = layers.broadcast_plus(array, bias) else: array = _gru_convolution(X, n_filters, X_weight) + _gru_convolution( previous_h, n_filters, h_weight) array = layers.broadcast_plus(array, bias) group = layers.slice(X=array, axis=1, n_outputs=4) i = layers.sigmoid(group[0]) f = layers.sigmoid(group[1]) o = layers.sigmoid(group[2]) g = layers.sigmoid(group[3]) next_c = f * previous_c + i * g next_h = o * layers.tanh(next_c) memory = next_h, next_c return memory