Esempio n. 1
0
def _rnn(X, n_filters, parameters, memory):
    X_weight, h_weight, bias = parameters
    previous_h, previous_c = memory

    if previous_h is 0: array = _rnn_convolution(X, n_filters, X_weight)
    else:        array = \
      _rnn_convolution(X, n_filters, X_weight) + _rnn_convolution(previous_h, n_filters, h_weight)
    array = layers.broadcast_plus(array, bias)

    group = layers.slice(X=array, axis=1, n_outputs=4)
    i = layers.sigmoid(group[0])
    f = layers.sigmoid(group[1])
    o = layers.sigmoid(group[2])
    g = layers.tanh(group[3])

    next_c = f * previous_c + i * g
    next_h = o * layers.tanh(next_c)
    memory = next_h, next_c

    return memory
Esempio n. 2
0
def lstm(X, n_filters, cache):
    WX = cache.setdefault('WX', layers.variable('X_weight'))
    WH = cache.setdefault('WH', layers.variable('H_weight'))
    bias = cache.setdefault(
        'bias', layers.variable('lstm_bias', shape=(1, n_filters * 4, 1, 1)))

    network = _rnn_convolution(X, n_filters * 4, WH) + \
      (_rnn_convolution(cache['h'], n_filters * 4, WH) if 'h' in cache else 0)
    network = layers.broadcast_plus(network, bias)

    group = layers.slice(X=network, axis=1, n_outputs=4)
    i = layers.sigmoid(group[0])
    f = layers.sigmoid(group[1])
    o = layers.sigmoid(group[2])
    g = layers.tanh(group[3])

    cache['c'] = f * cache.get('c', 0) + i * g
    cache['h'] = o * layers.tanh(cache['c'])

    return cache
Esempio n. 3
0
def lstm(X, D, cache):
  time = cache.setdefault('time', -1)
  cache['time'] += 1

  WX = cache.setdefault('WX', layers.variable('X_weight'))
  WH = cache.setdefault('WH', layers.variable('H_weight'))
  bias = cache.setdefault('bias', layers.variable('lstm_bias', shape=(1, D * 4)))

  network = _rnn_linearity(X, D * 4, WX) + (_rnn_linearity(cache['h'], D * 4, WH) if 'h' in cache else 0)
  network = layers.broadcast_plus(network, bias)

  group = layers.slice(X=network, axis=1, n_outputs=4)
  i = layers.sigmoid(group[0])
  f = layers.sigmoid(group[1])
  o = layers.sigmoid(group[2])
  g = layers.tanh(group[3])

  cache['c'] = f * cache.get('c', 0) + i * g
  cache['h'] = o * layers.tanh(cache['c'])
 
  return cache
Esempio n. 4
0
def elman(X, D, cache):
  time = cache.setdefault('time', -1)
  cache['time'] += 1

  WX = cache.setdefault('WX', layers.variable('X_weight'))
  WH = cache.setdefault('WH', layers.variable('H_weight'))
  bias = cache.setdefault('bias', layers.variable('elman_bias', shape=(1, D)))

  network = _rnn_linearity(X, D, WX) + (_rnn_linearity(cache['h'], D, WH) if 'h' in cache else 0)
  network = layers.broadcast_plus(network, bias)
  cache['h'] = layers.tanh(network)

  return cache
Esempio n. 5
0
def elman(X, n_filters, cache):
    time = cache.setdefault('time', -1)
    cache['time'] += 1

    WX = cache.setdefault('WX', layers.variable('X_weight'))
    WH = cache.setdefault('WH', layers.variable('H_weight'))
    bias = cache.setdefault(
        'bias', layers.variable('elman_bias', shape=(1, n_filters, 1, 1)))

    network = _rnn_convolution(X, n_filters, WX) + \
      (_rnn_convolution(cache['h'], n_filters, WH) if 'h' in cache else 0)

    network = layers.broadcast_plus(network, bias)
    # network = layers.batch_normalization(network, fix_gamma=False, id='ElmanBN%d' % time)

    cache['h'] = layers.tanh(network)

    return cache
Esempio n. 6
0
def _gru(X, settings, parameters, memory):
    n_filters = settings['n_filters'] * 4
    X_weight, h_weight, bias = parameters
    previous_h, previous_c = memory
    # TODO normalization
    if previous_h is 0:
        array = _gru_convolution(X, n_filters, X_weight)
        array = layers.broadcast_plus(array, bias)
    else:
        array = _gru_convolution(X, n_filters, X_weight) + _gru_convolution(
            previous_h, n_filters, h_weight)
        array = layers.broadcast_plus(array, bias)
    group = layers.slice(X=array, axis=1, n_outputs=4)
    i = layers.sigmoid(group[0])
    f = layers.sigmoid(group[1])
    o = layers.sigmoid(group[2])
    g = layers.sigmoid(group[3])
    next_c = f * previous_c + i * g
    next_h = o * layers.tanh(next_c)
    memory = next_h, next_c
    return memory