Ejemplo n.º 1
0
def future_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    rx = kd_x/as_floatx(kp_x+kd_x)
    re = kd_e/as_floatx(kp_e+kd_e)
    scale = (1./as_floatx(kp_x*kp_e + kp_x*kd_e + kd_x*kp_e))
    n_samples, n_in, n_out = shapes
    x_past_var = create_shared_variable(np.zeros((n_samples, n_in)))
    e_past_var = create_shared_variable(np.zeros((n_samples, n_out)))
    x_past = x_past_var*rx
    e_past = e_past_var*re
    w_grad = scale * (xs.T.dot(e_past+es) + x_past.T.dot(es))
    add_update(x_past_var, x_past + xs)
    add_update(e_past_var, e_past + es)
    return w_grad
Ejemplo n.º 2
0
 def __init__(self,
              w,
              b=0,
              normalize_minibatch=False,
              scale=False,
              use_bias=True):
     """
     :param w: Initial weight value.  Can be:
         - A numpy array, in which case a shared variable is instantiated from this data.
         - A symbolic variable that is either a shared variabe or descended from a shared variable.
           This is used when there are shared parameters.
     :param b: Can be:
         - A numpy vector representing the initial bias on the hidden layer, where len(b) = w.shape[1]
         - A scaler, which just initializes the full vector to this value
     :param normalize_minibatch: Set to True to normalize over the minibatch.  This has been shown to cause better optimization
     :param scale: Set to True to include an scale term (per output).  Generally this only makes sense if
         normalize_minibatch is True.
     :param use_bias: Use a bias term?  Generally, the answer is "True", a bias term helps.
     """
     self.w = create_shared_variable(w, name='w')
     self.b = create_shared_variable(
         b,
         shape=w.shape[1] if w.ndim == 2 else
         (w.shape[0], w.shape[2]) if w.ndim == 3 else bad_value(w.shape),
         name='b')
     self.log_scale = create_shared_variable(
         0 if scale else None, shape=w.shape[1],
         name='log_scale') if scale else None
     self.normalizer = \
         batch_normalize if normalize_minibatch is True else \
         None if normalize_minibatch is False else \
         normalize_minibatch
     self._use_bias = use_bias
Ejemplo n.º 3
0
def matrix_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes, epsilon=1e-7):
    """
    :param xs:
    :param es:
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes:
    :param epsilon:
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    v1 = create_shared_variable(np.zeros((n_samples, n_in, n_out)))
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    xr_decayed = xr*rx
    er_decayed = er*re
    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    v2 = xr_decayed[:, :, None]*er_decayed[:, None, :]
    dws = (spikes*(v2-v1))/(rx*re-1)
    new_xr = xr_decayed + xs/(kp_x+kd_x)
    new_er = er_decayed + es/(kp_e+kd_e)

    add_update(v1, tt.switch(spikes, new_xr[:, :, None]*new_er[:, None, :], v1))
    add_update(xr, new_xr)
    add_update(er, new_er)

    return dws.sum(axis=0)
Ejemplo n.º 4
0
def future_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    rx = kd_x / as_floatx(kp_x + kd_x)
    re = kd_e / as_floatx(kp_e + kd_e)
    scale = (1. / as_floatx(kp_x * kp_e + kp_x * kd_e + kd_x * kp_e))
    n_samples, n_in, n_out = shapes
    x_past_var = create_shared_variable(np.zeros((n_samples, n_in)))
    e_past_var = create_shared_variable(np.zeros((n_samples, n_out)))
    x_past = x_past_var * rx
    e_past = e_past_var * re
    w_grad = scale * (xs.T.dot(e_past + es) + x_past.T.dot(es))
    add_update(x_past_var, x_past + xs)
    add_update(e_past_var, e_past + es)
    return w_grad
Ejemplo n.º 5
0
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None):
    """
    Do an efficient update of the weights given the two spike-update.

    (This still runs FING SLOWLY!)

    :param xs: An (n_in) vector
    :param es: An (n_out) vector
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes: (n_in, n_out)
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_in, n_out = shape
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros(n_in)+1)
    te_last = create_shared_variable(np.zeros(n_out)+1)
    x_last = create_shared_variable(np.zeros(n_in))
    e_last = create_shared_variable(np.zeros(n_out))
    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    x_spike_ixs, = tt.nonzero(x_spikes)
    e_spike_ixs, = tt.nonzero(e_spikes)

    if dws is None:
        dws = tt.zeros(shape)

    t_last = tt.minimum(tx_last[x_spike_ixs, None], te_last)  # (n_x_spikes, n_out)
    dws = tt.inc_subtensor(dws[x_spike_ixs, :], x_last[x_spike_ixs, None]*e_last
        * rx**(tx_last[x_spike_ixs, None]-t_last)
        * re**(te_last[None, :]-t_last)
        * geoseries_sum(re*rx, t_end=t_last, t_start=1)
        )

    new_x_last = tt.set_subtensor(x_last[x_spike_ixs], x_last[x_spike_ixs]*rx**tx_last[x_spike_ixs]+ xs[x_spike_ixs]/as_floatx(kd_x))
    new_tx_last = tt.switch(x_spikes, 0, tx_last)

    t_last = tt.minimum(new_tx_last[:, None], te_last[e_spike_ixs])  # (n_in, n_e_spikes)
    dws = tt.inc_subtensor(dws[:, e_spike_ixs], new_x_last[:, None]*e_last[e_spike_ixs]
        * rx**(new_tx_last[:, None]-t_last)
        * re**(te_last[None, e_spike_ixs]-t_last)
        * geoseries_sum(re*rx, t_end=t_last, t_start=1)
        )

    add_update(x_last, new_x_last)
    add_update(e_last, tt.set_subtensor(e_last[e_spike_ixs], e_last[e_spike_ixs]*re**te_last[e_spike_ixs]+ es[e_spike_ixs]/as_floatx(kd_e)))
    add_update(tx_last, new_tx_last+1)
    add_update(te_last, tt.switch(e_spikes, 1, te_last+1))
    return dws
Ejemplo n.º 6
0
 def __call__(self, x):
     # x should have
     assert x.ishape[0]==1, "This method only works for minibatches of size 1, but you used a minibatch of size: %s" % (x.tag.test_value.shape[0])
     running_mean = create_shared_variable(np.zeros(x.tag.test_value.shape[1:]))
     running_mean_sq = create_shared_variable(np.zeros(x.tag.test_value.shape[1:]))
     new_running_mean = running_mean * self.decay_constant + x[0] * (1-self.decay_constant).astype(theano.config.floatX)
     new_running_mean_sq = running_mean_sq * self.decay_constant + (x[0]**2) * (1-self.decay_constant).astype(theano.config.floatX)
     add_update(running_mean, new_running_mean)
     add_update(running_mean_sq, new_running_mean_sq)
     running_std = tt.sqrt((new_running_mean_sq - new_running_mean**2))
     return (x - running_mean)/(running_std+1e-7)
Ejemplo n.º 7
0
 def __init__(self, kp, kd, shapes):
     """
     :param kp:
     :param kd:
     :param shapes: A tuple that specifies (minibatch_size, n_in, n_out)
     """
     self.kp = kp
     self.kd = kd
     self.r = kd/as_floatx(kp+kd)
     self.scale = (1./as_floatx(kp**2 + 2*kp*kd))
     self.x_past = create_shared_variable(np.zeros((shapes[0], shapes[1])))
     self.e_past = create_shared_variable(np.zeros((shapes[0], shapes[2])))
Ejemplo n.º 8
0
 def __init__(self, w, b, force_shared_parameters = True, border_mode = 'valid', filter_flip = True):
     """
     w is the kernel, an ndarray of shape (n_output_maps, n_input_maps, w_size_y, w_size_x)
     b is the bias, an ndarray of shape (n_output_maps, )
     force_shared_parameters: Set to true if you want to make the parameters shared variables.  If False, the
         parameters will be
     :param border_mode: {'valid', 'full', 'half', int, (int1, int2)}.  Afects
         default is 'valid'.  See theano.tensor.nnet.conv2d docstring for details.
     """
     self.w = create_shared_variable(w) if force_shared_parameters else tt.constant(w)
     self.b = create_shared_variable(b) if force_shared_parameters else tt.constant(b)
     self.border_mode = border_mode
     self.filter_flip = filter_flip
Ejemplo n.º 9
0
    def __init__(self, w, b, force_shared_parameters = True, border_mode = 'valid', filter_flip = True):
        """
        w is the kernel, an ndarray of shape (n_output_maps, n_input_maps, w_size_y, w_size_x)
        b is the bias, an ndarray of shape (n_output_maps, ).  Can also be "False" meaning, don't use biases

        force_shared_parameters: Set to true if you want to make the parameters shared variables.  If False, the
            parameters will be be constants (which allows for certain optimizations)
        :param border_mode: {'valid', 'full', 'half', int, (int1, int2)}.  Affects
            default is 'valid'.  See theano.tensor.nnet.conv2d docstring for details.
        """
        self.w = create_shared_variable(w) if force_shared_parameters else tt.constant(w)
        self.b = False if b is False else create_shared_variable(b) if force_shared_parameters else tt.constant(b)
        self.border_mode = border_mode
        self.filter_flip = filter_flip
Ejemplo n.º 10
0
    def __init__(self, w, b, nonlinearity, encdec, encdec_back, grad_calc='xx', minibatch_size=1):
        self.n_in, self.n_out = w.shape
        self.w = create_shared_variable(w)
        self.b = create_shared_variable(b)

        assert isinstance(encdec, IEncoderDecoder)
        assert isinstance(encdec_back, IEncoderDecoder)
        self.encdec = encdec
        self.encdec_back = encdec_back
        self.nonlinearity = nonlinearity
        self.minibatch_size = minibatch_size
        self.grad_calc = grad_calc
        self.fwd_op_count = create_shared_variable(0, name='fwd_op_count')
        self.back_op_count = create_shared_variable(0, name='back_op_count')
        self.update_op_count = create_shared_variable(0, name='update_op_count')
Ejemplo n.º 11
0
 def encode(self, x, shape=None):
     if shape is None:
         xp = create_shared_variable(np.zeros((0, )*x.ndim), name='xp')
         delta = ifelse(xp.size>0, x-xp, x)
     else:
         xp = create_shared_variable(np.zeros(shape), name='xp{}'.format(shape))
         delta = x - xp
     add_update(xp, x)
     y = self.kp*x + self.kd*delta
     if self.quantization is None:
         return y
     elif self.quantization=='herd':
         return herd(y, shape=shape)
     else:
         raise Exception('No quantizer: {}'.format(self.quantization))
Ejemplo n.º 12
0
 def encode(self, x, shape=None):
     if shape is None:
         xp = create_shared_variable(np.zeros((0, ) * x.ndim), name='xp')
         delta = ifelse(xp.size > 0, x - xp, x)
     else:
         xp = create_shared_variable(np.zeros(shape),
                                     name='xp{}'.format(shape))
         delta = x - xp
     add_update(xp, x)
     y = self.kp * x + self.kd * delta
     if self.quantization is None:
         return y
     elif self.quantization == 'herd':
         return herd(y, shape=shape)
     else:
         raise Exception('No quantizer: {}'.format(self.quantization))
Ejemplo n.º 13
0
    def get_sampling_fcn(self, initial_vis, n_steps):

        initial_vis = \
            create_shared_variable(initial_vis) if isinstance(initial_vis, np.ndarray) else \
            initial_vis if isinstance(initial_vis, SharedVariable) else \
            create_shared_variable(initial_vis.tag.test_value)

        @symbolic_multi
        def sample():
            vis = initial_vis
            for i in xrange(n_steps):
                hid = self.propup(vis)
                vis = self.propdown(hid)
            add_update(initial_vis, vis)
            return vis, hid
        return sample
Ejemplo n.º 14
0
def past_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: Make this actually use sparsity, one of these days.
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in)) + 1)
    te_last = create_shared_variable(np.zeros((n_samples, n_out)) + 1)
    x_last = create_shared_variable(np.zeros((n_samples, n_in)))
    e_last = create_shared_variable(np.zeros((n_samples, n_out)))

    t_last = tt.minimum(tx_last[:, :, None], te_last[:, None, :])
    x_spikes = tt.neq(xs, 0)
    dw_potentials = x_last[:, :, None] * e_last[:, None, :] * \
            rx**(tx_last[:, :, None]-t_last) \
            * re**(te_last[:, None, :]-t_last) \
            * geoseries_sum(rx*re, t_end=t_last, t_start=1)
    e_spikes = tt.neq(es, 0)
    dws = (x_spikes[:, :, None] + e_spikes[:, None, :] - x_spikes[:, :, None] *
           e_spikes[:, None, :]) * dw_potentials  # (n_samples, n_in, n_out)

    add_update(
        x_last,
        tt.switch(x_spikes, x_last * rx**tx_last + xs / as_floatx(kd_x),
                  x_last))
    add_update(
        e_last,
        tt.switch(e_spikes, e_last * rx**te_last + es / as_floatx(kd_e),
                  e_last))
    add_update(tx_last, tt.switch(x_spikes, 1, tx_last + 1))
    add_update(te_last, tt.switch(e_spikes, 1, te_last + 1))
    return dws.sum(axis=0)
Ejemplo n.º 15
0
def past_weight_grad_calculator_reloaded(xs, es, kp_x, kd_x, kp_e, kd_e,
                                         shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: RESOLVE INSTABILITY ISSUE
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in)))
    te_last = create_shared_variable(np.zeros((n_samples, n_out)))
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    t_last = tt.maximum(tx_last[:, :, None], te_last[:, None, :])
    sum_to_last = geoseries_sum(
        rx * re, t_start=t_last, t_end=0
    )  # Wasteful, since most of this is multiplied by zeros later, but for now it don't matter

    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    dw_es = (
        xr[:, :, None] * er[:, None, :] * spikes
    ) * sum_to_last  # PROBLEM HERE!!!! Can be very small number times very large numen
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    add_update(xr, xr * rx + xs / (kp_x + kd_x))
    add_update(er, er * re + es / (kp_e + kd_e))
    add_update(tx_last, tt.switch(x_spikes, 0, tx_last - 1))
    add_update(te_last, tt.switch(e_spikes, 0, te_last - 1))

    return dw_es.sum(axis=0)
Ejemplo n.º 16
0
 def decode(self, y, shape=None):
     xp = shared_like(
         y, name='xp') if shape is None else create_shared_variable(
             np.zeros(shape), name='xp{}'.format(shape))
     div = (self.kp + self.kd)
     x = (y + self.kd * xp) / div
     add_update(xp, x)
     return x
Ejemplo n.º 17
0
def herd(x, shape=None):
    phi = shared_like(x,
                      name='phi') if shape is None else create_shared_variable(
                          np.zeros(shape), name='phi{}'.format(shape))
    phi_ = phi + x
    s = tt.round(phi_)
    add_update(phi, phi_ - s)
    return s
Ejemplo n.º 18
0
 def __init__(self, w, b = 0, b_rev = None, use_bias = True):
     """
     :param w: Initial weight value.  Can be:
         - A numpy array, in which case a shared variable is instantiated from this data.
         - A symbolic variable that is either a shared variabe or descended from a shared variable.
           This is used when there are shared parameters.
     :param b: Can be:
         - A numpy vector representing the initial bias on the hidden layer, where len(b) = w.shape[1]
         - A scaler, which just initializes the full vector to this value
     :param b_rev: Can be:
         - A numpy vector representing the initial bias on the visible layer, where len(b) = w.shape[0]
         - A scaler, which just initializes the full vector to this value
         - None, in which case b_rev is not created (for instance in an MLP).
     """
     self.w = create_shared_variable(w, name = 'w')
     self.b = create_shared_variable(b, shape = w.shape[1], name = 'b') if use_bias else None
     self.b_rev = create_shared_variable(b_rev, shape = w.shape[0], name = 'b_rev') if use_bias else None
     self._use_bias = use_bias
Ejemplo n.º 19
0
    def __init__(self,
                 n_input,
                 n_hidden,
                 initializer_fcn,
                 input_layer_type='softmax',
                 hidden_layer_type='tanh'):

        self.lstm = LSTMLayer.from_initializer(
            n_input=n_input,
            n_hidden=n_hidden,
            initializer_fcn=initializer_fcn,
            hidden_layer_type=hidden_layer_type)
        self.w_hz = create_shared_variable(initializer_fcn,
                                           (n_hidden, n_input))
        self.b_z = create_shared_variable(0, n_input)

        self.output_activation = mysoftmax if input_layer_type == 'softmax' else get_named_activation_function(
            input_layer_type)
Ejemplo n.º 20
0
def matrix_weight_grad_calculator(xs,
                                  es,
                                  kp_x,
                                  kd_x,
                                  kp_e,
                                  kd_e,
                                  shapes,
                                  epsilon=1e-7):
    """
    :param xs:
    :param es:
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes:
    :param epsilon:
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    v1 = create_shared_variable(np.zeros((n_samples, n_in, n_out)))
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    xr_decayed = xr * rx
    er_decayed = er * re
    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    v2 = xr_decayed[:, :, None] * er_decayed[:, None, :]
    dws = (spikes * (v2 - v1)) / (rx * re - 1)
    new_xr = xr_decayed + xs / (kp_x + kd_x)
    new_er = er_decayed + es / (kp_e + kd_e)

    add_update(v1,
               tt.switch(spikes, new_xr[:, :, None] * new_er[:, None, :], v1))
    add_update(xr, new_xr)
    add_update(er, new_er)

    return dws.sum(axis=0)
Ejemplo n.º 21
0
def past_weight_grad_calculator2(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    This attempt never really got off the ground.  It doesn't work
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))




    # xr_new = xr*rx + xs/(kp_x+kd_x)
    # er_new = er*re + es/(kp_e+kd_e)

    arr = rx*re/(1-rx*re)

    xr_new = xr*arr + xs/(kp_x+kd_x)
    er_new = er*arr + es/(kp_e+kd_e)

    xsum = create_shared_variable(np.zeros((n_samples, n_in)))
    esum = create_shared_variable(np.zeros((n_samples, n_out)))

    xsum_new = xsum+xr_new
    esum_new = esum+er_new

    x_nospikes = tt.eq(xs, 0)
    e_nospikes = tt.eq(es, 0)

    dw = xs.T.dot(esum_new) + xsum_new.T.dot(es)

    add_update(xr, xr_new)
    add_update(er, er_new)
    add_update(xsum, xsum_new*x_nospikes)
    add_update(esum, esum_new*e_nospikes)

    return xs.T.dot(er) + xr.T.dot(es)
    # return xr.T.dot(er)
    # return dw
Ejemplo n.º 22
0
 def __init__(self,
              ws,
              bs=None,
              comp_weight=1e-6,
              optimizer=None,
              layerwise_scales=False,
              parametrization='log',
              hidden_activations='relu',
              output_activation='softmax',
              rng=None):
     """
     Learns how to rescale the units to be an optimal rounding network.
     :param ws: A list of (n_in, n_out) weight matrices
     :param bs: A length of bias vectors (same length as ws)
     :param comp_weight: The weight (lambda in the paper) given to computation
     :param optimizer: The optimizer (an IGradientOptimizer object)
     :param layerwise_scales: Make scales layerwise (as opposed to unitwise)
     :param parametrization: What space to parametrize in ('log', 'direct', or 'softplus')
     :param hidden_activations: Hidden activation functions (as a string, eg 'relu')
     :param output_activation: Output activation function
     :param rng: Random number generator or seed.
     """
     if optimizer is None:
         optimizer = get_named_optimizer('sgd', 0.01)
     if bs is None:
         bs = [np.zeros(w.shape[1]) for w in ws]
     self.ws = [create_shared_variable(w) for w in ws]
     self.bs = [create_shared_variable(b) for b in bs]
     self.comp_weight = tt.constant(comp_weight, dtype=theano.config.floatX)
     self.optimizer = optimizer
     self.hidden_activations = hidden_activations
     self.output_activation = output_activation
     scale_dims = [()] * len(ws) if layerwise_scales else [
         ws[0].shape[0]
     ] + [w.shape[1] for w in ws[:-1]]
     self.k_params = \
         [create_shared_variable(np.ones(d)) for d in scale_dims] if parametrization=='direct' else \
         [create_shared_variable(np.zeros(d)) for d in scale_dims] if parametrization=='log' else \
         [create_shared_variable(np.zeros(d)+np.exp(1)-1) for d in scale_dims] if parametrization=='softplus' else \
         bad_value(parametrization)
     self.parametrization = parametrization
     self.rng = get_theano_rng(rng)
Ejemplo n.º 23
0
 def __init__(self, w, b=0, b_rev=None, use_bias=True):
     """
     :param w: Initial weight value.  Can be:
         - A numpy array, in which case a shared variable is instantiated from this data.
         - A symbolic variable that is either a shared variabe or descended from a shared variable.
           This is used when there are shared parameters.
     :param b: Can be:
         - A numpy vector representing the initial bias on the hidden layer, where len(b) = w.shape[1]
         - A scaler, which just initializes the full vector to this value
     :param b_rev: Can be:
         - A numpy vector representing the initial bias on the visible layer, where len(b) = w.shape[0]
         - A scaler, which just initializes the full vector to this value
         - None, in which case b_rev is not created (for instance in an MLP).
     """
     self.w = create_shared_variable(w, name='w')
     self.b = create_shared_variable(b, shape=w.shape[1],
                                     name='b') if use_bias else None
     self.b_rev = create_shared_variable(
         b_rev, shape=w.shape[0], name='b_rev') if use_bias else None
     self._use_bias = use_bias
Ejemplo n.º 24
0
def past_weight_grad_calculator_reloaded(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: RESOLVE INSTABILITY ISSUE
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in)))
    te_last = create_shared_variable(np.zeros((n_samples, n_out)))
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    t_last = tt.maximum(tx_last[:, :, None], te_last[:, None, :])
    sum_to_last = geoseries_sum(rx*re, t_start=t_last, t_end=0)  # Wasteful, since most of this is multiplied by zeros later, but for now it don't matter

    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    dw_es = (xr[:, :, None]*er[:, None, :]*spikes)*sum_to_last  # PROBLEM HERE!!!! Can be very small number times very large numen
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    add_update(xr, xr*rx + xs/(kp_x+kd_x))
    add_update(er, er*re + es/(kp_e+kd_e))
    add_update(tx_last, tt.switch(x_spikes, 0, tx_last-1))
    add_update(te_last, tt.switch(e_spikes, 0, te_last-1))

    return dw_es.sum(axis=0)
Ejemplo n.º 25
0
def past_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: Make this actually use sparsity, one of these days.
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in))+1)
    te_last = create_shared_variable(np.zeros((n_samples, n_out))+1)
    x_last = create_shared_variable(np.zeros((n_samples, n_in)))
    e_last = create_shared_variable(np.zeros((n_samples, n_out)))

    t_last = tt.minimum(tx_last[:, :, None], te_last[:, None, :])
    x_spikes = tt.neq(xs, 0)
    dw_potentials = x_last[:, :, None] * e_last[:, None, :] * \
            rx**(tx_last[:, :, None]-t_last) \
            * re**(te_last[:, None, :]-t_last) \
            * geoseries_sum(rx*re, t_end=t_last, t_start=1)
    e_spikes = tt.neq(es, 0)
    dws = (x_spikes[:, :, None]+e_spikes[:, None, :]-x_spikes[:, :, None]*e_spikes[:, None, :])*dw_potentials  # (n_samples, n_in, n_out)

    add_update(x_last, tt.switch(x_spikes, x_last*rx**tx_last + xs/as_floatx(kd_x), x_last))
    add_update(e_last, tt.switch(e_spikes, e_last*rx**te_last + es/as_floatx(kd_e), e_last))
    add_update(tx_last, tt.switch(x_spikes, 1, tx_last+1))
    add_update(te_last, tt.switch(e_spikes, 1, te_last+1))
    return dws.sum(axis=0)
Ejemplo n.º 26
0
 def encode(self, x, shape=None):
     running_mag = create_shared_variable(1.)
     add_update(running_mag, (1 - self.adaptation_rate) * running_mag +
                self.adaptation_rate * abs(x).mean())
     target_k_beta = self.k_beta_init * running_mag
     add_update(
         self.k_beta,
         self.k_beta + self.adaptation_rate * (target_k_beta - self.k_beta))
     return pd_encode(x,
                      kp=self.kp,
                      kd=self.kd,
                      quantization=self.quantization,
                      shape=shape)
Ejemplo n.º 27
0
Archivo: mlp.py Proyecto: qyx268/plato
 def __init__(self, w, b = 0, normalize_minibatch = False, scale = False, use_bias = True):
     """
     :param w: Initial weight value.  Can be:
         - A numpy array, in which case a shared variable is instantiated from this data.
         - A symbolic variable that is either a shared variabe or descended from a shared variable.
           This is used when there are shared parameters.
     :param b: Can be:
         - A numpy vector representing the initial bias on the hidden layer, where len(b) = w.shape[1]
         - A scaler, which just initializes the full vector to this value
     :param normalize_minibatch: Set to True to normalize over the minibatch.  This has been shown to cause better optimization
     :param scale: Set to True to include an scale term (per output).  Generally this only makes sense if
         normalize_minibatch is True.
     :param use_bias: Use a bias term?  Generally, the answer is "True", a bias term helps.
     """
     self.w = create_shared_variable(w, name = 'w')
     self.b = create_shared_variable(b, shape = w.shape[1] if w.ndim==2 else (w.shape[0], w.shape[2]) if w.ndim==3 else bad_value(w.shape), name = 'b')
     self.log_scale = create_shared_variable(0 if scale else None, shape = w.shape[1], name = 'log_scale') if scale else None
     self.normalizer = \
         batch_normalize if normalize_minibatch is True else \
         None if normalize_minibatch is False else \
         normalize_minibatch
     self._use_bias = use_bias
Ejemplo n.º 28
0
    def __init__(self,
                 w,
                 b,
                 nonlinearity,
                 encdec,
                 encdec_back,
                 grad_calc='xx',
                 minibatch_size=1):
        self.n_in, self.n_out = w.shape
        self.w = create_shared_variable(w)
        self.b = create_shared_variable(b)

        assert isinstance(encdec, IEncoderDecoder)
        assert isinstance(encdec_back, IEncoderDecoder)
        self.encdec = encdec
        self.encdec_back = encdec_back
        self.nonlinearity = nonlinearity
        self.minibatch_size = minibatch_size
        self.grad_calc = grad_calc
        self.fwd_op_count = create_shared_variable(0, name='fwd_op_count')
        self.back_op_count = create_shared_variable(0, name='back_op_count')
        self.update_op_count = create_shared_variable(0,
                                                      name='update_op_count')
Ejemplo n.º 29
0
        def train(wake_visible):

            wake_hidden = self.propup(wake_visible)
            persistent_state = sleep_hidden = create_shared_variable(np.zeros(wake_hidden.tag.test_value.shape),
                name = 'persistend_hidden_state') if persistent else wake_hidden
            for _ in xrange(n_gibbs):
                sleep_visible = self.propdown(sleep_hidden)
                sleep_hidden = self.propup(sleep_visible)
            wake_energy = self.energy(wake_visible)
            sleep_energy = self.energy(sleep_visible)
            cost = wake_energy - sleep_energy
            optimizer(cost = cost, parameters = self.parameters, constants = [wake_visible, sleep_visible])
            if persistent:
                add_update(persistent_state, sleep_hidden)
Ejemplo n.º 30
0
 def get_sampling_fcn(self, initial_vis, n_steps):
     """
     :param initial_vis: An (n_samples, n_input_dims) array representing the initial visible samples
     :param n_steps: Number of steps to bounce on each call.
     :return: A function that returns an (n_samples, n_input_dims) tensor of samples.
     """
     initial_vis = create_shared_variable(initial_vis)
     initial_top_vis = self.propup(initial_vis, to_layer=-1)
     top_sampling_fcn = self.rbms[-1].get_sampling_fcn(initial_vis= initial_top_vis, n_steps=n_steps)
     @symbolic_simple
     def sample():
         top_sample, _ = top_sampling_fcn()
         bottom_sample = self.propdown(top_sample, stochastic = True, from_layer = -2)
         return bottom_sample
     return sample
Ejemplo n.º 31
0
    def __init__(self, kp, kd, adaptation_rate = 0.0001, quantization = None):
        """

        :param kp_over_kd: The ratio of kp/kd.  0.01 might be a normal value.
        :param relative_scale: Try to maintain a scale of
        :param adaptation_rate:
        """

        self.k_alpha = kd/float(kp+kd)
        self.k_beta_init = 1/float(kp+kd)  # The scale
        self.k_beta=self.k_beta_init
        assert np.allclose(self.kp, kp)
        assert np.allclose(self.kd, kd)
        self.k_beta = create_shared_variable(self.k_beta_init)
        self.adaptation_rate = adaptation_rate
        self.quantization = quantization
Ejemplo n.º 32
0
    def __init__(self, kp, kd, adaptation_rate=0.0001, quantization=None):
        """

        :param kp_over_kd: The ratio of kp/kd.  0.01 might be a normal value.
        :param relative_scale: Try to maintain a scale of
        :param adaptation_rate:
        """

        self.k_alpha = kd / float(kp + kd)
        self.k_beta_init = 1 / float(kp + kd)  # The scale
        self.k_beta = self.k_beta_init
        assert np.allclose(self.kp, kp)
        assert np.allclose(self.kd, kd)
        self.k_beta = create_shared_variable(self.k_beta_init)
        self.adaptation_rate = adaptation_rate
        self.quantization = quantization
Ejemplo n.º 33
0
    def _update_param(self, param, gradient):
        # Initialize variables
        i = create_shared_variable(0.)
        m = theano.shared(param.get_value() * 0.)
        v = theano.shared(param.get_value() * 0.)

        # Recompute values
        i_t = i + 1.
        fix1 = 1. - (1. - self.beta_1)**i_t
        fix2 = 1. - (1. - self.beta_2)**i_t
        lr_t = self.alpha * (tt.sqrt(fix2) / fix1)
        m_t = (self.beta_1 * gradient) + ((1. - self.beta_1) * m)
        v_t = (self.beta_2 * tt.sqr(gradient)) + ((1. - self.beta_2) * v)
        g_t = m_t / (tt.sqrt(v_t) + self.eps)
        p_t = param - (lr_t * g_t)
        add_update(param, p_t)
        add_update(m, m_t)
        add_update(v, v_t)
        add_update(i, i_t)
Ejemplo n.º 34
0
    def _update_param(self, param, gradient):
        # Initialize variables
        i = create_shared_variable(0.)
        m = theano.shared(param.get_value() * 0.)
        v = theano.shared(param.get_value() * 0.)

        # Recompute values
        i_t = i + 1.
        fix1 = 1. - (1. - self.beta_1)**i_t
        fix2 = 1. - (1. - self.beta_2)**i_t
        lr_t = self.alpha * (tt.sqrt(fix2) / fix1)
        m_t = (self.beta_1 * gradient) + ((1. - self.beta_1) * m)
        v_t = (self.beta_2 * tt.sqr(gradient)) + ((1. - self.beta_2) * v)
        g_t = m_t / (tt.sqrt(v_t) + self.eps)
        p_t = param - (lr_t * g_t)
        add_update(param, p_t)
        add_update(m, m_t)
        add_update(v, v_t)
        add_update(i, i_t)
Ejemplo n.º 35
0
Archivo: mlp.py Proyecto: qyx268/plato
 def __init__(self, w, b=0, stride = (1, 1)):
     self.w = create_shared_variable(w, name = 'w')
     self.b = create_shared_variable(b, name = 'b')
     self._stride = stride
Ejemplo n.º 36
0
 def __init__(self, w, b=0, stride=(1, 1)):
     self.w = create_shared_variable(w, name='w')
     self.b = create_shared_variable(b, name='b')
     self._stride = stride
Ejemplo n.º 37
0
 def decode(self, y, shape=None):
     xp = shared_like(y, name='xp') if shape is None else create_shared_variable(np.zeros(shape), name='xp{}'.format(shape))
     div = (self.kp+self.kd)
     x = (y+self.kd*xp)/div
     add_update(xp, x)
     return x
Ejemplo n.º 38
0
 def __init__(self, shape, scale_shape = None):
     self.phi = create_shared_variable(np.zeros(shape))
     self.log_scales = create_shared_variable(0. if scale_shape is None else np.zeros(scale_shape))
Ejemplo n.º 39
0
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None):
    """
    Do an efficient update of the weights given the two spike-update.

    (This still runs FING SLOWLY!)

    :param xs: An (n_in) vector
    :param es: An (n_out) vector
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes: (n_in, n_out)
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_in, n_out = shape
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)

    tx_last = create_shared_variable(np.zeros(n_in) + 1)
    te_last = create_shared_variable(np.zeros(n_out) + 1)
    x_last = create_shared_variable(np.zeros(n_in))
    e_last = create_shared_variable(np.zeros(n_out))
    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    x_spike_ixs, = tt.nonzero(x_spikes)
    e_spike_ixs, = tt.nonzero(e_spikes)

    if dws is None:
        dws = tt.zeros(shape)

    t_last = tt.minimum(tx_last[x_spike_ixs, None],
                        te_last)  # (n_x_spikes, n_out)
    dws = tt.inc_subtensor(
        dws[x_spike_ixs, :], x_last[x_spike_ixs, None] * e_last *
        rx**(tx_last[x_spike_ixs, None] - t_last) *
        re**(te_last[None, :] - t_last) *
        geoseries_sum(re * rx, t_end=t_last, t_start=1))

    new_x_last = tt.set_subtensor(
        x_last[x_spike_ixs], x_last[x_spike_ixs] * rx**tx_last[x_spike_ixs] +
        xs[x_spike_ixs] / as_floatx(kd_x))
    new_tx_last = tt.switch(x_spikes, 0, tx_last)

    t_last = tt.minimum(new_tx_last[:, None],
                        te_last[e_spike_ixs])  # (n_in, n_e_spikes)
    dws = tt.inc_subtensor(
        dws[:, e_spike_ixs], new_x_last[:, None] * e_last[e_spike_ixs] *
        rx**(new_tx_last[:, None] - t_last) *
        re**(te_last[None, e_spike_ixs] - t_last) *
        geoseries_sum(re * rx, t_end=t_last, t_start=1))

    add_update(x_last, new_x_last)
    add_update(
        e_last,
        tt.set_subtensor(
            e_last[e_spike_ixs],
            e_last[e_spike_ixs] * re**te_last[e_spike_ixs] +
            es[e_spike_ixs] / as_floatx(kd_e)))
    add_update(tx_last, new_tx_last + 1)
    add_update(te_last, tt.switch(e_spikes, 1, te_last + 1))
    return dws
Ejemplo n.º 40
0
 def encode(self, x, shape=None):
     running_mag = create_shared_variable(1.)
     add_update(running_mag, (1-self.adaptation_rate)*running_mag + self.adaptation_rate*abs(x).mean())
     target_k_beta = self.k_beta_init*running_mag
     add_update(self.k_beta, self.k_beta + self.adaptation_rate*(target_k_beta - self.k_beta))
     return pd_encode(x, kp=self.kp, kd=self.kd, quantization=self.quantization, shape=shape)
Ejemplo n.º 41
0
 def get_initial_state(self, h_init=None, c_init=None):
     if h_init is None:
         h_init = create_shared_variable(0, shape=self.n_hidden, name='h')
     if c_init is None:
         c_init = create_shared_variable(0, shape=self.n_hidden, name='c')
     return h_init, c_init
Ejemplo n.º 42
0
 def __init__(self, shape, scale = 1):
     self.phi = create_shared_variable(np.zeros(shape))
     self.scale = scale
Ejemplo n.º 43
0
    def get_generation_function(self,
                                maintain_state=True,
                                stochastic=True,
                                rng=None):
        """
        Return a symbolic function that generates a sequence (and updates its internal state).
        :param stochastic: True to sample a onehot-vector from the output.  False to simply reinsert the
            distribution vector.
        :param rng: A seed, numpy or theano random number generator
        :return: A symbolic function of the form:
            (outputs, updates) = generate(primer, n_steps)
        """
        h_init, c_init = self.lstm.get_initial_state()
        x_init = create_shared_variable(0, shape=self.lstm.n_inputs)
        rng = get_theano_rng(rng)

        @symbolic_multi
        def generate(primer, n_steps):
            """
            Generate a sequence of outputs, and update the internal state.

            primer: A sequence to prime on.  This will overwrite the OUTPUT at
                each time step.  Note: this means the first iteration will run
                off the last output from the previous call to generate.
            n_steps: Number of steps (after the primer) to run.
            return: A sequence of length n_steps.
            """
            n_primer_steps = primer.shape[0]
            n_total_steps = n_primer_steps + n_steps

            def do_step(i, x_, h_, c_):
                """
                i: The step number (int)
                x_: An input vector
                h_: A hiddens state vector
                c_: A memory cell vector
                """
                y_prob, h, c = self.step(x_, h_, c_)
                y_candidate = ifelse(
                    int(stochastic),
                    rng.multinomial(n=1, pvals=y_prob[None, :])[0].astype(
                        theano.config.floatX), y_prob)
                # y_candidate = ifelse(int(stochastic), rng.multinomial(n=1, pvals=y_prob.dimshuffle('x', 1))[0].astype(theano.config.floatX), y_prob)
                y = ifelse(
                    i < n_primer_steps, primer[i], y_candidate
                )  # Note: If you get error here, you just need to prime with something on first call.
                return y, h, c

            (x_gen, h_gen, c_gen), updates = theano.scan(
                do_step,
                sequences=[tt.arange(n_total_steps)],
                outputs_info=[x_init, h_init, c_init],
            )

            if maintain_state:
                updates += [(x_init, x_gen[-1]), (h_init, h_gen[-1]),
                            (c_init, c_gen[-1])]

            for var, val in updates.items():
                add_update(var, val)

            return x_gen[n_primer_steps:],

        return generate
Ejemplo n.º 44
0
 def __init__(self, *args, **kwargs):
     ConvLayer.__init__(self, *args, **kwargs)
     self.bias_switch = create_shared_variable(1.)
Ejemplo n.º 45
0
 def __init__(self, w, b_vis, b_hid, rng):
     self.rng = get_theano_rng(rng)
     self.w = create_shared_variable(w)
     self.b_vis = create_shared_variable(b_vis)
     self.b_hid = create_shared_variable(b_hid)
Ejemplo n.º 46
0
 def __init__(self, shape):
     self.sum = create_shared_variable(np.zeros(shape))
Ejemplo n.º 47
0
 def from_initializer(cls,
                      n_input,
                      n_hidden,
                      initializer_fcn,
                      hidden_layer_type='tanh'):
     """
     :param n_input: Number of inputs
     :param n_hidden: Number of hiddens
     :param n_output: Number of outputs
     :param initializer_fcn: Function taking a shape and returning parameters.
     :return: An LSTMLayer
     """
     return LSTMLayer(
         w_xi=create_shared_variable(initializer_fcn,
                                     shape=(n_input, n_hidden)),
         w_xf=create_shared_variable(initializer_fcn,
                                     shape=(n_input, n_hidden)),
         w_xc=create_shared_variable(initializer_fcn,
                                     shape=(n_input, n_hidden)),
         w_xo=create_shared_variable(initializer_fcn,
                                     shape=(n_input, n_hidden)),
         w_hi=create_shared_variable(initializer_fcn,
                                     shape=(n_hidden, n_hidden)),
         w_hf=create_shared_variable(initializer_fcn,
                                     shape=(n_hidden, n_hidden)),
         w_hc=create_shared_variable(initializer_fcn,
                                     shape=(n_hidden, n_hidden)),
         w_ho=create_shared_variable(initializer_fcn,
                                     shape=(n_hidden, n_hidden)),
         w_co=create_shared_variable(initializer_fcn,
                                     shape=(n_hidden, n_hidden)),
         b_i=create_shared_variable(0, shape=n_hidden),
         b_f=create_shared_variable(0, shape=n_hidden),
         b_c=create_shared_variable(0, shape=n_hidden),
         b_o=create_shared_variable(0, shape=n_hidden),
         hidden_layer_type=hidden_layer_type)
Ejemplo n.º 48
0
def herd(x, shape = None):
    phi = shared_like(x, name='phi') if shape is None else create_shared_variable(np.zeros(shape), name='phi{}'.format(shape))
    phi_ = phi + x
    s = tt.round(phi_)
    add_update(phi, phi_ - s)
    return s