Beispiel #1
0
def future_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    rx = kd_x / as_floatx(kp_x + kd_x)
    re = kd_e / as_floatx(kp_e + kd_e)
    scale = (1. / as_floatx(kp_x * kp_e + kp_x * kd_e + kd_x * kp_e))
    n_samples, n_in, n_out = shapes
    x_past_var = create_shared_variable(np.zeros((n_samples, n_in)))
    e_past_var = create_shared_variable(np.zeros((n_samples, n_out)))
    x_past = x_past_var * rx
    e_past = e_past_var * re
    w_grad = scale * (xs.T.dot(e_past + es) + x_past.T.dot(es))
    add_update(x_past_var, x_past + xs)
    add_update(e_past_var, e_past + es)
    return w_grad
Beispiel #2
0
def future_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    rx = kd_x/as_floatx(kp_x+kd_x)
    re = kd_e/as_floatx(kp_e+kd_e)
    scale = (1./as_floatx(kp_x*kp_e + kp_x*kd_e + kd_x*kp_e))
    n_samples, n_in, n_out = shapes
    x_past_var = create_shared_variable(np.zeros((n_samples, n_in)))
    e_past_var = create_shared_variable(np.zeros((n_samples, n_out)))
    x_past = x_past_var*rx
    e_past = e_past_var*re
    w_grad = scale * (xs.T.dot(e_past+es) + x_past.T.dot(es))
    add_update(x_past_var, x_past + xs)
    add_update(e_past_var, e_past + es)
    return w_grad
Beispiel #3
0
 def __init__(self, kp, kd, shapes):
     """
     :param kp:
     :param kd:
     :param shapes: A tuple that specifies (minibatch_size, n_in, n_out)
     """
     self.kp = kp
     self.kd = kd
     self.r = kd/as_floatx(kp+kd)
     self.scale = (1./as_floatx(kp**2 + 2*kp*kd))
     self.x_past = create_shared_variable(np.zeros((shapes[0], shapes[1])))
     self.e_past = create_shared_variable(np.zeros((shapes[0], shapes[2])))
Beispiel #4
0
def matrix_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes, epsilon=1e-7):
    """
    :param xs:
    :param es:
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes:
    :param epsilon:
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    v1 = create_shared_variable(np.zeros((n_samples, n_in, n_out)))
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    xr_decayed = xr*rx
    er_decayed = er*re
    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    v2 = xr_decayed[:, :, None]*er_decayed[:, None, :]
    dws = (spikes*(v2-v1))/(rx*re-1)
    new_xr = xr_decayed + xs/(kp_x+kd_x)
    new_er = er_decayed + es/(kp_e+kd_e)

    add_update(v1, tt.switch(spikes, new_xr[:, :, None]*new_er[:, None, :], v1))
    add_update(xr, new_xr)
    add_update(er, new_er)

    return dws.sum(axis=0)
Beispiel #5
0
def past_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: Make this actually use sparsity, one of these days.
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in)) + 1)
    te_last = create_shared_variable(np.zeros((n_samples, n_out)) + 1)
    x_last = create_shared_variable(np.zeros((n_samples, n_in)))
    e_last = create_shared_variable(np.zeros((n_samples, n_out)))

    t_last = tt.minimum(tx_last[:, :, None], te_last[:, None, :])
    x_spikes = tt.neq(xs, 0)
    dw_potentials = x_last[:, :, None] * e_last[:, None, :] * \
            rx**(tx_last[:, :, None]-t_last) \
            * re**(te_last[:, None, :]-t_last) \
            * geoseries_sum(rx*re, t_end=t_last, t_start=1)
    e_spikes = tt.neq(es, 0)
    dws = (x_spikes[:, :, None] + e_spikes[:, None, :] - x_spikes[:, :, None] *
           e_spikes[:, None, :]) * dw_potentials  # (n_samples, n_in, n_out)

    add_update(
        x_last,
        tt.switch(x_spikes, x_last * rx**tx_last + xs / as_floatx(kd_x),
                  x_last))
    add_update(
        e_last,
        tt.switch(e_spikes, e_last * rx**te_last + es / as_floatx(kd_e),
                  e_last))
    add_update(tx_last, tt.switch(x_spikes, 1, tx_last + 1))
    add_update(te_last, tt.switch(e_spikes, 1, te_last + 1))
    return dws.sum(axis=0)
Beispiel #6
0
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None):
    """
    Do an efficient update of the weights given the two spike-update.

    (This still runs FING SLOWLY!)

    :param xs: An (n_in) vector
    :param es: An (n_out) vector
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes: (n_in, n_out)
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_in, n_out = shape
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros(n_in)+1)
    te_last = create_shared_variable(np.zeros(n_out)+1)
    x_last = create_shared_variable(np.zeros(n_in))
    e_last = create_shared_variable(np.zeros(n_out))
    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    x_spike_ixs, = tt.nonzero(x_spikes)
    e_spike_ixs, = tt.nonzero(e_spikes)

    if dws is None:
        dws = tt.zeros(shape)

    t_last = tt.minimum(tx_last[x_spike_ixs, None], te_last)  # (n_x_spikes, n_out)
    dws = tt.inc_subtensor(dws[x_spike_ixs, :], x_last[x_spike_ixs, None]*e_last
        * rx**(tx_last[x_spike_ixs, None]-t_last)
        * re**(te_last[None, :]-t_last)
        * geoseries_sum(re*rx, t_end=t_last, t_start=1)
        )

    new_x_last = tt.set_subtensor(x_last[x_spike_ixs], x_last[x_spike_ixs]*rx**tx_last[x_spike_ixs]+ xs[x_spike_ixs]/as_floatx(kd_x))
    new_tx_last = tt.switch(x_spikes, 0, tx_last)

    t_last = tt.minimum(new_tx_last[:, None], te_last[e_spike_ixs])  # (n_in, n_e_spikes)
    dws = tt.inc_subtensor(dws[:, e_spike_ixs], new_x_last[:, None]*e_last[e_spike_ixs]
        * rx**(new_tx_last[:, None]-t_last)
        * re**(te_last[None, e_spike_ixs]-t_last)
        * geoseries_sum(re*rx, t_end=t_last, t_start=1)
        )

    add_update(x_last, new_x_last)
    add_update(e_last, tt.set_subtensor(e_last[e_spike_ixs], e_last[e_spike_ixs]*re**te_last[e_spike_ixs]+ es[e_spike_ixs]/as_floatx(kd_e)))
    add_update(tx_last, new_tx_last+1)
    add_update(te_last, tt.switch(e_spikes, 1, te_last+1))
    return dws
Beispiel #7
0
def past_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: Make this actually use sparsity, one of these days.
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in))+1)
    te_last = create_shared_variable(np.zeros((n_samples, n_out))+1)
    x_last = create_shared_variable(np.zeros((n_samples, n_in)))
    e_last = create_shared_variable(np.zeros((n_samples, n_out)))

    t_last = tt.minimum(tx_last[:, :, None], te_last[:, None, :])
    x_spikes = tt.neq(xs, 0)
    dw_potentials = x_last[:, :, None] * e_last[:, None, :] * \
            rx**(tx_last[:, :, None]-t_last) \
            * re**(te_last[:, None, :]-t_last) \
            * geoseries_sum(rx*re, t_end=t_last, t_start=1)
    e_spikes = tt.neq(es, 0)
    dws = (x_spikes[:, :, None]+e_spikes[:, None, :]-x_spikes[:, :, None]*e_spikes[:, None, :])*dw_potentials  # (n_samples, n_in, n_out)

    add_update(x_last, tt.switch(x_spikes, x_last*rx**tx_last + xs/as_floatx(kd_x), x_last))
    add_update(e_last, tt.switch(e_spikes, e_last*rx**te_last + es/as_floatx(kd_e), e_last))
    add_update(tx_last, tt.switch(x_spikes, 1, tx_last+1))
    add_update(te_last, tt.switch(e_spikes, 1, te_last+1))
    return dws.sum(axis=0)
Beispiel #8
0
def past_weight_grad_calculator_reloaded(xs, es, kp_x, kd_x, kp_e, kd_e,
                                         shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: RESOLVE INSTABILITY ISSUE
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in)))
    te_last = create_shared_variable(np.zeros((n_samples, n_out)))
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    t_last = tt.maximum(tx_last[:, :, None], te_last[:, None, :])
    sum_to_last = geoseries_sum(
        rx * re, t_start=t_last, t_end=0
    )  # Wasteful, since most of this is multiplied by zeros later, but for now it don't matter

    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    dw_es = (
        xr[:, :, None] * er[:, None, :] * spikes
    ) * sum_to_last  # PROBLEM HERE!!!! Can be very small number times very large numen
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    add_update(xr, xr * rx + xs / (kp_x + kd_x))
    add_update(er, er * re + es / (kp_e + kd_e))
    add_update(tx_last, tt.switch(x_spikes, 0, tx_last - 1))
    add_update(te_last, tt.switch(e_spikes, 0, te_last - 1))

    return dw_es.sum(axis=0)
Beispiel #9
0
def matrix_weight_grad_calculator(xs,
                                  es,
                                  kp_x,
                                  kd_x,
                                  kp_e,
                                  kd_e,
                                  shapes,
                                  epsilon=1e-7):
    """
    :param xs:
    :param es:
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes:
    :param epsilon:
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    v1 = create_shared_variable(np.zeros((n_samples, n_in, n_out)))
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    xr_decayed = xr * rx
    er_decayed = er * re
    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    v2 = xr_decayed[:, :, None] * er_decayed[:, None, :]
    dws = (spikes * (v2 - v1)) / (rx * re - 1)
    new_xr = xr_decayed + xs / (kp_x + kd_x)
    new_er = er_decayed + es / (kp_e + kd_e)

    add_update(v1,
               tt.switch(spikes, new_xr[:, :, None] * new_er[:, None, :], v1))
    add_update(xr, new_xr)
    add_update(er, new_er)

    return dws.sum(axis=0)
Beispiel #10
0
def past_weight_grad_calculator2(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    This attempt never really got off the ground.  It doesn't work
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))




    # xr_new = xr*rx + xs/(kp_x+kd_x)
    # er_new = er*re + es/(kp_e+kd_e)

    arr = rx*re/(1-rx*re)

    xr_new = xr*arr + xs/(kp_x+kd_x)
    er_new = er*arr + es/(kp_e+kd_e)

    xsum = create_shared_variable(np.zeros((n_samples, n_in)))
    esum = create_shared_variable(np.zeros((n_samples, n_out)))

    xsum_new = xsum+xr_new
    esum_new = esum+er_new

    x_nospikes = tt.eq(xs, 0)
    e_nospikes = tt.eq(es, 0)

    dw = xs.T.dot(esum_new) + xsum_new.T.dot(es)

    add_update(xr, xr_new)
    add_update(er, er_new)
    add_update(xsum, xsum_new*x_nospikes)
    add_update(esum, esum_new*e_nospikes)

    return xs.T.dot(er) + xr.T.dot(es)
    # return xr.T.dot(er)
    # return dw
Beispiel #11
0
def past_weight_grad_calculator_reloaded(xs, es, kp_x, kd_x, kp_e, kd_e, shapes):
    """
    Do an efficient update of the weights given the two spike-trains.

    This isn't actually implemented as an efficient update, but it will produce the identical result as if it were.

    :param xs: An (n_samples, n_in) array
    :param es: An (n_samples, n_out) array
    :param kp_x: kp for the x units
    :param kd_x: kd for the x units
    :param kp_e: kp for the e units
    :param kd_e: kd for the e units
    :param shapes: (minibatch_size, n_in, n_out)
    :return: An (n_in, n_out) approximate weight gradient.
    """
    # TODO: RESOLVE INSTABILITY ISSUE
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_samples, n_in, n_out = shapes
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros((n_samples, n_in)))
    te_last = create_shared_variable(np.zeros((n_samples, n_out)))
    xr = create_shared_variable(np.zeros((n_samples, n_in)))
    er = create_shared_variable(np.zeros((n_samples, n_out)))

    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    t_last = tt.maximum(tx_last[:, :, None], te_last[:, None, :])
    sum_to_last = geoseries_sum(rx*re, t_start=t_last, t_end=0)  # Wasteful, since most of this is multiplied by zeros later, but for now it don't matter

    spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :])
    dw_es = (xr[:, :, None]*er[:, None, :]*spikes)*sum_to_last  # PROBLEM HERE!!!! Can be very small number times very large numen
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last
    add_update(xr, xr*rx + xs/(kp_x+kd_x))
    add_update(er, er*re + es/(kp_e+kd_e))
    add_update(tx_last, tt.switch(x_spikes, 0, tx_last-1))
    add_update(te_last, tt.switch(e_spikes, 0, te_last-1))

    return dw_es.sum(axis=0)
Beispiel #12
0
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None):
    """
    Do an efficient update of the weights given the two spike-update.

    (This still runs FING SLOWLY!)

    :param xs: An (n_in) vector
    :param es: An (n_out) vector
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes: (n_in, n_out)
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_in, n_out = shape
    rx = kd_x / (kp_x + kd_x)
    re = kd_e / (kp_e + kd_e)

    tx_last = create_shared_variable(np.zeros(n_in) + 1)
    te_last = create_shared_variable(np.zeros(n_out) + 1)
    x_last = create_shared_variable(np.zeros(n_in))
    e_last = create_shared_variable(np.zeros(n_out))
    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    x_spike_ixs, = tt.nonzero(x_spikes)
    e_spike_ixs, = tt.nonzero(e_spikes)

    if dws is None:
        dws = tt.zeros(shape)

    t_last = tt.minimum(tx_last[x_spike_ixs, None],
                        te_last)  # (n_x_spikes, n_out)
    dws = tt.inc_subtensor(
        dws[x_spike_ixs, :], x_last[x_spike_ixs, None] * e_last *
        rx**(tx_last[x_spike_ixs, None] - t_last) *
        re**(te_last[None, :] - t_last) *
        geoseries_sum(re * rx, t_end=t_last, t_start=1))

    new_x_last = tt.set_subtensor(
        x_last[x_spike_ixs], x_last[x_spike_ixs] * rx**tx_last[x_spike_ixs] +
        xs[x_spike_ixs] / as_floatx(kd_x))
    new_tx_last = tt.switch(x_spikes, 0, tx_last)

    t_last = tt.minimum(new_tx_last[:, None],
                        te_last[e_spike_ixs])  # (n_in, n_e_spikes)
    dws = tt.inc_subtensor(
        dws[:, e_spike_ixs], new_x_last[:, None] * e_last[e_spike_ixs] *
        rx**(new_tx_last[:, None] - t_last) *
        re**(te_last[None, e_spike_ixs] - t_last) *
        geoseries_sum(re * rx, t_end=t_last, t_start=1))

    add_update(x_last, new_x_last)
    add_update(
        e_last,
        tt.set_subtensor(
            e_last[e_spike_ixs],
            e_last[e_spike_ixs] * re**te_last[e_spike_ixs] +
            es[e_spike_ixs] / as_floatx(kd_e)))
    add_update(tx_last, new_tx_last + 1)
    add_update(te_last, tt.switch(e_spikes, 1, te_last + 1))
    return dws