def future_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes): """ Do an efficient update of the weights given the two spike-trains. This isn't actually implemented as an efficient update, but it will produce the identical result as if it were. :param xs: An (n_samples, n_in) array :param es: An (n_samples, n_out) array :param kp_x: kp for the x units :param kd_x: kd for the x units :param kp_e: kp for the e units :param kd_e: kd for the e units :param shapes: (minibatch_size, n_in, n_out) :return: An (n_in, n_out) approximate weight gradient. """ kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] rx = kd_x / as_floatx(kp_x + kd_x) re = kd_e / as_floatx(kp_e + kd_e) scale = (1. / as_floatx(kp_x * kp_e + kp_x * kd_e + kd_x * kp_e)) n_samples, n_in, n_out = shapes x_past_var = create_shared_variable(np.zeros((n_samples, n_in))) e_past_var = create_shared_variable(np.zeros((n_samples, n_out))) x_past = x_past_var * rx e_past = e_past_var * re w_grad = scale * (xs.T.dot(e_past + es) + x_past.T.dot(es)) add_update(x_past_var, x_past + xs) add_update(e_past_var, e_past + es) return w_grad
def future_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes): """ Do an efficient update of the weights given the two spike-trains. This isn't actually implemented as an efficient update, but it will produce the identical result as if it were. :param xs: An (n_samples, n_in) array :param es: An (n_samples, n_out) array :param kp_x: kp for the x units :param kd_x: kd for the x units :param kp_e: kp for the e units :param kd_e: kd for the e units :param shapes: (minibatch_size, n_in, n_out) :return: An (n_in, n_out) approximate weight gradient. """ kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] rx = kd_x/as_floatx(kp_x+kd_x) re = kd_e/as_floatx(kp_e+kd_e) scale = (1./as_floatx(kp_x*kp_e + kp_x*kd_e + kd_x*kp_e)) n_samples, n_in, n_out = shapes x_past_var = create_shared_variable(np.zeros((n_samples, n_in))) e_past_var = create_shared_variable(np.zeros((n_samples, n_out))) x_past = x_past_var*rx e_past = e_past_var*re w_grad = scale * (xs.T.dot(e_past+es) + x_past.T.dot(es)) add_update(x_past_var, x_past + xs) add_update(e_past_var, e_past + es) return w_grad
def __init__(self, kp, kd, shapes): """ :param kp: :param kd: :param shapes: A tuple that specifies (minibatch_size, n_in, n_out) """ self.kp = kp self.kd = kd self.r = kd/as_floatx(kp+kd) self.scale = (1./as_floatx(kp**2 + 2*kp*kd)) self.x_past = create_shared_variable(np.zeros((shapes[0], shapes[1]))) self.e_past = create_shared_variable(np.zeros((shapes[0], shapes[2])))
def matrix_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes, epsilon=1e-7): """ :param xs: :param es: :param kp_x: :param kd_x: :param kp_e: :param kd_e: :param shapes: :param epsilon: :return: """ kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] n_samples, n_in, n_out = shapes v1 = create_shared_variable(np.zeros((n_samples, n_in, n_out))) rx = kd_x/(kp_x+kd_x) re = kd_e/(kp_e+kd_e) xr = create_shared_variable(np.zeros((n_samples, n_in))) er = create_shared_variable(np.zeros((n_samples, n_out))) x_spikes = tt.neq(xs, 0) e_spikes = tt.neq(es, 0) xr_decayed = xr*rx er_decayed = er*re spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :]) v2 = xr_decayed[:, :, None]*er_decayed[:, None, :] dws = (spikes*(v2-v1))/(rx*re-1) new_xr = xr_decayed + xs/(kp_x+kd_x) new_er = er_decayed + es/(kp_e+kd_e) add_update(v1, tt.switch(spikes, new_xr[:, :, None]*new_er[:, None, :], v1)) add_update(xr, new_xr) add_update(er, new_er) return dws.sum(axis=0)
def past_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes): """ Do an efficient update of the weights given the two spike-trains. This isn't actually implemented as an efficient update, but it will produce the identical result as if it were. :param xs: An (n_samples, n_in) array :param es: An (n_samples, n_out) array :param kp_x: kp for the x units :param kd_x: kd for the x units :param kp_e: kp for the e units :param kd_e: kd for the e units :param shapes: (minibatch_size, n_in, n_out) :return: An (n_in, n_out) approximate weight gradient. """ # TODO: Make this actually use sparsity, one of these days. kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] n_samples, n_in, n_out = shapes rx = kd_x / (kp_x + kd_x) re = kd_e / (kp_e + kd_e) tx_last = create_shared_variable(np.zeros((n_samples, n_in)) + 1) te_last = create_shared_variable(np.zeros((n_samples, n_out)) + 1) x_last = create_shared_variable(np.zeros((n_samples, n_in))) e_last = create_shared_variable(np.zeros((n_samples, n_out))) t_last = tt.minimum(tx_last[:, :, None], te_last[:, None, :]) x_spikes = tt.neq(xs, 0) dw_potentials = x_last[:, :, None] * e_last[:, None, :] * \ rx**(tx_last[:, :, None]-t_last) \ * re**(te_last[:, None, :]-t_last) \ * geoseries_sum(rx*re, t_end=t_last, t_start=1) e_spikes = tt.neq(es, 0) dws = (x_spikes[:, :, None] + e_spikes[:, None, :] - x_spikes[:, :, None] * e_spikes[:, None, :]) * dw_potentials # (n_samples, n_in, n_out) add_update( x_last, tt.switch(x_spikes, x_last * rx**tx_last + xs / as_floatx(kd_x), x_last)) add_update( e_last, tt.switch(e_spikes, e_last * rx**te_last + es / as_floatx(kd_e), e_last)) add_update(tx_last, tt.switch(x_spikes, 1, tx_last + 1)) add_update(te_last, tt.switch(e_spikes, 1, te_last + 1)) return dws.sum(axis=0)
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None): """ Do an efficient update of the weights given the two spike-update. (This still runs FING SLOWLY!) :param xs: An (n_in) vector :param es: An (n_out) vector :param kp_x: :param kd_x: :param kp_e: :param kd_e: :param shapes: (n_in, n_out) :return: """ kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] n_in, n_out = shape rx = kd_x/(kp_x+kd_x) re = kd_e/(kp_e+kd_e) tx_last = create_shared_variable(np.zeros(n_in)+1) te_last = create_shared_variable(np.zeros(n_out)+1) x_last = create_shared_variable(np.zeros(n_in)) e_last = create_shared_variable(np.zeros(n_out)) x_spikes = tt.neq(xs, 0) e_spikes = tt.neq(es, 0) x_spike_ixs, = tt.nonzero(x_spikes) e_spike_ixs, = tt.nonzero(e_spikes) if dws is None: dws = tt.zeros(shape) t_last = tt.minimum(tx_last[x_spike_ixs, None], te_last) # (n_x_spikes, n_out) dws = tt.inc_subtensor(dws[x_spike_ixs, :], x_last[x_spike_ixs, None]*e_last * rx**(tx_last[x_spike_ixs, None]-t_last) * re**(te_last[None, :]-t_last) * geoseries_sum(re*rx, t_end=t_last, t_start=1) ) new_x_last = tt.set_subtensor(x_last[x_spike_ixs], x_last[x_spike_ixs]*rx**tx_last[x_spike_ixs]+ xs[x_spike_ixs]/as_floatx(kd_x)) new_tx_last = tt.switch(x_spikes, 0, tx_last) t_last = tt.minimum(new_tx_last[:, None], te_last[e_spike_ixs]) # (n_in, n_e_spikes) dws = tt.inc_subtensor(dws[:, e_spike_ixs], new_x_last[:, None]*e_last[e_spike_ixs] * rx**(new_tx_last[:, None]-t_last) * re**(te_last[None, e_spike_ixs]-t_last) * geoseries_sum(re*rx, t_end=t_last, t_start=1) ) add_update(x_last, new_x_last) add_update(e_last, tt.set_subtensor(e_last[e_spike_ixs], e_last[e_spike_ixs]*re**te_last[e_spike_ixs]+ es[e_spike_ixs]/as_floatx(kd_e))) add_update(tx_last, new_tx_last+1) add_update(te_last, tt.switch(e_spikes, 1, te_last+1)) return dws
def past_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes): """ Do an efficient update of the weights given the two spike-trains. This isn't actually implemented as an efficient update, but it will produce the identical result as if it were. :param xs: An (n_samples, n_in) array :param es: An (n_samples, n_out) array :param kp_x: kp for the x units :param kd_x: kd for the x units :param kp_e: kp for the e units :param kd_e: kd for the e units :param shapes: (minibatch_size, n_in, n_out) :return: An (n_in, n_out) approximate weight gradient. """ # TODO: Make this actually use sparsity, one of these days. kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] n_samples, n_in, n_out = shapes rx = kd_x/(kp_x+kd_x) re = kd_e/(kp_e+kd_e) tx_last = create_shared_variable(np.zeros((n_samples, n_in))+1) te_last = create_shared_variable(np.zeros((n_samples, n_out))+1) x_last = create_shared_variable(np.zeros((n_samples, n_in))) e_last = create_shared_variable(np.zeros((n_samples, n_out))) t_last = tt.minimum(tx_last[:, :, None], te_last[:, None, :]) x_spikes = tt.neq(xs, 0) dw_potentials = x_last[:, :, None] * e_last[:, None, :] * \ rx**(tx_last[:, :, None]-t_last) \ * re**(te_last[:, None, :]-t_last) \ * geoseries_sum(rx*re, t_end=t_last, t_start=1) e_spikes = tt.neq(es, 0) dws = (x_spikes[:, :, None]+e_spikes[:, None, :]-x_spikes[:, :, None]*e_spikes[:, None, :])*dw_potentials # (n_samples, n_in, n_out) add_update(x_last, tt.switch(x_spikes, x_last*rx**tx_last + xs/as_floatx(kd_x), x_last)) add_update(e_last, tt.switch(e_spikes, e_last*rx**te_last + es/as_floatx(kd_e), e_last)) add_update(tx_last, tt.switch(x_spikes, 1, tx_last+1)) add_update(te_last, tt.switch(e_spikes, 1, te_last+1)) return dws.sum(axis=0)
def past_weight_grad_calculator_reloaded(xs, es, kp_x, kd_x, kp_e, kd_e, shapes): """ Do an efficient update of the weights given the two spike-trains. This isn't actually implemented as an efficient update, but it will produce the identical result as if it were. :param xs: An (n_samples, n_in) array :param es: An (n_samples, n_out) array :param kp_x: kp for the x units :param kd_x: kd for the x units :param kp_e: kp for the e units :param kd_e: kd for the e units :param shapes: (minibatch_size, n_in, n_out) :return: An (n_in, n_out) approximate weight gradient. """ # TODO: RESOLVE INSTABILITY ISSUE kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] n_samples, n_in, n_out = shapes rx = kd_x / (kp_x + kd_x) re = kd_e / (kp_e + kd_e) tx_last = create_shared_variable(np.zeros((n_samples, n_in))) te_last = create_shared_variable(np.zeros((n_samples, n_out))) xr = create_shared_variable(np.zeros((n_samples, n_in))) er = create_shared_variable(np.zeros((n_samples, n_out))) x_spikes = tt.neq(xs, 0) e_spikes = tt.neq(es, 0) t_last = tt.maximum(tx_last[:, :, None], te_last[:, None, :]) sum_to_last = geoseries_sum( rx * re, t_start=t_last, t_end=0 ) # Wasteful, since most of this is multiplied by zeros later, but for now it don't matter spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :]) dw_es = ( xr[:, :, None] * er[:, None, :] * spikes ) * sum_to_last # PROBLEM HERE!!!! Can be very small number times very large numen # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last add_update(xr, xr * rx + xs / (kp_x + kd_x)) add_update(er, er * re + es / (kp_e + kd_e)) add_update(tx_last, tt.switch(x_spikes, 0, tx_last - 1)) add_update(te_last, tt.switch(e_spikes, 0, te_last - 1)) return dw_es.sum(axis=0)
def matrix_weight_grad_calculator(xs, es, kp_x, kd_x, kp_e, kd_e, shapes, epsilon=1e-7): """ :param xs: :param es: :param kp_x: :param kd_x: :param kp_e: :param kd_e: :param shapes: :param epsilon: :return: """ kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] n_samples, n_in, n_out = shapes v1 = create_shared_variable(np.zeros((n_samples, n_in, n_out))) rx = kd_x / (kp_x + kd_x) re = kd_e / (kp_e + kd_e) xr = create_shared_variable(np.zeros((n_samples, n_in))) er = create_shared_variable(np.zeros((n_samples, n_out))) x_spikes = tt.neq(xs, 0) e_spikes = tt.neq(es, 0) xr_decayed = xr * rx er_decayed = er * re spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :]) v2 = xr_decayed[:, :, None] * er_decayed[:, None, :] dws = (spikes * (v2 - v1)) / (rx * re - 1) new_xr = xr_decayed + xs / (kp_x + kd_x) new_er = er_decayed + es / (kp_e + kd_e) add_update(v1, tt.switch(spikes, new_xr[:, :, None] * new_er[:, None, :], v1)) add_update(xr, new_xr) add_update(er, new_er) return dws.sum(axis=0)
def past_weight_grad_calculator2(xs, es, kp_x, kd_x, kp_e, kd_e, shapes): """ This attempt never really got off the ground. It doesn't work """ kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] n_samples, n_in, n_out = shapes rx = kd_x/(kp_x+kd_x) re = kd_e/(kp_e+kd_e) xr = create_shared_variable(np.zeros((n_samples, n_in))) er = create_shared_variable(np.zeros((n_samples, n_out))) # xr_new = xr*rx + xs/(kp_x+kd_x) # er_new = er*re + es/(kp_e+kd_e) arr = rx*re/(1-rx*re) xr_new = xr*arr + xs/(kp_x+kd_x) er_new = er*arr + es/(kp_e+kd_e) xsum = create_shared_variable(np.zeros((n_samples, n_in))) esum = create_shared_variable(np.zeros((n_samples, n_out))) xsum_new = xsum+xr_new esum_new = esum+er_new x_nospikes = tt.eq(xs, 0) e_nospikes = tt.eq(es, 0) dw = xs.T.dot(esum_new) + xsum_new.T.dot(es) add_update(xr, xr_new) add_update(er, er_new) add_update(xsum, xsum_new*x_nospikes) add_update(esum, esum_new*e_nospikes) return xs.T.dot(er) + xr.T.dot(es) # return xr.T.dot(er) # return dw
def past_weight_grad_calculator_reloaded(xs, es, kp_x, kd_x, kp_e, kd_e, shapes): """ Do an efficient update of the weights given the two spike-trains. This isn't actually implemented as an efficient update, but it will produce the identical result as if it were. :param xs: An (n_samples, n_in) array :param es: An (n_samples, n_out) array :param kp_x: kp for the x units :param kd_x: kd for the x units :param kp_e: kp for the e units :param kd_e: kd for the e units :param shapes: (minibatch_size, n_in, n_out) :return: An (n_in, n_out) approximate weight gradient. """ # TODO: RESOLVE INSTABILITY ISSUE kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] n_samples, n_in, n_out = shapes rx = kd_x/(kp_x+kd_x) re = kd_e/(kp_e+kd_e) tx_last = create_shared_variable(np.zeros((n_samples, n_in))) te_last = create_shared_variable(np.zeros((n_samples, n_out))) xr = create_shared_variable(np.zeros((n_samples, n_in))) er = create_shared_variable(np.zeros((n_samples, n_out))) x_spikes = tt.neq(xs, 0) e_spikes = tt.neq(es, 0) t_last = tt.maximum(tx_last[:, :, None], te_last[:, None, :]) sum_to_last = geoseries_sum(rx*re, t_start=t_last, t_end=0) # Wasteful, since most of this is multiplied by zeros later, but for now it don't matter spikes = tt.bitwise_or(x_spikes[:, :, None], e_spikes[:, None, :]) dw_es = (xr[:, :, None]*er[:, None, :]*spikes)*sum_to_last # PROBLEM HERE!!!! Can be very small number times very large numen # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last # dw_es = (xr[:, :, None]*(x_spikes[:, :, None]-x_spikes[:, :, None]*e_spikes[:, None, :]) * er[:, None, :] + xr[:, :, None] * (er*e_spikes)[:, None, :]) * sum_to_last add_update(xr, xr*rx + xs/(kp_x+kd_x)) add_update(er, er*re + es/(kp_e+kd_e)) add_update(tx_last, tt.switch(x_spikes, 0, tx_last-1)) add_update(te_last, tt.switch(e_spikes, 0, te_last-1)) return dw_es.sum(axis=0)
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None): """ Do an efficient update of the weights given the two spike-update. (This still runs FING SLOWLY!) :param xs: An (n_in) vector :param es: An (n_out) vector :param kp_x: :param kd_x: :param kp_e: :param kd_e: :param shapes: (n_in, n_out) :return: """ kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] n_in, n_out = shape rx = kd_x / (kp_x + kd_x) re = kd_e / (kp_e + kd_e) tx_last = create_shared_variable(np.zeros(n_in) + 1) te_last = create_shared_variable(np.zeros(n_out) + 1) x_last = create_shared_variable(np.zeros(n_in)) e_last = create_shared_variable(np.zeros(n_out)) x_spikes = tt.neq(xs, 0) e_spikes = tt.neq(es, 0) x_spike_ixs, = tt.nonzero(x_spikes) e_spike_ixs, = tt.nonzero(e_spikes) if dws is None: dws = tt.zeros(shape) t_last = tt.minimum(tx_last[x_spike_ixs, None], te_last) # (n_x_spikes, n_out) dws = tt.inc_subtensor( dws[x_spike_ixs, :], x_last[x_spike_ixs, None] * e_last * rx**(tx_last[x_spike_ixs, None] - t_last) * re**(te_last[None, :] - t_last) * geoseries_sum(re * rx, t_end=t_last, t_start=1)) new_x_last = tt.set_subtensor( x_last[x_spike_ixs], x_last[x_spike_ixs] * rx**tx_last[x_spike_ixs] + xs[x_spike_ixs] / as_floatx(kd_x)) new_tx_last = tt.switch(x_spikes, 0, tx_last) t_last = tt.minimum(new_tx_last[:, None], te_last[e_spike_ixs]) # (n_in, n_e_spikes) dws = tt.inc_subtensor( dws[:, e_spike_ixs], new_x_last[:, None] * e_last[e_spike_ixs] * rx**(new_tx_last[:, None] - t_last) * re**(te_last[None, e_spike_ixs] - t_last) * geoseries_sum(re * rx, t_end=t_last, t_start=1)) add_update(x_last, new_x_last) add_update( e_last, tt.set_subtensor( e_last[e_spike_ixs], e_last[e_spike_ixs] * re**te_last[e_spike_ixs] + es[e_spike_ixs] / as_floatx(kd_e))) add_update(tx_last, new_tx_last + 1) add_update(te_last, tt.switch(e_spikes, 1, te_last + 1)) return dws