def initanh(bounds,nk,popsize,angle_in): min_b, max_b = cp.asarray(bounds).T diff = np.fabs(min_b - max_b) A=(cp.ones((popsize,1,4)))*cp.array(angle_in) B=min_b+cp.random.rand(popsize,1,4)*diff ends=cp.concatenate((A,B),axis=1) rank=cp.arange(nk) travel=ends[:,1]-ends[:,0] u=cp.random.rand(popsize,1,4)*(nk-1) sigma=(1+cp.random.rand(popsize,4)*(nk/6-1)).reshape(popsize,1,4) print("rank") print(rank.shape) print("u") print(u.shape) print("sigma") print(sigma.shape) print(nk) print((rank.reshape(1,nk,1)-u.reshape(popsize,1,4)).shape) print(travel.reshape(popsize,1,4).shape) A=cp.tanh(rank.reshape(1,nk,1)-u.reshape(popsize,1,4))/sigma print(A.shape) pop=(travel.reshape(popsize,1,4)*0.5*(1+cp.tanh((rank.reshape(1,nk,1)-u.reshape(popsize,1,4))/sigma).reshape(popsize,nk,4))) pop+=ends[:,0].reshape(popsize,1,4) #A=(6*cp.abs(travel)*cp.random.rand(popsize,4)-3*cp.abs(travel)).reshape(popsize,1,4) #pop=0.5*(1+cp.tanh((1,nk,1)-u.reshape(popsize,1,4))/(2*sigma))**2 pop=pop.clip(min_b,max_b) return pop
def backward(self, dh_next, dc_next): Wx, Wh, b = self.params x, h_prev, c_prev, i, f, g, o, c_next = self.cache dt = dh_next * o dch = dt * (1 - cp.tanh(c_next)**2) dc = dch + dc_next dc_prev = dc * f df = dc * c_prev dg = dc * i di = dc * g do = dh_next * cp.tanh(c_next) di *= i * (1 - i) df *= f * (1 - f) do *= o * (1 - o) dg *= (1 - g**2) dA = cp.hstack((df, dg, di, do)) dx = cp.dot(dA, Wx.T) dWx = cp.dot(x.T, dA) dh_prev = cp.dot(dA, Wh.T) dWh = cp.dot(h_prev.T, dA) db = dA.sum(axis=0) self.grads[0][...] = dWx self.grads[1][...] = dWh self.grads[2][...] = db return dx, dh_prev, dc_prev
def lra_e(self, x, target, beta, gamma, print_flag=False): h1 = cp.dot(x, self.W_f1) z1 = cp.tanh(h1) h2 = cp.dot(z1, self.W_f2) z2 = cp.tanh(h2) h3 = cp.dot(z2, self.W_f3) output = softmax(h3) e3 = -target / output if print_flag: print(e3) y2 = cp.tanh(h2 - beta * cp.dot(e3, self.B3)) e2 = -2 * (y2 - z2) y1 = cp.tanh(h1 - beta * cp.dot(e2, self.B2)) e1 = -2 * (y1 - z1) """ delta_Wf3 = cp.dot(z2.T, e3*h3*(1-h3)) delta_Wf2 = cp.dot(z1.T, e2*tanh_grad(h2)) delta_Wf1 = cp.dot(x.T, e1*tanh_grad(h1)) """ delta_Wf3 = cp.dot(z2.T, e3) delta_Wf2 = cp.dot(z1.T, e2) delta_Wf1 = cp.dot(x.T, e1) delta_B3 = -gamma * delta_Wf3.T delta_B2 = -gamma * delta_Wf2.T # print(delta_Wf3.shape) # print(delta_Wf3) alpha = 0.05 self.W_f1 -= alpha * delta_Wf1 self.W_f2 -= alpha * delta_Wf2 self.W_f3 -= alpha * delta_Wf3 self.B3 -= alpha * delta_B3 self.B2 -= alpha * delta_B2
def predict(self, x): h1 = cp.dot(x, self.W_f1) h1 = cp.tanh(h1) h2 = cp.dot(h1, self.W_f2) h2 = cp.tanh(h2) h3 = cp.dot(h2, self.W_f3) output = softmax(h3) return output
def sigmoid_kernel(X, Y, gamma=None, coef0=1): if gamma is None: gamma = 1.0 / X.shape[1] K = cp.dot(X, Y.T) K *= gamma K += coef0 cp.tanh(K, K) return K
def calc_gru_z(W_z_x, U_z_h): # z = cp.tanh((W_z_x + U_z_h) * half) * half + half z = W_z_x z += U_z_h z *= 0.5 cp.tanh(z, z) z *= 0.5 z += 0.5 return z
def calc_gru_r(W_r_x, U_r_h): # r = cp.tanh((W_r_x + U_r_h) * half) * half + half r = W_r_x r += U_r_h r *= 0.5 cp.tanh(r, r) r *= 0.5 r += 0.5 return r
def gradient(self, x, target, epoch): reg = 0.01 h1 = cp.dot(x, self.W_f1) + self.b1 h1_ = cp.tanh(h1) h2 = cp.dot(h1_, self.W_f2) + self.b2 h2_ = cp.tanh(h2) h3 = cp.dot(h2_, self.W_f3) + self.b3 h3_ = cp.tanh(h3) h4 = cp.dot(h3_, self.W_f4) + self.b4 h4_ = cp.tanh(h4) h5 = cp.dot(h4_, self.W_f5) + self.b5 output = softmax(h5) delta5 = (output - target) / batch_size self.delta_Wf5 = cp.dot(h4_.T, delta5) + reg * self.W_f5 self.delta_b5 = cp.dot(cp.ones(batch_size), delta5) + reg * self.b5 delta4 = tanh_grad(h4) * cp.dot(delta5, self.W_f5.T) self.delta_Wf4 = cp.dot(h3_.T, delta4) + reg * self.W_f4 self.delta_b4 = cp.dot(cp.ones(batch_size), delta4) + reg * self.b4 delta3 = tanh_grad(h3) * cp.dot(delta4, self.W_f4.T) self.delta_Wf3 = cp.dot(h2_.T, delta3) + reg * self.W_f3 self.delta_b3 = cp.dot(cp.ones(batch_size), delta3) + reg * self.b3 delta2 = tanh_grad(h2) * cp.dot(delta3, self.W_f3.T) self.delta_Wf2 = cp.dot(h1_.T, delta2) + reg * self.W_f2 self.delta_b2 = cp.dot(cp.ones(batch_size), delta2) + reg * self.b2 delta1 = tanh_grad(h1) * cp.dot(delta2, self.W_f2.T) self.delta_Wf1 = cp.dot(x.T, delta1) + reg * self.W_f1 self.delta_b1 = cp.dot(cp.ones(batch_size), delta1) + reg * self.b1 # print(delta_Wf1) # eta = self.learning_rate(epoch) eta = 0.02 # eta, self.h_W1 = self.rms_prop(self.delta_Wf1, self.h_W1) self.W_f1 -= eta * self.delta_Wf1 # eta, self.h_W2 = self.rms_prop(self.delta_Wf2, self.h_W2) self.W_f2 -= eta * self.delta_Wf2 # eta, self.h_W3 = self.rms_prop(self.delta_Wf3, self.h_W3) self.W_f3 -= eta * self.delta_Wf3 # eta, self.h_W4 = self.rms_prop(self.delta_Wf4, self.h_W4) self.W_f4 -= eta * self.delta_Wf4 # eta, self.h_W5 = self.rms_prop(self.delta_Wf5, self.h_W5) self.W_f5 -= eta * self.delta_Wf5 # eta, self.h_b1 = self.rms_prop(self.delta_b1, self.h_b1) self.b1 -= eta * self.delta_b1 # eta, self.h_b2 = self.rms_prop(self.delta_b2, self.h_b2) self.b2 -= eta * self.delta_b2 # eta, self.h_b3 = self.rms_prop(self.delta_b3, self.h_b3) self.b3 -= eta * self.delta_b3 # eta, self.h_b4 = self.rms_prop(self.delta_b4, self.h_b4) self.b4 -= eta * self.delta_b4 # eta, self.h_b5 = self.rms_prop(self.delta_b5, self.h_b5) self.b5 -= eta * self.delta_b5
def forward(self, x, y_prev, c_prev): u = np.matmul(x, self.w) + np.matmul(y_prev, self.v) + self.b.reshape(4, 1, -1) a0 = sigmoid(u[0]) a1 = sigmoid(u[1]) a2 = np.tanh(u[2]) a3 = sigmoid(u[3]) self.gates = np.stack((a0, a1, a2, a3)) self.c = a0 * c_prev + a1 * a2 self.y = a3 * np.tanh(self.c)
def get_phase(num_of_vort, pos, x_pts, y_pts, grid_x, grid_y, grid_len_x, grid_len_y, component): """ Gets phase distribution of N dipoles.""" # Phase initialisation theta_tot = cp.empty((x_pts, y_pts)) # Scale pts: x_tilde = 2 * cp.pi * ((grid_x - grid_x.min()) / grid_len_x) y_tilde = 2 * cp.pi * ((grid_y - grid_y.min()) / grid_len_y) if component == '2': switch = True else: switch = False for i in range(num_of_vort // 2): theta_k = cp.zeros((x_pts, y_pts)) if i % 24 == 0 and i > 0: switch ^= True if switch: x_p, y_p = next(pos) x_m, y_m = next(pos) else: x_m, y_m = next(pos) x_p, y_p = next(pos) # Scaling vortex positions: x_m_tilde = 2 * cp.pi * ((x_m - grid_x.min()) / grid_len_x) y_m_tilde = 2 * cp.pi * ((y_m - grid_y.min()) / grid_len_y) x_p_tilde = 2 * cp.pi * ((x_p - grid_x.min()) / grid_len_x) y_p_tilde = 2 * cp.pi * ((y_p - grid_y.min()) / grid_len_y) # Aux variables Y_minus = y_tilde - y_m_tilde X_minus = x_tilde - x_m_tilde Y_plus = y_tilde - y_p_tilde X_plus = x_tilde - x_p_tilde heav_xp = cp.asarray(np.heaviside(cp.asnumpy(X_plus), 1.)) heav_xm = cp.asarray(np.heaviside(cp.asnumpy(X_minus), 1.)) for nn in cp.arange(-5, 6): theta_k += cp.arctan(cp.tanh((Y_minus + 2 * cp.pi * nn) / 2) * cp.tan((X_minus - cp.pi) / 2)) \ - cp.arctan(cp.tanh((Y_plus + 2 * cp.pi * nn) / 2) * cp.tan((X_plus - cp.pi) / 2)) \ + cp.pi * (heav_xp - heav_xm) theta_k -= y_tilde * (x_p_tilde - x_m_tilde) / (2 * cp.pi) theta_tot += theta_k return theta_tot
def time_fusion_lstm_grad_grad(c_prev, a, i, f, o, c, gc, gh, ggc_prev, gga, ggi, ggf, ggo, gc_prev, ga, gi, gf, go, gc_next, ggc, ggh): def _cupy_sigmoid(x): half = x.dtype.type(0.5) return cupy.tanh(x * half) * half + half def _grad_grad_sigmoid(x): return x * (1 - x) * (1 - 2 * x) def _grad_sigmoid(x): return x * (1 - x) def _grad_tanh(x): return 1 - x * x def _grad_grad_tanh(x, gx): return -2 * x * gx sig_o = _cupy_sigmoid(o) gsig_o = _grad_sigmoid(sig_o) ggsig_o = _grad_grad_sigmoid(sig_o) sig_i = _cupy_sigmoid(i) gsig_i = _grad_sigmoid(sig_i) ggsig_i = _grad_grad_sigmoid(sig_i) sig_f = _cupy_sigmoid(f) gsig_f = _grad_sigmoid(sig_f) ggsig_f = _grad_grad_sigmoid(sig_f) tanh_a = cupy.tanh(a) gtanh_a = _grad_tanh(tanh_a) ggtanh_a = _grad_grad_tanh(tanh_a, gtanh_a) tanh_c = cupy.tanh(c) gtanh_c = _grad_tanh(tanh_c) ggtanh_c = _grad_grad_tanh(tanh_c, gtanh_c) gc_bar = gh * sig_o * gtanh_c + gc gc_prev[:] = ggf * gc_bar * gsig_f ga[:] = (gga * sig_i * ggtanh_a + ggi * gtanh_a * gsig_i) * gc_bar gi[:] = (gga * gtanh_a * gsig_i + ggi * tanh_a * ggsig_i) * gc_bar gf[:] = (ggc_prev * (gh * sig_o * gtanh_c + gc) * gsig_f + ggf * gc_bar * c_prev * ggsig_f) ggc[:] = (ggc_prev * sig_f + gga * sig_i * gtanh_a + ggi * tanh_a * gsig_i + ggf * c_prev * gsig_f) dgc_do = gh * gsig_o * gtanh_c go[:] = ggc * dgc_do + ggo * gh * tanh_c * ggsig_o dgc_dc = gh * sig_o * ggtanh_c gc_next[:] = ggc * dgc_dc + ggo * gh * gtanh_c * gsig_o ggh[:] = ggc * sig_o * gtanh_c + ggo * tanh_c * gsig_o return gc_prev, ga, gi, gf, go, gc_next, ggc, ggh
def predict(self, x): h1 = cp.dot(x, self.W_f1) + self.b1 h1 = cp.tanh(h1) h2 = cp.dot(h1, self.W_f2) + self.b2 h2 = cp.tanh(h2) h3 = cp.dot(h2, self.W_f3) + self.b3 h3 = cp.tanh(h3) h4 = cp.dot(h3, self.W_f4) + self.b4 h4 = cp.tanh(h4) h5 = cp.dot(h4, self.W_f5) + self.b5 output = softmax(h5) return output
def get_phase(num_of_vort, pos, grid_x, grid_y): """ num_of_vort: number of vortices to imprint pos: iterable of positions to imprint the vortices grid_x: X-meshgrid grid_y: Y-meshgrid """ # Constructing necessary grid parameters: x_pts, y_pts = len(grid_x[:, 0]), len(grid_y[0, :]) dx, dy = grid_x[0, 1] - grid_x[0, 0], grid_y[1, 0] - grid_y[0, 0] grid_len_x, grid_len_y = x_pts * dx, y_pts * dy # Phase initialisation theta_tot = cp.empty((x_pts, y_pts)) # Scale pts: x_tilde = 2 * cp.pi * ((grid_x - grid_x.min()) / grid_len_x) y_tilde = 2 * cp.pi * ((grid_y - grid_y.min()) / grid_len_y) for _ in range(num_of_vort // 2): theta_k = cp.zeros((x_pts, y_pts)) x_m, y_m = next(pos) x_p, y_p = next(pos) # Scaling vortex positions: x_m_tilde = 2 * cp.pi * ((x_m - grid_x.min()) / grid_len_x) y_m_tilde = 2 * cp.pi * ((y_m - grid_y.min()) / grid_len_y) x_p_tilde = 2 * cp.pi * ((x_p - grid_x.min()) / grid_len_x) y_p_tilde = 2 * cp.pi * ((y_p - grid_y.min()) / grid_len_y) # Aux variables Y_minus = y_tilde - y_m_tilde X_minus = x_tilde - x_m_tilde Y_plus = y_tilde - y_p_tilde X_plus = x_tilde - x_p_tilde heav_xp = cp.asarray(np.heaviside(cp.asnumpy(X_plus), 1.)) heav_xm = cp.asarray(np.heaviside(cp.asnumpy(X_minus), 1.)) for nn in cp.arange(-5, 6): theta_k += cp.arctan(cp.tanh((Y_minus + 2 * cp.pi * nn) / 2) * cp.tan((X_minus - cp.pi) / 2)) \ - cp.arctan(cp.tanh((Y_plus + 2 * cp.pi * nn) / 2) * cp.tan((X_plus - cp.pi) / 2)) \ + cp.pi * (heav_xp - heav_xm) theta_k -= y_tilde * (x_p_tilde - x_m_tilde) / (2 * cp.pi) theta_tot += theta_k return theta_tot
def go_cupy(): import cupy as cp a = cp.arange(100).reshape(10, 10) trace = 0.0 for i in range(a.shape[0]): trace += cp.tanh(a[i, i]) return a + trace
def test_elementwise_trinary(self): desc_a = cutensor.create_tensor_descriptor(self.a, ct.OP_SQRT) desc_b = cutensor.create_tensor_descriptor(self.b, ct.OP_TANH) desc_c = cutensor.create_tensor_descriptor(self.c, ct.OP_COS) d = cutensor.elementwise_trinary(self.alpha, self.a, desc_a, self.mode_a, self.beta, self.b, desc_b, self.mode_b, self.gamma, self.c, desc_c, self.mode_c, op_AB=ct.OP_ADD, op_ABC=ct.OP_MUL) testing.assert_allclose((self.alpha * cupy.sqrt(self.a_transposed) + self.beta * cupy.tanh(self.b_transposed)) * self.gamma * cupy.cos(self.c), d, rtol=1e-6, atol=1e-6)
def tanh_ac(z): if (cupy_ready and (z.size > 400000) and (z.size <= gpu_array_max_size)): z_gpu = cp.asarray(z) r_gpu = cp.tanh(z_gpu) return cp.asnumpy(r_gpu) else: return np.tanh(z)
def tanh(x: Array, /) -> Array: """ Array API compatible wrapper for :py:func:`np.tanh <numpy.tanh>`. See its docstring for more information. """ if x.dtype not in _floating_dtypes: raise TypeError("Only floating-point dtypes are allowed in tanh") return Array._new(np.tanh(x._array))
def free_esn(self, wash, simulation_time): x = self.x r = self.r states_T = cp.zeros((self.N, simulation_time)) for i in range(simulation_time): x = (1.0 - wash) * x + wash * cp.tanh(cp.matmul(self.M, x)) states_T[:, i] = x[:, 0] self.x = x return states_T
def gradient(self, x, target): h1 = cp.dot(x, self.W_f1) + self.b1 h1_ = cp.tanh(h1) h2 = cp.dot(h1_, self.W_f2) + self.b2 h2_ = cp.tanh(h2) h3 = cp.dot(h2_, self.W_f3) + self.b3 h3_ = cp.tanh(h3) h4 = cp.dot(h3_, self.W_f4) + self.b4 # h4_ = cp.tanh(h4) # h5 = cp.dot(h4_, self.W_f5) + self.b5 output = softmax(h4) delta4 = (output - target) / batch_size # delta_Wf5 = cp.dot(h4_.T, delta5) # delta_b5 = cp.dot(cp.ones(batch_size), delta5) # delta4 = tanh_grad(h4) * cp.dot(delta5, self.B5) delta_Wf4 = cp.dot(h3_.T, delta4) delta_b4 = cp.dot(cp.ones(batch_size), delta4) delta3 = tanh_grad(h3) * cp.dot(delta4, self.W_f4.T) delta_Wf3 = cp.dot(h2_.T, delta3) delta_b3 = cp.dot(cp.ones(batch_size), delta3) delta2 = tanh_grad(h2) * cp.dot(delta3, self.W_f3.T) delta_Wf2 = cp.dot(h1_.T, delta2) delta_b2 = cp.dot(cp.ones(batch_size), delta2) delta1 = tanh_grad(h1) * cp.dot(delta2, self.W_f2.T) delta_Wf1 = cp.dot(x.T, delta1) delta_b1 = cp.dot(cp.ones(batch_size), delta1) # print(delta_Wf1) alpha1 = 0.02 self.W_f1 -= alpha1 * delta_Wf1 self.W_f2 -= alpha1 * delta_Wf2 self.W_f3 -= alpha1 * delta_Wf3 self.W_f4 -= alpha1 * delta_Wf4 # self.W_f5 -= alpha1 * delta_Wf5 self.b1 -= alpha1 * delta_b1 self.b2 -= alpha1 * delta_b2 self.b3 -= alpha1 * delta_b3 self.b4 -= alpha1 * delta_b4
def update_reservoir(self, u, n, Y): # u is input at specific time # u has shape (N_u (3 for L63)) # See page 16 eqtn 18 of Lukosevicius PracticalESN for feedback info. x_n_tilde = cp.tanh( cp.matmul(self.W, self.x[n]) + cp.array(sp.matmul(self.W_in, sp.hstack((sp.array([1]), u)))) + cp.array(sp.matmul(self.W_fb, Y))) self.x[n+1] = cp.multiply((1-cp.array(self.alpha_matrix)), cp.array(self.x[n])) \ + cp.multiply(cp.array(self.alpha_matrix), x_n_tilde)
def free_run(self, dt, simulation_time): x = self.x r = self.r tspan = cp.array(cp.arange(0, simulation_time, dt)) states_T = cp.zeros((self.N, len(tspan))) for i, t in enumerate(tspan): x = (1.0 - dt) * x + cp.dot(self.M, r * dt) r = cp.tanh(x) #(N,1) states_T[:, i] = x[:, 0] return states_T
def gradient(self, x, target, epoch): h1 = cp.dot(x, self.W_f1) + self.b1 h1_ = cp.tanh(h1) h2 = cp.dot(h1_, self.W_f2) + self.b2 h2_ = cp.tanh(h2) h3 = cp.dot(h2_, self.W_f3) + self.b3 h3_ = cp.tanh(h3) h4 = cp.dot(h3_, self.W_f4) + self.b4 h4_ = cp.tanh(h4) h5 = cp.dot(h4_, self.W_f5) + self.b5 output = softmax(h5) delta5 = (output - target) / batch_size self.delta_Wf5 = cp.dot(h4_.T, delta5) self.delta_b5 = cp.dot(cp.ones(batch_size), delta5) delta4 = tanh_grad(h4) * cp.dot(delta5, self.W_f5.T) self.delta_Wf4 = cp.dot(h3_.T, delta4) self.delta_b4 = cp.dot(cp.ones(batch_size), delta4) delta3 = tanh_grad(h3) * cp.dot(delta4, self.W_f4.T) self.delta_Wf3 = cp.dot(h2_.T, delta3) self.delta_b3 = cp.dot(cp.ones(batch_size), delta3) delta2 = tanh_grad(h2) * cp.dot(delta3, self.W_f3.T) self.delta_Wf2 = cp.dot(h1_.T, delta2) self.delta_b2 = cp.dot(cp.ones(batch_size), delta2) delta1 = tanh_grad(h1) * cp.dot(delta2, self.W_f2.T) self.delta_Wf1 = cp.dot(x.T, delta1) self.delta_b1 = cp.dot(cp.ones(batch_size), delta1) eta = 0.02 self.W_f1 -= eta * self.delta_Wf1 self.W_f2 -= eta * self.delta_Wf2 self.W_f3 -= eta * self.delta_Wf3 self.W_f4 -= eta * self.delta_Wf4 self.W_f5 -= eta * self.delta_Wf5 self.b1 -= eta * self.delta_b1 self.b2 -= eta * self.delta_b2 self.b3 -= eta * self.delta_b3 self.b4 -= eta * self.delta_b4 self.b5 -= eta * self.delta_b5
def tanh(inp) -> 'Tensor': _check_tensors(inp) engine = _get_engine(inp) output_array = engine.tanh(inp.data) return _create_tensor( inp, data=output_array, func=wrapped_partial(tanh_backward, inp=inp, out=output_array) )
def gradient(self, x, target, alpha): h1 = cp.dot(x, self.W_f1) h1_ = cp.tanh(h1) h2 = cp.dot(h1_, self.W_f2) h2_ = cp.tanh(h2) h3 = cp.dot(h2_, self.W_f3) output = softmax(h3) delta3 = (output - target) / batch_size delta_Wf3 = cp.dot(h2_.T, delta3) delta2 = tanh_grad(h2) * cp.dot(delta3, self.W_f3.T) delta_Wf2 = cp.dot(h1_.T, delta2) delta1 = tanh_grad(h1) * cp.dot(delta2, self.W_f2.T) delta_Wf1 = cp.dot(x.T, delta1) self.W_f1 -= alpha * delta_Wf1 self.W_f2 -= alpha * delta_Wf2 self.W_f3 -= alpha * delta_Wf3
def tanh(X): """Compute the hyperbolic tan function inplace. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. Returns ------- X_new : {array-like, sparse matrix}, shape (n_samples, n_features) The transformed data. """ return np.tanh(X, out=X)
def tanh_derivative(self, x): #convert to 2d array of shape (features, 1) x=cp.expand_dims(x, axis=1) #compute matrix multiplication for data with hidden layer matrix into hidden_output wtx=cp.matmul(self.h_matrix, x) #compute link function on hidden_outputs wtx=self.tanh_link(wtx) return cp.ones(wtx.shape)-cp.square(cp.tanh(wtx))
def sample(memory, seed_ix, n): """ sample a sequence of integers from the model h is memory state, seed_ix is seed letter for first time step """ h, c = memory x = cp.zeros((vocab_size, 1)) #seed_ix = cp.asnumpy(seed_ix) x[seed_ix] = 1 ixes = [] for t in range(n): wes = Wex @ x z = cp.vstack((h, wes)) f = sigmoid(Wf @ z + bf) ins = sigmoid(Wi @ z + bi) c_ = cp.tanh(Wc @ z + bc) cs = cp.multiply(f, c) + cp.multiply(ins, c_) o = sigmoid(Wo @ z + bo) h = cp.multiply(o, cp.tanh(cs)) y = Why @ h + by p = softmax(y) # forward pass again, but we do not have to store the activations now #loss, activations, memory = forward(inputs, targets, (hprev, nprev)) p = cp.exp(y) / cp.sum(cp.exp(y)) #p = cp.asnumpy(p) ix = cp.random.choice(a=range(vocab_size), size=1, p=p.ravel()).item() index = ix x = cp.zeros((vocab_size, 1)) x[index] = 1 ixes.append(index) return ixes
def forward(self, x, h_prev, c_prev): Wx, Wh, b = self.params N, H = h_prev.shape A = cp.dot(x, Wx) + cp.dot(h_prev, Wh) + b # slice f = A[:, :H] g = A[:, H:2 * H] i = A[:, 2 * H:3 * H] o = A[:, 3 * H:] f = sigmoid(f) g = cp.tanh(g) i = sigmoid(i) o = sigmoid(o) c_next = f * c_prev + g * i h_next = o * cp.tanh(c_next) self.cache = (x, h_prev, c_prev, i, f, g, o, c_next) return h_next, c_next
def get_phase(num_of_vort, pos, x_pts, y_pts, grid_x, grid_y, grid_len_x, grid_len_y): # Phase initialisation theta_tot = cp.empty((x_pts, y_pts)) # Scale pts: x_tilde = 2 * cp.pi * ((grid_x - grid_x.min()) / grid_len_x) y_tilde = 2 * cp.pi * ((grid_y - grid_y.min()) / grid_len_y) for _ in range(num_of_vort // 2): theta_k = cp.zeros((x_pts, y_pts)) x_m, y_m = next(pos) x_p, y_p = next(pos) # Scaling vortex positions: x_m_tilde = 2 * cp.pi * ((x_m - grid_x.min()) / grid_len_x) y_m_tilde = 2 * cp.pi * ((y_m - grid_y.min()) / grid_len_y) x_p_tilde = 2 * cp.pi * ((x_p - grid_x.min()) / grid_len_x) y_p_tilde = 2 * cp.pi * ((y_p - grid_y.min()) / grid_len_y) # Aux variables Y_minus = y_tilde - y_m_tilde X_minus = x_tilde - x_m_tilde Y_plus = y_tilde - y_p_tilde X_plus = x_tilde - x_p_tilde heav_xp = cp.asarray(np.heaviside(cp.asnumpy(X_plus), 1.)) heav_xm = cp.asarray(np.heaviside(cp.asnumpy(X_minus), 1.)) for nn in cp.arange(-5, 6): theta_k += cp.arctan(cp.tanh((Y_minus + 2 * cp.pi * nn) / 2) * cp.tan((X_minus - cp.pi) / 2)) \ - cp.arctan(cp.tanh((Y_plus + 2 * cp.pi * nn) / 2) * cp.tan((X_plus - cp.pi) / 2)) \ + cp.pi * (heav_xp - heav_xm) theta_k -= y_tilde * (x_p_tilde - x_m_tilde) / (2 * cp.pi) theta_tot += theta_k return theta_tot
def forward(): for i in range(l): a = cache['a' + str(i)] w = parameters['w' + str(i + 1)] b = parameters['b' + str(i + 1)] z = cp.dot(w, a) + b if i != l - 1: a = cp.tanh(z) else: a = softmax(z) cache['a' + str(i + 1)] = a cache['z' + str(i + 1)] = z return a