def sample_text(seed_text="The", unroll_steps_after_seed=30, temperature=0.7): seed_text_onehot = text_loader.to_one_hot( text_loader.text_to_indices(seed_text)) h = ad.Variable(np.zeros((1, hidden_size)), name="h") for seed_step in range(len(seed_text)): # iterate through the seed text x = ad.Variable(seed_text_onehot[:, seed_step, :], name="x") h = ad.Tanh(h @ w + x @ u + b_h) logits = h @ v + b_o def sample_char( logits): # sample a character from the multinomial distribution next_char_onehot = np.random.multinomial( n=1, pvals=ad.Softmax(logits / temperature)()[0]) next_char = text_loader.ind_to_char[np.argmax(next_char_onehot)] next_char_onehot = np.expand_dims(next_char_onehot, axis=0) return next_char_onehot, next_char for _ in range(unroll_steps_after_seed ): # autoregressively generate new characters next_char_onehot, next_char = sample_char(logits) seed_text += next_char x = ad.Variable(next_char_onehot, name="x") h = ad.Tanh(h @ w + x @ u + b_h) logits = h @ v + b_o return seed_text
def output_layer(self, S_Old, X): S = S_Old val_z = ad.MatMul(X, self._Uz) + ad.MatMul(S, self._Wz) + self._bz Z = ad.Tanh(val_z) #print("Z",Z()) val_g = ad.MatMul(X, self._Ug) + ad.MatMul(S, self._Wg) + self._bg G = ad.Tanh(val_g) #print("G",G()) val_r = ad.MatMul(X, self._Ur) + ad.MatMul(S, self._Wr) + self._br R = ad.Tanh(val_r) #print("R",R()) val_h = ad.MatMul(X, self._Uh) + ad.MatMul(S * R, self._Wh) + self._bh H = ad.Tanh(val_h) #print("H",H()) S_New = ((ad.Variable(np.ones_like(G.eval())) - G) * H) + (Z * S) #print("Snew",S_New()) #val = (-G * ((ad.Variable(np.ones_like(G.eval()))- G ) * H) * (self._Ug+self._Wg*self._Wg*temp)) + (((ad.Variable(np.ones_like(G.eval()))- G ) * H*(ad.Variable(np.ones_like(H.eval()))- H ))*(self._Uz+self._Wh*self._Wg*R*temp)+ (self._Wg*S*(ad.Variable(np.ones_like(R.eval()))- R ) * R*(self._Ug+self._Wg*self._Wg*temp))) +(Z*self._Wg*temp) + (S*(self._Ug+self._Wg*self._Wg*temp)) #print(val()) return S_New
def output(self, X): S = ad.Tanh(ad.MatMul(X, self._W) + self._B) #print("S:",S()) S1 = self.layer1.output_layer(S, X) S_list = [] S_list.append(S1) for i in range(self.number_of_layers): S_list.append(self.layers[i].output_layer(S_list[i], X)) S_final = S_list[-1] #print(S_final.shape) #print(self.Wf.shape) #print(self.Bf.shape) val = ad.MatMul(S_final, self._Wf) + self._Bf #print("The output:",val()) return val
def test_tanh(self): my_graph = ad.Tanh(self.my_w0) tf_graph = tf.tanh(self.tf_w0) wrt_vars = [self.my_w0] tf_vars = [self.tf_w0] utils.custom_test(self, my_graph, wrt_vars, tf_graph, tf_vars)
): # autoregressively generate new characters next_char_onehot, next_char = sample_char(logits) seed_text += next_char x = ad.Variable(next_char_onehot, name="x") h = ad.Tanh(h @ w + x @ u + b_h) logits = h @ v + b_o return seed_text for step in range(10000): x_batch_onehot = text_loader.to_one_hot( text_loader.next_batch(batch_size, seq_len=unroll_steps)) h = ad.Variable(np.zeros((1, hidden_size)), name="h") costs = [] for unroll_step in range(unroll_steps - 1): x = ad.Variable(x_batch_onehot[:, unroll_step, :], name="x") h = ad.Tanh(h @ w + x @ u + b_h) logits = h @ v + b_o y = ad.Variable(x_batch_onehot[:, unroll_step + 1, :]) cost = ad.Einsum("i->", ad.SoftmaxCEWithLogits(labels=y, logits=logits)) costs.append(cost) total_cost = ad.Add(*costs) / unroll_steps param_grads = ad.grad(total_cost, params) new_params = optimizer([i() for i in params], [i() for i in param_grads]) optimizer.apply_new_weights(params, new_params) if step % 20 == 0: text = "step: {}, cost: {:.2f} \n------------------------------ \n {} \n------------------------------" print(text.format(step, float(total_cost()), sample_text()))