def compute_gradient(fflayer): """ args: yhat-Size of yhat should be batch,seq,size target-Size of target should be batch,seq """ #Works for regular LSTM #Seq is a count of times the diff was done. #print("fflayer:",fflayer) yhat=fflayer.yhat target=fflayer.target checkdatadim(yhat,3) checkdatadim(target,2) batch,seq,size=yhat.shape target_one_hot=np.zeros((batch,seq,size)) for batnum in range(batch): for i in range(seq): target_one_hot[batnum][i]=input_one_hot(target[batnum][i],size) dy = yhat.copy() dy = dy - target_one_hot if (fflayer.layer.backpassdebug): print("*****************************************************************") print("BackPass:Debug:Name:",fflayer.layer.name) print("*****************************************************************") print("yhat:",yhat) print("label:",target) print("labeltransformed:",target_one_hot) print("gradient:",dy) print("*****************************************************************") #a convention to save to this field the grad that has to be passes back to the next layer in reverse fflayer.grad=dy # return whatever has to be returned to be applied return None
pred = out_l(h[0][-1].reshape(1, vocab_size)) return pred def LOSS(X, target): pred = RNN(X, out_weights, out_biases) return cross_entropy_loss(pred.reshape([1, 1, vocab_size]), np.array([[target]])) while step < training_iters: if offset > (len(train_data) - end_offset): offset = rnd.randint(0, n_input + 1) print("offset:", offset) symbols_in_keys = [ input_one_hot(dictionary[str(train_data[i])], vocab_size) for i in range(offset, offset + n_input) ] symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, vocab_size]) print("symbols_in_keys:", symbols_in_keys) target = dictionary[str(train_data[offset + n_input])] """with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: cell = LSTMCell(n_hidden,debug=True) result, state = dynamic_rnn(cell, symbols_in_keys) (c, h) = state.c,state.h print("final:", repr(result),state,h.shape) #last layer of Feed Forward to compare to transform result to the shape of target out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases))
cell= MultiRNNCell([cell1, cell2]) result, state = dynamic_rnn(cell, symbols_in_keys) "Dense in this case should be out of WeightsInitializer scope because we are passing constants" out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases)) return out_l(state[-1].h) def LOSS(X,target): pred=RNN(X,out_weights,out_biases) return cross_entropy_loss(pred.reshape([1,1,vocab_size]),np.array([[target]])) while step < training_iters: if offset > (len(train_data) - end_offset): offset = rnd.randint(0, n_input + 1) print("offset:", offset) symbols_in_keys = [input_one_hot(dictionary[str(train_data[i])],vocab_size) for i in range(offset, offset + n_input)] symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, vocab_size]) print("symbols_in_keys:",symbols_in_keys) target=dictionary[str(train_data[offset + n_input])] """with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: cell = LSTMCell(n_hidden,debug=True) result, state = dynamic_rnn(cell, symbols_in_keys) (c, h) = state.c,state.h print("final:", repr(result),state,h.shape) #last layer of Feed Forward to compare to transform result to the shape of target out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases)) pred=out_l(h) print("pred:",pred)"""
with tf.Session() as session: session.run(init) step = 0 #offset = rnd.randint(0,n_input+1) offset = 2 end_offset = n_input + 1 acc_total = 0 loss_total = 0 print("offset:", offset) while step < training_iters: if offset > (len(train_data) - end_offset): offset = rnd.randint(0, n_input + 1) print("offset:", offset) symbols_in_keys = [ input_one_hot(dictionary[str(train_data[i])], vocab_size) for i in range(offset, offset + n_input) ] symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, vocab_size]) symbols_out_onehot = input_one_hot( dictionary[str(train_data[offset + n_input])], vocab_size) symbols_out_onehot = np.reshape(symbols_out_onehot, [1, -1]) tfbi_output,tfbi_state,tfcc_output,tfpreds,tfgrads_and_vars_tf_style, _,acc, loss=session.run( [bi_output,bi_state,cc_output,preds,grads_and_vars_tf_style,train_tf_style, accuracy, cost], \ feed_dict={x: symbols_in_keys, y: symbols_out_onehot}) print("tfbi_output:", tfbi_output) print("tfbi_state:", tfbi_state) print("cc_output:", tfcc_output) print("tfpreds:", tfpreds)
sm = softmax(x) print("softmax:", sm) jacobian = _softmax_grad(sm[0]) print("jacobian:", jacobian) jacobian = _softmax_grad(sm[1]) print(jacobian) """ Example-2 Softmax and loss """ x = np.array([[1, 3, 5, 7], [1, -9, 4, 8]]) y = np.array([3, 1]) sm = softmax(x) #prints out 0.145 print(loss(sm[0], input_one_hot(y[0], 4))) #prints out 17.01 print(loss(sm[1], input_one_hot(y[1], 4))) """ Example-3 Softmax and crossentropyloss """ x = np.array([[[1, 3, 5, 7], [1, -9, 4, 8]]]) y = np.array([[3, 1]]) #prints array([[ 0.14507794, 17.01904505]])) softmaxed, loss = cross_entropy_loss(x, y) print("loss:", loss) """ Example-4 Combined Gradient of Loss with respect to x """ batch, seq, size = x.shape