def forward_pass(self, data_sqeuence): """ Feed a sequence in the data to the RNN unit and calculates the next words probabilites. :param data_sqeuence: a sequence for which we want to predict the next word. :return: 1. matrix O - each row is a vector of vobabulary size which predicts the i+1 word of the sqeuence. number of rows as the length of the sequence. 2. matrix S - the hidden state valus. ( starting from S(-1) ) : each row is the hidden state at time i. """ # length of sequence: T = len(data_sqeuence) # During forward propagation we save all hidden states in s because wee need them for the gradient calculations. # We init S(-1) to 0 S = np.zeros(shape=(T + 1, self.hidden_layer_size)) S[-1] = np.zeros(shape=(1, self.hidden_layer_size)) O = np.zeros(shape=(T, self.vocabulary_size)) # calculate for each time step t the prediction of the t+1 word in the sequence: for t in range(T): word_index = self.get_word_index(data_sqeuence[t]) S[t] = np.tanh(S[t - 1].dot(self.W) + self.U[:, word_index]) O[t] = np.softmax(S[t].dot(self.V)) # TODO: implement softmax ? return [O, S]
def feed_forward(self, input): T = len(input) s = np.zeros((T + 1, self.hidden_units)) s[-1] = np.zeros(self.hidden_units) o = np.zeros((T, self.vocab_size)) for t in range(T): s[t] = np.tanh(self.U[:, input[t]] + self.W.dot(s[t - 1])) o[t] = np.softmax(self.V.dot(s[t])) return [o, s]
def rnn_cell_forward(xt, a_prev, parameters): Wax = parameters["Wax"] Waa = parameters["Waa"] Wya = parameters["Wya"] ba = parameters["ba"] by = parameters["by"] a_next = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, xt) + ba) yt_pred = np.softmax(np.dot(Wya, a_next) + by) cache = (a_next, a_prev, xt, parameters) return a_next, yt_pred, cache
def fCE (X, Y, w): W1, b1, W2, b2 = unpack(w) ''' z(1) = W(1)x + b(1) h(1) = relu(z(1))) z(2) = W(2)h(1) + b(2) ^y = g(x) = softmax(z(2)) ''' z1 = predictor(W1, X, b1) h1 = reLU(z1) z2 = predictor(W2, h1, b2) Yhat = np.softmax(z2) cost = -np.mean(np.log(Yhat[Y==1])) return cost
def forward_propagation(self, x): """ :param x: :return: S[t] = U[:,x[t]] * W.S[t-1] """ timesteps = len(x) S = np.zeros((timesteps + 1), self.hidden_dim) O = np.zeros(timesteps, self.word_dim) for t in range(timesteps): S[t] = self.U[:, x[t]] + np.dot(self.W, S[t - 1]) O[t] = np.softmax(np.dot(self.V, S[t - 1]))
def issue_queries(self, query_samples): for i in range(len(query_samples)): data = self.qsl.get_features(query_samples[i].index) print("Processing sample id {:d} with shape = {:}".format( query_samples[i].index, data.shape)) # Follow the PyTorch implementation. # The ONNX file has five outputs, but we only care about the one named "output". before_softmax = self.sess.run(["output"], {"input": data[np.newaxis, ...]})[0] softmax = np.softmax(before_softmax, axis=0).astype(np.float16) response_array = array.array("B", softmax.tobytes()) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1]) lg.QuerySamplesComplete([response])
def max_probability(outputs): return np.max(np.softmax(outputs, axis=-1), axis=-1)