for i, sent in enumerate(tokenized_sentences): tokenized_sentences[i] = [ w if w in word_to_index else unknown_token for w in sent ] print("\nExample sentence: '%s'" % sentences[0]) print("\nExample sentence after Pre-processing: '%s'" % tokenized_sentences[0]) # Create the training data X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences]) y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences]) np.random.seed(10) model = RNN(vocabulary_size) o, s = model.forward_propagation(X_train[10]) print(o.shape) print(o) predictions = model.predict(X_train[10]) print(predictions.shape) print(predictions) print("-------------------------------------------") ''' Cross Entropy loss is L(y, o) = -\cfrac{1}{N}\sum_{n\in N} y_n \log o_n ''' E_loss = np.log(vocabulary_size) print('Expected loss for random predictions is {}'.format(E_loss))