# vocab size V = len(word2idx) print("Vocab size:", V) # we will also treat beginning of sentence and end of sentence as bigrams # START -> first word # last word -> END start_idx = word2idx['START'] end_idx = word2idx['END'] # a matrix where: # row = last word # col = current word # value at [row, col] = p(current word | last word) bigram_probs = get_bigram_probs(sentences, V, start_idx, end_idx, smoothing=0.1) # train a shallow neural network model D = 100 W1 = np.random.randn(V, D) / np.sqrt(V) W2 = np.random.randn(D, V) / np.sqrt(D) losses = [] epochs = 1 lr = 1e-2 def softmax(a): a = a - a.max() exp_a = np.exp(a) return exp_a / exp_a.sum(axis=1, keepdims=True)
V = len(word2idx) print('Vocab size:', V) ## 第一種使用的權重 : randomly initialize weights W = np.random.randn(V, V) / np.sqrt(V) # b = 這次省略bias 設置 # we will also treat beginning of sentence and end of sentence as bigrams # START -> first word # last word -> END start_idx = word2idx['START'] end_idx = word2idx['END'] ## 第二種使用的權重 : 用markov 產生的 bigram language model bigram_probs = get_bigram_probs(sentences, V, start_idx, end_idx, smoothing=1) W_bigram = np.log(bigram_probs) def sofmax(a): expA = np.exp(a) return expA / expA.sum(axis=1, keepdims=True) def forword(X, W): return sofmax(X.dot(W)) def predict(P_Y_given_X): return np.argmax(P_Y_given_X, axis=1) def cross_entropy(T, pY): return -np.mean(T*np.log(pY))
# vocab size V = len(word2index) print('Vocab size:', V) # we will also treat beginning of sentences and end of sentence as bigram # Start -> first word # last word -> End start_index = word2index['START'] end_index = word2index['END'] # a matrix where: # row = last word # col = current word # value at [row, col] = p(current word | last word) bigram_probs = get_bigram_probs(sentences, V, start_index, end_index, smoothing =0.1) # train a shallow neural network model D = 100 W1 = np.random.randn(V, D) / np.sqrt(V) W2 = np.random.randn(D, V) / np.sqrt(D) losses = [] epochs = 1 lr = 1e-2 def softmax(a): a = a - a.max() exp_a = np.exp(a) return exp_a / exp_a.sum(axis=1, keepdims=True)