def model(X, Y, word_to_vec_map, learning_rate=0.01, num_iterations=400): """ 在numpy中训练词向量模型。 参数: X -- 输入的字符串类型的数据,维度为(m, 1)。 Y -- 对应的标签,0-7的数组,维度为(m, 1)。 word_to_vec_map -- 字典类型的单词到50维词向量的映射。 learning_rate -- 学习率. num_iterations -- 迭代次数。 返回: pred -- 预测的向量,维度为(m, 1)。 W -- 权重参数,维度为(n_y, n_h)。 b -- 偏置参数,维度为(n_y,) """ np.random.seed(1) # 定义训练数量 m = Y.shape[0] n_y = 5 n_h = 50 # 使用Xavier初始化参数 W = np.random.randn(n_y, n_h) / np.sqrt(n_h) b = np.zeros((n_y, )) # 将Y转换成独热编码 Y_oh = emo_utils.convert_to_one_hot(Y, C=n_y) # 优化循环 for t in range(num_iterations): for i in range(m): # 获取第i个训练样本的均值 avg = sentence_to_avg(X[i], word_to_vec_map) # 前向传播 z = np.dot(W, avg) + b a = emo_utils.softmax(z) # 计算第i个训练的损失 cost = -np.sum(Y_oh[i] * np.log(a)) # 计算梯度 dz = a - Y_oh[i] dW = np.dot(dz.reshape(n_y, 1), avg.reshape(1, n_h)) db = dz # 更新参数 W = W - learning_rate * dW b = b - learning_rate * db if t % 100 == 0: print("第{t}轮,损失为{cost}".format(t=t, cost=cost)) pred = emo_utils.predict(X, Y, W, b, word_to_vec_map) return pred, W, b
def main(): X_train, Y_train = read_csv('../data/train_emoji.csv') X_test, Y_test = read_csv('../data/tesss.csv') maxLen = len(max(X_train, key=len).split()) for index in range(10): print(X_train[index], label_to_emoji(Y_train[index])) Y_oh_train = convert_to_one_hot(Y_train, C=5) Y_oh_test = convert_to_one_hot(Y_test, C=5) word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('../data/glove.6B.50d.txt') word = "cucumber" index = 289846 print("") print("the index of", word, "in the vocabulary is", word_to_index[word]) print("the", str(index) + "th word in the vocabulary is", index_to_word[index]) pred, W, b = model(X_train, Y_train, word_to_vec_map) print("Training set:") pred_train = predict(X_train, Y_train, W, b, word_to_vec_map) print('Test set:') pred_test = predict(X_test, Y_test, W, b, word_to_vec_map) X_my_sentences = np.array( ["i adore you", "i love you", "funny lol", "lets play with a ball", "food is ready", "not feeling happy"]) Y_my_labels = np.array([[0], [0], [2], [1], [4], [3]]) print('--------- PRINT PREDICTIONS ----------') pred = predict(X_my_sentences, Y_my_labels, W, b, word_to_vec_map) print_predictions(X_my_sentences, pred) print(Y_test.shape) print(' ' + label_to_emoji(0) + ' ' + label_to_emoji(1) + ' ' + label_to_emoji( 2) + ' ' + label_to_emoji(3) + ' ' + label_to_emoji(4)) print(pd.crosstab(Y_test, pred_test.reshape(56, ), rownames=['Actual'], colnames=['Predicted'], margins=True)) plot_confusion_matrix(Y_test, pred_test) plt.show()
def model(X, Y, word_to_vec_map, learning_rate=0.01, num_iterations=400): """ 在numpy中训练词向量模型。 参数: X -- 输入的字符串类型的数据,维度为(m, 1)。 Y -- 对应的标签,0-7的数组,维度为(m, 1)。 word_to_vec_map -- 字典类型的单词到50维词向量的映射。 learning_rate -- 学习率. num_iterations -- 迭代次数。 返回: pred -- 预测的向量,维度为(m, 1)。 W -- 权重参数,维度为(n_y, n_h)。 b -- 偏置参数,维度为(n_y,) """ np.random.seed(1) # 定义训练数量 m = Y.shape[0] n_y = 5 n_h = 50 W = np.random.randn(n_y, n_h)/np.sqrt(n_h) b = np.zeros((n_y,)) Y_oh = emo_utils.convert_to_one_hot(Y, C=n_y) for i in range(num_iterations): for j in range(m): avg = sentence_to_avg(X[j],word_to_vec_map) z = np.dot(W,avg)+b a = emo_utils.softmax(z) loss = -np.sum(np.dot(Y_oh[j] , np.log(a))) dz = a - Y_oh[j] dW = np.dot(dz.reshape(n_y,1), avg.reshape(1, n_h)) db = dz W = W - learning_rate*dW b = b - learning_rate*db if i % 100 == 0: print("第{t}轮,损失为{cost}".format(t=i, cost=loss)) pred = emo_utils.predict(X, Y, W, b, word_to_vec_map) return pred,W,b
def model(X, Y, word_to_vec_map, learning_rate=0.01, num_iterations=400): """ Model to train word vector representations in numpy. Arguments: X -- input data, numpy array of sentences as strings, of shape (m, 1) Y -- labels, numpy array of integers between 0 and 7, numpy-array of shape (m, 1) word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation learning_rate -- learning_rate for the stochastic gradient descent algorithm num_iterations -- number of iterations Returns: pred -- vector of predictions, numpy-array of shape (m, 1) W -- weight matrix of the softmax layer, of shape (n_y, n_h) b -- bias of the softmax layer, of shape (n_y,) """ np.random.seed(1) # Define number of training examples m = Y.shape[0] # number of training examples n_y = 5 # number of classes n_h = 50 # dimensions of the GloVe vectors # Initialize parameters using Xavier initialization W = np.random.randn(n_y, n_h) / np.sqrt(n_h) b = np.zeros((n_y,)) # Convert Y to Y_onehot with n_y classes Y_oh = convert_to_one_hot(Y, C=n_y) # Optimization loop for t in range(num_iterations): # Loop over the number of iterations for i in range(m): # Loop over the training examples ### START CODE HERE ### (≈ 4 lines of code) # Average the word vectors of the words from the i'th training example avg = sentence_to_avg(X[i], word_to_vec_map) # Forward propagate the avg through the softmax layer z = np.dot(W, avg) + b a = softmax(z) # Compute cost using the i'th training label's one hot representation and "A" (the output of the softmax) cost = - np.sum(Y_oh[i] * np.log(a)) ### END CODE HERE ### # print('---') # print(a) # print(Y_oh.shape) # print(cost) # Compute gradients dz = a - Y_oh[i] dW = np.dot(dz.reshape(n_y, 1), avg.reshape(1, n_h)) db = dz # Update parameters with Stochastic Gradient Descent W = W - learning_rate * dW b = b - learning_rate * db if t % 100 == 0: print("Epoch: " + str(t) + " --- cost = " + str(cost)) pred = predict(X, Y, W, b, word_to_vec_map) return pred, W, b
print(type(X_train)) Y = np.asarray([5, 0, 0, 5, 4, 4, 4, 6, 6, 4, 1, 1, 5, 6, 6, 3, 6, 3, 4, 4]) print(Y.shape) X = np.asarray(['I am going to the bar tonight', 'I love you', 'miss you my dear', 'Lets go party and drinks', 'Congrats on the new job', 'Congratulations', 'I am so happy for you', 'Why are you feeling bad', 'What is wrong with you', 'You totally deserve this prize', 'Let us go play football', 'Are you down for football this afternoon', 'Work hard play harder', 'It is suprising how people can be dumb sometimes', 'I am very disappointed', 'It is the best day in my life', 'I think I will end up alone', 'My life is so boring', 'Good job', 'Great so awesome']) pred, W, b = model(X_train, Y_train, word_to_vec_map) print("=====训练集====") pred_train = emo_utils.predict(X_train, Y_train, W, b, word_to_vec_map) print("=====测试集====") pred_test = emo_utils.predict(X_test, Y_test, W, b, word_to_vec_map) X_my_sentences = np.array( ["i adore you", "i love you", "funny lol", "lets play with a ball", "food is ready", "you are not happy"]) Y_my_labels = np.array([[0], [0], [2], [1], [4], [3]]) pred = emo_utils.predict(X_my_sentences, Y_my_labels, W, b, word_to_vec_map) emo_utils.print_predictions(X_my_sentences, pred) print(" \t {0} \t {1} \t {2} \t {3} \t {4}".format(emo_utils.label_to_emoji(0), emo_utils.label_to_emoji(1), \ emo_utils.label_to_emoji(2), emo_utils.label_to_emoji(3), \ emo_utils.label_to_emoji(4))) import pandas as pd