Beispiel #1
0
def get_data():
    X_train, Y_train = read_csv('data/train_emoji.csv')
    X_test, Y_test = read_csv('data/tesss.csv')
    Y_oh_train = convert_to_one_hot(Y_train, C=5)
    Y_oh_test = convert_to_one_hot(Y_test, C=5)
    maxLen = len(max(X_train, key=len).split())
    word_to_index, index_to_word, word_to_vec_map = \
        read_glove_vecs('../word_vectors/data/glove.6B.50d.txt')
    return word_to_index, index_to_word, word_to_vec_map, X_train, Y_train
def main():
    X_train, Y_train = read_csv('../data/train_emoji.csv')
    X_test, Y_test = read_csv('../data/tesss.csv')
    maxLen = len(max(X_train, key=len).split())

    word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('../data/glove.6B.50d.txt')

    # X1 = np.array(["funny lol", "lets play baseball", "food is ready for you"])
    # X1_indices = sentences_to_indices(X1, word_to_index, max_len=5)
    # print("X1 =", X1)
    # print("X1_indices =", X1_indices)
    #
    # embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)
    # print("weights[0][1][3] =", embedding_layer.get_weights()[0][1][3])

    model = Emojify_V2((maxLen,), word_to_vec_map, word_to_index)
    model.summary()

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    X_train_indices = sentences_to_indices(X_train, word_to_index, maxLen)
    Y_train_oh = convert_to_one_hot(Y_train, C=5)

    model.fit(X_train_indices, Y_train_oh, epochs=50, batch_size=32, shuffle=True)

    X_test_indices = sentences_to_indices(X_test, word_to_index, max_len=maxLen)
    Y_test_oh = convert_to_one_hot(Y_test, C=5)
    loss, acc = model.evaluate(X_test_indices, Y_test_oh)
    print()
    print("Test accuracy = ", acc)

    # This code allows you to see the mislabelled examples
    C = 5
    y_test_oh = np.eye(C)[Y_test.reshape(-1)]
    X_test_indices = sentences_to_indices(X_test, word_to_index, maxLen)
    pred = model.predict(X_test_indices)
    for i in range(len(X_test)):
        x = X_test_indices
        num = np.argmax(pred[i])
        if (num != Y_test[i]):
            print('Expected emoji:' + label_to_emoji(Y_test[i]) + ' prediction: ' + X_test[i] + label_to_emoji(
                num).strip())

    # Change the sentence below to see your prediction. Make sure all the words are in the Glove embeddings.
    x_test = np.array(['not feeling happy'])
    X_test_indices = sentences_to_indices(x_test, word_to_index, maxLen)
    print(x_test[0] + ' ' + label_to_emoji(np.argmax(model.predict(X_test_indices))))
Beispiel #3
0
def main():
    X_train, Y_train = read_csv('../data/train_emoji.csv')
    X_test, Y_test = read_csv('../data/tesss.csv')
    maxLen = len(max(X_train, key=len).split())

    for index in range(10):
        print(X_train[index], label_to_emoji(Y_train[index]))

    Y_oh_train = convert_to_one_hot(Y_train, C=5)
    Y_oh_test = convert_to_one_hot(Y_test, C=5)

    word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('../data/glove.6B.50d.txt')

    word = "cucumber"
    index = 289846
    print("")
    print("the index of", word, "in the vocabulary is", word_to_index[word])
    print("the", str(index) + "th word in the vocabulary is", index_to_word[index])

    pred, W, b = model(X_train, Y_train, word_to_vec_map)

    print("Training set:")
    pred_train = predict(X_train, Y_train, W, b, word_to_vec_map)
    print('Test set:')
    pred_test = predict(X_test, Y_test, W, b, word_to_vec_map)

    X_my_sentences = np.array(
        ["i adore you", "i love you", "funny lol", "lets play with a ball", "food is ready", "not feeling happy"])
    Y_my_labels = np.array([[0], [0], [2], [1], [4], [3]])
    print('--------- PRINT PREDICTIONS ----------')
    pred = predict(X_my_sentences, Y_my_labels, W, b, word_to_vec_map)
    print_predictions(X_my_sentences, pred)

    print(Y_test.shape)
    print('           ' + label_to_emoji(0) + '    ' + label_to_emoji(1) + '    ' + label_to_emoji(
        2) + '    ' + label_to_emoji(3) + '   ' + label_to_emoji(4))
    print(pd.crosstab(Y_test, pred_test.reshape(56, ), rownames=['Actual'], colnames=['Predicted'], margins=True))
    plot_confusion_matrix(Y_test, pred_test)
    plt.show()
if __name__ == '__main__':
    os.chdir(
        r"E:\深度学习\【中英】【吴恩达课后编程作业】Course 5 - 序列模型 - 第二周作业 - 词向量的运算与Emoji生成器")
    word_to_index, index_to_word, word_to_vec_map = emo_utils.read_glove_vecs(
        'data/glove.6B.50d.txt')
    embedding_layer = pretrained_embedding_layer(word_to_vec_map,
                                                 word_to_index)
    print("weights[0][1][3] =", embedding_layer.get_weights()[0][1][3])
    max_Len = 10
    model = Emojify_V2((max_Len, ), word_to_vec_map, word_to_index)
    model.summary()
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    X_train, Y_train = emo_utils.read_csv('data/train_emoji.csv')
    X_test, Y_test = emo_utils.read_csv('data/test.csv')
    X_train_indices = sentences_to_indices(X_train, word_to_index, max_Len)
    Y_train_oh = emo_utils.convert_to_one_hot(Y_train, C=5)
    model.fit(X_train_indices,
              Y_train_oh,
              epochs=200,
              batch_size=32,
              shuffle=True)

    C = 5

    X_test_indices = sentences_to_indices(X_test,
                                          word_to_index,
                                          max_len=max_Len)
    Y_test_oh = emo_utils.convert_to_one_hot(Y_test, C=5)