def build_keras_input_amended():
    filename_data, filename_w = './tmp/amended_indexed_data.p', './tmp/amended_Weight.p'

    if os.path.isfile(filename_data) and os.path.isfile(filename_w):
        data = load_pickle(filename_data)
        W = load_pickle(filename_w)
        print('Load OK.')
        return (data, W)

    # load data from pickle
    (x_train, y_train_valence, y_train_labels,
     x_test, y_test_valence, y_test_labels,
     x_valid, y_valid_valence, y_valid_labels,
     x_train_polarity, y_train_polarity,
     x_test_polarity, y_test_polarity,
     x_valid_polarity, y_valid_polarity) = load_sst(path='./resources/stanfordSentimentTreebank/')

    vocab = get_vocab(x_train)
    # word_vecs = load_embeddings('google_news', '/home/hs/Data/Word_Embeddings/google_news.bin')
    # word_vecs = load_embeddings('glove')

    # load amended word vectors
    word_vecs = load_embeddings('amended_word2vec')

    word_vecs = add_unknown_words(word_vecs, vocab)
    W, word_idx_map = build_embedding_matrix(word_vecs, vocab)

    x_train_idx_data = make_idx_data(x_train, word_idx_map)
    x_test_idx_data = make_idx_data(x_test, word_idx_map)
    x_valid_idx_data = make_idx_data(x_valid, word_idx_map)
    x_train_polarity_idx_data = make_idx_data(x_train_polarity, word_idx_map)
    x_test_polarity_idx_data = make_idx_data(x_test_polarity, word_idx_map)
    x_valid_polarity_idx_data = make_idx_data(x_valid_polarity, word_idx_map)

    data = (x_train_idx_data, y_train_valence, y_train_labels,
            x_test_idx_data, y_test_valence, y_test_labels,
            x_valid_idx_data, y_valid_valence, y_valid_labels,
            x_train_polarity_idx_data, y_train_polarity,
            x_test_polarity_idx_data, y_test_polarity,
            x_valid_polarity_idx_data, y_valid_polarity)

    dump_picle(data, filename_data)
    dump_picle(W, filename_w)
    return (data, W)
Esempio n. 2
0
def build_keras_input_amended():
    filename_data, filename_w = './tmp/amended_indexed_data.p', './tmp/amended_Weight.p'

    if os.path.isfile(filename_data) and os.path.isfile(filename_w):
        data = load_pickle(filename_data)
        W = load_pickle(filename_w)
        print('Load OK.')
        return (data, W)

    # load data from pickle
    (x_train, y_train_valence, y_train_labels,
     x_test, y_test_valence, y_test_labels,
     x_valid, y_valid_valence, y_valid_labels,
     x_train_polarity, y_train_polarity,
     x_test_polarity, y_test_polarity,
     x_valid_polarity, y_valid_polarity) = load_sst(path='./resources/stanfordSentimentTreebank/')

    vocab = get_vocab(x_train)
    # word_vecs = load_embeddings('google_news', '/home/hs/Data/Word_Embeddings/google_news.bin')
    # word_vecs = load_embeddings('glove')

    # load amended word vectors
    word_vecs = load_embeddings('amended_word2vec')

    word_vecs = add_unknown_words(word_vecs, vocab)
    W, word_idx_map = build_embedding_matrix(word_vecs, vocab)

    x_train_idx_data = make_idx_data(x_train, word_idx_map)
    x_test_idx_data = make_idx_data(x_test, word_idx_map)
    x_valid_idx_data = make_idx_data(x_valid, word_idx_map)
    x_train_polarity_idx_data = make_idx_data(x_train_polarity, word_idx_map)
    x_test_polarity_idx_data = make_idx_data(x_test_polarity, word_idx_map)
    x_valid_polarity_idx_data = make_idx_data(x_valid_polarity, word_idx_map)

    data = (x_train_idx_data, y_train_valence, y_train_labels,
            x_test_idx_data, y_test_valence, y_test_labels,
            x_valid_idx_data, y_valid_valence, y_valid_labels,
            x_train_polarity_idx_data, y_train_polarity,
            x_test_polarity_idx_data, y_test_polarity,
            x_valid_polarity_idx_data, y_valid_polarity)

    dump_picle(data, filename_data)
    dump_picle(W, filename_w)
    return (data, W)
def keras_nn_input(word_vectors_model, amending):
    if word_vectors_model == 'word2vec':
        if amending == True:
            filename_data, filename_w = './tmp/amended_w2v_indexed_data.p', './tmp/amended_w2v_Weight.p'
        elif amending == False:
            filename_data, filename_w = './tmp/w2v_indexed_data.p', './tmp/w2v_Weight.p'
        else:
            raise Exception('Wrong!')
    elif word_vectors_model == 'GloVe':
        if amending == True:
            filename_data, filename_w = './tmp/amended_GloVe_indexed_data.p', './tmp/amended_GloVe_Weight.p'
        elif amending == False:
            filename_data, filename_w = './tmp/GloVe_indexed_data.p', './tmp/GloVe_Weight.p'
        else:
            raise Exception('Wrong!')
    else:
        raise Exception('Wrong parameter!')

    if os.path.isfile(filename_data) and os.path.isfile(filename_w):
        data = load_pickle(filename_data)
        W = load_pickle(filename_w)
        print('Load OK, parameters: word_vectors_model = %s, amending = %s'%(word_vectors_model, amending))
        return (data, W)

    # load data from pickle
    (x_train, y_train_valence, y_train_labels,
     x_test, y_test_valence, y_test_labels,
     x_valid, y_valid_valence, y_valid_labels,
     x_train_polarity, y_train_polarity,
     x_test_polarity, y_test_polarity,
     x_valid_polarity, y_valid_polarity) = load_sst(path='./resources/stanfordSentimentTreebank/')

    vocab = get_vocab(x_train)

    if word_vectors_model == 'word2vec':
        if amending == True:
            word_vecs = load_embeddings('amended_word2vec')
        elif amending == False:
            word_vecs = load_embeddings('google_news', '/home/hs/Data/Word_Embeddings/google_news.bin')
        else:
            raise Exception('Wrong!')
    elif word_vectors_model == 'GloVe':
        if amending == True:
            word_vecs = load_embeddings('amended_glove')
        elif amending == False:
            word_vecs = load_embeddings('glove')
        else:
            raise Exception('Wrong!')
    else:
        raise Exception('Wrong parameter!')

    word_vecs = add_unknown_words(word_vecs, vocab)
    W, word_idx_map = build_embedding_matrix(word_vecs, vocab)

    x_train_idx_data = make_idx_data(x_train, word_idx_map)
    x_test_idx_data = make_idx_data(x_test, word_idx_map)
    x_valid_idx_data = make_idx_data(x_valid, word_idx_map)
    x_train_polarity_idx_data = make_idx_data(x_train_polarity, word_idx_map)
    x_test_polarity_idx_data = make_idx_data(x_test_polarity, word_idx_map)
    x_valid_polarity_idx_data = make_idx_data(x_valid_polarity, word_idx_map)

    data = (x_train_idx_data, y_train_valence, y_train_labels,
            x_test_idx_data, y_test_valence, y_test_labels,
            x_valid_idx_data, y_valid_valence, y_valid_labels,
            x_train_polarity_idx_data, y_train_polarity,
            x_test_polarity_idx_data, y_test_polarity,
            x_valid_polarity_idx_data, y_valid_polarity)

    dump_picle(data, filename_data)
    dump_picle(W, filename_w)
    print('Load OK, parameters: word_vectors_model = %s, amending = %s'%(word_vectors_model, amending))
    return (data, W)
Esempio n. 4
0
def keras_nn_input(word_vectors_model, amending):
    if word_vectors_model == 'word2vec':
        if amending == True:
            filename_data, filename_w = './tmp/amended_w2v_indexed_data.p', './tmp/amended_w2v_Weight.p'
        elif amending == False:
            filename_data, filename_w = './tmp/w2v_indexed_data.p', './tmp/w2v_Weight.p'
        else:
            raise Exception('Wrong!')
    elif word_vectors_model == 'GloVe':
        if amending == True:
            filename_data, filename_w = './tmp/amended_GloVe_indexed_data.p', './tmp/amended_GloVe_Weight.p'
        elif amending == False:
            filename_data, filename_w = './tmp/GloVe_indexed_data.p', './tmp/GloVe_Weight.p'
        else:
            raise Exception('Wrong!')

    elif word_vectors_model == 'retrofitted_GloVe':
        filename_data, filename_w = './tmp/retrofitted_GloVe_indexed_data.p', './tmp/retrofitted_GloVe_Weight.p'
    elif word_vectors_model == 'retrofitted_word2vec':
        filename_data, filename_w = './tmp/retrofitted_word2vec_indexed_data.p', './tmp/retrofitted_word2vec_Weight.p'

    else:
        raise Exception('Wrong parameter!')

    if os.path.isfile(filename_data) and os.path.isfile(filename_w):
        data = load_pickle(filename_data)
        W = load_pickle(filename_w)
        print('Load OK, parameters: word_vectors_model = %s, amending = %s'%(word_vectors_model, amending))
        return (data, W)

    # load data from pickle
    (x_train, y_train_valence, y_train_labels,
     x_test, y_test_valence, y_test_labels,
     x_valid, y_valid_valence, y_valid_labels,
     x_train_polarity, y_train_polarity,
     x_test_polarity, y_test_polarity,
     x_valid_polarity, y_valid_polarity) = load_sst(path='./resources/stanfordSentimentTreebank/')

    vocab = get_vocab(x_train)

    if word_vectors_model == 'word2vec':
        if amending == True:
            word_vecs = load_embeddings('amended_word2vec')
        elif amending == False:
            word_vecs = load_embeddings('google_news', '/home/hs/Data/Word_Embeddings/google_news.bin')
        else:
            raise Exception('Wrong!')
    elif word_vectors_model == 'GloVe':
        if amending == True:
            word_vecs = load_embeddings('amended_glove')
        elif amending == False:
            word_vecs = load_embeddings('glove')
        else:
            raise Exception('Wrong!')
    elif word_vectors_model == 'retrofitted_GloVe':
        word_vecs = load_embeddings('zh_tw', 'D:\Word_Embeddings\English\glove.6B\GloVe_out_vec_file.txt')
        # convert gensim model to dict type
        w2v = dict()
        for key in word_vecs.vocab.keys():
            w2v[key] = word_vecs[key]
        word_vecs = w2v

    elif word_vectors_model == 'retrofitted_word2vec':
        word_vecs = load_embeddings('zh_tw', 'D:\Word_Embeddings\English\word2vec_out_vec_file.txt')
        # convert gensim model to dict type
        w2v = dict()
        for key in word_vecs.vocab.keys():
            w2v[key] = word_vecs[key]
        word_vecs = w2v

    else:
        raise Exception('Wrong parameter!')

    word_vecs = add_unknown_words(word_vecs, vocab)
    W, word_idx_map = build_embedding_matrix(word_vecs, vocab)

    x_train_idx_data = make_idx_data(x_train, word_idx_map)
    x_test_idx_data = make_idx_data(x_test, word_idx_map)
    x_valid_idx_data = make_idx_data(x_valid, word_idx_map)
    x_train_polarity_idx_data = make_idx_data(x_train_polarity, word_idx_map)
    x_test_polarity_idx_data = make_idx_data(x_test_polarity, word_idx_map)
    x_valid_polarity_idx_data = make_idx_data(x_valid_polarity, word_idx_map)

    data = (x_train_idx_data, y_train_valence, y_train_labels,
            x_test_idx_data, y_test_valence, y_test_labels,
            x_valid_idx_data, y_valid_valence, y_valid_labels,
            x_train_polarity_idx_data, y_train_polarity,
            x_test_polarity_idx_data, y_test_polarity,
            x_valid_polarity_idx_data, y_valid_polarity)

    dump_picle(data, filename_data)
    dump_picle(W, filename_w)
    print('Load OK, parameters: word_vectors_model = %s, amending = %s'%(word_vectors_model, amending))
    return (data, W)