data_util = DataUtil()
train_data, test_data = data_util.load_train_test_data(config)
label_to_index, index_to_label = data_util.get_label_index()

train_x = train_data['SENTENCE'].as_matrix()
train_y = train_data['LABEL_INDEX'].as_matrix()
test_x = test_data['SENTENCE'].as_matrix()
test_y = test_data['LABEL_INDEX'].as_matrix()

from deep_learning.cnn.wordEmbedding_cnn.example.one_conv_layer_wordEmbedding_cnn import WordEmbeddingCNNWithOneConv

input_length = 14
word_embedding_dim = 50
WordEmbeddingCNNWithOneConv.cross_validation(
    train_data=(train_x, train_y),
    test_data=(test_x, test_y),
    need_validation=True,
    include_train_data=True,
    vocabulary_including_test_set=False,
    cv=3,
    feature_type = 'word',
    num_labels=24,
    input_length=input_length,
    # num_filter_list=[8],
    num_filter_list=[10,30,50, 80, 100, 110, 150, 200, 300,500,1000],
    verbose=0,
    embedding_weight_trainable=False,
    word2vec_model_file_path = data_util.transform_word2vec_model_name('%dd_weibo_100w' % word_embedding_dim)

)
Esempio n. 2
0
config = {
    'verbose': 1,
}

from version_2.data_processing.data_util import DataUtil

data_util = DataUtil()
train_data, test_data = data_util.load_train_test_data(config)
label_to_index, index_to_label = data_util.get_label_index()

train_x = train_data['TEXT'].as_matrix()
train_y = train_data['STANCE_INDEX'].as_matrix()
test_x = test_data['TEXT'].as_matrix()
test_y = test_data['STANCE_INDEX'].as_matrix()

from deep_learning.cnn.wordEmbedding_cnn.example.one_conv_layer_wordEmbedding_cnn import WordEmbeddingCNNWithOneConv

input_length = 120
word_embedding_dim = 50
WordEmbeddingCNNWithOneConv.cross_validation(
    train_data=(train_x, train_y),
    test_data=(test_x, test_y),
    feature_type='word',
    input_length=input_length,
    num_filter_list=[10],
    # num_filter_list=[10,30,50, 80, 100, 110, 150, 200, 300,500,1000],
    verbose=1,
    # word2vec_model_file_path = data_util.transform_word2vec_model_name('%dd_weibo_100w' % word_embedding_dim),
    word2vec_model_file_path=
    '/home/jdwang/PycharmProjects/corprocessor/word2vec/vector/50dim/vector1000000_50dim.gem'
)
Esempio n. 3
0
from deep_learning.cnn.wordEmbedding_cnn.example.one_conv_layer_wordEmbedding_cnn import WordEmbeddingCNNWithOneConv

input_length = 14
word_embedding_dim = 50
WordEmbeddingCNNWithOneConv.cross_validation(
    train_data=(train_X, train_y),
    test_data=(test_X, test_y),
    need_validation=True,
    include_train_data=True,
    vocabulary_including_test_set=True,
    cv=3,
    feature_type=feature_type,
    num_labels=24,
    input_length=input_length,
    # batch_size = 50,
    # num_filter_list=[8],
    num_filter_list=num_filter_list,
    verbose=config['verbose'],
    embedding_weight_trainable=False,
    # 获取中间层输出
    get_cnn_middle_layer_output=True,
    # 保存到以下地址
    middle_layer_output_file = 'result/conv_middle_output_%dfilters.pkl'%num_filter_list[0],
    word2vec_model_file_path=data_util.transform_word2vec_model_name('%dd_weibo_100w' % word_embedding_dim)
)

if config['verbose'] > 0:
    print('-' * 20)
# endregion -------------- cross validation ---------------
Esempio n. 4
0
    print('-' * 20)
    print('cross validation')

input_length = 14
word_embedding_dim = 50
WordEmbeddingCNNWithOneConv.cross_validation(
    train_data=(train_X, train_y),
    test_data=(test_X, test_y),
    need_validation=True,
    include_train_data=True,
    vocabulary_including_test_set=True,
    cv=3,
    feature_type=feature_type,
    num_labels=24,
    input_length=input_length,
    # batch_size = 50,
    # num_filter_list=[8],
    num_filter_list=num_filter_list,
    verbose=config['verbose'],
    embedding_weight_trainable=False,
    # 获取中间层输出
    get_cnn_middle_layer_output=True,
    # 保存到以下地址
    middle_layer_output_file='result/conv_middle_output_%dfilters.pkl' %
    num_filter_list[0],
    word2vec_model_file_path=data_util.transform_word2vec_model_name(
        '%dd_weibo_100w' % word_embedding_dim))

if config['verbose'] > 0:
    print('-' * 20)
# endregion -------------- cross validation ---------------
Esempio n. 5
0
    'verbose':1,
}

from version_2.data_processing.data_util import DataUtil

data_util = DataUtil()
train_data, test_data = data_util.load_train_test_data(config)
label_to_index, index_to_label = data_util.get_label_index()

train_x = train_data['TEXT'].as_matrix()
train_y = train_data['STANCE_INDEX'].as_matrix()
test_x = test_data['TEXT'].as_matrix()
test_y = test_data['STANCE_INDEX'].as_matrix()

from deep_learning.cnn.wordEmbedding_cnn.example.one_conv_layer_wordEmbedding_cnn import WordEmbeddingCNNWithOneConv

input_length = 120
word_embedding_dim = 50
WordEmbeddingCNNWithOneConv.cross_validation(
    train_data=(train_x, train_y),
    test_data=(test_x, test_y),
    feature_type = 'word',
    input_length=input_length,
    num_filter_list=[10],
    # num_filter_list=[10,30,50, 80, 100, 110, 150, 200, 300,500,1000],
    verbose=1,
    # word2vec_model_file_path = data_util.transform_word2vec_model_name('%dd_weibo_100w' % word_embedding_dim),
    word2vec_model_file_path = '/home/jdwang/PycharmProjects/corprocessor/word2vec/vector/50dim/vector1000000_50dim.gem'


)