label_to_index,index_to_label = data_util.get_label_index() # **************************************************************** # ------------- region end : 1. 加载训练数据和测试数据 ------------- # **************************************************************** # **************************************************************** # +++++++++++++ region start : 2. 转换数据的格式并特征编码 +++++++++++++ # **************************************************************** logging.debug('=' * 20) logging.debug('2. 转换数据的格式并特征编码') from deep_learning.cnn.wordEmbedding_cnn.multichannel_onehot_cnn_model import MultiChannelOnehotBowCNN # 获取该分类器的编码器 word_feature_encoder, seg_feature_encoder = MultiChannelOnehotBowCNN.get_feature_encoder( **{'word_input_length': word_input_length, 'seg_input_length': seg_input_length} ) train_y = train_data['LABEL_INDEX'].as_matrix() test_y = test_data['LABEL_INDEX'].as_matrix() cv_data = data_util.get_k_fold_data(k=3, data=train_data, rand_seed=3, ) all_cv_word_data = transform_cv_data(word_feature_encoder, cv_data, (test_data[u'SENTENCE'].as_matrix(),test_y),**config) all_cv_seg_data = transform_cv_data(seg_feature_encoder, cv_data, (test_data[u'SENTENCE'].as_matrix(),test_y), **config)
cv_data = data_util.get_k_fold_data(k=3, data=train_data, rand_seed=3, ) MultiChannelOnehotBowCNN.cross_validation( cv_data, (test_data[u'SENTENCE'].as_matrix(), test_y), 'result/multi_cnn_bow_v2.3Sa_cv_detail.txt', rand_seed=rand_seed, nb_epoch=nb_epoch, num_labels = 24, verbose=verbose, remove_stopword = remove_stopword, layer1=layer1, l1_conv_filter_type=l1_conv_filter_type, layer2=layer2, l2_conv_filter_type=l2_conv_filter_type, k=k, hidden1=hidden1, hidden2=hidden2, word2vec_to_solve_oov = word2vec_to_solve_oov, word2vec_model_file_path = config['word2vec_model_file_path'], use_layer=use_layer, lr = lr, ) end_time = timeit.default_timer() print('end! Running time:%ds!' % (end_time - start_time))