def get_model(feature_encoder, num_filter, region_size, num_labels, **kwargs): onehot_cnn = OnehotBowCNN( rand_seed=1377, verbose=kwargs.get('verbose', 0), feature_encoder=feature_encoder, # optimizers='adadelta', optimizers='sgd', num_labels=num_labels, l1_conv_filter_type=[ [ num_filter, region_size, -1, 'valid', (-1, 1), 0., 'none', 'none' ], ], l2_conv_filter_type=[], full_connected_layer_units=[[0, 0, 'relu', 'batch_normalization']], embedding_dropout_rate=0., nb_epoch=30, nb_batch=32, earlyStoping_patience=30, lr=1e-2, ) return onehot_cnn
def cross_validation( train_data=None, test_data=None, cv_data=None, input_length=None, feature_type='word', num_filter_list=None, region_size_list=None, word2vec_to_solve_oov=False, word2vec_model_file_path=None, verbose=0, ): from data_processing_util.cross_validation_util import transform_cv_data, get_k_fold_data, get_val_score # 1. 获取交叉验证的数据 if cv_data is None: assert train_data is not None, 'cv_data和train_data必须至少提供一个!' cv_data = get_k_fold_data( k=3, train_data=train_data, test_data=test_data, include_train_data=True, ) # 2. 将数据进行特征编码转换 feature_encoder = OnehotBowCNN.get_feature_encoder( input_length=input_length, verbose=verbose, feature_type=feature_type, word2vec_to_solve_oov=word2vec_to_solve_oov, word2vec_model_file_path=word2vec_model_file_path, ) cv_data = transform_cv_data(feature_encoder, cv_data, verbose=0) # 交叉验证 for num_filter in num_filter_list: for region_size in region_size_list: print('=' * 40) print('num_filter and region_size is %d,%d.' % (num_filter, region_size)) get_val_score(OnehotCNNWithOneConv, cv_data=cv_data, verbose=verbose, region_size=region_size, num_filter=num_filter, num_labels=24)
def cross_validation( train_data=None, test_data=None, cv_data=None, input_length =None, feature_type = 'word', num_filter_list=None, region_size_list=None, word2vec_to_solve_oov = False, word2vec_model_file_path = None, verbose = 0, ): from data_processing_util.cross_validation_util import transform_cv_data, get_k_fold_data, get_val_score # 1. 获取交叉验证的数据 if cv_data is None: assert train_data is not None, 'cv_data和train_data必须至少提供一个!' cv_data = get_k_fold_data( k=3, train_data=train_data, test_data=test_data, include_train_data=True, ) # 2. 将数据进行特征编码转换 feature_encoder = OnehotBowCNN.get_feature_encoder( input_length=input_length, verbose=verbose, feature_type=feature_type, word2vec_to_solve_oov = word2vec_to_solve_oov, word2vec_model_file_path=word2vec_model_file_path, ) cv_data = transform_cv_data(feature_encoder, cv_data,verbose=0) # 交叉验证 for num_filter in num_filter_list: for region_size in region_size_list: print('=' * 40) print('num_filter and region_size is %d,%d.'%(num_filter,region_size)) get_val_score(OnehotCNNWithOneConv, cv_data=cv_data, verbose=verbose, region_size = region_size, num_filter=num_filter, num_labels=24 )
print('bow length:%d'%bow_length) print('='*80) onehot_cnn = OnehotBowCNN( rand_seed=rand_seed, verbose=0, feature_encoder=feature_encoder, # optimizers='adadelta', optimizers='sgd', input_length=sentence_padding_length, input_dim=feature_encoder.vocabulary_size, num_labels=len(index_to_label), l1_conv_filter_type=[ [layer1, 2, -1, 'bow', (2, 1), 0., 'relu', 'batch_normalization'], # [layer1, 3, -1, 'bow', (2, 1), 0., 'relu', 'batch_normalization'], # [1000, 4, -1, 'valid', (2, 1), 0., 'relu', 'batch_normalization'], ], l2_conv_filter_type=[ # [layer2, 2, -1, 'valid', (2, 1), 0.25, 'relu', 'none'] ], full_connected_layer_units=[ # (hidden1, 0.5, 'relu', 'batch_normalization'), # (hidden2, 0.5, 'relu', 'none'), ], embedding_dropout_rate=0., nb_epoch=30, nb_batch=5, earlyStoping_patience=20, lr=1e-2, ) onehot_cnn.print_model_descibe()
# +++++++++++++ region start : 3、构建onehot编码 +++++++++++++ # **************************************************************** logging.debug('=' * 20) logging.debug('3、构建onehot编码') onehot_cnn = OnehotBowCNN( rand_seed=1337, verbose=config['verbose'], feature_encoder=feature_encoder, # optimizers='adadelta', optimizers=config['optimizers'], input_length=sentence_padding_length, num_labels=len(index_to_label), conv1_filter_type=config['l1_conv_filter_type'], conv2_filter_type=config['l2_conv_filter_type'], full_connected_layer_units=config['full_connected_layer_units'], output_dropout_rate=config['output_dropout_rate'], nb_epoch=config['nb_epoch'], nb_batch=config['nb_batch'], earlyStoping_patience=config['earlyStoping_patience'], lr=config['lr'], ) onehot_cnn.print_model_descibe() onehot_cnn.fit((train_X_feature, train_y), (test_X_feature, test_y)) onehot_cnn.accuracy((train_X_feature, train_y), transform_input=False) logging.debug('=' * 20)
# **************************************************************** logging.debug('=' * 20) logging.debug('3、构建onehot编码') from deep_learning.cnn.wordEmbedding_cnn.onehot_cnn_model import OnehotBowCNN onehot_cnn = OnehotBowCNN( rand_seed=config['rand_seed'], verbose=config['verbose'], feature_encoder=feature_encoder, # optimizers='adadelta', optimizers=config['optimizers'], input_length=sentence_padding_length, num_labels=len(index_to_label), conv1_filter_type=config['l1_conv_filter_type'], conv2_filter_type=config['l2_conv_filter_type'], full_connected_layer_units=config['full_connected_layer_units'], output_dropout_rate=config['output_dropout_rate'], nb_epoch=config['nb_epoch'], batch_size=config['nb_batch'], earlyStoping_patience=config['earlyStoping_patience'], lr=config['lr'], ) # np.random.seed(0) print(np.random.randint(0,100)) # quit() onehot_cnn.print_model_descibe() onehot_cnn.fit((train_X_feature, train_y),