verbose=1, need_segmented=True, full_mode=full_mode, remove_stopword=True, replace_number=True, lowercase=True, zhs2zht=True, remove_url=True, padding_mode='center', add_unkown_word=True, feature_type=feature_type, ) train_X_feature = feature_encoder.fit_transform(train_data=train_data['SENTENCE'].as_matrix()) feature_encoder.print_model_descibe() feature_encoder.print_sentence_length_detail() # train_y = train_data['LABEL_INDEX'].as_matrix() test_all_X_feature = feature_encoder.transform(test_data['SENTENCE'].as_matrix()) test_all_y = test_data['LABEL_INDEX'].as_matrix() print(train_X_feature.shape) print(test_all_X_feature.shape) logging.debug('=' * 20) # **************************************************************** # ------------- region end : 2. 转换数据的格式并特征编码 ------------- # ****************************************************************
remove_stopword=True, replace_number=True, lowercase=True, zhs2zht=True, remove_url=True, padding_mode='center', add_unkown_word=True, mask_zero=True) train_X_features = feature_encoder.fit_transform(train_data=train_X) test_X_features = feature_encoder.transform(test_X) feature_encoder.print_sentence_length_detail print(feature_encoder.vocabulary_size) # print ','.join(sorted(feature_encoder.vocabulary)) # quit() feature_encoder.print_model_descibe() # -------------- code start : 结束 ------------- if verbose > 2: logging.debug('-' * 20) print('-' * 20) # -------------- region end : 2. 转换数据格式,以可以进行分类 --------------- for seed in config['rand_seed']: # -------------- region start : 3. 初始化CNN模型并训练 ------------- if verbose > 2: logging.debug('-' * 20) print('-' * 20) logging.debug('3. 初始化CNN模型并训练') print('3. 初始化CNN模型并训练') # -------------- code start : 开始 -------------