def get_all_data(batch_size, sentence_len, word2idx, label2idx, fold_num): utils = DataUtils(batch_size=batch_size, sentence_len=sentence_len, word2idx=word2idx, label2idx=label2idx) # 开发集 develop_sentences, develop_labels = utils.get_train_data( "./data/", mode='develop_') develop_idx_x_batches, develop_y_batches, develop_word_len_batches = utils.encoder_data2idx_batch( develop_sentences, develop_labels) # 测试集 test_sentences, test_labels = utils.get_train_data("./data/", mode='test_') test_idx_x_batches, test_y_batches, test_word_len_batches = utils.encoder_data2idx_batch( test_sentences, test_labels) # 训练集 train_sentences, train_labels = utils.get_train_data("./data/", mode='train_') # 训练集的5折 k_fold_x_train, k_fold_y_train, k_fold_x_test, k_fold_y_test = DataUtils.k_fold( train_sentences, train_labels, fold_num) # k 代表 训练集切分出来的数据 k_train_idx_x_batches_list, k_train_y_batches_list, k_train_word_len_batches_list = [], [], [] k_develop_idx_x_batches_list, k_develop_y_batches_list, k_develop_word_len_batches_list = [], [], [] if fold_num != 1: for fold_idx in range(fold_num): k_train_idx_x_batches, k_train_y_batches, k_train_word_len_batches = utils.encoder_data2idx_batch( k_fold_x_train[fold_idx], k_fold_y_train[fold_idx]) k_train_idx_x_batches_list.append(k_train_idx_x_batches) k_train_y_batches_list.append(k_train_y_batches) k_train_word_len_batches_list.append(k_train_word_len_batches) k_develop_idx_x_batches, k_develop_y_batches, k_develop_word_len_batches = utils.encoder_data2idx_batch( k_fold_x_test[fold_idx], k_fold_y_test[fold_idx]) k_develop_idx_x_batches_list.append(k_develop_idx_x_batches) k_develop_y_batches_list.append(k_develop_y_batches) k_develop_word_len_batches_list.append( k_develop_word_len_batches) else: k_train_idx_x_batches, k_train_y_batches, k_train_word_len_batches = utils.encoder_data2idx_batch( k_fold_x_train[0], k_fold_y_train[0]) k_train_idx_x_batches_list.append(k_train_idx_x_batches) k_train_y_batches_list.append(k_train_y_batches) k_train_word_len_batches_list.append(k_train_word_len_batches) return k_train_idx_x_batches_list, k_train_y_batches_list, k_train_word_len_batches_list, \ k_develop_idx_x_batches_list, k_develop_y_batches_list, k_develop_word_len_batches_list, \ develop_idx_x_batches, develop_y_batches, develop_word_len_batches, \ test_idx_x_batches, test_y_batches, test_word_len_batches,