def dump_data_for_nn(): accu_dict, reverse_accu_dict = generator.read_accu() word_dict, embedding, reverse_dictionary = generator.get_dictionary_and_embedding( ) print("reading data from training set...") train_data_x, train_data_y = generator.read_data_in_imprisonment_format( constant.DATA_TRAIN, embedding, word_dict, accu_dict) valid_data_x, valid_data_y = generator.read_data_in_imprisonment_format( constant.DATA_VALID, embedding, word_dict, accu_dict) print("reading complete!") # 随机打乱数据 permutation_for_train = np.random.permutation(train_data_x.shape[0]) train_data_x = train_data_x[permutation_for_train, :] train_data_y = train_data_y[permutation_for_train] permutation_for_valid = np.random.permutation(valid_data_y.shape[0]) valid_data_x = valid_data_x[permutation_for_valid, :] valid_data_y = valid_data_y[permutation_for_valid, :] with open('./dump_data/nn/dump_train_x.txt', 'wb') as f: pickle.dump(train_data_x, f) with open('./dump_data/nn/dump_train_y_label.txt', 'wb') as f: pickle.dump(train_data_y, f) with open('./dump_data/nn/dump_valid_x.txt', 'wb') as f: pickle.dump(valid_data_x, f) with open('./dump_data/nn/dump_valid_y_label.txt', 'wb') as f: pickle.dump(valid_data_y, f) print("dump complete!")
train_data_x = pickle.load(f) with open('./dump_data/nn/dump_train_y_label.txt', 'rb') as f: train_data_y = pickle.load(f) with open('./dump_data/nn/dump_valid_x.txt', 'rb') as f: valid_data_x = pickle.load(f) with open('./dump_data/nn/dump_valid_y_label.txt', 'rb') as f: valid_data_y = pickle.load(f) except: print( "No dump file read original file! Please wait... " "If u want to accelerate this process, please see read_me -> transform_data_to_feature_and_dump" ) accu_dict, reverse_accu_dict = generator.read_accu() word_dict, embedding, reverse_dictionary = generator.get_dictionary_and_embedding( ) train_data_x, train_data_y = generator.read_data_in_accu_format( constant.DATA_TRAIN, embedding, word_dict, accu_dict, one_hot=True) valid_data_x, valid_data_y = generator.read_data_in_accu_format( constant.DATA_VALID, embedding, word_dict, accu_dict, one_hot=True) print("reading complete!") # just test generate_accu_batch x, y = generator.generate_batch(training_batch_size, train_data_x, train_data_y) print(x.shape)