def train_teacher(dataset, nb_teachers, teacher_id): """ This function trains a teacher (teacher id) among an ensemble of nb_teachers models for the dataset specified. :param dataset: string corresponding to dataset (svhn, cifar10) :param nb_teachers: total number of teachers in the ensemble :param teacher_id: id of the teacher being trained :return: True if everything went well """ # If working directories do not exist, create them assert input.create_dir_if_needed(FLAGS.data_dir) assert input.create_dir_if_needed(FLAGS.train_dir) # Load the dataset if dataset == 'svhn': train_data, train_labels, test_data, test_labels = input.ld_svhn( extended=True) elif dataset == 'cifar10': train_data, train_labels, test_data, test_labels = input.ld_cifar10() elif dataset == 'mnist': train_data, train_labels, test_data, test_labels = input.ld_mnist() else: print("Check value of dataset flag") return False if FLAGS.teacher_data_share: train_data = train_data[:FLAGS.teacher_data_share] train_labels = train_labels[:FLAGS.teacher_data_share] # Retrieve subset of data for this teacher data, labels = input.partition_dataset(train_data, train_labels, nb_teachers, teacher_id) print("Length of training data: " + str(len(labels))) # Define teacher checkpoint filename and full path if FLAGS.deeper: filename = str(nb_teachers) + '_teachers_' + str( teacher_id) + '_deep.ckpt' else: filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename # Perform teacher training assert deep_cnn.train(data, labels, ckpt_path) # Append final step value to checkpoint for evaluation ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Retrieve teacher probability estimates on the test data teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) # Compute teacher accuracy precision = metrics.accuracy(teacher_preds, test_labels) print('Precision of teacher after training: ' + str(precision)) return True
def train_teacher(dataset, nb_teachers, teacher_id): """ This function trains a teacher (teacher id) among an ensemble of nb_teachers models for the dataset specified. :param dataset: string corresponding to dataset (svhn, cifar10) :param nb_teachers: total number of teachers in the ensemble :param teacher_id: id of the teacher being trained :return: True if everything went well """ # If working directories do not exist, create them assert input.create_dir_if_needed(FLAGS.data_dir) assert input.create_dir_if_needed(FLAGS.train_dir) # Load the dataset if dataset == 'svhn': train_data,train_labels,test_data,test_labels = input.ld_svhn(extended=True) elif dataset == 'cifar10': train_data, train_labels, test_data, test_labels = input.ld_cifar10() elif dataset == 'mnist': train_data, train_labels, test_data, test_labels = input.ld_mnist() else: print("Check value of dataset flag") return False # Retrieve subset of data for this teacher data, labels = input.partition_dataset(train_data, train_labels, nb_teachers, teacher_id) print("Length of training data: " + str(len(labels))) # Define teacher checkpoint filename and full path if FLAGS.deeper: filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt' else: filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename # Perform teacher training assert deep_cnn.train(data, labels, ckpt_path) # Append final step value to checkpoint for evaluation ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Retrieve teacher probability estimates on the test data teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) # Compute teacher accuracy precision = metrics.accuracy(teacher_preds, test_labels) print('Precision of teacher after training: ' + str(precision)) return True
def train_student(dataset, nb_teachers): """ This function trains a student using predictions made by an ensemble of teachers. The student and teacher models are trained using the same neural network architecture. :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :return: True if student training went well """ assert input.create_dir_if_needed(FLAGS.train_dir) # Call helper function to prepare student data using teacher predictions stdnt_dataset = prepare_student_data(dataset, nb_teachers, save=True) # Unpack the student dataset stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels = stdnt_dataset # Prepare checkpoint filename and path if FLAGS.deeper: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str( nb_teachers) + '_student_deeper.ckpt' #NOLINT(long-line) else: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str( nb_teachers) + '_student.ckpt' # NOLINT(long-line) # Start student training assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path) # Compute final checkpoint name for student (with max number of steps) ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Compute student label predictions on remaining chunk of test set student_preds = deep_cnn.softmax_preds(stdnt_test_data, ckpt_path_final) # Compute teacher accuracy precision = metrics.accuracy(student_preds, stdnt_test_labels) print('Precision of student after training: ' + str(precision)) return True
def train_student(dataset, nb_teachers): """ This function trains a student using predictions made by an ensemble of teachers. The student and teacher models are trained using the same neural network architecture. :param dataset: string corresponding to mnist, cifar10, or svhn 与mnist、cifar10或svhn相对应的字符串 :param nb_teachers: number of teachers (in the ensemble) to learn from :return: True if student training went well """ assert input.create_dir_if_needed(FLAGS.train_dir) # Call helper function to prepare student data using teacher predictions调用助手函数,使用教师预测来准备学生数据 stdnt_dataset = prepare_student_data(dataset, nb_teachers, save=True) # Unpack the student dataset 打开学生的数据集 stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels = stdnt_dataset # Prepare checkpoint filename and path 准备检查点文件名和路径 if FLAGS.deeper: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student_deeper.ckpt' #NOLINT(long-line) else: ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student.ckpt' # NOLINT(long-line) # Start student training assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path) # Compute final checkpoint name for student (with max number of steps) 计算学生的最终检查点名称(最大步数) ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Compute student label predictions on remaining chunk of test set 在剩余的测试集上计算学生标签预测 student_preds = deep_cnn.softmax_preds(stdnt_test_data, ckpt_path_final) # Compute teacher accuracy precision = metrics.accuracy(student_preds, stdnt_test_labels) print('Precision of student after training: ' + str(precision)) return True
def prepare_student_data(dataset, nb_teachers, save=False): """ Takes a dataset name and the size of the teacher ensemble and prepares training data for the student model, according to parameters indicated in flags above. :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :param save: if set to True, will dump student training labels predicted by the ensemble of teachers (with Laplacian noise) as npy files. It also dumps the clean votes for each class (without noise) and the labels assigned by teachers :return: pairs of (data, labels) to be used for student training and testing """ assert input.create_dir_if_needed(FLAGS.train_dir) # Load the dataset if dataset == 'svhn': test_data, test_labels = input.ld_svhn(test_only=True) elif dataset == 'cifar10': test_data, test_labels = input.ld_cifar10(test_only=True) elif dataset == 'mnist': test_data, test_labels = input.ld_mnist(test_only=True) else: print("Check value of dataset flag") return False # Make sure there is data leftover to be used as a test set assert FLAGS.stdnt_share < len(test_data) # Prepare [unlabeled] student training data (subset of test set) stdnt_data = test_data[:FLAGS.stdnt_share] # Compute teacher predictions for student training data teachers_preds = ensemble_preds(dataset, nb_teachers, stdnt_data) # Aggregate teacher predictions to get student training labels if not save: stdnt_labels = aggregation.noisy_max(teachers_preds, FLAGS.lap_scale) else: # Request clean votes and clean labels as well stdnt_labels, clean_votes, labels_for_dump = aggregation.noisy_max(teachers_preds, FLAGS.lap_scale, return_clean_votes=True) #NOLINT(long-line) # Prepare filepath for numpy dump of clean votes filepath = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_clean_votes_lap_' + str(FLAGS.lap_scale) + '.npy' # NOLINT(long-line) # Prepare filepath for numpy dump of clean labels filepath_labels = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_teachers_labels_lap_' + str(FLAGS.lap_scale) + '.npy' # NOLINT(long-line) # Dump clean_votes array with tf.gfile.Open(filepath, mode='w') as file_obj: np.save(file_obj, clean_votes) # Dump labels_for_dump array with tf.gfile.Open(filepath_labels, mode='w') as file_obj: np.save(file_obj, labels_for_dump) # Print accuracy of aggregated labels ac_ag_labels = metrics.accuracy(stdnt_labels, test_labels[:FLAGS.stdnt_share]) print("Accuracy of the aggregated labels: " + str(ac_ag_labels)) # Store unused part of test set for use as a test set after student training stdnt_test_data = test_data[FLAGS.stdnt_share:] stdnt_test_labels = test_labels[FLAGS.stdnt_share:] if save: # Prepare filepath for numpy dump of labels produced by noisy aggregation filepath = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_labels_lap_' + str(FLAGS.lap_scale) + '.npy' #NOLINT(long-line) # Dump student noisy labels array with tf.gfile.Open(filepath, mode='w') as file_obj: np.save(file_obj, stdnt_labels) return stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels
def prepare_student_data(dataset, nb_teachers, save=False): """ Takes a dataset name and the size of the teacher ensemble and prepares training data for the student model, according to parameters indicated in flags above. :param dataset: string corresponding to mnist, cifar10, or svhn#与mnist、cifar10或svhn相对应的字符串 :param nb_teachers: number of teachers (in the ensemble) to learn from :param save: if set to True, will dump student training labels predicted by the ensemble of teachers (with Laplacian noise) as npy files. It also dumps the clean votes for each class (without noise) and the labels assigned by teachers#如果设置为True,则将教师(拉普拉斯噪声)预测的学生培 #训标签转储为npy文件,并将每个类(无噪音)的干净选票和老师指定的标签 :return: pairs of (data, labels) to be used for student training and testing """ assert input.create_dir_if_needed(FLAGS.train_dir) # Load the dataset if dataset == 'svhn': test_data, test_labels = input.ld_svhn(test_only=True) elif dataset == 'cifar10': test_data, test_labels = input.ld_cifar10(test_only=True) elif dataset == 'mnist': test_data, test_labels = input.ld_mnist(test_only=True) else: print("Check value of dataset flag") return False # Make sure there is data leftover to be used as a test set请确保将剩余的数据用作测试集 assert FLAGS.stdnt_share < len(test_data) # Prepare [unlabeled] student training data (subset of test set)准备[未标记]学生训练数据(测试集的子集) stdnt_data = test_data[:FLAGS.stdnt_share]#测试数据的学生共享(最后一个索引) # Compute teacher predictions for student training data 计算学生培训数据的教师预测 对我们的训练数据进行预测并存储在结果数组中 teachers_preds = ensemble_preds(dataset, nb_teachers, stdnt_data)##3d数组(教师id、样本id、每个类的概率) # Aggregate teacher predictions to get student training labels 集合教师预测得到学生培训标签 if not save: stdnt_labels = aggregation.noisy_max(teachers_preds, FLAGS.lap_scale) else: # Request clean votes and clean labels as well要求干净的选票和干净的标签 stdnt_labels, clean_votes, labels_for_dump = aggregation.noisy_max(teachers_preds, FLAGS.lap_scale, return_clean_votes=True) #NOLINT(long-line) # Prepare filepath for numpy dump of clean votes 为干净选票的numpy转储准备文件路径 filepath = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_clean_votes_lap_' + str(FLAGS.lap_scale) + '.npy' # NOLINT(long-line) # Prepare filepath for numpy dump of clean labels filepath_labels = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_teachers_labels_lap_' + str(FLAGS.lap_scale) + '.npy' # NOLINT(long-line) # Dump clean_votes array with tf.gfile.Open(filepath, mode='w') as file_obj: np.save(file_obj, clean_votes) # Dump labels_for_dump array with tf.gfile.Open(filepath_labels, mode='w') as file_obj: np.save(file_obj, labels_for_dump) #将数组保存为NumPy # Print accuracy of aggregated labels 打印汇总标签的精度 ac_ag_labels = metrics.accuracy(stdnt_labels, test_labels[:FLAGS.stdnt_share]) print("Accuracy of the aggregated labels: " + str(ac_ag_labels)) # Store unused part of test set for use as a test set after student training 在学生培训之后,存储未使用的测试集的一部分作为测试集 stdnt_test_data = test_data[FLAGS.stdnt_share:] stdnt_test_labels = test_labels[FLAGS.stdnt_share:] if save: # Prepare filepath for numpy dump of labels produced by noisy aggregation噪声聚合生成的标签的numpy转储准备filepath filepath = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_labels_lap_' + str(FLAGS.lap_scale) + '.npy' #NOLINT(long-line) # Dump student noisy labels array 转储学生嘈杂的标签阵列 with tf.gfile.Open(filepath, mode='w') as file_obj: np.save(file_obj, stdnt_labels) return stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels