def train_teacher(dataset, nb_teachers, teacher_id): """ This function trains a teacher (teacher id) among an ensemble of nb_teachers models for the dataset specified. :param dataset: string corresponding to dataset (svhn, cifar10) :param nb_teachers: total number of teachers in the ensemble :param teacher_id: id of the teacher being trained :return: True if everything went well """ # If working directories do not exist, create them assert input.create_dir_if_needed(FLAGS.data_dir) assert input.create_dir_if_needed(FLAGS.train_dir) # Load the dataset if dataset == 'svhn': train_data, train_labels, test_data, test_labels = input.ld_svhn( extended=True) elif dataset == 'cifar10': train_data, train_labels, test_data, test_labels = input.ld_cifar10() elif dataset == 'mnist': train_data, train_labels, test_data, test_labels = input.ld_mnist() else: print("Check value of dataset flag") return False if FLAGS.teacher_data_share: train_data = train_data[:FLAGS.teacher_data_share] train_labels = train_labels[:FLAGS.teacher_data_share] # Retrieve subset of data for this teacher data, labels = input.partition_dataset(train_data, train_labels, nb_teachers, teacher_id) print("Length of training data: " + str(len(labels))) # Define teacher checkpoint filename and full path if FLAGS.deeper: filename = str(nb_teachers) + '_teachers_' + str( teacher_id) + '_deep.ckpt' else: filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename # Perform teacher training assert deep_cnn.train(data, labels, ckpt_path) # Append final step value to checkpoint for evaluation ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Retrieve teacher probability estimates on the test data teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) # Compute teacher accuracy precision = metrics.accuracy(teacher_preds, test_labels) print('Precision of teacher after training: ' + str(precision)) return True
def train_teacher(dataset, nb_teachers, teacher_id): """ This function trains a teacher (teacher id) among an ensemble of nb_teachers models for the dataset specified. :param dataset: string corresponding to dataset (svhn, cifar10) :param nb_teachers: total number of teachers in the ensemble :param teacher_id: id of the teacher being trained :return: True if everything went well """ # If working directories do not exist, create them assert input.create_dir_if_needed(FLAGS.data_dir) assert input.create_dir_if_needed(FLAGS.train_dir) # Load the dataset if dataset == 'svhn': train_data,train_labels,test_data,test_labels = input.ld_svhn(extended=True) elif dataset == 'cifar10': train_data, train_labels, test_data, test_labels = input.ld_cifar10() elif dataset == 'mnist': train_data, train_labels, test_data, test_labels = input.ld_mnist() else: print("Check value of dataset flag") return False # Retrieve subset of data for this teacher data, labels = input.partition_dataset(train_data, train_labels, nb_teachers, teacher_id) print("Length of training data: " + str(len(labels))) # Define teacher checkpoint filename and full path if FLAGS.deeper: filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt' else: filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename # Perform teacher training assert deep_cnn.train(data, labels, ckpt_path) # Append final step value to checkpoint for evaluation ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) # Retrieve teacher probability estimates on the test data teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) # Compute teacher accuracy precision = metrics.accuracy(teacher_preds, test_labels) print('Precision of teacher after training: ' + str(precision)) return True
def prepare_student_data(dataset, nb_teachers, save=False): """ Takes a dataset name and the size of the teacher ensemble and prepares training data for the student model, according to parameters indicated in flags above. :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :param save: if set to True, will dump student training labels predicted by the ensemble of teachers (with Laplacian noise) as npy files. It also dumps the clean votes for each class (without noise) and the labels assigned by teachers :return: pairs of (data, labels) to be used for student training and testing """ assert input.create_dir_if_needed(FLAGS.train_dir) # Load the dataset if dataset == 'svhn': test_data, test_labels = input.ld_svhn(test_only=True) elif dataset == 'cifar10': test_data, test_labels = input.ld_cifar10(test_only=True) elif dataset == 'mnist': test_data, test_labels = input.ld_mnist(test_only=True) else: print("Check value of dataset flag") return False # Make sure there is data leftover to be used as a test set assert FLAGS.stdnt_share < len(test_data) # Prepare [unlabeled] student training data (subset of test set) stdnt_data = test_data[:FLAGS.stdnt_share] # Compute teacher predictions for student training data teachers_preds = ensemble_preds(dataset, nb_teachers, stdnt_data) # Aggregate teacher predictions to get student training labels if not save: stdnt_labels = aggregation.noisy_max(teachers_preds, FLAGS.lap_scale) else: # Request clean votes and clean labels as well stdnt_labels, clean_votes, labels_for_dump = aggregation.noisy_max(teachers_preds, FLAGS.lap_scale, return_clean_votes=True) #NOLINT(long-line) # Prepare filepath for numpy dump of clean votes filepath = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_clean_votes_lap_' + str(FLAGS.lap_scale) + '.npy' # NOLINT(long-line) # Prepare filepath for numpy dump of clean labels filepath_labels = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_teachers_labels_lap_' + str(FLAGS.lap_scale) + '.npy' # NOLINT(long-line) # Dump clean_votes array with tf.gfile.Open(filepath, mode='w') as file_obj: np.save(file_obj, clean_votes) # Dump labels_for_dump array with tf.gfile.Open(filepath_labels, mode='w') as file_obj: np.save(file_obj, labels_for_dump) # Print accuracy of aggregated labels ac_ag_labels = metrics.accuracy(stdnt_labels, test_labels[:FLAGS.stdnt_share]) print("Accuracy of the aggregated labels: " + str(ac_ag_labels)) # Store unused part of test set for use as a test set after student training stdnt_test_data = test_data[FLAGS.stdnt_share:] stdnt_test_labels = test_labels[FLAGS.stdnt_share:] if save: # Prepare filepath for numpy dump of labels produced by noisy aggregation filepath = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_labels_lap_' + str(FLAGS.lap_scale) + '.npy' #NOLINT(long-line) # Dump student noisy labels array with tf.gfile.Open(filepath, mode='w') as file_obj: np.save(file_obj, stdnt_labels) return stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels
def prepare_student_data(dataset, nb_teachers, save=False): """ Takes a dataset name and the size of the teacher ensemble and prepares training data for the student model, according to parameters indicated in flags above. :param dataset: string corresponding to mnist, cifar10, or svhn#与mnist、cifar10或svhn相对应的字符串 :param nb_teachers: number of teachers (in the ensemble) to learn from :param save: if set to True, will dump student training labels predicted by the ensemble of teachers (with Laplacian noise) as npy files. It also dumps the clean votes for each class (without noise) and the labels assigned by teachers#如果设置为True,则将教师(拉普拉斯噪声)预测的学生培 #训标签转储为npy文件,并将每个类(无噪音)的干净选票和老师指定的标签 :return: pairs of (data, labels) to be used for student training and testing """ assert input.create_dir_if_needed(FLAGS.train_dir) # Load the dataset if dataset == 'svhn': test_data, test_labels = input.ld_svhn(test_only=True) elif dataset == 'cifar10': test_data, test_labels = input.ld_cifar10(test_only=True) elif dataset == 'mnist': test_data, test_labels = input.ld_mnist(test_only=True) else: print("Check value of dataset flag") return False # Make sure there is data leftover to be used as a test set请确保将剩余的数据用作测试集 assert FLAGS.stdnt_share < len(test_data) # Prepare [unlabeled] student training data (subset of test set)准备[未标记]学生训练数据(测试集的子集) stdnt_data = test_data[:FLAGS.stdnt_share]#测试数据的学生共享(最后一个索引) # Compute teacher predictions for student training data 计算学生培训数据的教师预测 对我们的训练数据进行预测并存储在结果数组中 teachers_preds = ensemble_preds(dataset, nb_teachers, stdnt_data)##3d数组(教师id、样本id、每个类的概率) # Aggregate teacher predictions to get student training labels 集合教师预测得到学生培训标签 if not save: stdnt_labels = aggregation.noisy_max(teachers_preds, FLAGS.lap_scale) else: # Request clean votes and clean labels as well要求干净的选票和干净的标签 stdnt_labels, clean_votes, labels_for_dump = aggregation.noisy_max(teachers_preds, FLAGS.lap_scale, return_clean_votes=True) #NOLINT(long-line) # Prepare filepath for numpy dump of clean votes 为干净选票的numpy转储准备文件路径 filepath = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_clean_votes_lap_' + str(FLAGS.lap_scale) + '.npy' # NOLINT(long-line) # Prepare filepath for numpy dump of clean labels filepath_labels = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_teachers_labels_lap_' + str(FLAGS.lap_scale) + '.npy' # NOLINT(long-line) # Dump clean_votes array with tf.gfile.Open(filepath, mode='w') as file_obj: np.save(file_obj, clean_votes) # Dump labels_for_dump array with tf.gfile.Open(filepath_labels, mode='w') as file_obj: np.save(file_obj, labels_for_dump) #将数组保存为NumPy # Print accuracy of aggregated labels 打印汇总标签的精度 ac_ag_labels = metrics.accuracy(stdnt_labels, test_labels[:FLAGS.stdnt_share]) print("Accuracy of the aggregated labels: " + str(ac_ag_labels)) # Store unused part of test set for use as a test set after student training 在学生培训之后,存储未使用的测试集的一部分作为测试集 stdnt_test_data = test_data[FLAGS.stdnt_share:] stdnt_test_labels = test_labels[FLAGS.stdnt_share:] if save: # Prepare filepath for numpy dump of labels produced by noisy aggregation噪声聚合生成的标签的numpy转储准备filepath filepath = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_labels_lap_' + str(FLAGS.lap_scale) + '.npy' #NOLINT(long-line) # Dump student noisy labels array 转储学生嘈杂的标签阵列 with tf.gfile.Open(filepath, mode='w') as file_obj: np.save(file_obj, stdnt_labels) return stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels