예제 #1
0
def extract_feature(train_img, test_img, path=None):
    """
    This help to compute feature for knn from pretrained network
    :param FLAGS:
    :param ckpt_path:
    :return:
    """

    # check if a certain variable has been saved in the model

    if config.extract_feature == 'feature':
        dir_path = os.path.join(config.save_model, config.dataset)
        dir_path = os.path.join(dir_path,
                                'knn_num_neighbor_' + str(config.nb_teachers))
        filename = str(config.nb_teachers) + '_stdnt_resnet.checkpoint.pth.tar'
        filename = os.path.join(dir_path, filename)
        train_feature = network.pred(train_img, filename, return_feature=True)
        test_feature = network.pred(test_img, filename, return_feature=True)
        print('shape of extract feature', train_feature.shape)
        return train_feature, test_feature
        #return utils.pca(test_feature, train_feature)
    if config.extract_feature == 'hog':
        # usually the file to save all hog is too large. we decompose it into 10 pieces.
        train_data = None
        each_length = int((9 + len(train_img)) / 10)
        for idx in range(10):
            #to save pkl into several small pieces

            train_hog_path = os.path.join(
                config.hog_path, config.dataset + str(idx) + '_train_hog.pkl')
            if os.path.exists(train_hog_path) == False:
                p1 = idx * each_length
                p2 = min((idx + 1) * each_length, len(train_img))
                print('save_hog_pkl for interval{} : {}'.format(p1, p2))
                utils.save_hog(train_img[p1:p2], train_hog_path)

            with open(train_hog_path, 'rb') as f:
                if train_data is not None:
                    train_data = np.vstack((train_data, pickle.load(f)))
                else:
                    train_data = pickle.load(f)
            print('load hog feature shape', train_data.shape)
        test_hog_path = os.path.join(config.hog_path,
                                     config.dataset + '_test_hog.pkl')
        if os.path.exists(test_hog_path) == False:
            utils.save_hog(test_img, test_hog_path)
        with open(test_hog_path, 'rb') as f:
            test_data = pickle.load(f)
        return train_data, test_data
    if config.extract_feature == 'pca':
        return utils.pca(test_img, train_img)
예제 #2
0
def extract_feature(train_img, test_img, path=None):
    """
    This help to compute feature for knn from pretrained network
    :param FLAGS:
    :param ckpt_path:
    :return:
    """

    # check if a certain variable has been saved in the model
    if config.extract_feature == 'feature':
        # Update the feature extractor using the student model(filename) in the last iteration.
        # Replace the filename with the saved student model, the following in an example of the checkpoint
        filename = 'save_model/svhn/knn_num_neighbor_800/800_stdnt_.checkpoint.pth.tar'
        train_feature = network.pred(train_img, filename, return_feature=True)
        test_feature = network.pred(test_img, filename, return_feature=True)
        return train_feature, test_feature
    train_img = [np.asarray(data) for data in train_img]
    test_img = [np.asarray(data) for data in test_img]

    if config.extract_feature == 'hog':
        # usually the file to save all hog is too large. we decompose it into 10 pieces.
        train_data = None
        each_length = int((9 + len(train_img)) / 10)
        for idx in range(10):
            #Save pkl into several small pieces, incase the size of private dataset is too large

            train_hog_path = os.path.join(
                config.hog_path, config.dataset + str(idx) + '_train_hog.pkl')
            if os.path.exists(train_hog_path) == False:
                p1 = idx * each_length
                p2 = min((idx + 1) * each_length, len(train_img))
                print('save_hog_pkl for interval{} : {}'.format(p1, p2))
                utils.save_hog(train_img[p1:p2], train_hog_path)

            with open(train_hog_path, 'rb') as f:
                if train_data is not None:
                    train_data = np.vstack((train_data, pickle.load(f)))
                else:
                    train_data = pickle.load(f)
            print('load hog feature shape', train_data.shape)
        test_hog_path = os.path.join(config.hog_path,
                                     config.dataset + '_test_hog.pkl')
        if os.path.exists(test_hog_path) == False:
            utils.save_hog(test_img, test_hog_path)
        with open(test_hog_path, 'rb') as f:
            test_data = pickle.load(f)

        return train_data, test_data
    if config.extract_feature == 'pca':
        return utils.pca(test_img, train_img)
예제 #3
0
def train_student(nb_teachers):
    """
  This function trains a student using predictions made by an ensemble of
  teachers. The student and teacher models are trained using the same
  neural network architecture.
  :param dataset: string corresponding to mnist, cifar10, or svhn
  :param nb_teachers: number of teachers (in the ensemble) to learn from
  :return: True if student training went well
  """

    # Call helper function to prepare student data using teacher predictions
    stdnt_dataset = prepare_student_data(nb_teachers, save=True)

    # Unpack the student dataset
    stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels = stdnt_dataset

    if config.resnet:
        dir_path = os.path.join(config.save_model, config.dataset)
        dir_path = os.path.join(dir_path,
                                'pate_num_teacher_' + str(config.nb_teachers))
        #dir_path = os.path.join(config.save_model,'pate_'+str(config.nb_teachers))
        utils.mkdir_if_missing(dir_path)
        filename = os.path.join(dir_path, '_stndent_resnet.checkpoint.pth.tar')

    print('stdnt_label used for train', stdnt_labels.shape)
    network.train_each_teacher(config.student_epoch, stdnt_data, stdnt_labels,
                               stdnt_test_data, stdnt_test_labels, filename)

    final_preds = network.pred(stdnt_test_data, filename)

    precision = hamming_accuracy(final_preds, stdnt_test_labels, torch=False)
    print('Precision of student after training: ' + str(precision))

    return True
예제 #4
0
def ensemble_preds(nb_teachers, stdnt_data):

    result_shape = (nb_teachers, len(stdnt_data))
    # Create array that will hold result
    result = np.zeros(result_shape, dtype=np.float32)
    dir_path = os.path.join(config.save_model,
                            'pate_' + str(config.nb_teachers))
    for teacher_id in range(config.nb_teachers):
        filename = os.path.join(
            dir_path,
            str(config.nb_teachers) + '_teachers_' + str(teacher_id) +
            config.arch + '.checkpoint.pth.tar')
        result[teacher_id] = network.pred(stdnt_data, filename)
        print("Computed Teacher " + str(teacher_id) + " softmax predictions")
    result = np.asarray(result, dtype=np.int32)
    return result.transpose()
예제 #5
0
def ensemble_preds(nb_teachers, stdnt_data):
    """
  Given a dataset, a number of teachers, and some input data, this helper
  function queries each teacher for predictions on the data and returns
  all predictions in a single array. (That can then be aggregated into
  one single prediction per input using aggregation.py (cf. function
  prepare_student_data() below)
  :param dataset: string corresponding to mnist, cifar10, or svhn
  :param nb_teachers: number of teachers (in the ensemble) to learn from
  :param stdnt_data: unlabeled student training data
  :return: 3d array (teacher id, sample id, probability per class)
  """

    result_shape = (nb_teachers, len(stdnt_data), config.nb_labels)

    # Create array that will hold result
    result = np.zeros(result_shape, dtype=np.float32)
    # Get predictions from each teacher
    for teacher_id in range(nb_teachers):
        # Compute path of checkpoint file for teacher model with ID teacher_id
        if config.dataset == 'celeba':
            dir_path = os.path.join(
                config.save_model,
                'pate_num_teacher_' + str(config.nb_teachers))
        elif config.dataset == 'market':
            dir_path = os.path.join(
                config.save_model,
                'pate_' + config.dataset + str(config.nb_teachers))
        utils.mkdir_if_missing(dir_path)
        filename = os.path.join(
            dir_path,
            str(config.nb_teachers) + '_teachers_' + str(teacher_id) +
            config.arch + '.checkpoint.pth.tar')
        result[teacher_id] = network.pred(stdnt_data, filename)

        # This can take a while when there are a lot of teachers so output status
        print("Computed Teacher " + str(teacher_id) + " softmax predictions")

    return result