예제 #1
0
    def grid_search(dataset_name, datasets_dir, output_dir, n_jobs):

        # hyperparameter grid
        knn_grid = [1] + [(2**x)+1 for x in range(1, 11)]

        # init results
        results = PersistentDefaultDict(output_dir.joinpath(f'knn_grid_search.json'))

        # load data
        train_data, train_labels = read_dataset(datasets_dir, f'{dataset_name}_train')
        val_data, val_labels = read_dataset(datasets_dir, f'{dataset_name}_val')
        
        for n_neighbors in knn_grid:
            knn_params_str = f'n_neighbors.{n_neighbors}'
            print(f"[+] {knn_params_str}")

            # skip if result already exists
            if dataset_name in results.as_dict() and \
                knn_params_str in results.as_dict()[dataset_name]:
                continue
            
            # train and test classifier
            knn = KNNClassifier(n_neighbors, n_jobs)
            knn.fit(train_data, train_labels)
            score = knn.score(val_data, val_labels)

            # store result
            results[dataset_name, knn_params_str] = score

        return results
예제 #2
0
 def train_classifier(dataset_name, datasets_dir, output_dir, n_jobs, n_neighbors):
     results = PersistentDefaultDict(output_dir.joinpath(f'knn_test.json'))
     # classifier name
     classifier_name = f'classifier_{dataset_name}_knn_n_neighbors.{n_neighbors}'
     # load data
     train_data, train_labels = read_dataset(datasets_dir, f'{dataset_name}_train')
     test_data, test_labels = read_dataset(datasets_dir, f'{dataset_name}_test')
     # train classifier
     knn = KNNClassifier(n_neighbors, n_jobs)
     knn.fit(train_data, train_labels)
     # test classifier
     score = knn.score(test_data, test_labels)
     results[classifier_name] = score
예제 #3
0
def test_classifiers():

    n_neighbors_config = {
        "lsun_raw_color": 1,
        "lsun_color_log_scaled_normalized": 65,
        "celebA_raw_color": 33,
        "celebA_color_log_scaled_normalized": 129
    }

    dataset_names = [
        "lsun_raw_color", "lsun_color_log_scaled_normalized",
        "celebA_raw_color", "celebA_color_log_scaled_normalized"
    ]

    results = PersistentDefaultDict(RESULTS_DIR.joinpath(f'final_knn.json'))

    for dataset_name in dataset_names:
        classifier_name = f"knn_{dataset_name}.100000"
        print(f"\n{classifier_name.upper()}")
        # load data
        train_data, train_labels = subset_dataset(DATASETS_DIR,
                                                  f'{dataset_name}_train',
                                                  100_000)
        test_data, test_labels = read_dataset(
            DATASETS_DIR.joinpath(f'{dataset_name}_test'))
        # train
        knn = KNNClassifier(n_neighbors_config[dataset_name], N_JOBS)
        knn.fit(train_data, train_labels)
        # score
        score = knn.score(test_data, test_labels)
        results[classifier_name] = score
예제 #4
0
 def train_classifier(dataset_name, datasets_dir, output_dir, n_jobs,
                      pca_target_variance, C):
     # classifier name
     classifier_name = f'classifier_{dataset_name}_eigenfaces_v.{pca_target_variance}_c.{C}'
     # load data
     train_data, train_labels = read_dataset(datasets_dir,
                                             f'{dataset_name}_train')
     train_data_pca, _ = read_dataset(datasets_dir,
                                      f'{dataset_name}_train',
                                      subset_to_size=10000)
     # train
     pca = PCAClassifier(pca_target_variance=pca_target_variance,
                         svm_params={'C': C})
     pca.fit_pca(train_data_pca)
     pca.fit(train_data, train_labels)
     pca.save(output_dir.joinpath(f'{classifier_name}.pickle'))
     # test
     PCAClassifier.test_classifier(classifier_name, dataset_name,
                                   datasets_dir, output_dir, n_jobs)
예제 #5
0
    def grid_search(dataset_name, datasets_dir, output_dir, n_jobs):
        # hyperparameter grid
        pca_target_variances = [0.25, 0.5, 0.75, 0.95]
        svm_grid = [{'C': [0.0001, 0.001, 0.01, 0.1]}]

        # init results
        results = PersistentDefaultDict(
            output_dir.joinpath(f'eigenfaces_grid_search.json'))

        for pca_target_variance in pca_target_variances:
            # enumerate svm params
            for svm_params in PCAClassifier.generate_params(svm_grid):
                svm_params_str = "_".join(
                    [f'{k}.{v}' for k, v in svm_params.items()])
                params_str = f'pca_target_variance.{pca_target_variance}_{svm_params_str}'
                print(f"[+] {params_str}")

                # skip if result already exists
                if dataset_name in results.as_dict() and \
                params_str in results.as_dict()[dataset_name]:
                    continue

                # load data
                train_data, train_labels = read_dataset(
                    datasets_dir, f'{dataset_name}_train')
                train_data_pca, _ = read_dataset(datasets_dir,
                                                 f'{dataset_name}_train',
                                                 subset_to_size=10000)
                val_data, val_labels = read_dataset(datasets_dir,
                                                    f'{dataset_name}_val')

                # train and test classifier
                pca = PCAClassifier(pca_target_variance, svm_params)
                pca.fit_pca(train_data_pca)
                pca.fit(train_data, train_labels)
                score = pca.score(val_data, val_labels)

                # store result
                results[dataset_name, params_str] = score

        return results
예제 #6
0
 def test_classifier(classifier_name, dataset_name, datasets_dir,
                     output_dir, n_jobs):
     results = PersistentDefaultDict(
         output_dir.joinpath(f'eigenfaces_test.json'))
     # load data
     test_data, test_labels = read_dataset(datasets_dir,
                                           f'{dataset_name}_test')
     # load classifier
     pca = PCAClassifier.load(
         output_dir.joinpath(f'{classifier_name}.pickle'))
     # score
     score = pca.score(test_data, test_labels)
     results[classifier_name] = score
예제 #7
0
    def grid_search(dataset_name, datasets_dir, output_dir, n_jobs):
        # init results
        results = PersistentDefaultDict(
            output_dir.joinpath(f'prnu_grid_search.json'))

        # load data
        train_data, train_labels = read_dataset(datasets_dir,
                                                f'{dataset_name}_train',
                                                flatten=False)
        val_data, val_labels = read_dataset(datasets_dir,
                                            f'{dataset_name}_val',
                                            flatten=False)
        train_data = train_data.astype(np.dtype('uint8'))
        val_data = val_data.astype(np.dtype('uint8'))

        # hyperparameter grid
        levels_range = range(1, 5, 1)
        sigma_range = np.arange(0.05, 1, 0.05)

        for levels, sigma in product(levels_range, sigma_range):
            # classifier name
            prnu_params_str = f'levels.{levels}_sigma.{sigma}'
            print(f"[+] {prnu_params_str}")

            # skip if result already exists
            if dataset_name in results.as_dict() and \
                prnu_params_str in results.as_dict()[dataset_name]:
                continue

            # train and test classifier
            prnu = PRNUClassifier(levels, sigma)
            prnu.fit(train_data, train_labels)
            score = prnu.score(val_data, val_labels)

            # store result
            results[dataset_name, prnu_params_str] = score

        return results
예제 #8
0
 def test_classifier(classifier_name, dataset_name, datasets_dir,
                     output_dir, n_jobs):
     print(f"\n{classifier_name.upper()}")
     results = PersistentDefaultDict(output_dir.joinpath(f'prnu_test.json'))
     # load data
     test_data, test_labels = read_dataset(datasets_dir,
                                           f'{dataset_name}_test',
                                           flatten=False)
     test_data = test_data.astype(np.dtype('uint8'))
     # load classifier
     prnu = PRNUClassifier.load(
         output_dir.joinpath(classifier_name + '.pickle'))
     # score
     score = prnu.score(test_data, test_labels)
     results[classifier_name] = score
예제 #9
0
 def train_classifier(dataset_name, datasets_dir, output_dir, n_jobs,
                      levels, sigma):
     # classifier name
     classifier_name = f'classifier_{dataset_name}_prnu_levels.{levels}_sigma.{sigma}'
     print(f"\n{classifier_name.upper()}")
     # load data
     train_data, train_labels = read_dataset(datasets_dir,
                                             f'{dataset_name}_train',
                                             flatten=False)
     train_data = train_data.astype(np.dtype('uint8'))
     # train
     prnu = PRNUClassifier(levels, sigma)
     prnu.fit(train_data, train_labels)
     prnu.save(output_dir.joinpath(f'{classifier_name}.pickle'))
     # test
     PRNUClassifier.test_classifier(classifier_name, dataset_name,
                                    datasets_dir, output_dir, n_jobs)
예제 #10
0
def test_classifiers():

    dataset_names = [ "lsun_raw_color", 
                      "lsun_color_log_scaled_normalized", 
                      "celebA_raw_color", 
                      "celebA_color_log_scaled_normalized" ]

    results = PersistentDefaultDict(RESULTS_DIR.joinpath(f'final_eigenfaces.json'))

    for dataset_name in dataset_names:
        classifier_name = f"eigenfaces_{dataset_name}.100000"
        print(f"\n{classifier_name.upper()}")
        # load data
        test_data, test_labels = read_dataset(DATASETS_DIR.joinpath(f'{dataset_name}_test'))
        # load classifier
        pca = PCAClassifier.load(CLASSIFIER_DIR.joinpath(f'{classifier_name}.pickle'))
        # score
        score = pca.score(test_data, test_labels)
        results[classifier_name] = score
예제 #11
0
def test_classifiers():

    dataset_names = ["lsun_raw_prnu_color", "celebA_raw_prnu_color"]

    results = PersistentDefaultDict(RESULTS_DIR.joinpath(f'final_prnu.json'))

    for dataset_name in dataset_names:
        # classifier name
        classifier_name = f'prnu_{dataset_name}.100000'
        print(f"\n{classifier_name.upper()}")
        # load data
        test_data, test_labels = read_dataset(
            DATASETS_DIR.joinpath(f'{dataset_name}_test'), flatten=False)
        test_data = test_data.astype(np.dtype('uint8'))
        # load classifier
        prnu = PRNUClassifier.load(
            CLASSIFIER_DIR.joinpath(f'{classifier_name}.pickle'))
        # score
        score = prnu.score(test_data, test_labels)
        results[classifier_name] = score