def grid_search(dataset_name, datasets_dir, output_dir, n_jobs): # hyperparameter grid knn_grid = [1] + [(2**x)+1 for x in range(1, 11)] # init results results = PersistentDefaultDict(output_dir.joinpath(f'knn_grid_search.json')) # load data train_data, train_labels = read_dataset(datasets_dir, f'{dataset_name}_train') val_data, val_labels = read_dataset(datasets_dir, f'{dataset_name}_val') for n_neighbors in knn_grid: knn_params_str = f'n_neighbors.{n_neighbors}' print(f"[+] {knn_params_str}") # skip if result already exists if dataset_name in results.as_dict() and \ knn_params_str in results.as_dict()[dataset_name]: continue # train and test classifier knn = KNNClassifier(n_neighbors, n_jobs) knn.fit(train_data, train_labels) score = knn.score(val_data, val_labels) # store result results[dataset_name, knn_params_str] = score return results
def train_classifier(dataset_name, datasets_dir, output_dir, n_jobs, n_neighbors): results = PersistentDefaultDict(output_dir.joinpath(f'knn_test.json')) # classifier name classifier_name = f'classifier_{dataset_name}_knn_n_neighbors.{n_neighbors}' # load data train_data, train_labels = read_dataset(datasets_dir, f'{dataset_name}_train') test_data, test_labels = read_dataset(datasets_dir, f'{dataset_name}_test') # train classifier knn = KNNClassifier(n_neighbors, n_jobs) knn.fit(train_data, train_labels) # test classifier score = knn.score(test_data, test_labels) results[classifier_name] = score
def test_classifiers(): n_neighbors_config = { "lsun_raw_color": 1, "lsun_color_log_scaled_normalized": 65, "celebA_raw_color": 33, "celebA_color_log_scaled_normalized": 129 } dataset_names = [ "lsun_raw_color", "lsun_color_log_scaled_normalized", "celebA_raw_color", "celebA_color_log_scaled_normalized" ] results = PersistentDefaultDict(RESULTS_DIR.joinpath(f'final_knn.json')) for dataset_name in dataset_names: classifier_name = f"knn_{dataset_name}.100000" print(f"\n{classifier_name.upper()}") # load data train_data, train_labels = subset_dataset(DATASETS_DIR, f'{dataset_name}_train', 100_000) test_data, test_labels = read_dataset( DATASETS_DIR.joinpath(f'{dataset_name}_test')) # train knn = KNNClassifier(n_neighbors_config[dataset_name], N_JOBS) knn.fit(train_data, train_labels) # score score = knn.score(test_data, test_labels) results[classifier_name] = score
def train_classifier(dataset_name, datasets_dir, output_dir, n_jobs, pca_target_variance, C): # classifier name classifier_name = f'classifier_{dataset_name}_eigenfaces_v.{pca_target_variance}_c.{C}' # load data train_data, train_labels = read_dataset(datasets_dir, f'{dataset_name}_train') train_data_pca, _ = read_dataset(datasets_dir, f'{dataset_name}_train', subset_to_size=10000) # train pca = PCAClassifier(pca_target_variance=pca_target_variance, svm_params={'C': C}) pca.fit_pca(train_data_pca) pca.fit(train_data, train_labels) pca.save(output_dir.joinpath(f'{classifier_name}.pickle')) # test PCAClassifier.test_classifier(classifier_name, dataset_name, datasets_dir, output_dir, n_jobs)
def grid_search(dataset_name, datasets_dir, output_dir, n_jobs): # hyperparameter grid pca_target_variances = [0.25, 0.5, 0.75, 0.95] svm_grid = [{'C': [0.0001, 0.001, 0.01, 0.1]}] # init results results = PersistentDefaultDict( output_dir.joinpath(f'eigenfaces_grid_search.json')) for pca_target_variance in pca_target_variances: # enumerate svm params for svm_params in PCAClassifier.generate_params(svm_grid): svm_params_str = "_".join( [f'{k}.{v}' for k, v in svm_params.items()]) params_str = f'pca_target_variance.{pca_target_variance}_{svm_params_str}' print(f"[+] {params_str}") # skip if result already exists if dataset_name in results.as_dict() and \ params_str in results.as_dict()[dataset_name]: continue # load data train_data, train_labels = read_dataset( datasets_dir, f'{dataset_name}_train') train_data_pca, _ = read_dataset(datasets_dir, f'{dataset_name}_train', subset_to_size=10000) val_data, val_labels = read_dataset(datasets_dir, f'{dataset_name}_val') # train and test classifier pca = PCAClassifier(pca_target_variance, svm_params) pca.fit_pca(train_data_pca) pca.fit(train_data, train_labels) score = pca.score(val_data, val_labels) # store result results[dataset_name, params_str] = score return results
def test_classifier(classifier_name, dataset_name, datasets_dir, output_dir, n_jobs): results = PersistentDefaultDict( output_dir.joinpath(f'eigenfaces_test.json')) # load data test_data, test_labels = read_dataset(datasets_dir, f'{dataset_name}_test') # load classifier pca = PCAClassifier.load( output_dir.joinpath(f'{classifier_name}.pickle')) # score score = pca.score(test_data, test_labels) results[classifier_name] = score
def grid_search(dataset_name, datasets_dir, output_dir, n_jobs): # init results results = PersistentDefaultDict( output_dir.joinpath(f'prnu_grid_search.json')) # load data train_data, train_labels = read_dataset(datasets_dir, f'{dataset_name}_train', flatten=False) val_data, val_labels = read_dataset(datasets_dir, f'{dataset_name}_val', flatten=False) train_data = train_data.astype(np.dtype('uint8')) val_data = val_data.astype(np.dtype('uint8')) # hyperparameter grid levels_range = range(1, 5, 1) sigma_range = np.arange(0.05, 1, 0.05) for levels, sigma in product(levels_range, sigma_range): # classifier name prnu_params_str = f'levels.{levels}_sigma.{sigma}' print(f"[+] {prnu_params_str}") # skip if result already exists if dataset_name in results.as_dict() and \ prnu_params_str in results.as_dict()[dataset_name]: continue # train and test classifier prnu = PRNUClassifier(levels, sigma) prnu.fit(train_data, train_labels) score = prnu.score(val_data, val_labels) # store result results[dataset_name, prnu_params_str] = score return results
def test_classifier(classifier_name, dataset_name, datasets_dir, output_dir, n_jobs): print(f"\n{classifier_name.upper()}") results = PersistentDefaultDict(output_dir.joinpath(f'prnu_test.json')) # load data test_data, test_labels = read_dataset(datasets_dir, f'{dataset_name}_test', flatten=False) test_data = test_data.astype(np.dtype('uint8')) # load classifier prnu = PRNUClassifier.load( output_dir.joinpath(classifier_name + '.pickle')) # score score = prnu.score(test_data, test_labels) results[classifier_name] = score
def train_classifier(dataset_name, datasets_dir, output_dir, n_jobs, levels, sigma): # classifier name classifier_name = f'classifier_{dataset_name}_prnu_levels.{levels}_sigma.{sigma}' print(f"\n{classifier_name.upper()}") # load data train_data, train_labels = read_dataset(datasets_dir, f'{dataset_name}_train', flatten=False) train_data = train_data.astype(np.dtype('uint8')) # train prnu = PRNUClassifier(levels, sigma) prnu.fit(train_data, train_labels) prnu.save(output_dir.joinpath(f'{classifier_name}.pickle')) # test PRNUClassifier.test_classifier(classifier_name, dataset_name, datasets_dir, output_dir, n_jobs)
def test_classifiers(): dataset_names = [ "lsun_raw_color", "lsun_color_log_scaled_normalized", "celebA_raw_color", "celebA_color_log_scaled_normalized" ] results = PersistentDefaultDict(RESULTS_DIR.joinpath(f'final_eigenfaces.json')) for dataset_name in dataset_names: classifier_name = f"eigenfaces_{dataset_name}.100000" print(f"\n{classifier_name.upper()}") # load data test_data, test_labels = read_dataset(DATASETS_DIR.joinpath(f'{dataset_name}_test')) # load classifier pca = PCAClassifier.load(CLASSIFIER_DIR.joinpath(f'{classifier_name}.pickle')) # score score = pca.score(test_data, test_labels) results[classifier_name] = score
def test_classifiers(): dataset_names = ["lsun_raw_prnu_color", "celebA_raw_prnu_color"] results = PersistentDefaultDict(RESULTS_DIR.joinpath(f'final_prnu.json')) for dataset_name in dataset_names: # classifier name classifier_name = f'prnu_{dataset_name}.100000' print(f"\n{classifier_name.upper()}") # load data test_data, test_labels = read_dataset( DATASETS_DIR.joinpath(f'{dataset_name}_test'), flatten=False) test_data = test_data.astype(np.dtype('uint8')) # load classifier prnu = PRNUClassifier.load( CLASSIFIER_DIR.joinpath(f'{classifier_name}.pickle')) # score score = prnu.score(test_data, test_labels) results[classifier_name] = score