return predicted_class == test_label, predicted_class, np.ravel(prediction_prob) """ MAIN SCRIPT""" if __name__ == '__main__': """ SETTINGS """ settings.n_jobs = 1 settings.codebook_size = 32 settings.dense_sampling_density = 16 settings.pca_reduction = 64 start = time.time() # Read the training set train_images_filenames, train_labels = io.load_training_set() print('Loaded {} train images.'.format(len(train_images_filenames))) # Feature extraction with sift print('Obtaining dense features...') try: D, L, I = io.load_object('train_dense_descriptors', ignore=True), \ io.load_object('train_dense_labels', ignore=True), \ io.load_object('train_dense_indices', ignore=True) except IOError: D, L, I, _ = feature_extraction.parallel_dense(train_images_filenames, train_labels, num_samples_class=-1, n_jobs=settings.n_jobs) io.save_object(D, 'train_dense_descriptors', ignore=True) io.save_object(L, 'train_dense_labels', ignore=True) io.save_object(I, 'train_dense_indices', ignore=True)
def train(): best_accuracy = 0 best_params = {} cv_results = {} """ SETTINGS """ settings.n_jobs = 1 # Read the training set train_images_filenames, train_labels = io.load_training_set() io.log('Loaded {} train images.'.format(len(train_images_filenames))) k = 64 io.log('Obtaining dense CNN features...') start_feature = time.time() try: D, L, I = io.load_object('train_CNN_descriptors', ignore=True), \ io.load_object('train_CNN_labels', ignore=True), \ io.load_object('train_CNN_indices', ignore=True) except IOError: # load VGG model base_model = VGG16(weights='imagenet') # io.save_object(base_model, 'base_model', ignore=True) # visualize topology in an image plot(base_model, to_file='modelVGG16.png', show_shapes=True, show_layer_names=True) # crop the model up to a certain layer model = Model(input=base_model.input, output=base_model.get_layer('block5_conv2').output) D, L, I = feature_extraction.parallel_CNN_features( train_images_filenames, train_labels, model, num_samples_class=-1, n_jobs=settings.n_jobs) io.save_object(D, 'train_CNN_descriptors', ignore=True) io.save_object(L, 'train_CNN_labels', ignore=True) io.save_object(I, 'train_CNN_indices', ignore=True) feature_time = time.time() - start_feature io.log('Elapsed time: {:.2f} s'.format(feature_time)) for dim_red in pca_reduction: io.log('Applying PCA ... ') start_pca = time.time() settings.pca_reduction = D.shape[1] * dim_red pca, D_pca = feature_extraction.pca(D) pca_time = time.time() - start_pca io.log('Elapsed time: {:.2f} s'.format(pca_time)) for k in codebook_size: io.log('Creating GMM model (k = {})'.format(k)) start_gmm = time.time() settings.codebook_size = k gmm = bovw.create_gmm( D_pca, 'gmm_{}_pca_{}_CNNfeature'.format(k, settings.pca_reduction)) gmm_time = time.time() - start_gmm io.log('Elapsed time: {:.2f} s'.format(gmm_time)) io.log('Getting Fisher vectors from training set...') start_fisher = time.time() fisher, labels = bovw.fisher_vectors(D_pca, L, I, gmm, normalization='l2') fisher_time = time.time() - start_fisher io.log('Elapsed time: {:.2f} s'.format(fisher_time)) io.log('Scaling features...') start_scaler = time.time() std_scaler = StandardScaler().fit(fisher) vis_words = std_scaler.transform(fisher) scaler_time = time.time() - start_scaler io.log('Elapsed time: {:.2f} s'.format(scaler_time)) io.log('Optimizing SVM hyperparameters...') start_crossvalidation = time.time() svm = SVC(kernel='precomputed') random_search = RandomizedSearchCV(svm, params_distribution, n_iter=n_iter, scoring='accuracy', n_jobs=settings.n_jobs, refit=False, cv=3, verbose=1) # Precompute Gram matrix gram = kernels.intersection_kernel(vis_words, vis_words) random_search.fit(gram, labels) crossvalidation_time = time.time() - start_crossvalidation io.log('Elapsed time: {:.2f} s'.format(crossvalidation_time)) # Convert MaskedArrays to ndarrays to avoid unpickling bugs results = random_search.cv_results_ results['param_C'] = results['param_C'].data # Appending all parameter-scores combinations cv_results.update({ (k): { 'cv_results': results, 'feature_time': feature_time, 'pca_time': pca_time, 'gmm_time': gmm_time, 'fisher_time': fisher_time, 'scaler_time': scaler_time, 'crossvalidation_time': crossvalidation_time, 'total_time': feature_time + pca_time + gmm_time + fisher_time + scaler_time + crossvalidation_time } }) io.save_object(cv_results, 'intersection_svm_CNNfeatures', ignore=True) # Obtaining the parameters which yielded the best accuracy if random_search.best_score_ > best_accuracy: best_accuracy = random_search.best_score_ best_params = random_search.best_params_ best_params.update({'k': k, 'pca': dim_red}) io.log('-------------------------------\n') io.log('\nSaving best parameters...') io.save_object(best_params, 'best_params_intersection_svm_CNNfeatures', ignore=True) best_params_file = os.path.abspath( './ignore/best_params_intersection_svm_CNNfeatures.pickle') io.log('Saved at {}'.format(best_params_file)) io.log('\nSaving all cross-validation values...') io.save_object(cv_results, 'intersection_svm_CNNfeatures', ignore=True) cv_results_file = os.path.abspath( './ignore/intersection_svm_CNNfeatures.pickle') io.log('Saved at {}'.format(cv_results_file)) io.log('\nBEST PARAMS') io.log('k={}, dim_red={}, C={} --> accuracy: {:.3f}'.format( best_params['k'], best_params['pca'], best_params['C'], best_accuracy))
def train(): start = time.time() # Read the training set train_images_filenames, train_labels = io.load_training_set() print('Loaded {} train images.'.format(len(train_images_filenames))) # Feature extraction with sift print('Obtaining sift features...') try: D, L, I = io.load_object('train_sift_descriptors', ignore=True), \ io.load_object('train_sift_labels', ignore=True), \ io.load_object('train_sift_indices', ignore=True) except IOError: D, L, I, _ = feature_extraction.parallel_sift(train_images_filenames, train_labels, num_samples_class=-1, n_jobs=N_JOBS) io.save_object(D, 'train_sift_descriptors', ignore=True) io.save_object(L, 'train_sift_labels', ignore=True) io.save_object(I, 'train_sift_indices', ignore=True) print('Time spend: {:.2f} s'.format(time.time() - start)) # Start hyperparameters optimization print('\nSTARTING HYPERPARAMETER OPTIMIZATION FOR RBF SVM') codebook_k_values = [2**i for i in range(7, 16)] params_distribution = { 'C': np.logspace(-4, 1, 10**3), 'gamma': np.logspace(-3, 1, 10**3) } n_iter = 100 best_accuracy = 0 best_params = {} cv_results = {} # Iterate codebook values for k in codebook_k_values: temp = time.time() print('Creating codebook with {} visual words'.format(k)) D = D.astype(np.uint32) codebook = bovw.create_codebook(D, codebook_name='codebook_{}'.format(k)) print('Time spend: {:.2f} s'.format(time.time() - temp)) temp = time.time() print('Getting visual words from training set...') vis_words, labels = bovw.visual_words(D, L, I, codebook, normalization='l1') print('Time spend: {:.2f} s'.format(time.time() - temp)) temp = time.time() print('Scaling features...') std_scaler = StandardScaler().fit(vis_words) vis_words = std_scaler.transform(vis_words) print('Time spend: {:.2f} s'.format(time.time() - temp)) temp = time.time() print('Optimizing SVM hyperparameters...') svm = SVC(kernel='rbf') random_search = RandomizedSearchCV(svm, params_distribution, n_iter=n_iter, scoring='accuracy', n_jobs=N_JOBS, refit=False, verbose=1, cv=4) random_search.fit(vis_words, labels) print('Time spend: {:.2f} s'.format(time.time() - temp)) # Convert MaskedArrays to ndarrays to avoid unpickling bugs results = random_search.cv_results_ results['param_C'] = results['param_C'].data results['param_gamma'] = results['param_gamma'].data # Appending all parameter-scores combinations cv_results.update({k: results}) io.save_object(cv_results, 'rbf_svm_optimization_norml1') # Obtaining the parameters which yielded the best accuracy if random_search.best_score_ > best_accuracy: best_accuracy = random_search.best_score_ best_params = random_search.best_params_ best_params.update({'k': k}) print('-------------------------------\n') print('\nBEST PARAMS') print('k={}, C={} , gamma={} --> accuracy: {:.3f}'.format( best_params['k'], best_params['C'], best_params['gamma'], best_accuracy)) print('Saving all cross-validation values...') io.save_object(cv_results, 'rbf_svm_optimization_norml1') print('Done')
def train(): best_accuracy = 0 best_params = {} cv_results = {} """ SETTINGS """ settings.n_jobs = 1 # Read the training set train_images_filenames, train_labels = io.load_training_set() io.log('Loaded {} train images.'.format(len(train_images_filenames))) # Parameter sweep for dense SIFT for ds in dense_sampling_density: io.log('Obtaining dense features with sampling parameter {}...'.format( ds)) start_sift = time.time() settings.dense_sampling_density = ds try: D, L, I = io.load_object('train_dense_descriptors_{}'.format(settings.dense_sampling_density), ignore=True), \ io.load_object('train_dense_labels_{}'.format(settings.dense_sampling_density), ignore=True), \ io.load_object('train_dense_indices_{}'.format(settings.dense_sampling_density), ignore=True) except IOError: D, L, I, _ = feature_extraction.parallel_dense( train_images_filenames, train_labels, num_samples_class=-1, n_jobs=settings.n_jobs) io.save_object(D, 'train_dense_descriptors_{}'.format( settings.dense_sampling_density), ignore=True) io.save_object(L, 'train_dense_labels_{}'.format( settings.dense_sampling_density), ignore=True) io.save_object(I, 'train_dense_indices_{}'.format( settings.dense_sampling_density), ignore=True) sift_time = time.time() - start_sift io.log('Elapsed time: {:.2f} s'.format(sift_time)) # Parameter sweep for PCA for dim_red in pca_reduction: io.log('Applying PCA (dim = {})...'.format(dim_red)) start_pca = time.time() settings.pca_reduction = dim_red pca, D_pca = feature_extraction.pca(D) pca_time = time.time() - start_pca io.log('Elapsed time: {:.2f} s'.format(pca_time)) # Parameter sweep for codebook size for k in codebook_size: io.log('Creating GMM model (k = {})'.format(k)) start_gmm = time.time() settings.codebook_size = k gmm = bovw.create_gmm( D_pca, 'gmm_{}_dense_{}_pca_{}'.format(k, ds, dim_red)) gmm_time = time.time() - start_gmm io.log('Elapsed time: {:.2f} s'.format(gmm_time)) io.log('Getting Fisher vectors from training set...') start_fisher = time.time() fisher, labels = bovw.fisher_vectors(D_pca, L, I, gmm, normalization='l2') fisher_time = time.time() - start_fisher io.log('Elapsed time: {:.2f} s'.format(fisher_time)) io.log('Scaling features...') start_scaler = time.time() std_scaler = StandardScaler().fit(fisher) vis_words = std_scaler.transform(fisher) scaler_time = time.time() - start_scaler io.log('Elapsed time: {:.2f} s'.format(scaler_time)) io.log('Optimizing SVM hyperparameters...') start_crossvalidation = time.time() svm = SVC(kernel='precomputed') random_search = RandomizedSearchCV(svm, params_distribution, n_iter=n_iter, scoring='accuracy', n_jobs=settings.n_jobs, refit=False, cv=3, verbose=1) # Precompute Gram matrix gram = kernels.intersection_kernel(vis_words, vis_words) random_search.fit(gram, labels) crossvalidation_time = time.time() - start_crossvalidation io.log('Elapsed time: {:.2f} s'.format(crossvalidation_time)) # Convert MaskedArrays to ndarrays to avoid unpickling bugs results = random_search.cv_results_ results['param_C'] = results['param_C'].data # Appending all parameter-scores combinations cv_results.update({ (k, dim_red, ds): { 'cv_results': results, 'sift_time': sift_time, 'pca_time': pca_time, 'gmm_time': gmm_time, 'fisher_time': fisher_time, 'scaler_time': scaler_time, 'crossvalidation_time': crossvalidation_time, 'total_time': sift_time + pca_time + gmm_time + fisher_time + scaler_time + crossvalidation_time } }) io.save_object( cv_results, 'intersection_svm_optimization_fisher_vectors_l2', ignore=True) # Obtaining the parameters which yielded the best accuracy if random_search.best_score_ > best_accuracy: best_accuracy = random_search.best_score_ best_params = random_search.best_params_ best_params.update({'k': k, 'pca': dim_red, 'ds': ds}) io.log('-------------------------------\n') io.log('\nSaving best parameters...') io.save_object( best_params, 'best_params_intersection_svm_optimization_fisher_vectors_l2', ignore=True) best_params_file = os.path.abspath( './ignore/best_params_intersection_svm_optimization_fisher_vectors_l2.pickle' ) io.log('Saved at {}'.format(best_params_file)) io.log('\nSaving all cross-validation values...') io.save_object(cv_results, 'intersection_svm_optimization_fisher_vectors_l2', ignore=True) cv_results_file = os.path.abspath( './ignore/intersection_svm_optimization_fisher_vectors_l2.pickle') io.log('Saved at {}'.format(cv_results_file)) io.log('\nBEST PARAMS') io.log('k={}, C={}, dim_red={}, dense_grid={} --> accuracy: {:.3f}'.format( best_params['k'], best_params['C'], best_params['pca'], best_params['ds'], best_accuracy))