def fisher_vectors(X, y, descriptors_indices, codebook, normalization=None, spatial_pyramid=False): from libraries.yael.yael import ynumpy # Compute Fisher vector for each image (which can have multiple descriptors) X = np.float32(X) fv = np.array([ ynumpy.fisher(codebook, X[descriptors_indices == i], include=['mu', 'sigma']) for i in range(0, descriptors_indices.max() + 1) ]) # TODO: Spatial Pyramid Option # Normalization if normalization == 'l1': fisher_vect = fv / np.sum(np.abs(fv), axis=1, keepdims=True) elif normalization == 'l2': fisher_vect = fv / np.linalg.norm(fv, keepdims=True) elif normalization == 'power': fisher_vect = np.multiply(np.sign(fv), np.sqrt(np.absolute(fv))) else: fisher_vect = fv labels = [ y[descriptors_indices == i][0] for i in range(0, descriptors_indices.max() + 1) ] return fisher_vect, np.array(labels)
def train(): best_accuracy = 0 best_params = {} cv_results = {} base_model = VGG16(weights='imagenet') # crop the model up to a certain layer model = Model(input=base_model.input, output=base_model.get_layer('block5_conv2').output) # Read the training set train_images_filenames = cPickle.load( open('./dataset/train_images_filenames.dat', 'r')) test_images_filenames = cPickle.load( open('./dataset/test_images_filenames.dat', 'r')) train_labels = cPickle.load(open('./dataset/train_labels.dat', 'r')) test_labels = cPickle.load(open('./dataset/test_labels.dat', 'r')) io.log('\nLoaded {} train images.'.format(len(train_images_filenames))) io.log('\nLoaded {} test images.'.format(len(test_images_filenames))) # read and process training images print 'Getting features from training images' start_feature = time.time() Train_descriptors = [] Train_label_per_descriptor = [] for i in range(len(train_images_filenames)): img = image.load_img(train_images_filenames[i], target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) # get the features from images features = model.predict(x) features = features[0, :, :, :] descriptor = features.reshape(features.shape[0] * features.shape[1], features.shape[2]) Train_descriptors.append(descriptor) Train_label_per_descriptor.append(train_labels[i]) # Put all descriptors in a numpy array to compute PCA and GMM size_descriptors = Train_descriptors[0].shape[1] Desc = np.zeros( (np.sum([len(p) for p in Train_descriptors]), size_descriptors), dtype=np.uint8) startingpoint = 0 for i in range(len(Train_descriptors)): Desc[startingpoint:startingpoint + len(Train_descriptors[i])] = Train_descriptors[i] startingpoint += len(Train_descriptors[i]) feature_time = time.time() - start_feature io.log('Elapsed time: {:.2f} s'.format(feature_time)) for dim_red in pca_reduction: io.log('Applying PCA ... ') start_pca = time.time() reduction = np.int(dim_red * Desc.shape[1]) pca = decomposition.PCA(n_components=reduction) pca.fit(Desc) Desc_pca = np.float32(pca.transform(Desc)) pca_time = time.time() - start_pca io.log('Elapsed time: {:.2f} s'.format(pca_time)) for k in codebook_size: io.log('Creating GMM model (k = {})'.format(k)) start_gmm = time.time() gmm = ynumpy.gmm_learn(np.float32(Desc_pca), k) io.save_object(gmm, 'gmm_NN_pca_{}_k_{}'.format(reduction, k)) gmm_time = time.time() - start_gmm io.log('Elapsed time: {:.2f} s'.format(gmm_time)) io.log('Getting Fisher vectors from training set...') start_fisher = time.time() fisher = np.zeros((len(Train_descriptors), k * reduction * 2), dtype=np.float32) for i in xrange(len(Train_descriptors)): descriptor = Train_descriptors[i] descriptor = np.float32(pca.transform(descriptor)) fisher[i, :] = ynumpy.fisher(gmm, descriptor, include=['mu', 'sigma']) # L2 normalization - reshape to avoid deprecation warning, checked that the result is the same fisher[i, :] = preprocessing.normalize(fisher[i, :].reshape( 1, -1), norm='l2') fisher_time = time.time() - start_fisher io.log('Elapsed time: {:.2f} s'.format(fisher_time)) io.log('Scaling features...') start_scaler = time.time() stdSlr = StandardScaler().fit(fisher) D_scaled = stdSlr.transform(fisher) scaler_time = time.time() - start_scaler io.log('Elapsed time: {:.2f} s'.format(scaler_time)) io.log('Optimizing SVM hyperparameters...') start_crossvalidation = time.time() svm = SVC(kernel='precomputed') random_search = RandomizedSearchCV(svm, params_distribution, n_iter=n_iter, scoring='accuracy', refit=False, cv=3, verbose=1) # Precompute Gram matrix gram = kernels.intersection_kernel(D_scaled, D_scaled) random_search.fit(gram, train_labels) crossvalidation_time = time.time() - start_crossvalidation io.log('Elapsed time: {:.2f} s'.format(crossvalidation_time)) # Convert MaskedArrays to ndarrays to avoid unpickling bugs results = random_search.cv_results_ results['param_C'] = results['param_C'].data # Appending all parameter-scores combinations cv_results.update({ (k): { 'cv_results': results, 'feature_time': feature_time, 'pca_time': pca_time, 'gmm_time': gmm_time, 'fisher_time': fisher_time, 'scaler_time': scaler_time, 'crossvalidation_time': crossvalidation_time, 'total_time': feature_time + pca_time + gmm_time + fisher_time + scaler_time + crossvalidation_time } }) io.save_object(cv_results, 'intersection_svm_CNNfeatures', ignore=True) # Obtaining the parameters which yielded the best accuracy if random_search.best_score_ > best_accuracy: best_accuracy = random_search.best_score_ best_params = random_search.best_params_ best_params.update({'k': k, 'pca': dim_red}) io.log('-------------------------------\n') io.log('\nSaving best parameters...') io.save_object(best_params, 'best_params_intersection_svm_CNNfeatures', ignore=True) best_params_file = os.path.abspath( './ignore/best_params_intersection_svm_CNNfeatures.pickle') io.log('Saved at {}'.format(best_params_file)) io.log('\nSaving all cross-validation values...') io.save_object(cv_results, 'intersection_svm_CNNfeatures', ignore=True) cv_results_file = os.path.abspath( './ignore/intersection_svm_CNNfeatures.pickle') io.log('Saved at {}'.format(cv_results_file)) io.log('\nBEST PARAMS') io.log('k={}, dim_red={}, C={} --> accuracy: {:.3f}'.format( best_params['k'], best_params['pca'], best_params['C'], best_accuracy))
pca = decomposition.PCA(n_components=pca_reduction) pca.fit(Desc) Desc = np.float32(pca.transform(Desc)) print('Computing gmm with ' + str(k) + ' centroids') gmm = ynumpy.gmm_learn(np.float32(Desc), k) io.save_object(gmm, 'gmm_NN_pca256') # Compute the fisher vectors of the training images print('Computing fisher vectors') fisher = np.zeros((len(Train_descriptors), k * pca_reduction * 2), dtype=np.float32) for i in xrange(len(Train_descriptors)): descriptor = Train_descriptors[i] descriptor = np.float32(pca.transform(descriptor)) fisher[i, :] = ynumpy.fisher(gmm, descriptor, include=['mu', 'sigma']) # L2 normalization - reshape to avoid deprecation warning, checked that the result is the same fisher[i, :] = preprocessing.normalize(fisher[i, :].reshape(1,-1), norm='l2') # Train an SVM classifier stdSlr = StandardScaler().fit(fisher) D_scaled = stdSlr.transform(fisher) print 'Training the SVM classifier...' clf = svm.SVC(kernel=kernels.intersection_kernel, C=C, probability=True).fit(D_scaled, train_labels) #clf = io.load_object('clf_NN_pca256') #io.save_object(clf, 'clf_NN_pca256') #clf = io.load_object('clf_NN',ignore=False) # get all the test data and predict their labels fisher_test = np.zeros((len(test_images_filenames), k * pca_reduction * 2), dtype=np.float32)
for i in range(len(Train_descriptors)): D[startingpoint:startingpoint + len(Train_descriptors[i])] = Train_descriptors[i] startingpoint += len(Train_descriptors[i]) k = 32 print 'Computing gmm with ' + str(k) + ' centroids' init = time.time() gmm = ynumpy.gmm_learn(np.float32(D), k) end = time.time() print 'Done in ' + str(end - init) + ' secs.' init = time.time() fisher = np.zeros((len(Train_descriptors), k * 128 * 2), dtype=np.float32) for i in xrange(len(Train_descriptors)): fisher[i, :] = ynumpy.fisher(gmm, Train_descriptors[i], include=['mu', 'sigma']) end = time.time() print 'Done in ' + str(end - init) + ' secs.' # Train a linear SVM classifier stdSlr = StandardScaler().fit(fisher) D_scaled = stdSlr.transform(fisher) print 'Training the SVM classifier...' clf = svm.SVC(kernel='linear', C=1).fit(D_scaled, train_labels) print 'Done!' # get all the test data and predict their labels fisher_test = np.zeros((len(test_images_filenames), k * 128 * 2), dtype=np.float32) for i in range(len(test_images_filenames)):
# pca = decomposition.PCA(n_components=pca_reduction) # pca.fit(Desc) # Desc = np.float32(pca.transform(Desc)) print('Computing gmm with ' + str(k) + ' centroids') gmm = ynumpy.gmm_learn(np.float32(Desc), k) io.save_object(gmm, 'gmm_NN_agg_features_max') # Compute the fisher vectors of the training images print('Computing fisher vectors') fisher = np.zeros((len(Train_descriptors), k * 1 * 2), dtype=np.float32) for i in xrange(len(Train_descriptors)): descriptor = Train_descriptors[i] # descriptor = np.float32(pca.transform(descriptor)) aux=ynumpy.fisher(gmm, descriptor, include=['mu', 'sigma']) fisher[i, :] = np.reshape(aux, [1, aux.shape[0]]) # L2 normalization - reshape to avoid deprecation warning, checked that the result is the same fisher[i, :] = preprocessing.normalize(fisher[i, :].reshape(1,-1), norm='l2') # Train an SVM classifier stdSlr = StandardScaler().fit(fisher) D_scaled = stdSlr.transform(fisher) print 'Training the SVM classifier...' clf = svm.SVC(kernel=kernels.intersection_kernel, C=C, probability=True).fit(D_scaled, train_labels) io.save_object(clf, 'clf_NN_pca256') #clf = io.load_object('clf_NN',ignore=False) # get all the test data and predict their labels fisher_test = np.zeros((len(test_images_filenames), k * 1* 2), dtype=np.float32)