예제 #1
0
def create_gmm(D, codebook_name=None):
    from libraries.yael.yael import ynumpy

    k = settings.codebook_size
    if codebook_name is not None:
        # Try to load a previously trained codebook
        try:
            gmm = io.load_object(codebook_name)
        except (IOError, EOFError):
            gmm = ynumpy.gmm_learn(np.float32(D), k)
            # Store the model with the provided name
            io.save_object(gmm, codebook_name)
    else:
        gmm = ynumpy.gmm_learn(np.float32(D), k)

    return gmm
예제 #2
0
def create_codebook(X, codebook_name=None, k_means_init='random'):
    k = settings.codebook_size
    batch_size = 20 * k if X.shape[0] > 20 * k else X.shape[0] / 10
    codebook = cluster.MiniBatchKMeans(n_clusters=k,
                                       verbose=False,
                                       batch_size=batch_size,
                                       compute_labels=False,
                                       reassignment_ratio=10**-4,
                                       init=k_means_init)

    if codebook_name is not None:
        # Try to load a previously trained codebook
        try:
            codebook = io.load_object(codebook_name)
        except (IOError, EOFError):
            codebook.fit(X)
            # Store the model with the provided name
            io.save_object(codebook, codebook_name)
    else:
        codebook.fit(X)

    return codebook
예제 #3
0
def train_pyramid_svm(X,
                      y,
                      C=1,
                      standardize=True,
                      dim_reduction=None,
                      save_scaler=False,
                      save_pca=False,
                      model_name=None):

    # Standardize the data before classification if necessary
    std_scaler = None
    if standardize:
        std_scaler = preprocessing.StandardScaler()
        std_scaler.fit(X)
        X_std = std_scaler.transform(X)
    else:
        X_std = X

    clf = svm.SVC(kernel=kernels.pyramid_kernel, C=C, probability=True)

    if model_name is not None:
        # Instance of SVM classifier
        # Try to load a previously trained model
        try:
            clf = io.load_object(model_name)
        except (IOError, EOFError):
            clf.fit(X_std, y)
            # Store the model with the provided name
            io.save_object(clf, model_name)
    else:
        clf.fit(X_std, y)

    if save_scaler:
        io.save_object(std_scaler, save_scaler)

    return clf, std_scaler, None
예제 #4
0
def train_poly_svm(X,
                   y,
                   C=1,
                   degree=3,
                   gamma='auto',
                   coef0=0.0,
                   standardize=True,
                   dim_reduction=None,
                   save_scaler=False,
                   save_pca=False,
                   model_name=None):
    # PCA for dimensionality reduction if necessary
    pca = None
    if dim_reduction is not None and dim_reduction > 0:
        pca = decomposition.PCA(n_components=dim_reduction)
        pca.fit(X)
        X = pca.transform(X)

    # Standardize the data before classification if necessary
    std_scaler = None
    if standardize:
        std_scaler = preprocessing.StandardScaler()
        std_scaler.fit(X)
        X_std = std_scaler.transform(X)
    else:
        X_std = X

    # Instance of SVM classifier
    clf = svm.SVC(kernel='poly',
                  C=C,
                  degree=degree,
                  gamma=gamma,
                  coef0=coef0,
                  probability=True)

    if model_name is not None:
        # Try to load a previously trained model
        try:
            clf = io.load_object(model_name)
        except (IOError, EOFError):
            clf.fit(X_std, y)
            # Store the model with the provided name
            io.save_object(clf, model_name)
    else:
        clf.fit(X_std, y)

    if save_scaler:
        io.save_object(std_scaler, save_scaler)

    if save_pca:
        io.save_object(pca, save_pca)

    return clf, std_scaler, pca
예제 #5
0
def train_linear_svm(X,
                     y,
                     C=1,
                     standardize=True,
                     dim_reduction=23,
                     save_scaler=False,
                     save_pca=False,
                     model_name=None,
                     liblinear=False):
    # PCA for dimensionality reduction if necessary
    pca = None
    if dim_reduction is not None and dim_reduction > 0:
        pca = decomposition.PCA(n_components=dim_reduction)
        pca.fit(X)
        X = pca.transform(X)

    # Standardize the data before classification if necessary
    std_scaler = None
    if standardize:
        std_scaler = preprocessing.StandardScaler()
        std_scaler.fit(X)
        X_std = std_scaler.transform(X)
    else:
        X_std = X

    # Instance of SVM classifier
    clf = svm.LinearSVC(C=C, max_iter=5000,
                        tol=1e-4) if liblinear else svm.SVC(
                            kernel='linear', C=C, probability=True)

    if model_name is not None:
        # Try to load a previously trained model
        try:
            clf = io.load_object(model_name)
        except (IOError, EOFError):
            clf.fit(X_std, y)
            # Store the model with the provided name
            io.save_object(clf, model_name)
    else:
        clf.fit(X_std, y)

    if save_scaler:
        io.save_object(std_scaler, save_scaler)

    if save_pca:
        io.save_object(pca, save_pca)

    return clf, std_scaler, pca
예제 #6
0
                                           return_counts=True)
                predicted_class = values[np.argmax(counts)]
                if predicted_class == test_labels[i]:
                    num_correct += 1
            print('Time spend: {:.2f} s'.format(time.time() - temp))
            temp = time.time()

            # Compute results
            Accuracy.append((num_correct * 100.0 / len(test_images_filenames)))
            Time.append((temp - start))
            if sweep_mode == 'cost':
                Cost.append(p1)
            elif sweep_mode == 'params':
                if kernel == 'poly':
                    D.append(p1)
                    R.append(p2)
                elif kernel == 'rbf':
                    Gamma.append(p1)
                elif kernel == 'sigmoid':
                    Gamma.append(p1)
                    R.append(p2)

    # Save the results
    results = []
    if sweep_mode == 'cost':
        results = [Cost, Accuracy, Time]
    elif sweep_mode == 'params':
        results = [Gamma, D, R, Accuracy, Time]

    io.save_object(results, 'resultsSVM_{}_{}'.format(kernel, sweep_mode))
예제 #7
0
    # Feature extraction with surf, prediction with SVM and aggregation to obtain final class
    print('Predicting test data...')
    result = joblib.Parallel(n_jobs=N_JOBS, backend='threading')(
        joblib.delayed(parallel_testing)(test_image, test_label, lin_svm,
                                         std_scaler, None)
        for test_image, test_label in zip(test_images_filenames, test_labels))

    correct_class = [i[0] for i in result]
    predicted = [i[1] for i in result if i[0] is not False]
    expected = [i[2] for i in result if i[0] is not False]

    num_correct = np.count_nonzero(correct_class)
    print('Time spend: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Compute accuracy
    accuracy = num_correct * 100.0 / len(test_images_filenames)

    conf = metrics.confusion_matrix(expected,
                                    predicted,
                                    labels=lin_svm.classes_)
    # Plot normalized confusion matrix
    plotConfusionMatrix(conf, classes=lin_svm.classes_, normalize=True)

    io.save_object(conf, 'final_surf_30_cm')

    # Show results and timing
    print('\nACCURACY: {:.2f}'.format(accuracy))
    print('\nTOTAL TIME: {:.2f} s'.format(time.time() - start))
예제 #8
0
    start = time.time()
    if calculate_results==1:
        # Read the training set
        train_images_filenames, train_labels = io.load_training_set()
        print('Loaded {} train images.'.format(len(train_images_filenames)))

        # Feature extraction with sift
        print('Obtaining dense sift features...')
        try:
            D, L, I = io.load_object('train_dense_descriptors', ignore=True), \
                      io.load_object('train_dense_labels', ignore=True), \
                      io.load_object('train_dense_indices', ignore=True)
        except IOError:
            D, L, I, _ = feature_extraction.parallel_dense(train_images_filenames, train_labels, num_samples_class=-1,
                                                        n_jobs=N_JOBS)
            io.save_object(D, 'train_dense_descriptors', ignore=True)
            io.save_object(L, 'train_dense_labels', ignore=True)
            io.save_object(I, 'train_dense_indices', ignore=True)

        print('Elapsed time: {:.2f} s'.format(time.time() - start))
        temp = time.time()

        print('Creating codebook with {} visual words'.format(K))
        codebook = bovw.create_codebook(D, codebook_name='dense_codebook')
        print('Elapsed time: {:.2f} s'.format(time.time() - temp))
        temp = time.time()

        print('Getting visual words from training set...')
        vis_words, labels = bovw.visual_words(D, L, I, codebook)
        print('Elapsed time: {:.2f} s'.format(time.time() - temp))
        temp = time.time()
예제 #9
0
def train():
    best_accuracy = 0
    best_params = {}
    cv_results = {}

    # load VGG model
    base_model = VGG16(weights='imagenet')

    # crop the model up to a certain layer
    model = Model(input=base_model.input,
                  output=base_model.get_layer('block5_conv2').output)

    # aggregating features with max-pooling
    # inputs = Input(shape=[14, 14, 512])
    # x = MaxPooling2D((2, 2), strides=(2, 2), name='max_pooling_layer')(inputs)
    # model_agg = Model(inputs, x, name='agg_features')

    # get train and test images
    train_images_filenames = cPickle.load(
        open('./dataset/train_images_filenames.dat', 'r'))
    train_labels = cPickle.load(open('./dataset/train_labels.dat', 'r'))
    io.log('\nLoaded {} train images.'.format(len(train_images_filenames)))

    # read and process training images
    print 'Getting features from training images'
    Train_descriptors = []
    Train_label_per_descriptor = []

    for i in range(len(train_images_filenames)):

        img = image.load_img(train_images_filenames[i], target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        # get the features from images
        features_ = model.predict(x)
        features = features_[0, :, :, :]
        descriptor = features.reshape(features.shape[0] * features.shape[1],
                                      features.shape[2])
        # aggregate features
        # max value (can be different filters)
        #descriptor_agg=descriptor.max(axis=1)
        # sum value (of all layers)
        #descriptor_agg=np.sum(descriptor,axis=1)
        # max value of just one filter
        energy = descriptor.max(axis=0)
        descriptor_agg = descriptor[:, np.argmax(energy)]

        descriptor_agg = np.reshape(descriptor_agg,
                                    [descriptor_agg.shape[0], 1])

        Train_descriptors.append(descriptor_agg)
        Train_label_per_descriptor.append(train_labels[i])

    # Put all descriptors in a numpy array to compute PCA and GMM
    size_descriptors = Train_descriptors[0].shape[1]
    #size_descriptors=1
    Desc = np.zeros(
        (np.sum([len(p) for p in Train_descriptors]), size_descriptors),
        dtype=np.uint8)
    startingpoint = 0
    for i in range(len(Train_descriptors)):
        Desc[startingpoint:startingpoint +
             len(Train_descriptors[i])] = Train_descriptors[i]
        startingpoint += len(Train_descriptors[i])

    for k in codebook_size:

        print('Computing gmm with ' + str(k) + ' centroids')
        gmm = ynumpy.gmm_learn(np.float32(Desc), k)
        # io.save_object(gmm, 'gmm_NN_agg_features_max')

        # Compute the fisher vectors of the training images
        print('Computing fisher vectors')
        fisher = np.zeros((len(Train_descriptors), k * 1 * 2),
                          dtype=np.float32)

        for i in xrange(len(Train_descriptors)):
            descriptor = Train_descriptors[i]
            # descriptor = np.float32(pca.transform(descriptor))
            aux = ynumpy.fisher(gmm, descriptor, include=['mu', 'sigma'])
            fisher[i, :] = np.reshape(aux, [1, aux.shape[0]])
            #fisher[i,:]=aux
            # L2 normalization - reshape to avoid deprecation warning, checked that the result is the same
            fisher[i, :] = preprocessing.normalize(fisher[i, :].reshape(1, -1),
                                                   norm='l2')

    # CV in SVM training
        io.log('Scaling features...')
        std_scaler = StandardScaler().fit(fisher)
        vis_words = std_scaler.transform(fisher)

        io.log('Optimizing SVM hyperparameters...')
        svm = SVC(kernel='precomputed')
        random_search = RandomizedSearchCV(svm,
                                           params_distribution,
                                           n_iter=n_iter,
                                           scoring='accuracy',
                                           n_jobs=1,
                                           refit=False,
                                           cv=3,
                                           verbose=1)
        # Precompute Gram matrix
        gram = kernels.intersection_kernel(vis_words, vis_words)
        random_search.fit(gram, train_labels)

        # Convert MaskedArrays to ndarrays to avoid unpickling bugs
        results = random_search.cv_results_
        results['param_C'] = results['param_C'].data

        # Appending all parameter-scores combinations
        cv_results.update({(k): {
                               'cv_results': results,
                           }})
        io.save_object(cv_results,
                       'intersection_svm_CNNfeatures_aggregate_energy',
                       ignore=True)

        # Obtaining the parameters which yielded the best accuracy
        if random_search.best_score_ > best_accuracy:
            best_accuracy = random_search.best_score_
            best_params = random_search.best_params_
            best_params.update({'k': k})

        io.log('-------------------------------\n')
    io.log('\nSaving best parameters...')
    io.save_object(best_params,
                   'best_params_intersection_svm_CNNfeatures_aggregate_energy',
                   ignore=True)
    best_params_file = os.path.abspath(
        './ignore/best_params_intersection_svm_CNNfeatures_aggregate_energy.pickle'
    )
    io.log('Saved at {}'.format(best_params_file))

    io.log('\nSaving all cross-validation values...')
    io.save_object(cv_results,
                   'intersection_svm_CNNfeatures_aggregate_energy',
                   ignore=True)
    cv_results_file = os.path.abspath(
        './ignore/intersection_svm_CNNfeatures_aggregate_energy.pickle')
    io.log('Saved at {}'.format(cv_results_file))

    io.log('\nBEST PARAMS')
    io.log('k={}, C={} --> accuracy: {:.3f}'.format(best_params['k'],
                                                    best_params['C'],
                                                    best_accuracy))
예제 #10
0
    # Read the training set
    train_images_filenames, train_labels = io.load_training_set()
    print('Loaded {} train images.'.format(len(train_images_filenames)))

    # Feature extraction with sift
    print('Obtaining sift features...')
    try:
        D, L, I, Kp_pos = io.load_object('train_dense_descriptors', ignore=True), \
                  io.load_object('train_dense_labels', ignore=True), \
                  io.load_object('train_dense_indices', ignore=True), \
                  io.load_object('train_dense_keypoints', ignore=True)
    except IOError:
        print('error')
        D, L, I, Kp = feature_extraction.parallel_dense(train_images_filenames, train_labels, num_samples_class=-1,
                                                   n_jobs=N_JOBS)
        io.save_object(D, 'train_dense_descriptors', ignore=True)
        io.save_object(L, 'train_dense_labels', ignore=True)
        io.save_object(I, 'train_dense_indices', ignore=True)
        Kp_pos = np.array([Kp[i].pt for i in range(0, len(Kp))], dtype=np.float64)
        io.save_object(Kp_pos, 'train_dense_keypoints', ignore=True)

    print('Elapsed time: {:.2f} s'.format(time.time() - start))
    temp = time.time()

    print('Creating codebook with {} visual words'.format(K))
    codebook = bovw.create_codebook(D, codebook_name='default_codebook')
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    print('Getting visual words from training set...')
    vis_words, labels = bovw.visual_words(D, L, I, codebook, spatial_pyramid=True)
예제 #11
0
    train_images_filenames, train_labels = io.load_training_set()
    print('Loaded {} train images.'.format(len(train_images_filenames)))

    # Feature extraction with sift
    print('Obtaining sift features...')
    try:
        D, L, I, Kp_pos = io.load_object('train_sift_descriptors', ignore=True), \
                  io.load_object('train_sift_labels', ignore=True), \
                  io.load_object('train_sift_indices', ignore=True), \
                  io.load_object('train_sift_keypoints', ignore=True)
    except IOError:
        D, L, I, Kp = feature_extraction.parallel_sift(train_images_filenames,
                                                       train_labels,
                                                       num_samples_class=-1,
                                                       n_jobs=N_JOBS)
        io.save_object(D, 'train_sift_descriptors', ignore=True)
        io.save_object(L, 'train_sift_labels', ignore=True)
        io.save_object(I, 'train_sift_indices', ignore=True)
        Kp_pos = np.array([Kp[i].pt for i in range(0, len(Kp))],
                          dtype=np.float64)
        io.save_object(Kp_pos, 'train_sift_keypoints', ignore=True)

    print('Elapsed time: {:.2f} s'.format(time.time() - start))
    temp = time.time()

    print('Creating codebook with {} visual words'.format(K))
    codebook = bovw.create_codebook(D, codebook_name='default_codebook')
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    print('Getting visual words from training set...')
예제 #12
0
def train():
    start = time.time()

    # Read the training set
    train_images_filenames, train_labels = io.load_training_set()
    print('Loaded {} train images.'.format(len(train_images_filenames)))

    # Feature extraction with sift
    print('Obtaining sift features...')
    try:
        D, L, I = io.load_object('train_sift_descriptors', ignore=True), \
                  io.load_object('train_sift_labels', ignore=True), \
                  io.load_object('train_sift_indices', ignore=True)
    except IOError:
        D, L, I, _ = feature_extraction.parallel_sift(train_images_filenames,
                                                      train_labels,
                                                      num_samples_class=-1,
                                                      n_jobs=N_JOBS)
        io.save_object(D, 'train_sift_descriptors', ignore=True)
        io.save_object(L, 'train_sift_labels', ignore=True)
        io.save_object(I, 'train_sift_indices', ignore=True)
    print('Time spend: {:.2f} s'.format(time.time() - start))

    # Start hyperparameters optimization
    print('\nSTARTING HYPERPARAMETER OPTIMIZATION FOR RBF SVM')
    codebook_k_values = [2**i for i in range(7, 16)]
    params_distribution = {
        'C': np.logspace(-4, 1, 10**3),
        'gamma': np.logspace(-3, 1, 10**3)
    }
    n_iter = 100
    best_accuracy = 0
    best_params = {}
    cv_results = {}

    # Iterate codebook values
    for k in codebook_k_values:
        temp = time.time()
        print('Creating codebook with {} visual words'.format(k))
        D = D.astype(np.uint32)
        codebook = bovw.create_codebook(D,
                                        codebook_name='codebook_{}'.format(k))
        print('Time spend: {:.2f} s'.format(time.time() - temp))
        temp = time.time()

        print('Getting visual words from training set...')
        vis_words, labels = bovw.visual_words(D,
                                              L,
                                              I,
                                              codebook,
                                              normalization='l1')
        print('Time spend: {:.2f} s'.format(time.time() - temp))
        temp = time.time()

        print('Scaling features...')
        std_scaler = StandardScaler().fit(vis_words)
        vis_words = std_scaler.transform(vis_words)
        print('Time spend: {:.2f} s'.format(time.time() - temp))
        temp = time.time()

        print('Optimizing SVM hyperparameters...')
        svm = SVC(kernel='rbf')
        random_search = RandomizedSearchCV(svm,
                                           params_distribution,
                                           n_iter=n_iter,
                                           scoring='accuracy',
                                           n_jobs=N_JOBS,
                                           refit=False,
                                           verbose=1,
                                           cv=4)
        random_search.fit(vis_words, labels)
        print('Time spend: {:.2f} s'.format(time.time() - temp))

        # Convert MaskedArrays to ndarrays to avoid unpickling bugs
        results = random_search.cv_results_
        results['param_C'] = results['param_C'].data
        results['param_gamma'] = results['param_gamma'].data

        # Appending all parameter-scores combinations
        cv_results.update({k: results})
        io.save_object(cv_results, 'rbf_svm_optimization_norml1')

        # Obtaining the parameters which yielded the best accuracy
        if random_search.best_score_ > best_accuracy:
            best_accuracy = random_search.best_score_
            best_params = random_search.best_params_
            best_params.update({'k': k})

        print('-------------------------------\n')

    print('\nBEST PARAMS')
    print('k={}, C={} , gamma={} --> accuracy: {:.3f}'.format(
        best_params['k'], best_params['C'], best_params['gamma'],
        best_accuracy))

    print('Saving all cross-validation values...')
    io.save_object(cv_results, 'rbf_svm_optimization_norml1')
    print('Done')
예제 #13
0
def train():
    best_accuracy = 0
    best_params = {}
    cv_results = {}

    base_model = VGG16(weights='imagenet')

    # crop the model up to a certain layer
    model = Model(input=base_model.input,
                  output=base_model.get_layer('fc2').output)

    # Read the training set
    train_images_filenames = cPickle.load(
        open('./dataset/train_images_filenames.dat', 'r'))
    test_images_filenames = cPickle.load(
        open('./dataset/test_images_filenames.dat', 'r'))
    train_labels = cPickle.load(open('./dataset/train_labels.dat', 'r'))
    test_labels = cPickle.load(open('./dataset/test_labels.dat', 'r'))
    io.log('\nLoaded {} train images.'.format(len(train_images_filenames)))
    io.log('\nLoaded {} test images.'.format(len(test_images_filenames)))

    # read and process training images
    print 'Getting features from training images'
    start_feature = time.time()

    first = 1
    for i in range(len(train_images_filenames)):
        img = image.load_img(train_images_filenames[i], target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        # get the features from images
        features = model.predict(x)
        features = features[0, :]
        if first == 1:
            Desc = features
            first = 0
        else:
            Desc = np.vstack((Desc, features))

    feature_time = time.time() - start_feature
    io.log('Elapsed time: {:.2f} s'.format(feature_time))

    io.log('Scaling features...')
    start_scaler = time.time()
    stdSlr = StandardScaler().fit(Desc)
    D_scaled = stdSlr.transform(Desc)
    scaler_time = time.time() - start_scaler
    io.log('Elapsed time: {:.2f} s'.format(scaler_time))

    io.log('Optimizing SVM hyperparameters...')
    start_crossvalidation = time.time()
    svm = SVC(kernel='precomputed', probability=True)
    random_search = RandomizedSearchCV(svm,
                                       params_distribution,
                                       n_iter=n_iter,
                                       scoring='accuracy',
                                       refit=False,
                                       cv=3,
                                       verbose=1)

    # Precompute Gram matrix
    gram = kernels.intersection_kernel(D_scaled, D_scaled)
    random_search.fit(gram, train_labels)
    crossvalidation_time = time.time() - start_crossvalidation
    io.log('Elapsed time: {:.2f} s'.format(crossvalidation_time))

    # Convert MaskedArrays to ndarrays to avoid unpickling bugs
    results = random_search.cv_results_
    results['param_C'] = results['param_C'].data

    # Appending all parameter-scores combinations
    cv_results.update({
        'cv_results':
        results,
        'feature_time':
        feature_time,
        'scaler_time':
        scaler_time,
        'crossvalidation_time':
        crossvalidation_time,
        'total_time':
        feature_time + scaler_time + crossvalidation_time
    })
    io.save_object(cv_results, 'intersection_svm_CNNfeatures', ignore=True)
    print('Best accuracy ' + str(random_search.best_score_))
    # Obtaining the parameters which yielded the best accuracy
    if random_search.best_score_ > best_accuracy:
        best_accuracy = random_search.best_score_
        best_params = random_search.best_params_

    io.log('-------------------------------\n')
    io.log('\nSaving best parameters...')
    io.save_object(best_params,
                   'best_params_intersection_svm_CNNfeatures',
                   ignore=True)
    best_params_file = os.path.abspath(
        './ignore/best_params_intersection_svm_CNNfeatures.pickle')
    io.log('Saved at {}'.format(best_params_file))

    io.log('\nSaving all cross-validation values...')
    io.save_object(cv_results, 'intersection_svm_CNNfeatures', ignore=True)
    cv_results_file = os.path.abspath(
        './ignore/intersection_svm_CNNfeatures.pickle')
    io.log('Saved at {}'.format(cv_results_file))

    io.log('\nBEST PARAMS')
    io.log('C={} --> accuracy: {:.3f}'.format(best_params['C'], best_accuracy))
예제 #14
0
    # Put all descriptors in a numpy array to compute PCA and GMM
    size_descriptors = Train_descriptors[0].shape[1]
    Desc = np.zeros((np.sum([len(p) for p in Train_descriptors]), size_descriptors), dtype=np.uint8)
    startingpoint = 0
    for i in range(len(Train_descriptors)):
        Desc[startingpoint:startingpoint + len(Train_descriptors[i])] = Train_descriptors[i]
        startingpoint += len(Train_descriptors[i])

    print('Computing PCA')
    pca = decomposition.PCA(n_components=pca_reduction)
    pca.fit(Desc)
    Desc = np.float32(pca.transform(Desc))

    print('Computing gmm with ' + str(k) + ' centroids')
    gmm = ynumpy.gmm_learn(np.float32(Desc), k)
    io.save_object(gmm, 'gmm_NN_pca256')


    # Compute the fisher vectors of the training images
    print('Computing fisher vectors')
    fisher = np.zeros((len(Train_descriptors), k * pca_reduction * 2), dtype=np.float32)
    for i in xrange(len(Train_descriptors)):
        descriptor = Train_descriptors[i]
        descriptor = np.float32(pca.transform(descriptor))
        fisher[i, :] = ynumpy.fisher(gmm, descriptor, include=['mu', 'sigma'])
        # L2 normalization - reshape to avoid deprecation warning, checked that the result is the same
        fisher[i, :] = preprocessing.normalize(fisher[i, :].reshape(1,-1), norm='l2')


    # Train an SVM classifier
    stdSlr = StandardScaler().fit(fisher)
예제 #15
0
    print('Loaded {} test images.'.format(len(test_images_filenames)))

    # Feature extraction with sift, prediction with SVM and aggregation to obtain final class
    print('Predicting test data...')
    result = joblib.Parallel(n_jobs=N_JOBS, backend='threading')(
        joblib.delayed(parallel_testing)(test_image, test_label, lin_svm,
                                         std_scaler, None)
        for test_image, test_label in zip(test_images_filenames, test_labels))

    correct_class = [i[0] for i in result]
    predicted = [i[1] for i in result]
    expected = [i[2] for i in result]

    num_correct = np.count_nonzero(correct_class)
    print('Time spend: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Compute accuracy
    accuracy = num_correct * 100.0 / len(test_images_filenames)

    conf = metrics.confusion_matrix(expected,
                                    predicted,
                                    labels=lin_svm.classes_)
    # Plot normalized confusion matrix
    #plot_confusion_matrix(conf, classes=lin_svm.classes_, normalize=True)

    io.save_object(conf, 'final_noprob_sift_all_cm')

    # Show results and timing
    print('\nACCURACY: {:.2f}'.format(accuracy))
    print('\nTOTAL TIME: {:.2f} s'.format(time.time() - start))
예제 #16
0
    for i in range(len(train_images_filenames)):
        img = image.load_img(train_images_filenames[i], target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        # get the features from images
        features = model.predict(x)
        features = features[0, :]
        if first == 1:
            Desc = features
            first = 0
        else:
            Desc = np.vstack((Desc, features))

    io.save_object(Desc, 'train_descriptors')

    # Train a linear SVM classifier
    stdSlr = StandardScaler().fit(Desc)
    D_scaled = stdSlr.transform(Desc)
    print 'Training the SVM classifier...'
    clf = svm.SVC(kernel=kernels.intersection_kernel, C=C,
                  probability=True).fit(D_scaled, train_labels)
    io.save_object(clf, 'clf_T3')
    #clf = io.load_object('clf_T3_pca256',ignore=False)

    # get all the test data and predict their labels
    features_test = np.zeros(
        (len(test_images_filenames), model.output_shape[1]), dtype=np.float32)
    for i in range(len(test_images_filenames)):
        img = image.load_img(test_images_filenames[i], target_size=(224, 224))
예제 #17
0
                correct_class = [i[0] for i in result]
                predicted = [i[1] for i in result]
                expected = [i[2] for i in result]

                # Compute accuracy
                num_correct = np.count_nonzero(correct_class)
                accuracy = num_correct * 100.0 / len(test_images_filenames)

                # Show results and timing
                print('\nACCURACY: {:.2f}'.format(accuracy))
                print('TOTAL TIME: {:.2f} s'.format(time.time() - start))
                print('------------------------------')

                # Store it in object
                results.append(
                    [fe_name, num_samples, dim_red_option, accuracy])

                # Confusion matrix
                conf = metrics.confusion_matrix(expected,
                                                predicted,
                                                labels=svm.classes_)
                # Plot normalized confusion matrix
                # plot_confusion_matrix(conf, classes=svm.classes_, normalize=True)

                io.save_object(
                    conf, 'conf_matrix_svm_{}_{}s_{}c'.format(
                        fe_name, num_samples if num_samples > -1 else 'all',
                        dim_red_option
                        if dim_red_option is not None else 'all'))
예제 #18
0
    train_images_filenames, train_labels = io.load_training_set()
    print('Loaded {} train images.'.format(len(train_images_filenames)))

    # Feature extraction with sift
    print('Obtaining sift features...')
    try:
        D, L, I, Kp_pos = io.load_object('train_sift_descriptors', ignore=True), \
                  io.load_object('train_sift_labels', ignore=True), \
                  io.load_object('train_sift_indices', ignore=True), \
                  io.load_object('train_sift_keypoints', ignore=True)
    except IOError:
        D, L, I, Kp = feature_extraction.parallel_sift(train_images_filenames,
                                                       train_labels,
                                                       num_samples_class=-1,
                                                       n_jobs=N_JOBS)
        io.save_object(D, 'train_sift_descriptors', ignore=True)
        io.save_object(L, 'train_sift_labels', ignore=True)
        io.save_object(I, 'train_sift_indices', ignore=True)
        Kp_pos = np.array([Kp[i].pt for i in range(0, len(Kp))],
                          dtype=np.float64)
        io.save_object(Kp_pos, 'train_sift_keypoints', ignore=True)

    print('Elapsed time: {:.2f} s'.format(time.time() - start))
    temp = time.time()

    print('Creating codebook with {} visual words'.format(K))
    codebook = bovw.create_codebook(D, codebook_name='default_codebook')
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    print('Getting visual words from training set...')
    size_descriptors = Train_descriptors[0].shape[1]
    #size_descriptors=1
    Desc = np.zeros((np.sum([len(p) for p in Train_descriptors]), size_descriptors), dtype=np.uint8)
    startingpoint = 0
    for i in range(len(Train_descriptors)):
        Desc[startingpoint:startingpoint + len(Train_descriptors[i])] = Train_descriptors[i]
        startingpoint += len(Train_descriptors[i])

    print('Computing PCA')
  #  pca = decomposition.PCA(n_components=pca_reduction)
  #  pca.fit(Desc)
  #  Desc = np.float32(pca.transform(Desc))

    print('Computing gmm with ' + str(k) + ' centroids')
    gmm = ynumpy.gmm_learn(np.float32(Desc), k)
    io.save_object(gmm, 'gmm_NN_agg_features_max')


    # Compute the fisher vectors of the training images
    print('Computing fisher vectors')
    fisher = np.zeros((len(Train_descriptors), k * 1 * 2), dtype=np.float32)
    for i in xrange(len(Train_descriptors)):
        descriptor = Train_descriptors[i]
       # descriptor = np.float32(pca.transform(descriptor))
        aux=ynumpy.fisher(gmm, descriptor, include=['mu', 'sigma'])
        fisher[i, :] = np.reshape(aux, [1, aux.shape[0]])
        # L2 normalization - reshape to avoid deprecation warning, checked that the result is the same
        fisher[i, :] = preprocessing.normalize(fisher[i, :].reshape(1,-1), norm='l2')


    # Train an SVM classifier
예제 #20
0
    # Feature extraction with sift, prediction with SVM and aggregation to obtain final class
    print('Predicting test data...')
    result = joblib.Parallel(n_jobs=N_JOBS, backend='threading')(
        joblib.delayed(parallel_testing)(test_image, test_label, lin_svm,
                                         std_scaler, pca)
        for test_image, test_label in zip(test_images_filenames, test_labels))

    correct_class = [i[0] for i in result]
    predicted = [i[1] for i in result]
    expected = [i[2] for i in result]

    num_correct = np.count_nonzero(correct_class)
    print('Time spend: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Compute accuracy
    accuracy = num_correct * 100.0 / len(test_images_filenames)

    conf = metrics.confusion_matrix(expected,
                                    predicted,
                                    labels=lin_svm.classes_)
    # Plot normalized confusion matrix
    plot_confusion_matrix(conf, classes=lin_svm.classes_, normalize=True)

    io.save_object(conf, 'final_sift_all_cmPCA')

    # Show results and timing
    print('\nACCURACY: {:.2f}'.format(accuracy))
    print('\nTOTAL TIME: {:.2f} s'.format(time.time() - start))
예제 #21
0
def train():
    best_accuracy = 0
    best_params = {}
    cv_results = {}
    """ SETTINGS """
    settings.n_jobs = 1

    # Read the training set
    train_images_filenames, train_labels = io.load_training_set()
    io.log('Loaded {} train images.'.format(len(train_images_filenames)))

    # Parameter sweep for dense SIFT
    for ds in dense_sampling_density:

        io.log('Obtaining dense features with sampling parameter {}...'.format(
            ds))
        start_sift = time.time()
        settings.dense_sampling_density = ds
        try:
            D, L, I = io.load_object('train_dense_descriptors_{}'.format(settings.dense_sampling_density), ignore=True), \
                      io.load_object('train_dense_labels_{}'.format(settings.dense_sampling_density), ignore=True), \
                      io.load_object('train_dense_indices_{}'.format(settings.dense_sampling_density), ignore=True)
        except IOError:
            D, L, I, _ = feature_extraction.parallel_dense(
                train_images_filenames,
                train_labels,
                num_samples_class=-1,
                n_jobs=settings.n_jobs)
            io.save_object(D,
                           'train_dense_descriptors_{}'.format(
                               settings.dense_sampling_density),
                           ignore=True)
            io.save_object(L,
                           'train_dense_labels_{}'.format(
                               settings.dense_sampling_density),
                           ignore=True)
            io.save_object(I,
                           'train_dense_indices_{}'.format(
                               settings.dense_sampling_density),
                           ignore=True)
        sift_time = time.time() - start_sift
        io.log('Elapsed time: {:.2f} s'.format(sift_time))

        # Parameter sweep for PCA
        for dim_red in pca_reduction:

            io.log('Applying PCA (dim = {})...'.format(dim_red))
            start_pca = time.time()
            settings.pca_reduction = dim_red
            pca, D_pca = feature_extraction.pca(D)
            pca_time = time.time() - start_pca
            io.log('Elapsed time: {:.2f} s'.format(pca_time))

            # Parameter sweep for codebook size
            for k in codebook_size:

                io.log('Creating GMM model (k = {})'.format(k))
                start_gmm = time.time()
                settings.codebook_size = k
                gmm = bovw.create_gmm(
                    D_pca, 'gmm_{}_dense_{}_pca_{}'.format(k, ds, dim_red))
                gmm_time = time.time() - start_gmm
                io.log('Elapsed time: {:.2f} s'.format(gmm_time))

                io.log('Getting Fisher vectors from training set...')
                start_fisher = time.time()
                fisher, labels = bovw.fisher_vectors(D_pca,
                                                     L,
                                                     I,
                                                     gmm,
                                                     normalization='l2')
                fisher_time = time.time() - start_fisher
                io.log('Elapsed time: {:.2f} s'.format(fisher_time))

                io.log('Scaling features...')
                start_scaler = time.time()
                std_scaler = StandardScaler().fit(fisher)
                vis_words = std_scaler.transform(fisher)
                scaler_time = time.time() - start_scaler
                io.log('Elapsed time: {:.2f} s'.format(scaler_time))

                io.log('Optimizing SVM hyperparameters...')
                start_crossvalidation = time.time()
                svm = SVC(kernel='precomputed')
                random_search = RandomizedSearchCV(svm,
                                                   params_distribution,
                                                   n_iter=n_iter,
                                                   scoring='accuracy',
                                                   n_jobs=settings.n_jobs,
                                                   refit=False,
                                                   cv=3,
                                                   verbose=1)
                # Precompute Gram matrix
                gram = kernels.intersection_kernel(vis_words, vis_words)
                random_search.fit(gram, labels)
                crossvalidation_time = time.time() - start_crossvalidation
                io.log('Elapsed time: {:.2f} s'.format(crossvalidation_time))

                # Convert MaskedArrays to ndarrays to avoid unpickling bugs
                results = random_search.cv_results_
                results['param_C'] = results['param_C'].data

                # Appending all parameter-scores combinations
                cv_results.update({
                    (k, dim_red, ds): {
                        'cv_results':
                        results,
                        'sift_time':
                        sift_time,
                        'pca_time':
                        pca_time,
                        'gmm_time':
                        gmm_time,
                        'fisher_time':
                        fisher_time,
                        'scaler_time':
                        scaler_time,
                        'crossvalidation_time':
                        crossvalidation_time,
                        'total_time':
                        sift_time + pca_time + gmm_time + fisher_time +
                        scaler_time + crossvalidation_time
                    }
                })
                io.save_object(
                    cv_results,
                    'intersection_svm_optimization_fisher_vectors_l2',
                    ignore=True)

                # Obtaining the parameters which yielded the best accuracy
                if random_search.best_score_ > best_accuracy:
                    best_accuracy = random_search.best_score_
                    best_params = random_search.best_params_
                    best_params.update({'k': k, 'pca': dim_red, 'ds': ds})

                io.log('-------------------------------\n')

    io.log('\nSaving best parameters...')
    io.save_object(
        best_params,
        'best_params_intersection_svm_optimization_fisher_vectors_l2',
        ignore=True)
    best_params_file = os.path.abspath(
        './ignore/best_params_intersection_svm_optimization_fisher_vectors_l2.pickle'
    )
    io.log('Saved at {}'.format(best_params_file))

    io.log('\nSaving all cross-validation values...')
    io.save_object(cv_results,
                   'intersection_svm_optimization_fisher_vectors_l2',
                   ignore=True)
    cv_results_file = os.path.abspath(
        './ignore/intersection_svm_optimization_fisher_vectors_l2.pickle')
    io.log('Saved at {}'.format(cv_results_file))

    io.log('\nBEST PARAMS')
    io.log('k={}, C={}, dim_red={}, dense_grid={} --> accuracy: {:.3f}'.format(
        best_params['k'], best_params['C'], best_params['pca'],
        best_params['ds'], best_accuracy))
예제 #22
0
print('Loss: {:.2f} \t Accuracy: {:.2f} %'.format(result[0], result[1] * 100))

print('\n--------------------------------')
print('EVALUATING PERFORMANCE ON VALIDATION SET (STORED WEIGHTS)')
print('--------------------------------\n')
new_model = load_model('./weights/{}.hdf5'.format(results_name))
result = new_model.evaluate_generator(validation_generator, val_samples=test_samples)
print('Loss: {:.2f} \t Accuracy: {:.2f} %'.format(result[0], result[1] * 100))


print('\n--------------------------------')
print('STORING LOSS AND ACCURACY PLOTS')
print('--------------------------------\n')

# Store history
io.save_object(history.history, results_name, ignore=True)

# Plot
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.ylim((0, 1))
plt.legend(['train', 'validation'], loc='lower right')
plt.savefig('./results/{}_accuracy.jpg'.format(results_name))
plt.close()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Categorical cross-entropy (loss)')
def train():
    best_accuracy = 0
    best_params = {}
    cv_results = {}
    """ SETTINGS """
    settings.n_jobs = 1

    # Read the training set
    train_images_filenames, train_labels = io.load_training_set()
    io.log('Loaded {} train images.'.format(len(train_images_filenames)))
    k = 64

    io.log('Obtaining dense CNN features...')
    start_feature = time.time()
    try:
        D, L, I = io.load_object('train_CNN_descriptors', ignore=True), \
                  io.load_object('train_CNN_labels', ignore=True), \
                  io.load_object('train_CNN_indices', ignore=True)
    except IOError:
        # load VGG model
        base_model = VGG16(weights='imagenet')
        # io.save_object(base_model, 'base_model', ignore=True)

        # visualize topology in an image
        plot(base_model,
             to_file='modelVGG16.png',
             show_shapes=True,
             show_layer_names=True)

        # crop the model up to a certain layer
        model = Model(input=base_model.input,
                      output=base_model.get_layer('block5_conv2').output)
        D, L, I = feature_extraction.parallel_CNN_features(
            train_images_filenames,
            train_labels,
            model,
            num_samples_class=-1,
            n_jobs=settings.n_jobs)
        io.save_object(D, 'train_CNN_descriptors', ignore=True)
        io.save_object(L, 'train_CNN_labels', ignore=True)
        io.save_object(I, 'train_CNN_indices', ignore=True)
    feature_time = time.time() - start_feature
    io.log('Elapsed time: {:.2f} s'.format(feature_time))

    for dim_red in pca_reduction:
        io.log('Applying PCA ... ')
        start_pca = time.time()
        settings.pca_reduction = D.shape[1] * dim_red
        pca, D_pca = feature_extraction.pca(D)
        pca_time = time.time() - start_pca
        io.log('Elapsed time: {:.2f} s'.format(pca_time))
        for k in codebook_size:
            io.log('Creating GMM model (k = {})'.format(k))
            start_gmm = time.time()
            settings.codebook_size = k
            gmm = bovw.create_gmm(
                D_pca,
                'gmm_{}_pca_{}_CNNfeature'.format(k, settings.pca_reduction))
            gmm_time = time.time() - start_gmm
            io.log('Elapsed time: {:.2f} s'.format(gmm_time))

            io.log('Getting Fisher vectors from training set...')
            start_fisher = time.time()
            fisher, labels = bovw.fisher_vectors(D_pca,
                                                 L,
                                                 I,
                                                 gmm,
                                                 normalization='l2')
            fisher_time = time.time() - start_fisher
            io.log('Elapsed time: {:.2f} s'.format(fisher_time))

            io.log('Scaling features...')
            start_scaler = time.time()
            std_scaler = StandardScaler().fit(fisher)
            vis_words = std_scaler.transform(fisher)
            scaler_time = time.time() - start_scaler
            io.log('Elapsed time: {:.2f} s'.format(scaler_time))

            io.log('Optimizing SVM hyperparameters...')
            start_crossvalidation = time.time()
            svm = SVC(kernel='precomputed')
            random_search = RandomizedSearchCV(svm,
                                               params_distribution,
                                               n_iter=n_iter,
                                               scoring='accuracy',
                                               n_jobs=settings.n_jobs,
                                               refit=False,
                                               cv=3,
                                               verbose=1)
            # Precompute Gram matrix
            gram = kernels.intersection_kernel(vis_words, vis_words)
            random_search.fit(gram, labels)
            crossvalidation_time = time.time() - start_crossvalidation
            io.log('Elapsed time: {:.2f} s'.format(crossvalidation_time))

            # Convert MaskedArrays to ndarrays to avoid unpickling bugs
            results = random_search.cv_results_
            results['param_C'] = results['param_C'].data

            # Appending all parameter-scores combinations
            cv_results.update({
                (k): {
                    'cv_results':
                    results,
                    'feature_time':
                    feature_time,
                    'pca_time':
                    pca_time,
                    'gmm_time':
                    gmm_time,
                    'fisher_time':
                    fisher_time,
                    'scaler_time':
                    scaler_time,
                    'crossvalidation_time':
                    crossvalidation_time,
                    'total_time':
                    feature_time + pca_time + gmm_time + fisher_time +
                    scaler_time + crossvalidation_time
                }
            })
            io.save_object(cv_results,
                           'intersection_svm_CNNfeatures',
                           ignore=True)

            # Obtaining the parameters which yielded the best accuracy
            if random_search.best_score_ > best_accuracy:
                best_accuracy = random_search.best_score_
                best_params = random_search.best_params_
                best_params.update({'k': k, 'pca': dim_red})

            io.log('-------------------------------\n')
    io.log('\nSaving best parameters...')
    io.save_object(best_params,
                   'best_params_intersection_svm_CNNfeatures',
                   ignore=True)
    best_params_file = os.path.abspath(
        './ignore/best_params_intersection_svm_CNNfeatures.pickle')
    io.log('Saved at {}'.format(best_params_file))

    io.log('\nSaving all cross-validation values...')
    io.save_object(cv_results, 'intersection_svm_CNNfeatures', ignore=True)
    cv_results_file = os.path.abspath(
        './ignore/intersection_svm_CNNfeatures.pickle')
    io.log('Saved at {}'.format(cv_results_file))

    io.log('\nBEST PARAMS')
    io.log('k={}, dim_red={}, C={} --> accuracy: {:.3f}'.format(
        best_params['k'], best_params['pca'], best_params['C'], best_accuracy))
예제 #24
0
    # Feature extraction with surf, prediction with SVM and aggregation to obtain final class
    print('Predicting test data...')
    result = joblib.Parallel(n_jobs=N_JOBS, backend='threading')(
        joblib.delayed(parallel_testing)(test_image, test_label, lin_svm,
                                         std_scaler, pca)
        for test_image, test_label in zip(test_images_filenames, test_labels))

    correct_class = [i[0] for i in result]
    predicted = [i[1] for i in result]
    expected = [i[2] for i in result]

    num_correct = np.count_nonzero(correct_class)
    print('Time spend: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Compute accuracy
    accuracy = num_correct * 100.0 / len(test_images_filenames)

    conf = metrics.confusion_matrix(expected,
                                    predicted,
                                    labels=lin_svm.classes_)
    # Plot normalized confusion matrix
    #plot_confusion_matrix(conf, classes=lin_svm.classes_, normalize=True)

    io.save_object(conf, 'final_surf_all_cm')

    # Show results and timing
    print('\nACCURACY: {:.2f}'.format(accuracy))
    print('\nTOTAL TIME: {:.2f} s'.format(time.time() - start))
예제 #25
0
def train():
    best_accuracy = 0
    best_params = {}
    cv_results = {}

    base_model = VGG16(weights='imagenet')

    # crop the model up to a certain layer
    model = Model(input=base_model.input,
                  output=base_model.get_layer('block5_conv2').output)

    # Read the training set
    train_images_filenames = cPickle.load(
        open('./dataset/train_images_filenames.dat', 'r'))
    test_images_filenames = cPickle.load(
        open('./dataset/test_images_filenames.dat', 'r'))
    train_labels = cPickle.load(open('./dataset/train_labels.dat', 'r'))
    test_labels = cPickle.load(open('./dataset/test_labels.dat', 'r'))
    io.log('\nLoaded {} train images.'.format(len(train_images_filenames)))
    io.log('\nLoaded {} test images.'.format(len(test_images_filenames)))

    # read and process training images
    print 'Getting features from training images'
    start_feature = time.time()
    Train_descriptors = []
    Train_label_per_descriptor = []

    for i in range(len(train_images_filenames)):
        img = image.load_img(train_images_filenames[i], target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        # get the features from images
        features = model.predict(x)
        features = features[0, :, :, :]
        descriptor = features.reshape(features.shape[0] * features.shape[1],
                                      features.shape[2])

        Train_descriptors.append(descriptor)
        Train_label_per_descriptor.append(train_labels[i])

    # Put all descriptors in a numpy array to compute PCA and GMM
    size_descriptors = Train_descriptors[0].shape[1]
    Desc = np.zeros(
        (np.sum([len(p) for p in Train_descriptors]), size_descriptors),
        dtype=np.uint8)
    startingpoint = 0
    for i in range(len(Train_descriptors)):
        Desc[startingpoint:startingpoint +
             len(Train_descriptors[i])] = Train_descriptors[i]
        startingpoint += len(Train_descriptors[i])
    feature_time = time.time() - start_feature
    io.log('Elapsed time: {:.2f} s'.format(feature_time))

    for dim_red in pca_reduction:
        io.log('Applying PCA ... ')
        start_pca = time.time()
        reduction = np.int(dim_red * Desc.shape[1])
        pca = decomposition.PCA(n_components=reduction)
        pca.fit(Desc)
        Desc_pca = np.float32(pca.transform(Desc))
        pca_time = time.time() - start_pca
        io.log('Elapsed time: {:.2f} s'.format(pca_time))
        for k in codebook_size:
            io.log('Creating GMM model (k = {})'.format(k))
            start_gmm = time.time()
            gmm = ynumpy.gmm_learn(np.float32(Desc_pca), k)
            io.save_object(gmm, 'gmm_NN_pca_{}_k_{}'.format(reduction, k))
            gmm_time = time.time() - start_gmm
            io.log('Elapsed time: {:.2f} s'.format(gmm_time))

            io.log('Getting Fisher vectors from training set...')
            start_fisher = time.time()
            fisher = np.zeros((len(Train_descriptors), k * reduction * 2),
                              dtype=np.float32)
            for i in xrange(len(Train_descriptors)):
                descriptor = Train_descriptors[i]
                descriptor = np.float32(pca.transform(descriptor))
                fisher[i, :] = ynumpy.fisher(gmm,
                                             descriptor,
                                             include=['mu', 'sigma'])
                # L2 normalization - reshape to avoid deprecation warning, checked that the result is the same
                fisher[i, :] = preprocessing.normalize(fisher[i, :].reshape(
                    1, -1),
                                                       norm='l2')

            fisher_time = time.time() - start_fisher
            io.log('Elapsed time: {:.2f} s'.format(fisher_time))

            io.log('Scaling features...')
            start_scaler = time.time()
            stdSlr = StandardScaler().fit(fisher)
            D_scaled = stdSlr.transform(fisher)
            scaler_time = time.time() - start_scaler
            io.log('Elapsed time: {:.2f} s'.format(scaler_time))

            io.log('Optimizing SVM hyperparameters...')
            start_crossvalidation = time.time()
            svm = SVC(kernel='precomputed')
            random_search = RandomizedSearchCV(svm,
                                               params_distribution,
                                               n_iter=n_iter,
                                               scoring='accuracy',
                                               refit=False,
                                               cv=3,
                                               verbose=1)
            # Precompute Gram matrix
            gram = kernels.intersection_kernel(D_scaled, D_scaled)
            random_search.fit(gram, train_labels)
            crossvalidation_time = time.time() - start_crossvalidation
            io.log('Elapsed time: {:.2f} s'.format(crossvalidation_time))

            # Convert MaskedArrays to ndarrays to avoid unpickling bugs
            results = random_search.cv_results_
            results['param_C'] = results['param_C'].data

            # Appending all parameter-scores combinations
            cv_results.update({
                (k): {
                    'cv_results':
                    results,
                    'feature_time':
                    feature_time,
                    'pca_time':
                    pca_time,
                    'gmm_time':
                    gmm_time,
                    'fisher_time':
                    fisher_time,
                    'scaler_time':
                    scaler_time,
                    'crossvalidation_time':
                    crossvalidation_time,
                    'total_time':
                    feature_time + pca_time + gmm_time + fisher_time +
                    scaler_time + crossvalidation_time
                }
            })
            io.save_object(cv_results,
                           'intersection_svm_CNNfeatures',
                           ignore=True)

            # Obtaining the parameters which yielded the best accuracy
            if random_search.best_score_ > best_accuracy:
                best_accuracy = random_search.best_score_
                best_params = random_search.best_params_
                best_params.update({'k': k, 'pca': dim_red})

            io.log('-------------------------------\n')
    io.log('\nSaving best parameters...')
    io.save_object(best_params,
                   'best_params_intersection_svm_CNNfeatures',
                   ignore=True)
    best_params_file = os.path.abspath(
        './ignore/best_params_intersection_svm_CNNfeatures.pickle')
    io.log('Saved at {}'.format(best_params_file))

    io.log('\nSaving all cross-validation values...')
    io.save_object(cv_results, 'intersection_svm_CNNfeatures', ignore=True)
    cv_results_file = os.path.abspath(
        './ignore/intersection_svm_CNNfeatures.pickle')
    io.log('Saved at {}'.format(cv_results_file))

    io.log('\nBEST PARAMS')
    io.log('k={}, dim_red={}, C={} --> accuracy: {:.3f}'.format(
        best_params['k'], best_params['pca'], best_params['C'], best_accuracy))
예제 #26
0
    svm = io.load_object(SESSION1['model'])
    std_scaler = io.load_object(SESSION1['scaler'])
    pca = io.load_object(SESSION1['pca'])

    # Feature extraction with sift, prediction with SVM and aggregation to obtain final class
    print('Predicting test data...')
    result = joblib.Parallel(n_jobs=SESSION1['n_jobs'], backend='threading')(
        joblib.delayed(parallel_testing)(test_image, test_label, svm,
                                         std_scaler, pca)
        for test_image, test_label in zip(test_images_filenames, test_labels))

    correct_class = [i[0] for i in result]
    predicted = [i[1] for i in result]
    expected = [i[2] for i in result]

    num_correct = np.count_nonzero(correct_class)
    print('Time spend: {:.2f} s'.format(time.time() - start))
    temp = time.time()

    # Compute accuracy
    accuracy = num_correct * 100.0 / len(test_images_filenames)

    # Plot and save normalized confusion matrix
    conf = metrics.confusion_matrix(expected, predicted, labels=svm.classes_)
    plot_confusion_matrix(conf, classes=svm.classes_, normalize=True)
    io.save_object(conf, SESSION1['conf_matrix'])

    # Show results and timing
    print('\nACCURACY: {:.2f}'.format(accuracy))
    print('\nTOTAL TIME: {:.2f} s'.format(time.time() - start))
예제 #27
0
    # Read the training set
    train_images_filenames, train_labels = io.load_training_set()
    print('Loaded {} train images.'.format(len(train_images_filenames)))

    # Feature extraction with sift
    print('Obtaining dense features...')
    try:
        D, L, I = io.load_object('train_dense_descriptors', ignore=True), \
                  io.load_object('train_dense_labels', ignore=True), \
                  io.load_object('train_dense_indices', ignore=True)
    except IOError:
        D, L, I, _ = feature_extraction.parallel_dense(train_images_filenames, train_labels,
                                                       num_samples_class=-1,
                                                       n_jobs=settings.n_jobs)
        io.save_object(D, 'train_dense_descriptors', ignore=True)
        io.save_object(L, 'train_dense_labels', ignore=True)
        io.save_object(I, 'train_dense_indices', ignore=True)
    print('Elapsed time: {:.2f} s'.format(time.time() - start))
    temp = time.time()

    print('Applying PCA...')
    pca, D = feature_extraction.pca(D)
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    print('Creating GMM model with {} Gaussians'.format(settings.codebook_size))
    gmm = bovw.create_gmm(D, codebook_name='gmm_{}_dense_pca_{}'.format(settings.codebook_size, settings.pca_reduction))
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()
예제 #28
0
    # Feature extraction with sift, prediction with SVM and aggregation to obtain final class
    print('Predicting test data...')
    result = joblib.Parallel(n_jobs=N_JOBS, backend='threading')(
        joblib.delayed(parallel_testing)(test_image, test_label, lin_svm,
                                         std_scaler, None)
        for test_image, test_label in zip(test_images_filenames, test_labels))

    correct_class = [i[0] for i in result]
    predicted = [i[1] for i in result]
    expected = [i[2] for i in result]

    num_correct = np.count_nonzero(correct_class)
    print('Time spend: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Compute accuracy
    accuracy = num_correct * 100.0 / len(test_images_filenames)

    conf = metrics.confusion_matrix(expected,
                                    predicted,
                                    labels=lin_svm.classes_)
    # Plot normalized confusion matrix
    plot_confusion_matrix(conf, classes=lin_svm.classes_, normalize=True)

    io.save_object(conf, 'final_sift_30_pca23_cm')

    # Show results and timing
    print('\nACCURACY: {:.2f}'.format(accuracy))
    print('\nTOTAL TIME: {:.2f} s'.format(time.time() - start))