Esempio n. 1
0
        D, L, I, _ = feature_extraction.parallel_dense(train_images_filenames, train_labels,
                                                       num_samples_class=-1,
                                                       n_jobs=settings.n_jobs)
        io.save_object(D, 'train_dense_descriptors', ignore=True)
        io.save_object(L, 'train_dense_labels', ignore=True)
        io.save_object(I, 'train_dense_indices', ignore=True)
    print('Elapsed time: {:.2f} s'.format(time.time() - start))
    temp = time.time()

    print('Applying PCA...')
    pca, D = feature_extraction.pca(D)
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    print('Creating GMM model with {} Gaussians'.format(settings.codebook_size))
    gmm = bovw.create_gmm(D, codebook_name='gmm_{}_dense_pca_{}'.format(settings.codebook_size, settings.pca_reduction))
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    print('Getting Fisher vectors from training set...')
    fisher, labels = bovw.fisher_vectors(D, L, I, gmm)
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Train Linear SVM classifier
    print('Training the SVM classifier...')
    lin_svm, std_scaler, _ = classification.train_linear_svm(fisher, train_labels, C=1, dim_reduction=None)
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Read the test set
def train():
    best_accuracy = 0
    best_params = {}
    cv_results = {}
    """ SETTINGS """
    settings.n_jobs = 1

    # Read the training set
    train_images_filenames, train_labels = io.load_training_set()
    io.log('Loaded {} train images.'.format(len(train_images_filenames)))
    k = 64

    io.log('Obtaining dense CNN features...')
    start_feature = time.time()
    try:
        D, L, I = io.load_object('train_CNN_descriptors', ignore=True), \
                  io.load_object('train_CNN_labels', ignore=True), \
                  io.load_object('train_CNN_indices', ignore=True)
    except IOError:
        # load VGG model
        base_model = VGG16(weights='imagenet')
        # io.save_object(base_model, 'base_model', ignore=True)

        # visualize topology in an image
        plot(base_model,
             to_file='modelVGG16.png',
             show_shapes=True,
             show_layer_names=True)

        # crop the model up to a certain layer
        model = Model(input=base_model.input,
                      output=base_model.get_layer('block5_conv2').output)
        D, L, I = feature_extraction.parallel_CNN_features(
            train_images_filenames,
            train_labels,
            model,
            num_samples_class=-1,
            n_jobs=settings.n_jobs)
        io.save_object(D, 'train_CNN_descriptors', ignore=True)
        io.save_object(L, 'train_CNN_labels', ignore=True)
        io.save_object(I, 'train_CNN_indices', ignore=True)
    feature_time = time.time() - start_feature
    io.log('Elapsed time: {:.2f} s'.format(feature_time))

    for dim_red in pca_reduction:
        io.log('Applying PCA ... ')
        start_pca = time.time()
        settings.pca_reduction = D.shape[1] * dim_red
        pca, D_pca = feature_extraction.pca(D)
        pca_time = time.time() - start_pca
        io.log('Elapsed time: {:.2f} s'.format(pca_time))
        for k in codebook_size:
            io.log('Creating GMM model (k = {})'.format(k))
            start_gmm = time.time()
            settings.codebook_size = k
            gmm = bovw.create_gmm(
                D_pca,
                'gmm_{}_pca_{}_CNNfeature'.format(k, settings.pca_reduction))
            gmm_time = time.time() - start_gmm
            io.log('Elapsed time: {:.2f} s'.format(gmm_time))

            io.log('Getting Fisher vectors from training set...')
            start_fisher = time.time()
            fisher, labels = bovw.fisher_vectors(D_pca,
                                                 L,
                                                 I,
                                                 gmm,
                                                 normalization='l2')
            fisher_time = time.time() - start_fisher
            io.log('Elapsed time: {:.2f} s'.format(fisher_time))

            io.log('Scaling features...')
            start_scaler = time.time()
            std_scaler = StandardScaler().fit(fisher)
            vis_words = std_scaler.transform(fisher)
            scaler_time = time.time() - start_scaler
            io.log('Elapsed time: {:.2f} s'.format(scaler_time))

            io.log('Optimizing SVM hyperparameters...')
            start_crossvalidation = time.time()
            svm = SVC(kernel='precomputed')
            random_search = RandomizedSearchCV(svm,
                                               params_distribution,
                                               n_iter=n_iter,
                                               scoring='accuracy',
                                               n_jobs=settings.n_jobs,
                                               refit=False,
                                               cv=3,
                                               verbose=1)
            # Precompute Gram matrix
            gram = kernels.intersection_kernel(vis_words, vis_words)
            random_search.fit(gram, labels)
            crossvalidation_time = time.time() - start_crossvalidation
            io.log('Elapsed time: {:.2f} s'.format(crossvalidation_time))

            # Convert MaskedArrays to ndarrays to avoid unpickling bugs
            results = random_search.cv_results_
            results['param_C'] = results['param_C'].data

            # Appending all parameter-scores combinations
            cv_results.update({
                (k): {
                    'cv_results':
                    results,
                    'feature_time':
                    feature_time,
                    'pca_time':
                    pca_time,
                    'gmm_time':
                    gmm_time,
                    'fisher_time':
                    fisher_time,
                    'scaler_time':
                    scaler_time,
                    'crossvalidation_time':
                    crossvalidation_time,
                    'total_time':
                    feature_time + pca_time + gmm_time + fisher_time +
                    scaler_time + crossvalidation_time
                }
            })
            io.save_object(cv_results,
                           'intersection_svm_CNNfeatures',
                           ignore=True)

            # Obtaining the parameters which yielded the best accuracy
            if random_search.best_score_ > best_accuracy:
                best_accuracy = random_search.best_score_
                best_params = random_search.best_params_
                best_params.update({'k': k, 'pca': dim_red})

            io.log('-------------------------------\n')
    io.log('\nSaving best parameters...')
    io.save_object(best_params,
                   'best_params_intersection_svm_CNNfeatures',
                   ignore=True)
    best_params_file = os.path.abspath(
        './ignore/best_params_intersection_svm_CNNfeatures.pickle')
    io.log('Saved at {}'.format(best_params_file))

    io.log('\nSaving all cross-validation values...')
    io.save_object(cv_results, 'intersection_svm_CNNfeatures', ignore=True)
    cv_results_file = os.path.abspath(
        './ignore/intersection_svm_CNNfeatures.pickle')
    io.log('Saved at {}'.format(cv_results_file))

    io.log('\nBEST PARAMS')
    io.log('k={}, dim_red={}, C={} --> accuracy: {:.3f}'.format(
        best_params['k'], best_params['pca'], best_params['C'], best_accuracy))
Esempio n. 3
0
    try:
        D, L, I = io.load_object('train_sift_descriptors', ignore=True), \
                  io.load_object('train_sift_labels', ignore=True), \
                  io.load_object('train_sift_indices', ignore=True)
    except IOError:
        D, L, I, _ = feature_extraction.parallel_sift(train_images_filenames, train_labels,
                                                      num_samples_class=-1,
                                                      n_jobs=settings.n_jobs)
        io.save_object(D, 'train_sift_descriptors', ignore=True)
        io.save_object(L, 'train_sift_labels', ignore=True)
        io.save_object(I, 'train_sift_indices', ignore=True)
    print('Elapsed time: {:.2f} s'.format(time.time() - start))
    temp = time.time()

    print('Creating GMM model with {} Gaussians'.format(settings.codebook_size))
    gmm = bovw.create_gmm(D, codebook_name='gmm_{}'.format(settings.codebook_size))
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    print('Getting Fisher vectors from training set...')
    fisher, labels = bovw.fisher_vectors(D, L, I, gmm)
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Train Linear SVM classifier
    print('Training the SVM classifier...')
    lin_svm, std_scaler, _ = classification.train_linear_svm(fisher, train_labels, C=1, dim_reduction=None)
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Read the test set
Esempio n. 4
0
def train():
    best_accuracy = 0
    best_params = {}
    cv_results = {}
    """ SETTINGS """
    settings.n_jobs = 1

    # Read the training set
    train_images_filenames, train_labels = io.load_training_set()
    io.log('Loaded {} train images.'.format(len(train_images_filenames)))

    # Parameter sweep for dense SIFT
    for ds in dense_sampling_density:

        io.log('Obtaining dense features with sampling parameter {}...'.format(
            ds))
        start_sift = time.time()
        settings.dense_sampling_density = ds
        try:
            D, L, I = io.load_object('train_dense_descriptors_{}'.format(settings.dense_sampling_density), ignore=True), \
                      io.load_object('train_dense_labels_{}'.format(settings.dense_sampling_density), ignore=True), \
                      io.load_object('train_dense_indices_{}'.format(settings.dense_sampling_density), ignore=True)
        except IOError:
            D, L, I, _ = feature_extraction.parallel_dense(
                train_images_filenames,
                train_labels,
                num_samples_class=-1,
                n_jobs=settings.n_jobs)
            io.save_object(D,
                           'train_dense_descriptors_{}'.format(
                               settings.dense_sampling_density),
                           ignore=True)
            io.save_object(L,
                           'train_dense_labels_{}'.format(
                               settings.dense_sampling_density),
                           ignore=True)
            io.save_object(I,
                           'train_dense_indices_{}'.format(
                               settings.dense_sampling_density),
                           ignore=True)
        sift_time = time.time() - start_sift
        io.log('Elapsed time: {:.2f} s'.format(sift_time))

        # Parameter sweep for PCA
        for dim_red in pca_reduction:

            io.log('Applying PCA (dim = {})...'.format(dim_red))
            start_pca = time.time()
            settings.pca_reduction = dim_red
            pca, D_pca = feature_extraction.pca(D)
            pca_time = time.time() - start_pca
            io.log('Elapsed time: {:.2f} s'.format(pca_time))

            # Parameter sweep for codebook size
            for k in codebook_size:

                io.log('Creating GMM model (k = {})'.format(k))
                start_gmm = time.time()
                settings.codebook_size = k
                gmm = bovw.create_gmm(
                    D_pca, 'gmm_{}_dense_{}_pca_{}'.format(k, ds, dim_red))
                gmm_time = time.time() - start_gmm
                io.log('Elapsed time: {:.2f} s'.format(gmm_time))

                io.log('Getting Fisher vectors from training set...')
                start_fisher = time.time()
                fisher, labels = bovw.fisher_vectors(D_pca,
                                                     L,
                                                     I,
                                                     gmm,
                                                     normalization='l2')
                fisher_time = time.time() - start_fisher
                io.log('Elapsed time: {:.2f} s'.format(fisher_time))

                io.log('Scaling features...')
                start_scaler = time.time()
                std_scaler = StandardScaler().fit(fisher)
                vis_words = std_scaler.transform(fisher)
                scaler_time = time.time() - start_scaler
                io.log('Elapsed time: {:.2f} s'.format(scaler_time))

                io.log('Optimizing SVM hyperparameters...')
                start_crossvalidation = time.time()
                svm = SVC(kernel='precomputed')
                random_search = RandomizedSearchCV(svm,
                                                   params_distribution,
                                                   n_iter=n_iter,
                                                   scoring='accuracy',
                                                   n_jobs=settings.n_jobs,
                                                   refit=False,
                                                   cv=3,
                                                   verbose=1)
                # Precompute Gram matrix
                gram = kernels.intersection_kernel(vis_words, vis_words)
                random_search.fit(gram, labels)
                crossvalidation_time = time.time() - start_crossvalidation
                io.log('Elapsed time: {:.2f} s'.format(crossvalidation_time))

                # Convert MaskedArrays to ndarrays to avoid unpickling bugs
                results = random_search.cv_results_
                results['param_C'] = results['param_C'].data

                # Appending all parameter-scores combinations
                cv_results.update({
                    (k, dim_red, ds): {
                        'cv_results':
                        results,
                        'sift_time':
                        sift_time,
                        'pca_time':
                        pca_time,
                        'gmm_time':
                        gmm_time,
                        'fisher_time':
                        fisher_time,
                        'scaler_time':
                        scaler_time,
                        'crossvalidation_time':
                        crossvalidation_time,
                        'total_time':
                        sift_time + pca_time + gmm_time + fisher_time +
                        scaler_time + crossvalidation_time
                    }
                })
                io.save_object(
                    cv_results,
                    'intersection_svm_optimization_fisher_vectors_l2',
                    ignore=True)

                # Obtaining the parameters which yielded the best accuracy
                if random_search.best_score_ > best_accuracy:
                    best_accuracy = random_search.best_score_
                    best_params = random_search.best_params_
                    best_params.update({'k': k, 'pca': dim_red, 'ds': ds})

                io.log('-------------------------------\n')

    io.log('\nSaving best parameters...')
    io.save_object(
        best_params,
        'best_params_intersection_svm_optimization_fisher_vectors_l2',
        ignore=True)
    best_params_file = os.path.abspath(
        './ignore/best_params_intersection_svm_optimization_fisher_vectors_l2.pickle'
    )
    io.log('Saved at {}'.format(best_params_file))

    io.log('\nSaving all cross-validation values...')
    io.save_object(cv_results,
                   'intersection_svm_optimization_fisher_vectors_l2',
                   ignore=True)
    cv_results_file = os.path.abspath(
        './ignore/intersection_svm_optimization_fisher_vectors_l2.pickle')
    io.log('Saved at {}'.format(cv_results_file))

    io.log('\nBEST PARAMS')
    io.log('k={}, C={}, dim_red={}, dense_grid={} --> accuracy: {:.3f}'.format(
        best_params['k'], best_params['C'], best_params['pca'],
        best_params['ds'], best_accuracy))
            train_images_filenames,
            train_labels,
            num_samples_class=-1,
            model=model,
            n_jobs=settings.n_jobs)
        io.save_object(D, 'train_CNN_descriptors', ignore=True)
        io.save_object(L, 'train_CNN_labels', ignore=True)
        io.save_object(I, 'train_CNN_indices', ignore=True)

    # get the features from images

    settings.pca_reduction = D.shape[1] / 2
    pca, D_pca = feature_extraction.pca(D)

    k = settings.codebook_size
    gmm = bovw.create_gmm(
        D_pca, 'gmm_{}_pca_{}_CNNfeature'.format(k, settings.pca_reduction))
    fisher, labels = bovw.fisher_vectors(D_pca,
                                         L,
                                         I,
                                         gmm,
                                         normalization='l2',
                                         spatial_pyramid=False)
    # std_scaler = StandardScaler().fit(fisher)
    # vis_words = std_scaler.transform(fisher)

    print('Training the SVM classifier...')
    lin_svm, std_scaler, _ = classification.train_intersection_svm(
        fisher, train_labels, C=0.0268101613883, dim_reduction=None)

    # Read the test set
    test_images_filenames, test_labels = io.load_test_set()