Ejemplo n.º 1
0
def outofsample_extensions(method='linear-regression'):
    # Load the data and init seeds
    train_data, train_labels, test_data, test_labels = load_mnist(dataset_path='data')
    np.random.seed(1)
    sklearn.utils.check_random_state(1)
    n_train_samples = 5000

    # Learn a new space using Isomap
    isomap = Isomap(n_components=10, n_neighbors=20)
    train_data_isomap = np.float32(isomap.fit_transform(train_data[:n_train_samples, :]))

    if method == 'linear-regression':
        # Use linear regression to provide baseline out-of-sample extensions
        proj = LinearRegression()
        proj.fit(np.float64(train_data[:n_train_samples, :]), np.float64(train_data_isomap))
        acc = evaluate_svm(proj.predict(train_data[:n_train_samples, :]), train_labels[:n_train_samples],
                           proj.predict(test_data), test_labels)
    elif method == 'c-ISOMAP-10d' or method == 'c-ISOMAP-20d':
        # Use the SEF to provide out-of-sample extensions
        if method == 'c-ISOMAP-10d':
            proj = LinearSEF(train_data.shape[1], output_dimensionality=10)
            proj.cuda()
        else:
            proj = LinearSEF(train_data.shape[1], output_dimensionality=20)
            proj.cuda()
        loss = proj.fit(data=train_data[:n_train_samples, :], target_data=train_data_isomap, target='copy',
                        epochs=50, batch_size=128, verbose=True, learning_rate=0.001, regularizer_weight=1)
        acc = evaluate_svm(proj.transform(train_data[:n_train_samples, :]), train_labels[:n_train_samples],
                           proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def unsupervised_approximation(method='pca'):
    # Load the data and init seeds
    train_data, train_labels, test_data, test_labels = load_mnist(dataset_path='data')
    np.random.seed(1)
    sklearn.utils.check_random_state(1)
    n_train_samples = 5000

    if method == 'pca':

        # Learn a baseline pca projection
        proj = PCA(n_components=10)
        proj.fit(train_data[:n_train_samples, :])

    elif method == 's-pca':

        # Learn a high dimensional projection
        proj_to_copy = PCA(n_components=50)
        proj_to_copy.fit(train_data[:n_train_samples, :])
        target_data = np.float32(proj_to_copy.transform(train_data[:n_train_samples, :]))

        # Approximate it using the SEF and 10 dimensions
        proj = LinearSEF(train_data.shape[1], output_dimensionality=10)
        proj.cuda()
        loss = proj.fit(data=train_data[:n_train_samples, :], target_data=target_data, target='copy',
                        epochs=50, batch_size=128, verbose=True, learning_rate=0.001, regularizer_weight=1)


    # Evaluate the method
    acc = evaluate_svm(proj.transform(train_data[:n_train_samples, :]), train_labels[:n_train_samples],
                       proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def unsupervised_approximation(method=None, dataset=None):
    np.random.seed(1)
    sklearn.utils.check_random_state(1)

    dataset_path = 'data'
    train_data, train_labels, test_data, test_labels = dataset_loader(dataset_path, dataset, seed=1)

    if method == 'pca':

        # Learn a baseline pca projection
        proj = PCA(n_components=10)
        proj.fit(train_data)

    elif method == 's-pca':
        # Learn a high dimensional projection
        proj_to_copy = PCA(n_components=50)
        proj_to_copy.fit(train_data)
        target_data = np.float32(proj_to_copy.transform(train_data))

        # Approximate it using the SEF and 10 dimensions
        proj = LinearSEF(train_data.shape[1], output_dimensionality=10)
        proj.cuda()
        loss = proj.fit(data=train_data, target_data=target_data, target='copy',
                        epochs=50, batch_size=1024, verbose=False, learning_rate=0.001, regularizer_weight=1)

    # Evaluate the method
    acc = evaluate_svm(proj.transform(train_data), train_labels, proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def svm_approximation(method=None, dataset=None):
    np.random.seed(1)
    sklearn.utils.check_random_state(1)

    dataset_path = 'data'
    train_data, train_labels, test_data, test_labels = dataset_loader(
        dataset_path, dataset, seed=1)

    lab = LabelEncoder()
    train_labels = lab.fit_transform(train_labels)
    test_labels = lab.transform(test_labels)

    if method == 'svm':
        acc = evaluate_svm(train_data, train_labels, test_data, test_labels)
    elif method == 'ncc':

        acc = evaluate_ncc(train_data, train_labels, test_data, test_labels)
    elif method == 'S-SVM-A-10d' or method == 'S-SVM-A-20d':

        # Learn an SVM
        parameters = {
            'kernel': ['linear'],
            'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
        }
        model = grid_search.GridSearchCV(svm.SVC(
            max_iter=10000, decision_function_shape='ovo'),
                                         parameters,
                                         n_jobs=-1,
                                         cv=3)
        model.fit(train_data, train_labels)

        # params = {'model': model, 'n_labels': np.unique(train_labels).shape[0], 'scaler': None}
        Gt = generate_svm_similarity_matrix(train_data, train_labels,
                                            len(np.unique(train_labels)),
                                            model, None)
        params = {'Gt': Gt}

        # Learn a similarity embedding
        if method == 'S-SVM-A-10d':
            dims = len(np.unique(train_labels))
        else:
            dims = 2 * len(np.unique(train_labels))

        proj = LinearSEF(train_data.shape[1], output_dimensionality=dims)
        proj.cuda()
        loss = proj.fit(data=train_data,
                        target_data=train_data,
                        target_labels=train_labels,
                        target=sim_target_svm_precomputed,
                        target_params=params,
                        epochs=100,
                        learning_rate=0.001,
                        batch_size=256,
                        verbose=True,
                        regularizer_weight=0.001)

        acc = evaluate_ncc(proj.transform(train_data), train_labels,
                           proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def supervised_reduction(method=None, dataset=None):
    np.random.seed(1)
    sklearn.utils.check_random_state(1)

    dataset_path = 'data'
    train_data, train_labels, test_data, test_labels = dataset_loader(
        dataset_path, dataset, seed=1)

    scaler = StandardScaler()
    train_data = scaler.fit_transform(train_data)
    test_data = scaler.transform(test_data)

    if dataset == 'yale':
        regularizer_weight = 0.0001
    else:
        regularizer_weight = 1

    n_classes = len(np.unique(train_labels))

    if method == 'lda':
        proj = LinearDiscriminantAnalysis(n_components=n_classes - 1)
        proj.fit(train_data, train_labels)
    elif method == 's-lda':
        proj = LinearSEF(train_data.shape[1],
                         output_dimensionality=(n_classes - 1))
        proj.cuda()
        loss = proj.fit(data=train_data,
                        target_labels=train_labels,
                        epochs=100,
                        target='supervised',
                        batch_size=256,
                        regularizer_weight=regularizer_weight,
                        learning_rate=0.001,
                        verbose=False)

    elif method == 's-lda-2x':
        # SEF output dimensions are not limited
        proj = LinearSEF(train_data.shape[1],
                         output_dimensionality=2 * (n_classes - 1))
        proj.cuda()
        loss = proj.fit(data=train_data,
                        target_labels=train_labels,
                        epochs=100,
                        target='supervised',
                        batch_size=256,
                        regularizer_weight=regularizer_weight,
                        learning_rate=0.001,
                        verbose=False)

    acc = evaluate_svm(proj.transform(train_data), train_labels,
                       proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def outofsample_extensions(method=None, dataset=None):
    np.random.seed(1)
    sklearn.utils.check_random_state(1)
    dataset_path = 'data'
    train_data, train_labels, test_data, test_labels = dataset_loader(
        dataset_path, dataset, seed=1)
    # Learn a new space using Isomap
    isomap = Isomap(n_components=10, n_neighbors=20)
    train_data_isomap = np.float32(isomap.fit_transform(train_data))
    sigma = mean_data_distance(np.float32(train_data))

    if method == 'kernel-regression':
        # Use kernel regression to provide baseline out-of-sample extensions
        proj = KernelRidge(kernel='rbf', gamma=(1.0 / sigma**2))
        proj.fit(np.float64(train_data), np.float64(train_data_isomap))
        acc = evaluate_svm(proj.predict(train_data), train_labels,
                           proj.predict(test_data), test_labels)
    elif method == 'cK-ISOMAP-10d' or method == 'cK-ISOMAP-20d':
        # Use the SEF to provide out-of-sample extensions
        if method == 'cK-ISOMAP-10d':
            dims = 10
        else:
            dims = 20
        proj = KernelSEF(train_data,
                         train_data.shape[1],
                         output_dimensionality=dims)
        proj.cuda()
        loss = proj.fit(data=train_data,
                        target_data=train_data_isomap,
                        target='copy',
                        epochs=100,
                        batch_size=256,
                        verbose=True,
                        learning_rate=0.00001,
                        regularizer_weight=0.001)
        acc = evaluate_svm(proj.transform(train_data), train_labels,
                           proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
def outofsample_extensions(method=None, dataset=None):
    np.random.seed(1)
    sklearn.utils.check_random_state(1)

    dataset_path = 'data'
    train_data, train_labels, test_data, test_labels = dataset_loader(dataset_path, dataset, seed=1)

    # Learn a new space using Isomap
    isomap = Isomap(n_components=10, n_neighbors=20)
    train_data_isomap = np.float32(isomap.fit_transform(train_data))

    if method == 'linear-regression':
        from sklearn.preprocessing import StandardScaler
        std = StandardScaler()
        train_data = std.fit_transform(train_data)
        test_data = std.transform(test_data)

        # Use linear regression to provide baseline out-of-sample extensions
        proj = LinearRegression()
        proj.fit(np.float64(train_data), np.float64(train_data_isomap))
        acc = evaluate_svm(proj.predict(train_data), train_labels,
                           proj.predict(test_data), test_labels)
    elif method == 'c-ISOMAP-10d' or method == 'c-ISOMAP-20d':
        # Use the SEF to provide out-of-sample extensions
        if method == 'c-ISOMAP-10d':
            proj = LinearSEF(train_data.shape[1], output_dimensionality=10)
            proj.cuda()
        else:
            proj = LinearSEF(train_data.shape[1], output_dimensionality=20)
            proj.cuda()
        loss = proj.fit(data=train_data, target_data=train_data_isomap, target='copy',
                        epochs=50, batch_size=1024, verbose=False, learning_rate=0.001, regularizer_weight=1)
        acc = evaluate_svm(proj.transform(train_data), train_labels,
                           proj.transform(test_data), test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")
Ejemplo n.º 8
0
def svm_approximation(method=None):
    # Load the data and init seeds
    train_data, train_labels, test_data, test_labels = load_mnist(
        dataset_path='data')
    np.random.seed(1)
    sklearn.utils.check_random_state(1)
    n_train = 5000

    if method == 'svm':
        acc = evaluate_svm(train_data[:n_train, :], train_labels[:n_train],
                           test_data, test_labels)
    elif method == 'ncc':
        acc = evaluate_ncc(train_data[:n_train, :], train_labels[:n_train],
                           test_data, test_labels)

    elif method == 'S-SVM-A-10d' or method == 'S-SVM-A-20d':

        # Learn an SVM
        parameters = {
            'kernel': ['linear'],
            'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]
        }
        model = GridSearchCV(svm.SVC(max_iter=10000,
                                     decision_function_shape='ovo'),
                             parameters,
                             n_jobs=-1,
                             cv=3)
        model.fit(train_data[:n_train], train_labels[:n_train])

        # Learn a similarity embedding
        if method == 'S-SVM-A-10d':
            dims = 10
        else:
            dims = 20
        proj = LinearSEF(train_data.shape[1], output_dimensionality=dims)
        proj.cuda()

        # Precompute the similarity matrix
        Gt = generate_svm_similarity_matrix(train_data[:n_train],
                                            train_labels[:n_train],
                                            len(np.unique(train_labels)),
                                            model, None)
        params = {'Gt': Gt}

        # otherwise, we can simply set target='svm' and use the following target
        # params = {'model': model, 'n_labels': np.unique(train_labels).shape[0], 'scaler': scaler}

        loss = proj.fit(data=train_data[:n_train, :],
                        target_data=train_data[:n_train, :],
                        target_labels=train_labels[:n_train],
                        target=sim_target_svm_precomputed,
                        target_params=params,
                        epochs=50,
                        learning_rate=0.001,
                        batch_size=128,
                        verbose=True,
                        regularizer_weight=0.001)

        acc = evaluate_ncc(proj.transform(train_data[:n_train, :]),
                           train_labels[:n_train], proj.transform(test_data),
                           test_labels)

    print("Method: ", method, " Test accuracy: ", 100 * acc, " %")