Esempio n. 1
0
def train_data(data, labels, plot=False, C=3, W=32, H=32):
    data = data.reshape(-1, C, W, H)
    PrintHelper.print('Incoming data shape is %s' % str(data.shape))
    filters, means, outputs = saak.multi_stage_saak_trans(data,
                                                          energy_thresh=0.97)
    final_feat_dim = sum([
        ((output.shape[1] - 1) / 2 + 1) * output.shape[2] * output.shape[3]
        for output in outputs
    ])
    # This is the dimensionality of each datapoint.
    final_feat = saak.get_final_feature(outputs)
    #print('final feature dimension is {}'.format(final_feat.shape[1]))
    assert final_feat.shape[1] == final_feat_dim

    # Remove some of the features with an f-test
    selected_feat, idx = entropy_test(final_feat, labels, plot)
    #selected_feat, idx = f_test(final_feat, labels, thresh=0.75)

    reduced_feat, pca = reduce_feat_dim(selected_feat, dim=248)

    clf = svm_classifier(reduced_feat, labels)
    pred = clf.predict(reduced_feat)

    acc = sklearn.metrics.accuracy_score(labels, pred)
    print('training acc is {}'.format(acc))

    return clf, filters, means, final_feat_dim, idx, pca
Esempio n. 2
0
def train_data(data, labels):
    data = data.reshape(-1, 3, 32, 32)
    filters, means, outputs = saak.multi_stage_saak_trans(data,
                                                          energy_thresh=0.97)
    final_feat_dim = sum([
        ((output.shape[1] - 1) / 2 + 1) * output.shape[2] * output.shape[3]
        for output in outputs
    ])
    # This is the dimensionality of each datapoint.
    print('final feature dimension is {}'.format(final_feat_dim))
    final_feat = saak.get_final_feature(outputs)
    assert final_feat.shape[1] == final_feat_dim

    # Remove some of the features with an f-test
    selected_feat, idx = f_test(final_feat, labels, thresh=0.75)
    reduced_feat, pca = reduce_feat_dim(selected_feat, dim=248)

    #clf = svm_classifier(reduced_feat, labels)
    clf = knn_classifier(reduced_feat, labels, 20)
    pred = clf.predict(reduced_feat)

    acc = sklearn.metrics.accuracy_score(labels, pred)
    print('training acc is {}'.format(acc))

    return clf, filters, means, final_feat_dim, idx, pca
Esempio n. 3
0
 def create_test_dataset():
     #NUM_IMAGES_TEST = 500
     NUM_IMAGES_TEST = None
     test_data, test_labels = create_numpy_dataset(NUM_IMAGES_TEST,
                                                   test_loader)
     test_outputs = saak.test_multi_stage_saak_trans(
         test_data, means, filters)
     test_final_feat = saak.get_final_feature(test_outputs)
     return test_final_feat, test_labels
Esempio n. 4
0
def main():
    batch_size = 1
    test_batch_size = 1
    kwargs = {}
    torch.multiprocessing.set_sharing_strategy('file_system')

    # # MNIST
    # mnist_train = datasets.MNIST(root='./data/mnist', train=True,
    #                              transform=transforms.Compose([transforms.Pad(2), transforms.ToTensor()]),
    #                              download=True)
    # mnist_test = datasets.MNIST(root='./data/mnist', train=False,
    #                             transform=transforms.Compose([transforms.Pad(2), transforms.ToTensor()]), download=True)
    # # FASHION-MNIST
    # mnist_train = MNIST(root='./data/fashion-mnist', train=True,
    #                              transform=transforms.Compose([transforms.Pad(2), transforms.ToTensor()]), download=True)
    # mnist_test = MNIST(root='./data/fashion-mnist', train=False,
    #                             transform=transforms.Compose([transforms.Pad(2), transforms.ToTensor()]), download=True)
    #
    # train_loader = data_utils.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, **kwargs)
    #
    # test_loader = data_utils.DataLoader(mnist_test, batch_size=test_batch_size, shuffle=False, **kwargs)

    # # SVHN

    # # Customized data loader
    # hdf5 file is generated using data/create_hdf5.py
    # using create_hdf5.py, simply run: python create_hdf5.py
    # train_path = '/media/eeb435/media/Junting/data/Project/Cityscapes/saak_patches/32x32/hdf5/train_CS_DS2.hdf5'
    # train_path = '/media/eeb435/media/Junting/data/saak_da/data/svhn_train_full_hwc.hdf5'
    # test_path = '/media/eeb435/media/Junting/data/saak_da/data/svhn_train_full_hwc.hdf5'
    # train_set = DatasetFromHdf5(train_path, transform=transforms.ToTensor()) #transform=transforms.ToTensor()
    # test_set = DatasetFromHdf5(test_path, transform=transforms.ToTensor())
    #
    # train_loader = data_utils.DataLoader(dataset=train_set, num_workers=8,
    #                                      batch_size=batch_size, shuffle=True, **kwargs)
    #
    # test_loader = data_utils.DataLoader(dataset=test_set, num_workers=8,
    #                                                batch_size=batch_size, shuffle=False, **kwargs)

    # Built-in SVHN loader
    svhn_train = datasets.SVHN(root='./data/svhn',
                               split='train',
                               transform=transforms.ToTensor(),
                               download=True)
    svhn_test = datasets.SVHN(root='./data/svhn',
                              split='test',
                              transform=transforms.ToTensor(),
                              download=True)
    train_loader = data_utils.DataLoader(svhn_train,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         **kwargs)

    test_loader = data_utils.DataLoader(svhn_test,
                                        batch_size=test_batch_size,
                                        shuffle=False,
                                        **kwargs)

    NUM_IMAGES = None
    num_images = NUM_IMAGES
    data, labels = create_numpy_dataset(num_images, train_loader)
    filters, means, outputs = saak.multi_stage_saak_trans(data,
                                                          energy_thresh=0.97)
    final_feat_dim = sum([
        ((output.shape[1] - 1) / 2 + 1) * output.shape[2] * output.shape[3]
        for output in outputs
    ])
    print 'final feature dimension is {}'.format(final_feat_dim)
    final_feat = saak.get_final_feature(outputs)
    assert final_feat.shape[1] == final_feat_dim
    selected_feat, idx = f_test(final_feat, labels)
    reduced_feat, pca = reduce_feat_dim(selected_feat, dim=64)
    clf = svm_classifier(reduced_feat, labels)
    pred = clf.predict(reduced_feat)
    acc = sklearn.metrics.accuracy_score(labels, pred)
    print 'training acc is {}'.format(acc)

    print '\n-----------------start testing-------------\n'

    test_data, test_labels = create_numpy_dataset(None, test_loader)
    test_outputs = saak.test_multi_stage_saak_trans(test_data, means, filters)
    test_final_feat = saak.get_final_feature(test_outputs)
    assert test_final_feat.shape[1] == final_feat_dim

    test_selected_feat = test_final_feat[:, idx]
    test_reduced_feat = pca.transform(test_selected_feat)
    print 'testing reducued feat shape {}'.format(test_reduced_feat.shape)
    test_pred = clf.predict(test_reduced_feat)
    test_acc = sklearn.metrics.accuracy_score(test_labels, test_pred)
    print 'testing acc is {}'.format(test_acc)
Esempio n. 5
0
 def create_test_dataset():
     test_data, test_labels = create_numpy_dataset(None, test_loader)
     test_outputs = saak.test_multi_stage_saak_trans(
         test_data, means, filters)
     test_final_feat = saak.get_final_feature(test_outputs)
     return test_final_feat, test_labels
Esempio n. 6
0
                                         shuffle=True,
                                         **kwargs)

    test_loader = data_utils.DataLoader(svhn_test,
                                        batch_size=test_batch_size,
                                        shuffle=False,
                                        **kwargs)

    K = 10
    NUM_VIS = 20
    NUM_IMAGES = None
    num_images = NUM_IMAGES

    data = saak.create_numpy_dataset(num_images, train_loader)

    filters, means, outputs = saak.multi_stage_saak_trans(data,
                                                          energy_thresh=0.97)
    final_feat_dim = sum([
        ((output.shape[1] - 1) / 2 + 1) * output.shape[2] * output.shape[3]
        for output in outputs
    ])
    print('final feature dimension is {}'.format(final_feat_dim))
    final_feat = saak.get_final_feature(outputs)
    assert final_feat.shape[1] == final_feat_dim
    print(final_feat.shape)

    k_mean_clustering(data=data,
                      feature=final_feat,
                      K=K,
                      num_centroids_to_visualize=NUM_VIS)