Example #1
0
        def test(x):
            mu, sigma = preprocess.muSigma(x)

            self.assertAlmostEqual(1.23902738264240, x[1][2])

            self.assertEqual(5, len(mu))
            self.assertEqual(5, len(sigma))

            self.assertAlmostEqual(2.87969736221038, mu[0])
            self.assertAlmostEqual(2.04868506865762, sigma[0])
            self.assertAlmostEqual(-0.99025024303433, (x[0][0] - mu[0]) / sigma[0])

            self.assertAlmostEqual(1.97861578296198, mu[2])
            self.assertAlmostEqual(2.33076030134340, sigma[2])
            self.assertAlmostEqual(-0.31731637092553, (x[1][2] - mu[2]) / sigma[2])

            y = preprocess.normalize(x, mu, sigma)

            m, n = y.shape
            self.assertEqual(4, m)
            self.assertEqual(5, n)

            self.assertAlmostEqual(-0.99025024303433, y[0][0])
            self.assertAlmostEqual(-0.31731637092553, y[1][2])

            u = preprocess.sigmoid(y)
            self.assertAlmostEqual(0.27086265279957, u[0][0])
            self.assertAlmostEqual(0.42132990768430, u[1][2])
def main():
	
	ip, op , metadata = preprocess.pre_process_stage1(sys.argv[1])
	ip,op = shuffle_order(ip, op)
	normalized_ip, normalized_op = preprocess.normalize(ip, op, metadata);

	knn_ip, knn_op = preprocess.normalize(ip, op, metadata, hot_encode = True)
	#neural_spec = [int(spec.strip()) for spec in sys.argv[2].split(",")]
	neural_spec = [4,5]
	neural_spec.append(len(normalized_op[0]))
	neural_spec.insert(0, len(normalized_ip[0]))
	learning_rate, momentum_rate = 0.10, 0.02# 0.001, 0.001

	knn_accs, comp_accs, mean_iter, mse  =  [0]*10, [0]* 10, 0,0

		
	if '--neural' in sys.argv:
		comp_accs = k_fold_validation_neural_net(normalized_ip, normalized_op, neural_spec, learning_rate, momentum_rate)
	


	elif '--dtree' in sys.argv:
		comp_accs = k_fold_validation_dtree(ip, op, metadata)


	k = int(sys.argv[2])

	tic = timeit.default_timer()
	knn_accs = k_fold_validation_knn(knn_ip, numpy.array(op), k, metadata)
	toc = timeit.default_timer()


	

	print("\n\n")
	print("Time Taken : %f"% (toc-tic))
	print("Dataset Size : %d"%(len(ip)))
	print("Number of features : %d"%len(ip[0]))



	print("\nFold\t\t\tkNN\t\t\tDecision Tree/Neural Network")
	for fold in range(0,10):
			print( "%d \t\t\t %.2f \t\t\t %.2f"%(fold+1, knn_accs[fold], comp_accs[fold]))

	comp_mu, comp_ci = statistics.calc_confidence_interval(comp_accs)
	knn_mu, knn_ci = statistics.calc_confidence_interval(knn_accs)

	t_mu, t_ci = statistics.paired_t_test(comp_accs, knn_accs)

	print("\nConfidence interval for kNN classifier : %.3f   +/-   %.3f"%(knn_mu, knn_ci))
	print("Confidence interval for decison tree/neural network : %.3f   +/-   %.3f"%(comp_mu, comp_ci))


	print("Result of Paired T-Test : %.3f   +/-   %.3f"%(t_mu, t_ci))

	if 0 > t_mu - t_ci and 0<t_mu+t_ci:
		print("The two algorithms are statistically similar")

	else:
		print("The difference in the performance of the two algorithms is statistically significant")
Example #3
0
 def normalize(self, data):
     x, y = data
     z = preprocess.sigmoid(preprocess.normalize(x, self.mu, self.sigma))
     return numpy.array(z, dtype = numpy.float32), y        
Example #4
0
    def do_normalize(self, train_pos_col, train_neg_col, test_pos_col, test_neg_col):
        train_pos_col_norm, train_neg_col_norm, test_pos_col_norm, test_neg_col_norm, max_value, min_value = \
            preprocess.normalize(train_pos_col, train_neg_col, test_pos_col, test_neg_col)

        return train_pos_col_norm, train_neg_col_norm, test_pos_col_norm, test_neg_col_norm, max_value, min_value
Example #5
0
def query_terms(id, lang, terms, repo = None, txn = None):
    terms = p.normalize(lang, terms)
    return ctx.invidx[id][lang].query(terms, repo = repo, txn = txn)
Example #6
0
            self.chains.setdefault(s1, [])
            self.chains[s1].append(s2)

    def generate(self, bos='(BOS)'):
        tokens = []

        node = random.choice(self.chains[bos])
        while node != '(EOS)':
            tokens.append(node)

            node = random.choice(self.chains[node])

        return ' '.join(tokens)


if __name__ == '__main__':
    import sys
    from tokenizer import tokenize
    from preprocess import normalize

    markov_chain = MarkovChain()

    for line in sys.stdin:
        title = normalize(line.strip())
        tokens = tokenize(title)
        
        markov_chain.train(tokens)

    for i in range(100):
        print(markov_chain.generate())
def train_cifar10(datapath, dataset_name,
                  learning_rate=0.2, n_epochs=10000,
                  nkerns=[20, 50], batch_size=10000):
    """ This function is used to train cifar10 dataset for object recognition."""
    rng = numpy.random.RandomState(23455)                        # generate random number seed
    mrng = RandomStreams()
    num_channels = 3                                             # for RGB 3-channel image inputs
    layer0_rows = 32                                             # image height 
    layer0_cols = 32                                             # image width
    layer_pixels = layer0_rows * layer0_cols                     # number of pixels in a layer: 1024
    column_width = layer_pixels * num_channels                   # column_width = 3072
    layer0_sub_rows = layer0_rows / 2                            # layer0_sub_rows = 16
    layer0_sub_cols = layer0_cols / 2                            # layer0_sub_cols = 16
    kernel0_size = 5                                             # filter size of first layer kernels
    pool0_size = 2                                               # pool size of the first layer
    layer1_rows = (layer0_rows - kernel0_size + 1) / pool0_size           # layer1_rows = 14
    layer1_cols = (layer0_cols - kernel0_size + 1) / pool0_size           # layer1_cols = 14
    layer1_sub_rows = (layer0_sub_rows - kernel0_size + 1) / pool0_size   # layer1_sub_rows = 6
    layer1_sub_cols = (layer0_sub_cols - kernel0_size + 1) / pool0_size   # layer1_sub_cols = 6
    kernel1_size = 5
    pool1_size = 1                                                        # no pooling for the first layer
    layer2_rows = (layer1_rows - kernel1_size + 1) / pool1_size           # layer2_rows = 5
    layer2_cols = (layer1_cols - kernel1_size + 1) / pool1_size           # layer2_cols = 5
    hidden_nodes = 128
    hidden_extra_nodes = 500
    penalty_coeff = 0.0
    num_batches = 50000 / batch_size

    # read in data
    data_list  = numpy.empty(shape=[0, column_width])                     # for each set of training data,
                                                                          # column width is fixed.
    label_list = numpy.empty(shape=[0,])                                # for each set of training labels,
                                                                          # row height is fixed.
    for i in range(len(dataset_name)):
        temp_data = unpickle(datapath+dataset_name[i])
        temp_x    = temp_data['data']
        temp_y    = numpy.array(temp_data['labels'])                      # y labels are python lists, convert
                                                                          # to numpy.ndarray
        normalized_x = normalize(temp_x)                                  # normalize data, rescale to 0 - 1
        
        data_list = numpy.append(data_list, normalized_x, axis=0)
        label_list= numpy.append(label_list, temp_y, axis=0)              # loop over the whole training set

    del temp_data, temp_x, temp_y, normalized_x
    shared_x, shared_y = share_data(data_list, label_list)

    validate_set = unpickle('../data/cifar10/test_batch')
    validate_x = validate_set['data']
    validate_y = validate_set['labels']
    normalized_valx = normalize(validate_x)                              # normalize the validation set.
    evalset_x, evalset_y = share_data(normalized_valx, validate_y)
    del validate_set, validate_x, validate_y, normalized_valx

    # get variable names for data and labels
    x = T.matrix('x')
    y = T.ivector('y')
    state = T.iscalar('state')                                          # state variable represents train(0) and test(1) 
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... buliding the model'    
    # initialize layer0 parameters 
    layer0_fan_in = num_channels * layer0_rows * layer0_cols  # same as numpy.prod(filter_shape[1:])
    layer0_fan_out= nkerns[0] * kernel0_size * kernel0_size / (pool0_size * pool0_size)
    W_bound0 = numpy.sqrt(6. / (layer0_fan_in + layer0_fan_out))
    layer0_W = rng.uniform(low=-W_bound0, high=W_bound0, size=(nkerns[0], num_channels, kernel0_size, kernel0_size)) 
    
    # initialize layer1 parameters
    layer1_fan_in = num_channels * layer1_rows * layer1_cols  # same as numpy.prod(filter_shape[1:])
    layer1_fan_out= nkerns[1] * kernel1_size * kernel1_size / (pool1_size * pool1_size)
    W_bound1 = numpy.sqrt(6. / (layer1_fan_in + layer1_fan_out))
    layer1_W = rng.uniform(low=-W_bound1, high=W_bound1, size=(nkerns[1], nkerns[0], kernel1_size, kernel1_size)) 
    
    layer0_input = x.reshape((batch_size, num_channels, layer0_rows, layer0_cols))
    layer0_input_sub = downsample.max_pool_2d(input=layer0_input, ds=(2,2), ignore_border=True)

    layer0 = MyNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, num_channels, layer0_rows, layer0_cols),        # image_shape = (500, 3, 32, 32)
        filter_shape=(nkerns[0], num_channels, kernel0_size, kernel0_size),      # filter_shape= (20, 3, 5, 5)
        poolsize=(pool0_size, pool0_size),
        params_W=layer0_W,
    )                                      # construct the first layer
    layer0_sub = MyNetConvPoolLayer(
        rng,
        input=layer0_input_sub,
        image_shape=(batch_size, num_channels, layer0_sub_rows, layer0_sub_cols),# image_shape = (500, 3, 16, 16)
        filter_shape=(nkerns[0], num_channels, kernel0_size, kernel0_size),      # filter_shape= (20, 3, 5, 5)
        poolsize=(pool0_size, pool0_size),
        params_W=layer0_W
    )

    layer1 = MyNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], layer1_rows, layer1_cols),           # image_shape = (500, 20, 14, 14)
        filter_shape=(nkerns[1], nkerns[0], kernel1_size, kernel1_size),         # filter_shape= (50, 20, 5, 5)
        poolsize=(pool1_size, pool1_size),
        params_W=layer1_W
    )                                                                            # output size = (500, 50, 10, 10)
    layer1_sub = MyNetConvPoolLayer(
        rng,
        input=layer0_sub.output,
        image_shape=(batch_size, nkerns[0], layer1_sub_rows, layer1_sub_cols),   # image_shape = (500, 20, 6, 6)
        filter_shape=(nkerns[1], nkerns[0], kernel1_size, kernel1_size),         # filter_shape= (50, 20, 5, 5)
        poolsize=(pool1_size, pool1_size),
        params_W=layer1_W
    )                                                                            # output size = (500, 50, 2, 2)

    layer2_input = T.concatenate(
        [layer1.output.flatten(2), layer1_sub.output.flatten(2)], axis=1
    )
   
    layer2 = HiddenLayer(
        mrng,
        rng,
        input=layer2_input,
        n_in=nkerns[1]*((layer1_rows+1-kernel1_size)*(layer1_cols+1-kernel1_size)+(layer1_sub_rows+1-kernel1_size)*(layer1_sub_cols+1-kernel1_size)), 
        n_out=hidden_nodes,
        state=state,
        activation=T.tanh
    )


    layer3 = LogisticRegression(input=layer2.output, n_in=hidden_nodes, n_out=10)

    total_cost = layer3.negative_log_likelihood(y) + penalty_coeff * layer2.W.norm(2)

    params = layer3.params + layer2.params + layer1_sub.params + layer1.params + layer0_sub.params + layer0.params
    grad_l3     = T.grad(total_cost, layer3.params)
    grad_l2     = T.grad(total_cost, layer2.params)
    grad_l1_sub = T.grad(total_cost, layer1_sub.params)
    grad_l1     = T.grad(total_cost, layer1.params)
    grad_l0_sub	= T.grad(total_cost, layer0_sub.params)
    grad_l0     = T.grad(total_cost, layer0.params)

    updates = [
        (layer3.params[0]    , layer3.params[0]     - learning_rate * grad_l3[0]),
        (layer3.params[1]    , layer3.params[1]     - learning_rate * grad_l3[1]),
        (layer2.params[0]    , layer2.params[0]     - learning_rate * grad_l2[0]),
        (layer2.params[1]    , layer2.params[1]     - learning_rate * grad_l2[1]),
        (layer1_sub.params[0], layer1_sub.params[0] - learning_rate * (grad_l1_sub[0] + grad_l1[0])),
        (layer1_sub.params[1], layer1_sub.params[1] - learning_rate * (grad_l1_sub[1] + grad_l1[1])),
        (layer1.params[0]    , layer1.params[0]     - learning_rate * (grad_l1_sub[0] + grad_l1[0])),
        (layer1.params[1]    , layer1.params[1]     - learning_rate * (grad_l1_sub[1] + grad_l1[1])),
        (layer0_sub.params[0], layer0_sub.params[0] - learning_rate * (grad_l0_sub[0] + grad_l0[0])),
        (layer0_sub.params[1], layer0_sub.params[1] - learning_rate * (grad_l0_sub[1] + grad_l0[1])),
        (layer0.params[0]    , layer0.params[0]     - learning_rate * (grad_l0_sub[0] + grad_l0[0])),
        (layer0.params[1]    , layer0.params[1]     - learning_rate * (grad_l0_sub[1] + grad_l0[1]))
    ]

    training_index = T.iscalar()
    validate_index = T.iscalar()

    train_model = theano.function(
        [training_index],
        [total_cost, layer3.errors(y)],
        updates=updates,
        givens={
            x : shared_x[training_index * batch_size : (training_index+1) * batch_size],
            y : shared_y[training_index * batch_size : (training_index+1) * batch_size],
            state: numpy.cast['int32'](0)
        }
    )

    test_model = theano.function(
        [],
        layer3.errors(y),
        givens={
            x: evalset_x[0: batch_size],
            y: evalset_y[0: batch_size],
            state: numpy.cast['int32'](1)
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    patience = 10000
    epoch = 0
    done_looping = False

    # save parameters every 1000 iterations.
    param_files = ['p0', 'p1', 'p2', 'p3', 'p4',
                   'p5', 'p6', 'p7', 'p8', 'p9']
    while(epoch < n_epochs) and (not done_looping):
        batch_index = randint(0, num_batches-1)                                # randomly generate the batch number to be trained.
        cost_ij, error = train_model(batch_index)
        epoch = epoch + 1
        print "number of iterations:   ", epoch 
        print "selected training batch:", batch_index
        print "current cost:           ", cost_ij
        print "validate error:         ", error

        # call validation accuracy
        if (epoch % 10 == 0):
            error_test = test_model(1)
            print "      "
            print "validate error of test_batch:", error_test
            print "      "
import os
import load
import preprocess

normalize = False

if False or not os.path.isfile('train.npy'):
    load.import_test_train()
    train_data, ans = load.load_train()
    test_data = load.load_test()
    normalize = True
if normalize or not os.path.isfile('train_normalized.npy'):
    preprocess.normalize(train_data, test_data)
Example #9
0
import numpy as np

import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras import optimizers

from sklearn.model_selection import train_test_split

import preprocess

X, y = preprocess.read_data('../../smiles')
X = preprocess.normalize(X)
X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
X_train, X_valid, X_test, y_train, y_valid, y_test = preprocess.split_data(
    X, y)

# from keras.utils import to_categorical
# y_train = to_categorical(y_train)
# y_test = to_categorical(y_test)
# y_valid = to_categorical(y_valid)

print(y_train.shape)

input_shape = X_train.shape[1:]


def createModel():
    model = Sequential()

    model.add(
    y = np.array(data_mat['Y'])"""
    x=pd.read_feather("sc_data/snRNA_AD_brain.feather").iloc[:,:10]
    y=np.array(x.columns.str.split('.').tolist())[:,1].astype(np.float)
    x=x.values.T

    # preprocessing scRNA-seq read counts matrix
    adata = sc.AnnData(x)
    adata.obs['Group'] = y

    adata = read_dataset(adata,
                     transpose=False,
                     test_split=False,
                     copy=True)

    adata = normalize(adata,
                      size_factors=True,
                      normalize_input=True,
                      logtrans_input=True)

    input_size = adata.n_vars

    print(adata.X.shape)
    print(y.shape)

    x_sd = adata.X.std(0)
    x_sd_median = np.median(x_sd)
    print("median of gene sd: %.5f" % x_sd_median)


    if args.update_interval == 0:  # one epoch
        args.update_interval = int(adata.X.shape[0]/args.batch_size)
    print(args)
Example #11
0
def replaceWithPhoto(np_row):
    image_location = np_row[0].decode('UTF-8').strip()
    image = imread(cwd + '/data/' + image_location).astype(np.float32)
    image = preprocess_image(image)
    image = normalize(image)
    return np.array([image, np_row[1]])
    def make_columns():
        """
        Builds the feature_columns required by the estimator to link the Dataset and the model_fn
        :return:
        """
        columns_dict = {}

        columns_dict['gci'] = fc.indicator_column(
            fc.sequence_categorical_column_with_vocabulary_file(
                'gci',
                vocab_file,
                default_value="0"
            )
        )
        columns_dict['ta'] = (
            seq_fc.sequence_numeric_column(
                'ta', normalizer_fn=lambda x: normalize(x, 'ta', stats_dict)
            )
        )
        columns_dict['rsrp'] = (
            seq_fc.sequence_numeric_column(
                'rsrp', normalizer_fn=lambda x: normalize(
                    x, 'rsrp', stats_dict)))
        columns_dict['gci0'] = fc.indicator_column(
            fc.sequence_categorical_column_with_vocabulary_file(
                'gci0',
                vocab_file,
                default_value="0"
            )
        )
        columns_dict['rsrp0'] = (
            seq_fc.sequence_numeric_column(
                'rsrp0', normalizer_fn=lambda x: normalize(
                    x, 'rsrp0', stats_dict)))
        columns_dict['gci1'] = fc.indicator_column(
            fc.sequence_categorical_column_with_vocabulary_file(
                'gci1',
                vocab_file,
                default_value="0"
            )
        )
        columns_dict['rsrp1'] = (
            seq_fc.sequence_numeric_column(
                'rsrp1', normalizer_fn=lambda x: normalize(
                    x, 'rsrp1', stats_dict)))
        columns_dict['gci2'] = fc.indicator_column(
            fc.sequence_categorical_column_with_vocabulary_file(
                'gci2',
                vocab_file,
                default_value="0"
            )
        )
        columns_dict['rsrp2'] = (
            seq_fc.sequence_numeric_column(
                'rsrp2', normalizer_fn=lambda x: normalize(
                    x, 'rsrp2', stats_dict)))
        columns_dict['dt'] = (
            seq_fc.sequence_numeric_column(
                'dt', normalizer_fn=lambda x: normalize(x, 'dt', stats_dict)
            )
        )
        return columns_dict
Example #13
0
def preprocess_and_save_data(cifar10_dataset_folder_path, output_path,
                             rm_class, aug_enable, reshape_enable):
    """
    Preprocess Training and Validation Data
    """
    n_batches = 5

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    features = []
    labels = []
    for batch_i in range(1, n_batches + 1):
        curr_features, curr_labels = load_cfar10_batch(
            cifar10_dataset_folder_path, batch_i)

        if len(features) is 0:
            features = curr_features
            labels = curr_labels
        else:
            features = np.concatenate((features, curr_features))
            labels = np.concatenate((labels, curr_labels))

    # Preprocess training & validation data
    if aug_enable == True:
        features_ud = preprc.vertical_flip(features)
        features_lr = preprc.horizontal_flip(features)
        features_rot90 = preprc.rot90(features)
        features_rot270 = preprc.rot270(features)
        features = np.concatenate((features, features_ud, features_lr,
                                   features_rot90, features_rot270))
        labels = np.concatenate((labels, labels, labels, labels, labels))

    if reshape_enable == True:
        features = preprc.reshape_image(features, (64, 64, 3))

    features, _, _ = preprc.normalize(features, mean=mean, std=std)
    labels = preprc.one_hot_encode(labels)

    print("[Training data] Removing No.{} Class...".format(rm_class))
    print("\t[Before] feature shape: ", np.shape(features))
    print("\t[Before] label shape: ", np.shape(labels))
    count = 0
    remove_class = []
    for i in range(len(features)):
        if labels[i, rm_class] == 1:
            count = count + 1
            remove_class.append(i)
    print("\tCount: {}".format(count))
    features = np.delete(features, remove_class, axis=0)
    labels = np.delete(labels, remove_class, axis=0)

    print("\t[After] feature shape: ", np.shape(features))
    print("\t[After] label shape: ", np.shape(labels))

    # Save training data
    pickle.dump((features, labels),
                open(
                    os.path.join(output_path,
                                 'preprocess_train_{}.p'.format(rm_class)),
                    'wb'),
                protocol=4)

    with open(cifar10_dataset_folder_path + '/test_batch', mode='rb') as file:
        batch = pickle.load(file, encoding='latin1')

    # load the test data
    test_features = batch['data'].reshape(
        (len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
    test_labels = batch['labels']

    if reshape_enable == True:
        test_features = preprc.reshape_image(test_features, (64, 64, 3))

    # Preprocess training & validation data
    test_features, _, _ = preprc.normalize(test_features, mean=mean, std=std)
    test_labels = preprc.one_hot_encode(test_labels)

    # Save original test data
    pickle.dump((np.array(test_features), np.array(test_labels)),
                open(os.path.join(output_path, 'test.p'), 'wb'))

    print("[Testing data] Removing No.{} Class...".format(rm_class))
    print("\t[Before] feature shape: ", np.shape(test_features))
    print("\t[Before] label shape: ", np.shape(test_labels))
    count = 0
    remove_class = []
    for i in range(len(test_features)):
        if test_labels[i, rm_class] == 1:
            count = count + 1
            remove_class.append(i)
    print("\tCount: {}".format(count))
    test_features = np.delete(test_features, remove_class, axis=0)
    test_labels = np.delete(test_labels, remove_class, axis=0)

    print("\t[After] feature shape: ", np.shape(test_features))
    print("\t[After] label shape: ", np.shape(test_labels))

    # Save test data
    pickle.dump((np.array(test_features), np.array(test_labels)),
                open(
                    os.path.join(output_path,
                                 'preprocess_test_{}.p'.format(rm_class)),
                    'wb'),
                protocol=4)
Example #14
0
def preprocess_and_save_single_class_data(cifar10_dataset_folder_path,
                                          output_path, aug_enable,
                                          reshape_enable):
    """
    Preprocess Training and Validation Data
    """
    n_batches = 5

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    features = []
    labels = []
    for batch_i in range(1, n_batches + 1):
        curr_features, curr_labels = load_cfar10_batch(
            cifar10_dataset_folder_path, batch_i)

        if len(features) is 0:
            features = curr_features
            labels = curr_labels
        else:
            features = np.concatenate((features, curr_features))
            labels = np.concatenate((labels, curr_labels))

    # Preprocess training & validation data
    if aug_enable == True:
        features_ud = preprc.vertical_flip(features)
        features_lr = preprc.horizontal_flip(features)
        features_rot90 = preprc.rot90(features)
        features_rot270 = preprc.rot270(features)
        features = np.concatenate((features, features_ud, features_lr,
                                   features_rot90, features_rot270))
        labels = np.concatenate((labels, labels, labels, labels, labels))

    if reshape_enable == True:
        features = preprc.reshape_image(features, (64, 64, 3))

    features, _, _ = preprc.normalize(features, mean=mean, std=std)
    labels = preprc.one_hot_encode(labels)

    for reserved_class in range(10):

        print(
            "[Training data] Extracting No.{} Class...".format(reserved_class))

        curr_features = features[labels[:, reserved_class] == 1]
        curr_lables = labels[labels[:, reserved_class] == 1]

        print("\t[Class {}] feature shape: ".format(reserved_class),
              np.shape(curr_features))

        # Save training data
        pickle.dump(
            (curr_features, curr_lables),
            open(
                os.path.join(output_path,
                             'pr_train_class_{}.p'.format(reserved_class)),
                'wb'))

    with open(cifar10_dataset_folder_path + '/test_batch', mode='rb') as file:
        batch = pickle.load(file, encoding='latin1')

    # load the test data
    test_features = batch['data'].reshape(
        (len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
    test_labels = batch['labels']

    if reshape_enable == True:
        test_features = preprc.reshape_image(test_features, (64, 64, 3))

    # Preprocess training & validation data
    test_features, _, _ = preprc.normalize(test_features, mean=mean, std=std)
    test_labels = preprc.one_hot_encode(test_labels)

    # Save original test data
    pickle.dump((np.array(test_features), np.array(test_labels)),
                open(os.path.join(output_path, 'test.p'), 'wb'))

    for reserved_class in range(10):

        print(
            "[Testing data] Extracting No.{} Class...".format(reserved_class))

        curr_features = test_features[test_labels[:, reserved_class] == 1]
        curr_lables = test_labels[test_labels[:, reserved_class] == 1]

        print("\t[After] feature shape: ", np.shape(curr_features))

        # Save test data
        pickle.dump(
            (np.array(curr_features), np.array(curr_lables)),
            open(
                os.path.join(output_path,
                             'pr_test_class_{}.p'.format(reserved_class)),
                'wb'))
Example #15
0
def test_model_pointnet_sample_version():
    data = []
    label = []
    for i in range(2):
        f = h5py.File(
            '/home/pal/data/ModelNet40PointNetSampleVersion/modelnet40_ply_hdf5_2048/ply_data_test{}.h5'
            .format(i))
        data.append(f['data'][:])
        label.append(f['label'][:])
        print data[i].shape
        print label[i].shape

    data = np.concatenate(data, axis=0)
    label = np.concatenate(label, axis=0)
    label = label[:, 0]
    print data.shape, label.shape

    _, batch_names = read_category_file('data/ModelNet40/CategoryIDs')
    provided_names = read_pointnet_sample_category_file(
        '/home/pal/data/ModelNet40PointNetSampleVersion/modelnet40_ply_hdf5_2048/shape_names.txt'
    )
    index_map = map_provided_label_to_batch_label(batch_names, provided_names)

    rectify_label = np.empty_like(label)
    for index, l in enumerate(label):
        rectify_label[index] = index_map[l]
    label = rectify_label

    model_path = '/home/pal/model/1024_leaky_relu/epoch499.ckpt'
    net = Network(3, 40, True, 1024)
    input = tf.placeholder(dtype=tf.float32,
                           shape=[None, None, 3, 1],
                           name='point_cloud')
    is_training = tf.placeholder(dtype=tf.bool, shape=[], name='is_training')

    net.inference(input, 'cpu', is_training, leaky_relu)
    score_layer = net.ops['cpu_fc3']

    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.log_device_placement = False
    sess = tf.Session(config=config)
    saver = tf.train.Saver()
    saver.restore(sess, model_path)

    batch_size = 30
    iter_num = int(math.ceil(data.shape[0] / float(batch_size)))

    correct_num = 0
    all_labels = []
    all_preds = []
    for batch_index in xrange(iter_num):
        begin_index = batch_size * batch_index
        end_index = min((batch_index + 1) * batch_size, data.shape[0])
        batch_label = label[begin_index:end_index]
        batch_data = data[begin_index:end_index]
        batch_data = normalize(batch_data)
        batch_data = exchange_dims_zy(batch_data)
        batch_data = np.expand_dims(batch_data, axis=3)

        scores = sess.run(score_layer,
                          feed_dict={
                              input: batch_data,
                              is_training: False
                          })
        preds = np.argmax(scores, axis=1)
        all_labels.append(batch_label)
        all_preds.append(preds)
        correct_num += np.sum(preds == batch_label)
        # for i in xrange(data.shape[0]):
        #     if preds[i]==label[i]:
        #         continue
        #
        #     with open('misclassified/{}_{}_{}_{:.3}_{:.3}.txt'.format(
        #             names[label[i]],names[preds[i]],
        #             error_num,
        #             scores[i,preds[i]],scores[i,label[i]]),'w') as f:
        #         for pt in data[i,:,:,0]:
        #             f.write('{} {} {}\n'.format(pt[0],pt[1],pt[2]))
        #
        #     error_num+=1
        # print batch_names[batch_label[0]]
        # with open('test.txt','w') as f:
        #     for pt in batch_data[0,:,:,0]:
        #         f.write('{} {} {}\n'.format(pt[0],pt[1],pt[2]))

    print 'accuracy {}'.format(correct_num / float(data.shape[0]))

    cnf_matrix = confusion_matrix(np.concatenate(all_labels, axis=0),
                                  np.concatenate(all_preds, axis=0),
                                  labels=range(40))
    plt.figure()
    plot_confusion_matrix(cnf_matrix, batch_names)
    plt.show()