def test(x): mu, sigma = preprocess.muSigma(x) self.assertAlmostEqual(1.23902738264240, x[1][2]) self.assertEqual(5, len(mu)) self.assertEqual(5, len(sigma)) self.assertAlmostEqual(2.87969736221038, mu[0]) self.assertAlmostEqual(2.04868506865762, sigma[0]) self.assertAlmostEqual(-0.99025024303433, (x[0][0] - mu[0]) / sigma[0]) self.assertAlmostEqual(1.97861578296198, mu[2]) self.assertAlmostEqual(2.33076030134340, sigma[2]) self.assertAlmostEqual(-0.31731637092553, (x[1][2] - mu[2]) / sigma[2]) y = preprocess.normalize(x, mu, sigma) m, n = y.shape self.assertEqual(4, m) self.assertEqual(5, n) self.assertAlmostEqual(-0.99025024303433, y[0][0]) self.assertAlmostEqual(-0.31731637092553, y[1][2]) u = preprocess.sigmoid(y) self.assertAlmostEqual(0.27086265279957, u[0][0]) self.assertAlmostEqual(0.42132990768430, u[1][2])
def main(): ip, op , metadata = preprocess.pre_process_stage1(sys.argv[1]) ip,op = shuffle_order(ip, op) normalized_ip, normalized_op = preprocess.normalize(ip, op, metadata); knn_ip, knn_op = preprocess.normalize(ip, op, metadata, hot_encode = True) #neural_spec = [int(spec.strip()) for spec in sys.argv[2].split(",")] neural_spec = [4,5] neural_spec.append(len(normalized_op[0])) neural_spec.insert(0, len(normalized_ip[0])) learning_rate, momentum_rate = 0.10, 0.02# 0.001, 0.001 knn_accs, comp_accs, mean_iter, mse = [0]*10, [0]* 10, 0,0 if '--neural' in sys.argv: comp_accs = k_fold_validation_neural_net(normalized_ip, normalized_op, neural_spec, learning_rate, momentum_rate) elif '--dtree' in sys.argv: comp_accs = k_fold_validation_dtree(ip, op, metadata) k = int(sys.argv[2]) tic = timeit.default_timer() knn_accs = k_fold_validation_knn(knn_ip, numpy.array(op), k, metadata) toc = timeit.default_timer() print("\n\n") print("Time Taken : %f"% (toc-tic)) print("Dataset Size : %d"%(len(ip))) print("Number of features : %d"%len(ip[0])) print("\nFold\t\t\tkNN\t\t\tDecision Tree/Neural Network") for fold in range(0,10): print( "%d \t\t\t %.2f \t\t\t %.2f"%(fold+1, knn_accs[fold], comp_accs[fold])) comp_mu, comp_ci = statistics.calc_confidence_interval(comp_accs) knn_mu, knn_ci = statistics.calc_confidence_interval(knn_accs) t_mu, t_ci = statistics.paired_t_test(comp_accs, knn_accs) print("\nConfidence interval for kNN classifier : %.3f +/- %.3f"%(knn_mu, knn_ci)) print("Confidence interval for decison tree/neural network : %.3f +/- %.3f"%(comp_mu, comp_ci)) print("Result of Paired T-Test : %.3f +/- %.3f"%(t_mu, t_ci)) if 0 > t_mu - t_ci and 0<t_mu+t_ci: print("The two algorithms are statistically similar") else: print("The difference in the performance of the two algorithms is statistically significant")
def normalize(self, data): x, y = data z = preprocess.sigmoid(preprocess.normalize(x, self.mu, self.sigma)) return numpy.array(z, dtype = numpy.float32), y
def do_normalize(self, train_pos_col, train_neg_col, test_pos_col, test_neg_col): train_pos_col_norm, train_neg_col_norm, test_pos_col_norm, test_neg_col_norm, max_value, min_value = \ preprocess.normalize(train_pos_col, train_neg_col, test_pos_col, test_neg_col) return train_pos_col_norm, train_neg_col_norm, test_pos_col_norm, test_neg_col_norm, max_value, min_value
def query_terms(id, lang, terms, repo = None, txn = None): terms = p.normalize(lang, terms) return ctx.invidx[id][lang].query(terms, repo = repo, txn = txn)
self.chains.setdefault(s1, []) self.chains[s1].append(s2) def generate(self, bos='(BOS)'): tokens = [] node = random.choice(self.chains[bos]) while node != '(EOS)': tokens.append(node) node = random.choice(self.chains[node]) return ' '.join(tokens) if __name__ == '__main__': import sys from tokenizer import tokenize from preprocess import normalize markov_chain = MarkovChain() for line in sys.stdin: title = normalize(line.strip()) tokens = tokenize(title) markov_chain.train(tokens) for i in range(100): print(markov_chain.generate())
def train_cifar10(datapath, dataset_name, learning_rate=0.2, n_epochs=10000, nkerns=[20, 50], batch_size=10000): """ This function is used to train cifar10 dataset for object recognition.""" rng = numpy.random.RandomState(23455) # generate random number seed mrng = RandomStreams() num_channels = 3 # for RGB 3-channel image inputs layer0_rows = 32 # image height layer0_cols = 32 # image width layer_pixels = layer0_rows * layer0_cols # number of pixels in a layer: 1024 column_width = layer_pixels * num_channels # column_width = 3072 layer0_sub_rows = layer0_rows / 2 # layer0_sub_rows = 16 layer0_sub_cols = layer0_cols / 2 # layer0_sub_cols = 16 kernel0_size = 5 # filter size of first layer kernels pool0_size = 2 # pool size of the first layer layer1_rows = (layer0_rows - kernel0_size + 1) / pool0_size # layer1_rows = 14 layer1_cols = (layer0_cols - kernel0_size + 1) / pool0_size # layer1_cols = 14 layer1_sub_rows = (layer0_sub_rows - kernel0_size + 1) / pool0_size # layer1_sub_rows = 6 layer1_sub_cols = (layer0_sub_cols - kernel0_size + 1) / pool0_size # layer1_sub_cols = 6 kernel1_size = 5 pool1_size = 1 # no pooling for the first layer layer2_rows = (layer1_rows - kernel1_size + 1) / pool1_size # layer2_rows = 5 layer2_cols = (layer1_cols - kernel1_size + 1) / pool1_size # layer2_cols = 5 hidden_nodes = 128 hidden_extra_nodes = 500 penalty_coeff = 0.0 num_batches = 50000 / batch_size # read in data data_list = numpy.empty(shape=[0, column_width]) # for each set of training data, # column width is fixed. label_list = numpy.empty(shape=[0,]) # for each set of training labels, # row height is fixed. for i in range(len(dataset_name)): temp_data = unpickle(datapath+dataset_name[i]) temp_x = temp_data['data'] temp_y = numpy.array(temp_data['labels']) # y labels are python lists, convert # to numpy.ndarray normalized_x = normalize(temp_x) # normalize data, rescale to 0 - 1 data_list = numpy.append(data_list, normalized_x, axis=0) label_list= numpy.append(label_list, temp_y, axis=0) # loop over the whole training set del temp_data, temp_x, temp_y, normalized_x shared_x, shared_y = share_data(data_list, label_list) validate_set = unpickle('../data/cifar10/test_batch') validate_x = validate_set['data'] validate_y = validate_set['labels'] normalized_valx = normalize(validate_x) # normalize the validation set. evalset_x, evalset_y = share_data(normalized_valx, validate_y) del validate_set, validate_x, validate_y, normalized_valx # get variable names for data and labels x = T.matrix('x') y = T.ivector('y') state = T.iscalar('state') # state variable represents train(0) and test(1) ###################### # BUILD ACTUAL MODEL # ###################### print '... buliding the model' # initialize layer0 parameters layer0_fan_in = num_channels * layer0_rows * layer0_cols # same as numpy.prod(filter_shape[1:]) layer0_fan_out= nkerns[0] * kernel0_size * kernel0_size / (pool0_size * pool0_size) W_bound0 = numpy.sqrt(6. / (layer0_fan_in + layer0_fan_out)) layer0_W = rng.uniform(low=-W_bound0, high=W_bound0, size=(nkerns[0], num_channels, kernel0_size, kernel0_size)) # initialize layer1 parameters layer1_fan_in = num_channels * layer1_rows * layer1_cols # same as numpy.prod(filter_shape[1:]) layer1_fan_out= nkerns[1] * kernel1_size * kernel1_size / (pool1_size * pool1_size) W_bound1 = numpy.sqrt(6. / (layer1_fan_in + layer1_fan_out)) layer1_W = rng.uniform(low=-W_bound1, high=W_bound1, size=(nkerns[1], nkerns[0], kernel1_size, kernel1_size)) layer0_input = x.reshape((batch_size, num_channels, layer0_rows, layer0_cols)) layer0_input_sub = downsample.max_pool_2d(input=layer0_input, ds=(2,2), ignore_border=True) layer0 = MyNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, num_channels, layer0_rows, layer0_cols), # image_shape = (500, 3, 32, 32) filter_shape=(nkerns[0], num_channels, kernel0_size, kernel0_size), # filter_shape= (20, 3, 5, 5) poolsize=(pool0_size, pool0_size), params_W=layer0_W, ) # construct the first layer layer0_sub = MyNetConvPoolLayer( rng, input=layer0_input_sub, image_shape=(batch_size, num_channels, layer0_sub_rows, layer0_sub_cols),# image_shape = (500, 3, 16, 16) filter_shape=(nkerns[0], num_channels, kernel0_size, kernel0_size), # filter_shape= (20, 3, 5, 5) poolsize=(pool0_size, pool0_size), params_W=layer0_W ) layer1 = MyNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], layer1_rows, layer1_cols), # image_shape = (500, 20, 14, 14) filter_shape=(nkerns[1], nkerns[0], kernel1_size, kernel1_size), # filter_shape= (50, 20, 5, 5) poolsize=(pool1_size, pool1_size), params_W=layer1_W ) # output size = (500, 50, 10, 10) layer1_sub = MyNetConvPoolLayer( rng, input=layer0_sub.output, image_shape=(batch_size, nkerns[0], layer1_sub_rows, layer1_sub_cols), # image_shape = (500, 20, 6, 6) filter_shape=(nkerns[1], nkerns[0], kernel1_size, kernel1_size), # filter_shape= (50, 20, 5, 5) poolsize=(pool1_size, pool1_size), params_W=layer1_W ) # output size = (500, 50, 2, 2) layer2_input = T.concatenate( [layer1.output.flatten(2), layer1_sub.output.flatten(2)], axis=1 ) layer2 = HiddenLayer( mrng, rng, input=layer2_input, n_in=nkerns[1]*((layer1_rows+1-kernel1_size)*(layer1_cols+1-kernel1_size)+(layer1_sub_rows+1-kernel1_size)*(layer1_sub_cols+1-kernel1_size)), n_out=hidden_nodes, state=state, activation=T.tanh ) layer3 = LogisticRegression(input=layer2.output, n_in=hidden_nodes, n_out=10) total_cost = layer3.negative_log_likelihood(y) + penalty_coeff * layer2.W.norm(2) params = layer3.params + layer2.params + layer1_sub.params + layer1.params + layer0_sub.params + layer0.params grad_l3 = T.grad(total_cost, layer3.params) grad_l2 = T.grad(total_cost, layer2.params) grad_l1_sub = T.grad(total_cost, layer1_sub.params) grad_l1 = T.grad(total_cost, layer1.params) grad_l0_sub = T.grad(total_cost, layer0_sub.params) grad_l0 = T.grad(total_cost, layer0.params) updates = [ (layer3.params[0] , layer3.params[0] - learning_rate * grad_l3[0]), (layer3.params[1] , layer3.params[1] - learning_rate * grad_l3[1]), (layer2.params[0] , layer2.params[0] - learning_rate * grad_l2[0]), (layer2.params[1] , layer2.params[1] - learning_rate * grad_l2[1]), (layer1_sub.params[0], layer1_sub.params[0] - learning_rate * (grad_l1_sub[0] + grad_l1[0])), (layer1_sub.params[1], layer1_sub.params[1] - learning_rate * (grad_l1_sub[1] + grad_l1[1])), (layer1.params[0] , layer1.params[0] - learning_rate * (grad_l1_sub[0] + grad_l1[0])), (layer1.params[1] , layer1.params[1] - learning_rate * (grad_l1_sub[1] + grad_l1[1])), (layer0_sub.params[0], layer0_sub.params[0] - learning_rate * (grad_l0_sub[0] + grad_l0[0])), (layer0_sub.params[1], layer0_sub.params[1] - learning_rate * (grad_l0_sub[1] + grad_l0[1])), (layer0.params[0] , layer0.params[0] - learning_rate * (grad_l0_sub[0] + grad_l0[0])), (layer0.params[1] , layer0.params[1] - learning_rate * (grad_l0_sub[1] + grad_l0[1])) ] training_index = T.iscalar() validate_index = T.iscalar() train_model = theano.function( [training_index], [total_cost, layer3.errors(y)], updates=updates, givens={ x : shared_x[training_index * batch_size : (training_index+1) * batch_size], y : shared_y[training_index * batch_size : (training_index+1) * batch_size], state: numpy.cast['int32'](0) } ) test_model = theano.function( [], layer3.errors(y), givens={ x: evalset_x[0: batch_size], y: evalset_y[0: batch_size], state: numpy.cast['int32'](1) } ) ############### # TRAIN MODEL # ############### print '... training' patience = 10000 epoch = 0 done_looping = False # save parameters every 1000 iterations. param_files = ['p0', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9'] while(epoch < n_epochs) and (not done_looping): batch_index = randint(0, num_batches-1) # randomly generate the batch number to be trained. cost_ij, error = train_model(batch_index) epoch = epoch + 1 print "number of iterations: ", epoch print "selected training batch:", batch_index print "current cost: ", cost_ij print "validate error: ", error # call validation accuracy if (epoch % 10 == 0): error_test = test_model(1) print " " print "validate error of test_batch:", error_test print " "
import os import load import preprocess normalize = False if False or not os.path.isfile('train.npy'): load.import_test_train() train_data, ans = load.load_train() test_data = load.load_test() normalize = True if normalize or not os.path.isfile('train_normalized.npy'): preprocess.normalize(train_data, test_data)
import numpy as np import keras from keras.models import Sequential from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten from keras import optimizers from sklearn.model_selection import train_test_split import preprocess X, y = preprocess.read_data('../../smiles') X = preprocess.normalize(X) X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1) X_train, X_valid, X_test, y_train, y_valid, y_test = preprocess.split_data( X, y) # from keras.utils import to_categorical # y_train = to_categorical(y_train) # y_test = to_categorical(y_test) # y_valid = to_categorical(y_valid) print(y_train.shape) input_shape = X_train.shape[1:] def createModel(): model = Sequential() model.add(
y = np.array(data_mat['Y'])""" x=pd.read_feather("sc_data/snRNA_AD_brain.feather").iloc[:,:10] y=np.array(x.columns.str.split('.').tolist())[:,1].astype(np.float) x=x.values.T # preprocessing scRNA-seq read counts matrix adata = sc.AnnData(x) adata.obs['Group'] = y adata = read_dataset(adata, transpose=False, test_split=False, copy=True) adata = normalize(adata, size_factors=True, normalize_input=True, logtrans_input=True) input_size = adata.n_vars print(adata.X.shape) print(y.shape) x_sd = adata.X.std(0) x_sd_median = np.median(x_sd) print("median of gene sd: %.5f" % x_sd_median) if args.update_interval == 0: # one epoch args.update_interval = int(adata.X.shape[0]/args.batch_size) print(args)
def replaceWithPhoto(np_row): image_location = np_row[0].decode('UTF-8').strip() image = imread(cwd + '/data/' + image_location).astype(np.float32) image = preprocess_image(image) image = normalize(image) return np.array([image, np_row[1]])
def make_columns(): """ Builds the feature_columns required by the estimator to link the Dataset and the model_fn :return: """ columns_dict = {} columns_dict['gci'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci', vocab_file, default_value="0" ) ) columns_dict['ta'] = ( seq_fc.sequence_numeric_column( 'ta', normalizer_fn=lambda x: normalize(x, 'ta', stats_dict) ) ) columns_dict['rsrp'] = ( seq_fc.sequence_numeric_column( 'rsrp', normalizer_fn=lambda x: normalize( x, 'rsrp', stats_dict))) columns_dict['gci0'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci0', vocab_file, default_value="0" ) ) columns_dict['rsrp0'] = ( seq_fc.sequence_numeric_column( 'rsrp0', normalizer_fn=lambda x: normalize( x, 'rsrp0', stats_dict))) columns_dict['gci1'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci1', vocab_file, default_value="0" ) ) columns_dict['rsrp1'] = ( seq_fc.sequence_numeric_column( 'rsrp1', normalizer_fn=lambda x: normalize( x, 'rsrp1', stats_dict))) columns_dict['gci2'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci2', vocab_file, default_value="0" ) ) columns_dict['rsrp2'] = ( seq_fc.sequence_numeric_column( 'rsrp2', normalizer_fn=lambda x: normalize( x, 'rsrp2', stats_dict))) columns_dict['dt'] = ( seq_fc.sequence_numeric_column( 'dt', normalizer_fn=lambda x: normalize(x, 'dt', stats_dict) ) ) return columns_dict
def preprocess_and_save_data(cifar10_dataset_folder_path, output_path, rm_class, aug_enable, reshape_enable): """ Preprocess Training and Validation Data """ n_batches = 5 if not os.path.exists(output_path): os.makedirs(output_path) features = [] labels = [] for batch_i in range(1, n_batches + 1): curr_features, curr_labels = load_cfar10_batch( cifar10_dataset_folder_path, batch_i) if len(features) is 0: features = curr_features labels = curr_labels else: features = np.concatenate((features, curr_features)) labels = np.concatenate((labels, curr_labels)) # Preprocess training & validation data if aug_enable == True: features_ud = preprc.vertical_flip(features) features_lr = preprc.horizontal_flip(features) features_rot90 = preprc.rot90(features) features_rot270 = preprc.rot270(features) features = np.concatenate((features, features_ud, features_lr, features_rot90, features_rot270)) labels = np.concatenate((labels, labels, labels, labels, labels)) if reshape_enable == True: features = preprc.reshape_image(features, (64, 64, 3)) features, _, _ = preprc.normalize(features, mean=mean, std=std) labels = preprc.one_hot_encode(labels) print("[Training data] Removing No.{} Class...".format(rm_class)) print("\t[Before] feature shape: ", np.shape(features)) print("\t[Before] label shape: ", np.shape(labels)) count = 0 remove_class = [] for i in range(len(features)): if labels[i, rm_class] == 1: count = count + 1 remove_class.append(i) print("\tCount: {}".format(count)) features = np.delete(features, remove_class, axis=0) labels = np.delete(labels, remove_class, axis=0) print("\t[After] feature shape: ", np.shape(features)) print("\t[After] label shape: ", np.shape(labels)) # Save training data pickle.dump((features, labels), open( os.path.join(output_path, 'preprocess_train_{}.p'.format(rm_class)), 'wb'), protocol=4) with open(cifar10_dataset_folder_path + '/test_batch', mode='rb') as file: batch = pickle.load(file, encoding='latin1') # load the test data test_features = batch['data'].reshape( (len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1) test_labels = batch['labels'] if reshape_enable == True: test_features = preprc.reshape_image(test_features, (64, 64, 3)) # Preprocess training & validation data test_features, _, _ = preprc.normalize(test_features, mean=mean, std=std) test_labels = preprc.one_hot_encode(test_labels) # Save original test data pickle.dump((np.array(test_features), np.array(test_labels)), open(os.path.join(output_path, 'test.p'), 'wb')) print("[Testing data] Removing No.{} Class...".format(rm_class)) print("\t[Before] feature shape: ", np.shape(test_features)) print("\t[Before] label shape: ", np.shape(test_labels)) count = 0 remove_class = [] for i in range(len(test_features)): if test_labels[i, rm_class] == 1: count = count + 1 remove_class.append(i) print("\tCount: {}".format(count)) test_features = np.delete(test_features, remove_class, axis=0) test_labels = np.delete(test_labels, remove_class, axis=0) print("\t[After] feature shape: ", np.shape(test_features)) print("\t[After] label shape: ", np.shape(test_labels)) # Save test data pickle.dump((np.array(test_features), np.array(test_labels)), open( os.path.join(output_path, 'preprocess_test_{}.p'.format(rm_class)), 'wb'), protocol=4)
def preprocess_and_save_single_class_data(cifar10_dataset_folder_path, output_path, aug_enable, reshape_enable): """ Preprocess Training and Validation Data """ n_batches = 5 if not os.path.exists(output_path): os.makedirs(output_path) features = [] labels = [] for batch_i in range(1, n_batches + 1): curr_features, curr_labels = load_cfar10_batch( cifar10_dataset_folder_path, batch_i) if len(features) is 0: features = curr_features labels = curr_labels else: features = np.concatenate((features, curr_features)) labels = np.concatenate((labels, curr_labels)) # Preprocess training & validation data if aug_enable == True: features_ud = preprc.vertical_flip(features) features_lr = preprc.horizontal_flip(features) features_rot90 = preprc.rot90(features) features_rot270 = preprc.rot270(features) features = np.concatenate((features, features_ud, features_lr, features_rot90, features_rot270)) labels = np.concatenate((labels, labels, labels, labels, labels)) if reshape_enable == True: features = preprc.reshape_image(features, (64, 64, 3)) features, _, _ = preprc.normalize(features, mean=mean, std=std) labels = preprc.one_hot_encode(labels) for reserved_class in range(10): print( "[Training data] Extracting No.{} Class...".format(reserved_class)) curr_features = features[labels[:, reserved_class] == 1] curr_lables = labels[labels[:, reserved_class] == 1] print("\t[Class {}] feature shape: ".format(reserved_class), np.shape(curr_features)) # Save training data pickle.dump( (curr_features, curr_lables), open( os.path.join(output_path, 'pr_train_class_{}.p'.format(reserved_class)), 'wb')) with open(cifar10_dataset_folder_path + '/test_batch', mode='rb') as file: batch = pickle.load(file, encoding='latin1') # load the test data test_features = batch['data'].reshape( (len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1) test_labels = batch['labels'] if reshape_enable == True: test_features = preprc.reshape_image(test_features, (64, 64, 3)) # Preprocess training & validation data test_features, _, _ = preprc.normalize(test_features, mean=mean, std=std) test_labels = preprc.one_hot_encode(test_labels) # Save original test data pickle.dump((np.array(test_features), np.array(test_labels)), open(os.path.join(output_path, 'test.p'), 'wb')) for reserved_class in range(10): print( "[Testing data] Extracting No.{} Class...".format(reserved_class)) curr_features = test_features[test_labels[:, reserved_class] == 1] curr_lables = test_labels[test_labels[:, reserved_class] == 1] print("\t[After] feature shape: ", np.shape(curr_features)) # Save test data pickle.dump( (np.array(curr_features), np.array(curr_lables)), open( os.path.join(output_path, 'pr_test_class_{}.p'.format(reserved_class)), 'wb'))
def test_model_pointnet_sample_version(): data = [] label = [] for i in range(2): f = h5py.File( '/home/pal/data/ModelNet40PointNetSampleVersion/modelnet40_ply_hdf5_2048/ply_data_test{}.h5' .format(i)) data.append(f['data'][:]) label.append(f['label'][:]) print data[i].shape print label[i].shape data = np.concatenate(data, axis=0) label = np.concatenate(label, axis=0) label = label[:, 0] print data.shape, label.shape _, batch_names = read_category_file('data/ModelNet40/CategoryIDs') provided_names = read_pointnet_sample_category_file( '/home/pal/data/ModelNet40PointNetSampleVersion/modelnet40_ply_hdf5_2048/shape_names.txt' ) index_map = map_provided_label_to_batch_label(batch_names, provided_names) rectify_label = np.empty_like(label) for index, l in enumerate(label): rectify_label[index] = index_map[l] label = rectify_label model_path = '/home/pal/model/1024_leaky_relu/epoch499.ckpt' net = Network(3, 40, True, 1024) input = tf.placeholder(dtype=tf.float32, shape=[None, None, 3, 1], name='point_cloud') is_training = tf.placeholder(dtype=tf.bool, shape=[], name='is_training') net.inference(input, 'cpu', is_training, leaky_relu) score_layer = net.ops['cpu_fc3'] config = tf.ConfigProto() config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) saver = tf.train.Saver() saver.restore(sess, model_path) batch_size = 30 iter_num = int(math.ceil(data.shape[0] / float(batch_size))) correct_num = 0 all_labels = [] all_preds = [] for batch_index in xrange(iter_num): begin_index = batch_size * batch_index end_index = min((batch_index + 1) * batch_size, data.shape[0]) batch_label = label[begin_index:end_index] batch_data = data[begin_index:end_index] batch_data = normalize(batch_data) batch_data = exchange_dims_zy(batch_data) batch_data = np.expand_dims(batch_data, axis=3) scores = sess.run(score_layer, feed_dict={ input: batch_data, is_training: False }) preds = np.argmax(scores, axis=1) all_labels.append(batch_label) all_preds.append(preds) correct_num += np.sum(preds == batch_label) # for i in xrange(data.shape[0]): # if preds[i]==label[i]: # continue # # with open('misclassified/{}_{}_{}_{:.3}_{:.3}.txt'.format( # names[label[i]],names[preds[i]], # error_num, # scores[i,preds[i]],scores[i,label[i]]),'w') as f: # for pt in data[i,:,:,0]: # f.write('{} {} {}\n'.format(pt[0],pt[1],pt[2])) # # error_num+=1 # print batch_names[batch_label[0]] # with open('test.txt','w') as f: # for pt in batch_data[0,:,:,0]: # f.write('{} {} {}\n'.format(pt[0],pt[1],pt[2])) print 'accuracy {}'.format(correct_num / float(data.shape[0])) cnf_matrix = confusion_matrix(np.concatenate(all_labels, axis=0), np.concatenate(all_preds, axis=0), labels=range(40)) plt.figure() plot_confusion_matrix(cnf_matrix, batch_names) plt.show()