def norm_SVHN(self): #np.set_printoptions(threshold=np.NAN,precision=8) train = load('./SVHN/train_32x32.mat') test = load('./SVHN/test_32x32.mat') valid = load('./SVHN/extra_32x32.mat') train_samples = train['X'] train_labels = train['y'] test_samples = test['X'] test_labels = test['y'] valid_samples = valid['X'] valid_labels = valid['y'] print('before train sample ',train_samples.shape) print('train_sample shape',train_samples.shape) train_x, train_y = reformat(train_samples, train_labels) test_x, test_y = reformat(test_samples, test_labels) valid_x ,valid_y = reformat(valid_samples,valid_labels) train_x = normalize(train_x) test_x = normalize(test_x) valid_x = normalize(valid_x) print(test_x.shape ,test_y.shape) print(train_x.shape, train_y.shape) print(valid_x.shape,valid_y.shape) num_labels = 10 image_size = 32 print('in normSVHN') train_x, train_y = self.get_minibatch(train_x, train_y, 55000) test_x, test_y = self.get_minibatch(test_x, test_y, 5000) valid_x, valid_y = self.get_minibatch(valid_x, valid_y, 5000) data = (train_x, train_y, test_x, test_y, valid_x, valid_y) return data
def import_dataset(self): train = load('train_32x32.mat') test = load('test_32x32.mat') train_data = train['X'] train_labels = train['y'] test_data = test['X'] test_labels = test['y'] train_data = np.transpose(train_data, [3,0,1,2]) train_data = utils.rgb2gray(train_data) train_data = utils.normalize(train_data,-1,1) train_shape = (train_data.shape[0], train_data.shape[1]*train_data.shape[2]) train_data = np.reshape(train_data, train_shape) train_labels = utils.one_hot_coding(train_labels) test_data = np.transpose(test_data,[3,0,1,2]) test_data = utils.rgb2gray(test_data) test_data = utils.normalize(test_data,-1,1) test_shape = (test_data.shape[0], test_data.shape[1]*test_data.shape[2]) test_data = np.reshape(test_data, test_shape) test_labels = utils.one_hot_coding(test_labels) self.im_size = train_data.shape[1] #Create datasets from the above tensors self.train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels)) self.test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))
def __init__(self, path): self.path = path from scipy.io import loadmat as load train = load(os.path.join(self.path, 'train_32x32.mat')) self.train_samples, self.train_labels = self.reformat(train['X'], train['y']) test = load(os.path.join(self.path, 'test_32x32.mat')) self.test_samples, self.test_labels = self.reformat(test['X'], test['y'])
def read_data_sets(dtype=dtypes.float32, seed=None, num_classes=54): train_data = load('../../预处理/前导提取/Syn_Header_Datasets/train_data.mat') train_phased_data = load( '../../预处理/前导提取/Syn_Header_Datasets/train_phased_data.mat') test_data = load('../../预处理/前导提取/Syn_Header_Datasets/test_data.mat') validation_data = load( '../../预处理/前导提取/Syn_Header_Datasets/validation_data.mat') test_phased_data = load( '../../预处理/前导提取/Syn_Header_Datasets/test_phased_data.mat') train_x_data = train_data['train_x_data'] train_y_labels = train_data['train_y_labels'] #读取训练集中的数据和对应标签 train_phased_x_data = train_phased_data['train_phased_x_data'] train_phased_y_labels = train_phased_data['train_phased_y_labels'] #读取训练集中的数据和对应标签 test_x_data = test_data['test_x_data'] test_y_labels = test_data['test_y_labels'] #读取测试集中的数据和对应标签 validation_x_data = validation_data['validation_x_data'] validation_y_labels = validation_data['validation_y_labels'] test_x_phased_data = test_phased_data['test_x_data'] test_y_phased_labels = test_phased_data['test_y_labels'] train_images = train_x_data train_labels = dense_to_one_hot(train_y_labels, num_classes) train_phased_images = train_phased_x_data train_phased_labels = dense_to_one_hot(train_phased_y_labels, num_classes) validation_images = validation_x_data validation_labels = dense_to_one_hot(validation_y_labels, num_classes) test_images = test_x_data test_labels = dense_to_one_hot(test_y_labels, num_classes) test_phased_images = test_x_phased_data test_phased_labels = dense_to_one_hot(test_y_phased_labels, num_classes) options = dict(dtype=dtype, seed=seed) train = DataSet(train_images, train_labels, **options) phased_train = DataSet(train_phased_images, train_phased_labels, **options) validation = DataSet(validation_images, validation_labels, **options) test = DataSet(test_images, test_labels, **options) phased_test = DataSet(test_phased_images, test_phased_labels, **options) Datasets = collections.namedtuple( 'Datasets', ['train', 'validation', 'test', 'phased_test']) return Datasets(train=phased_train, validation=validation, test=test, phased_test=phased_test)
def get_svhn(): traindata = load('../data/SVHN/svhn/train_32x32.mat') testdata = load('../data/SVHN/svhn/test_32x32.mat') train_samples = traindata['X'] train_labels = traindata['y'] test_samples = testdata['X'] test_labels = testdata['y'] x_train, y_train = reformat(train_samples, train_labels) x_test, y_test = reformat(test_samples, test_labels) return np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test)
def _extract_images(self, filename, split): """Extract the images into a numpy array. Args: filename: The path to an usps images file. num_images: The number of images in the file. Returns: A numpy array of shape [number_of_images, height, width, channels]. float value between 0 to 1. A numpy array of shape [number_of_labels] """ print('Extracting images from: ', filename) data = np.rollaxis(np.rollaxis(load(filename)['data'], 1), -1) if split == "train": data = data[:, :1000, :] elif split == "test": data = data[:, 1000:, :] label = np.concatenate( [np.ones(data.shape[1]) * ((1 + i) % 10) for i in range(10)]) data = np.concatenate(data).reshape((-1, 16, 16, 1)).transpose( (0, 2, 1, 3)) return data.astype(float) / 255, label.astype(int)
def __init__(self, transform=None): self.transform = transform testdata = load('./dataset/svhn/test_32x32.mat') # format:(32, 32, 3, 26032) self.test_samples = testdata['X'] # format:(26032, 1) self.test_labels = testdata['y']
def __init__(self, transform=None): self.transform = transform traindata = load('./dataset/svhn/train_32x32.mat') # format:(32, 32, 3, 73257) self.train_samples = traindata['X'] # format:(73257, 1) self.train_labels = traindata['y']
def script4(): # all attr are categorical data = load('restaurant_1.mat') # [c] is an mx1 vector of int corrspd to the class labels for each of the m samples c = array(data['c']).astype(int) # [nc] is the number of classes, that is, c(i) belongs to {1,...,nc} for i = 1,...,m nc = array(data['nc']).astype(int)[0] # [x] is an mxn matrix of int corrspd to the n attr for each of the m training samples x = array(data['x']).astype(int) # [nx] is an 1xn vec corrspd to the n attr for each of the m training samples nx = array(data['nx']).astype(int) nx = reshape(nx, (1, nx.size)) # [y] is an kxn matrix of int corrspd to the n attr for each of the k testing samples y = array(data['y']).astype(int) d = array(data['d']).astype(int) tr = tree_train(c, nc, x, nx) print(tr) b = tree_classify(y, tr) your_output = b correct_output = d return your_output, correct_output
def load_data(): data = load('./data/data.mat') data_samples = data['image'] data_labels = data['label'] n_train_samples, _train_labels = reformat(data_samples, data_labels, 4) print(n_train_samples.shape, _train_labels.shape) _train_samples = normalize(n_train_samples) print(_train_samples.shape, _train_labels.shape) X_train, X_test, y_train, y_test = train_test_split(_train_samples, _train_labels, test_size=0.20, random_state=42) print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) print("display label distribution in training set") temp = np.argmax(y_train, axis=1) result = [0] * 4 for i in temp: if i == 0: result[0] += 1 elif i == 1: result[1] += 1 elif i == 2: result[2] += 1 else: result[3] += 1 print(result)
def read_data_sets(dtype=dtypes.float32, seed=None, num_classes=54): train_data = load( '../../预处理/前导提取/Dual_Channel_Syn_Header_Datasets/train_data.mat') validation_data = load( '../../预处理/前导提取/Dual_Channel_Syn_Header_Datasets/validation_data.mat') test_data = load( '../../预处理/前导提取/Dual_Channel_Syn_Header_Datasets/test_data.mat') #读取训练集中的数据和对应标签 train_x_data = train_data['train_x_data'] train_y_labels = train_data['train_y_labels'] train_SNR = np.reshape(train_data['SNR'], [-1]) #读取验证集中的数据和对应标签 validation_x_data = validation_data['validation_x_data'] validation_y_labels = validation_data['validation_y_labels'] validation_SNR = np.reshape(validation_data['SNR'], [-1]) #读取测试集中的数据和对应标签 test_x_data = test_data['test_x_data'] test_y_labels = test_data['test_y_labels'] test_SNR = np.reshape(test_data['SNR'], -1) # test_x_data = test_x_data[test_SNR == 30] # test_y_labels = test_y_labels[test_SNR == 30] train_images = train_x_data train_labels = dense_to_one_hot(train_y_labels, num_classes) validation_images = validation_x_data validation_labels = dense_to_one_hot(validation_y_labels, num_classes) test_images = test_x_data test_labels = dense_to_one_hot(test_y_labels, num_classes) options = dict(dtype=dtype, seed=seed) train = DataSet(train_images, train_labels, train_SNR, **options) validation = DataSet(validation_images, validation_labels, validation_SNR, **options) test = DataSet(test_images, test_labels, test_SNR, **options) Datasets = collections.namedtuple('Datasets', ['train', 'validation', 'test']) return Datasets(train=train, validation=validation, test=test)
def train_shuffle_batch(): dict = load(r'F:\WH\Test\single_8_14_x.mat') xyl = dict["input_train"] rgb = dict["output_train"] xyl_test = dict["input_test"] rgb_test = dict["output_test"] xyls, rgbs = get_batch(xyl, rgb, conf.batch_size) #batch predict = predictor(xyls) loss = tf.reduce_mean(tf.reduce_sum(tf.square(rgb - predict), 1)) tf.summary.scalar('loss', loss) opt = tf.train.GradientDescentOptimizer(0.1).minimize(loss) saver = tf.train.Saver() #counter = 0 start_time = time.time() if not os.path.exists(conf.data_path + "/checkpoint"): os.makedirs(conf.data_path + "/checkpoint") config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: if conf.model_path == "": sess.run(tf.global_variables_initializer()) else: saver.restore(sess, conf.model_path) merged_summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter('./logs/t_1', sess.graph) coord = tf.train.Coordinator() #创建一个协调器,管理线程 #启动QueueRunner, 此时文件名队列已经进队。 threads = tf.train.start_queue_runners(sess=sess, coord=coord) for epoch in xrange(2000000): _, m = sess.run([opt, loss]) if (epoch + 1) % 100 == 0: summary_str = sess.run(merged_summary_op) summary_writer.add_summary(summary_str, epoch + 1) print("Iterate [%d]: time: %4.4f, loss: %.8f" % (epoch + 1, time.time() - start_time, m)) if (epoch + 1) % 100000 == 0: save_path = saver.save( sess, conf.data_path + "/checkpoint/" + "model_%d.ckpt" % (epoch + 1)) coord.request_stop() coord.join(threads)
def input_data(): train_dataset = load(svhndata_path + 'train_32x32.mat', variable_names='X').get('X') train_labels = load(svhndata_path + 'train_32x32.mat', variable_names='y').get('y') test_dataset = load(svhndata_path + 'test_32x32.mat', variable_names='X').get('X') test_labels = load(svhndata_path + 'test_32x32.mat', variable_names='y').get('y') valid_dataset = load(svhndata_path + 'extra_32x32.mat', variable_names='X').get('X') valid_labels = load(svhndata_path + 'extra_32x32.mat', variable_names='y').get('y') n_labels = 10 valid_index1 = [] valid_index2 = [] train_index1 = [] train_index2 = [] random.seed() for i in np.arange(n_labels): valid_index1.extend( np.where(train_labels[:, 0] == (i))[0][:400].tolist()) train_index2.extend( np.where(train_labels[:, 0] == (i))[0][400:].tolist()) valid_index2.extend( np.where(train_labels[:, 0] == (i))[0][:200].tolist()) train_index2.extend( np.where(train_labels[:, 0] == (i))[0][200:].tolist()) random.shuffle(valid_index1) random.shuffle(train_index1) random.shuffle(valid_index2) random.shuffle(train_index2) valid_dataset = np.concatenate( (valid_dataset[:, :, :, valid_index2], train_dataset[:, :, :, valid_index1]), axis=3).transpose((3, 0, 1, 2)) valid_labels = np.concatenate( (valid_labels[valid_index2, :], train_labels[valid_index1, :]), axis=0)[:, 0] train_dataset = np.concatenate( (valid_dataset[:, :, :, train_index2], train_dataset[:, :, :, train_index1]), axis=3).transpose((3, 0, 1, 2)) train_labels = np.concatenate( (valid_labels[train_index2, :], train_labels[train_index1, :]), axis=0)[:, 0] test_dataset = test_dataset.transpose((3, 0, 1, 2)) test_labels = test_labels[:, 0] print(train_dataset.shape, train_labels.shape) print(test_dataset.shape, test_labels.shape) print(valid_dataset.shape, valid_labels.shape) return train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels
def loaddata(self): # path = "../data/train_32x32.mat" train = load(self.path) #该文件夹中包含了训练数据与其对应的标签 self.images = train['X'] self.labels = train['y'] if self.images is None: print('Images has no found..') else: print('Images has found successfully!') if self.labels is None: print('labels has no found..') else: print('labels has found successfully!') # print('Train datas shape',self.images.shape) # print('Train labels shape',self.labels.shape) #数据集中X,y的大小写要根据该数据集的介绍而定 return self.images, self.labels
def script6(): data = load('seed_data.mat') c = array(data['c']).astype(float).tolist() nc = array(data['nc']).astype(float).tolist() x = array(data['x']).astype(float).tolist() nx = len(x[0]) nc = nc[0][0] nc = int(nc) tempC = list() for i in c: tempC.append(i[0]) a = naivebayes_train(tempC, x,nc) b = naivebayes_classify(a, x[1]) print(b)
def script4(): data = load('restaurant.mat') c = array(data['c']).astype(int) nc = array(data['nc']).astype(int)[0] x = array(data['x']).astype(int) nx = array(data['nx']).astype(int) nx = reshape(nx, [1, nx.size]) y = array(data['y']).astype(int) d = array(data['d']).astype(int) tr = tree_train(c, nc, x, nx) b = tree_classify(y, tr) your_output = b correct_output = d return
def load_data_traces(datadir, stations, channels): ''' Load data traces for the given stations and channels. ''' trc_name_divider = '-' data_format = 'mseed' ref_channels = [] for cha in channels: if cha == 'Z': ref_channels.append('u') elif cha == 'T': ref_channels.append('r') elif cha == 'R': ref_channels.append('a') else: raise Exception('No data for this channel!') # load recorded data data_trcs = [] # (r)ight transverse, (a)way radial, vertical (u)p for ref_channel in ref_channels: for station in stations: trace_name = trc_name_divider.join( ('reference', station.network, station.station, ref_channel)) tracepath = datadir + trace_name + '.' + data_format try: with open(tracepath): dt = io.load(tracepath, data_format)[0] # [nm] convert to m dt.set_ydata(dt.ydata * m) dt.station = station.station dt.network = station.network dt.location = '0' # convert to BEAT seismic Dataset data_trcs.append( heart.SeismicDataset.from_pyrocko_trace(dt)) except IOError: logger.warn('Unable to open file: ' + trace_name) return data_trcs
def package_data(masked_data, nii_files, regressor_files): nii_subjs = list(map(lambda f: f.split('/')[-1][:-len('.nii')], nii_files)) reg_subjs = list( map(lambda f: f.split('/')[-1][:-len('_regs_results.mat')], regressor_files)) data = [] for i, s in enumerate(nii_subjs): try: j = np.ravel( np.where(np.array(list(map(lambda x: x == s, reg_subjs)))))[0] print(f'matching {s} with {regressor_files[j]}') regmat = load(regressor_files[j]) data.append({ 'data': masked_data[i], 'regmat': regmat['all_regressors'], 'mvpa': regmat['results'] }) except: print(f'missing data for {s}') return data
def script4(): data = load('restaurant.mat') c = array(data['c']).astype(int).tolist() # final result for m sample nc = array(data['nc']).astype( int)[0].tolist() # number of class, in restaurant data set, it is 2 x = array(data['x']).astype(int).tolist() # main data nx = array(data['nx']).astype(int) # range of each attribute nx = reshape(nx, [1, nx.size]).tolist() nx = nx[0] global wang wang = nc y = array(data['y']).astype(int).tolist() d = array(data['d']).astype(int) nc = nc[0] tempC = list() for myc in c: tempC.append(myc[0]) tr = tree_train(tempC, nc, x, nx) b = tree_classify(y, tr) print(b) print(d)
plt.xticks(y_pos, x) plt.ylabel('Count') plt.title(name + ' Label Distribution') plt.show() def inspect(dataset, labels, i): # 显示图片看看 if dataset.shape[3] == 1: shape = dataset.shape dataset = dataset.reshape(shape[0], shape[1], shape[2]) print(labels[i]) plt.imshow(dataset[i]) plt.show() train = load('../data/train_32x32.mat') test = load('../data/test_32x32.mat') # extra = load('../data/extra_32x32.mat') # print('Train Samples Shape:', train['X'].shape) # print('Train Labels Shape:', train['y'].shape) # print('Train Samples Shape:', test['X'].shape) # print('Train Labels Shape:', test['y'].shape) # print('Train Samples Shape:', extra['X'].shape) # print('Train Labels Shape:', extra['y'].shape) train_samples = train['X'] train_labels = train['y'] test_samples = test['X']
#!/usr/bin/env python # -*- coding: utf-8 -*- import os import glob import subprocess import config import numpy as np from scipy.io import loadmat as load from PIL import Image mat = load(config.thetas_path) theta1 = np.asmatrix(mat['Theta1'].transpose()) theta2 = np.asmatrix(mat['Theta2'].transpose()) num_labels = np.size(theta2, 1) def size(matrix): return [np.size(matrix, 0), np.size(matrix, 1)] def sigmoid(z): return 1.0 / (1.0 + np.exp(-z)) def imread(imgpath): img = Image.open(imgpath) img = img.convert('L', palette=Image.ADAPTIVE, colors=2) return np.array(img) > 128 def split_to_chars(imgpath, saveto): try: try: map(os.remove, glob.glob(os.path.join(saveto, '*.bmp')))
# 查看每个label的分布 def distribution(labels, name): pass # Display image def inspect(samples, labels, i): print(labels[i]) plt.imshow(samples[i]) plt.show() train_data = load('data/train_32x32.mat') test_data = load('data/test_32x32.mat') extra_data = load('data/extra_32x32.mat') print("Train data samples shape", train_data['X'].shape) print("Train data Labels shape", train_data['y'].shape) train_samples = train_data['X'] train_labels = train_data['y'] test_samples = test_data['X'] test_labels = test_data['y'] _train_samples, _train_labels = reformat(train_samples, train_labels) _test_samples, _test_labels = reformat(test_samples, test_labels) num_labels = 10
from __future__ import print_function from scipy.io import loadmat as load train = load('data/train_32x32.mat') print(type(train)) print(train['__version__']) print(train['__header__']) print(train['X']) print(train['X'].shape) # (32, 32, 3, 73257) (width, height, channel, count) print(train['y'].shape) # (73257, 1) (count, data) print(train['y'][1])