Esempio n. 1
0
    def norm_SVHN(self):
        #np.set_printoptions(threshold=np.NAN,precision=8)
        train = load('./SVHN/train_32x32.mat')
        test = load('./SVHN/test_32x32.mat')
        valid = load('./SVHN/extra_32x32.mat')
        train_samples = train['X']
        train_labels = train['y']
        test_samples = test['X']
        test_labels = test['y']
        valid_samples = valid['X']
        valid_labels = valid['y']
        print('before train sample ',train_samples.shape)
        print('train_sample shape',train_samples.shape)
        train_x, train_y = reformat(train_samples, train_labels)
        test_x, test_y = reformat(test_samples, test_labels)
        valid_x ,valid_y = reformat(valid_samples,valid_labels)
        train_x = normalize(train_x)
        test_x = normalize(test_x)
        valid_x = normalize(valid_x)

        print(test_x.shape ,test_y.shape)
        print(train_x.shape, train_y.shape)
        print(valid_x.shape,valid_y.shape)

        num_labels = 10
        image_size = 32
        print('in normSVHN')
        train_x, train_y = self.get_minibatch(train_x, train_y, 55000)
        test_x, test_y = self.get_minibatch(test_x, test_y, 5000)
        valid_x, valid_y = self.get_minibatch(valid_x, valid_y, 5000)
        data = (train_x, train_y, test_x, test_y, valid_x, valid_y)
        return data
Esempio n. 2
0
    def import_dataset(self):
        train = load('train_32x32.mat')
        test = load('test_32x32.mat')
        train_data = train['X']
        train_labels = train['y']
        test_data = test['X']
        test_labels = test['y']

        train_data = np.transpose(train_data, [3,0,1,2])
        train_data = utils.rgb2gray(train_data)
        train_data = utils.normalize(train_data,-1,1)
        train_shape = (train_data.shape[0], train_data.shape[1]*train_data.shape[2])
        train_data = np.reshape(train_data, train_shape)
        train_labels = utils.one_hot_coding(train_labels)

        test_data = np.transpose(test_data,[3,0,1,2])
        test_data = utils.rgb2gray(test_data)
        test_data = utils.normalize(test_data,-1,1)
        test_shape = (test_data.shape[0], test_data.shape[1]*test_data.shape[2])
        test_data = np.reshape(test_data, test_shape)
        test_labels = utils.one_hot_coding(test_labels)
        self.im_size = train_data.shape[1]

        #Create datasets from the above tensors
        self.train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
        self.test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))
Esempio n. 3
0
 def __init__(self, path):
     self.path = path
     from scipy.io import loadmat as load
     train = load(os.path.join(self.path, 'train_32x32.mat'))
     self.train_samples, self.train_labels = self.reformat(train['X'], train['y'])
     test = load(os.path.join(self.path, 'test_32x32.mat'))
     self.test_samples, self.test_labels = self.reformat(test['X'], test['y'])
Esempio n. 4
0
def read_data_sets(dtype=dtypes.float32, seed=None, num_classes=54):
    train_data = load('../../预处理/前导提取/Syn_Header_Datasets/train_data.mat')
    train_phased_data = load(
        '../../预处理/前导提取/Syn_Header_Datasets/train_phased_data.mat')
    test_data = load('../../预处理/前导提取/Syn_Header_Datasets/test_data.mat')

    validation_data = load(
        '../../预处理/前导提取/Syn_Header_Datasets/validation_data.mat')
    test_phased_data = load(
        '../../预处理/前导提取/Syn_Header_Datasets/test_phased_data.mat')

    train_x_data = train_data['train_x_data']
    train_y_labels = train_data['train_y_labels']
    #读取训练集中的数据和对应标签

    train_phased_x_data = train_phased_data['train_phased_x_data']
    train_phased_y_labels = train_phased_data['train_phased_y_labels']
    #读取训练集中的数据和对应标签

    test_x_data = test_data['test_x_data']
    test_y_labels = test_data['test_y_labels']
    #读取测试集中的数据和对应标签

    validation_x_data = validation_data['validation_x_data']
    validation_y_labels = validation_data['validation_y_labels']

    test_x_phased_data = test_phased_data['test_x_data']
    test_y_phased_labels = test_phased_data['test_y_labels']

    train_images = train_x_data
    train_labels = dense_to_one_hot(train_y_labels, num_classes)

    train_phased_images = train_phased_x_data
    train_phased_labels = dense_to_one_hot(train_phased_y_labels, num_classes)

    validation_images = validation_x_data
    validation_labels = dense_to_one_hot(validation_y_labels, num_classes)

    test_images = test_x_data
    test_labels = dense_to_one_hot(test_y_labels, num_classes)

    test_phased_images = test_x_phased_data
    test_phased_labels = dense_to_one_hot(test_y_phased_labels, num_classes)

    options = dict(dtype=dtype, seed=seed)
    train = DataSet(train_images, train_labels, **options)
    phased_train = DataSet(train_phased_images, train_phased_labels, **options)
    validation = DataSet(validation_images, validation_labels, **options)
    test = DataSet(test_images, test_labels, **options)
    phased_test = DataSet(test_phased_images, test_phased_labels, **options)
    Datasets = collections.namedtuple(
        'Datasets', ['train', 'validation', 'test', 'phased_test'])
    return Datasets(train=phased_train,
                    validation=validation,
                    test=test,
                    phased_test=phased_test)
Esempio n. 5
0
def get_svhn():
    
    traindata = load('../data/SVHN/svhn/train_32x32.mat')
    testdata = load('../data/SVHN/svhn/test_32x32.mat')
    
    train_samples = traindata['X']
    train_labels = traindata['y']
    test_samples = testdata['X']
    test_labels = testdata['y']
    x_train, y_train = reformat(train_samples, train_labels)
    x_test, y_test = reformat(test_samples, test_labels)
    return np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test)
Esempio n. 6
0
    def _extract_images(self, filename, split):
        """Extract the images into a numpy array.

        Args:
        filename: The path to an usps images file.
        num_images: The number of images in the file.

        Returns:
        A numpy array of shape [number_of_images, height, width, channels]. float value between 0 to 1.
        A numpy array of shape [number_of_labels]
        """
        print('Extracting images from: ', filename)

        data = np.rollaxis(np.rollaxis(load(filename)['data'], 1), -1)

        if split == "train":
            data = data[:, :1000, :]
        elif split == "test":
            data = data[:, 1000:, :]

        label = np.concatenate(
            [np.ones(data.shape[1]) * ((1 + i) % 10) for i in range(10)])
        data = np.concatenate(data).reshape((-1, 16, 16, 1)).transpose(
            (0, 2, 1, 3))
        return data.astype(float) / 255, label.astype(int)
 def __init__(self, transform=None):
     self.transform = transform
     testdata = load('./dataset/svhn/test_32x32.mat')
     # format:(32, 32, 3, 26032)
     self.test_samples = testdata['X']
     # format:(26032, 1)
     self.test_labels = testdata['y']
 def __init__(self, transform=None):
     self.transform = transform
     traindata = load('./dataset/svhn/train_32x32.mat')
     # format:(32, 32, 3, 73257)
     self.train_samples = traindata['X']
     # format:(73257, 1)
     self.train_labels = traindata['y']
def script4():
    # all attr are categorical
    data = load('restaurant_1.mat')
    # [c] is an mx1 vector of int corrspd to the class labels for each of the m samples
    c = array(data['c']).astype(int)
    # [nc] is the number of classes, that is, c(i) belongs to {1,...,nc} for i = 1,...,m
    nc = array(data['nc']).astype(int)[0]
    # [x] is an mxn matrix of int corrspd to the n attr for each of the m training samples
    x = array(data['x']).astype(int)
    # [nx] is an 1xn vec corrspd to the n attr for each of the m training samples
    nx = array(data['nx']).astype(int)
    nx = reshape(nx, (1, nx.size))

    # [y] is an kxn matrix of int corrspd to the n attr for each of the k testing samples
    y = array(data['y']).astype(int)
    d = array(data['d']).astype(int)

    tr = tree_train(c, nc, x, nx)
    print(tr)
    b = tree_classify(y, tr)

    your_output = b
    correct_output = d

    return your_output, correct_output
Esempio n. 10
0
def load_data():
    data = load('./data/data.mat')

    data_samples = data['image']
    data_labels = data['label']

    n_train_samples, _train_labels = reformat(data_samples, data_labels, 4)
    print(n_train_samples.shape, _train_labels.shape)

    _train_samples = normalize(n_train_samples)
    print(_train_samples.shape, _train_labels.shape)

    X_train, X_test, y_train, y_test = train_test_split(_train_samples, _train_labels, test_size=0.20, random_state=42)
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

    print("display label distribution in training set")
    temp = np.argmax(y_train, axis=1)
    result = [0] * 4
    for i in temp:
        if i == 0:
            result[0] += 1
        elif i == 1:
            result[1] += 1
        elif i == 2:
            result[2] += 1
        else:
            result[3] += 1
    print(result)
Esempio n. 11
0
def read_data_sets(dtype=dtypes.float32, seed=None, num_classes=54):
    train_data = load(
        '../../预处理/前导提取/Dual_Channel_Syn_Header_Datasets/train_data.mat')
    validation_data = load(
        '../../预处理/前导提取/Dual_Channel_Syn_Header_Datasets/validation_data.mat')
    test_data = load(
        '../../预处理/前导提取/Dual_Channel_Syn_Header_Datasets/test_data.mat')

    #读取训练集中的数据和对应标签
    train_x_data = train_data['train_x_data']
    train_y_labels = train_data['train_y_labels']
    train_SNR = np.reshape(train_data['SNR'], [-1])

    #读取验证集中的数据和对应标签
    validation_x_data = validation_data['validation_x_data']
    validation_y_labels = validation_data['validation_y_labels']
    validation_SNR = np.reshape(validation_data['SNR'], [-1])

    #读取测试集中的数据和对应标签
    test_x_data = test_data['test_x_data']
    test_y_labels = test_data['test_y_labels']
    test_SNR = np.reshape(test_data['SNR'], -1)

    #    test_x_data = test_x_data[test_SNR == 30]
    #    test_y_labels = test_y_labels[test_SNR == 30]

    train_images = train_x_data
    train_labels = dense_to_one_hot(train_y_labels, num_classes)

    validation_images = validation_x_data
    validation_labels = dense_to_one_hot(validation_y_labels, num_classes)

    test_images = test_x_data
    test_labels = dense_to_one_hot(test_y_labels, num_classes)

    options = dict(dtype=dtype, seed=seed)
    train = DataSet(train_images, train_labels, train_SNR, **options)
    validation = DataSet(validation_images, validation_labels, validation_SNR,
                         **options)
    test = DataSet(test_images, test_labels, test_SNR, **options)

    Datasets = collections.namedtuple('Datasets',
                                      ['train', 'validation', 'test'])
    return Datasets(train=train, validation=validation, test=test)
Esempio n. 12
0
def train_shuffle_batch():
    dict = load(r'F:\WH\Test\single_8_14_x.mat')

    xyl = dict["input_train"]
    rgb = dict["output_train"]
    xyl_test = dict["input_test"]
    rgb_test = dict["output_test"]

    xyls, rgbs = get_batch(xyl, rgb, conf.batch_size)  #batch

    predict = predictor(xyls)
    loss = tf.reduce_mean(tf.reduce_sum(tf.square(rgb - predict), 1))
    tf.summary.scalar('loss', loss)

    opt = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

    saver = tf.train.Saver()

    #counter = 0
    start_time = time.time()
    if not os.path.exists(conf.data_path + "/checkpoint"):
        os.makedirs(conf.data_path + "/checkpoint")

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        if conf.model_path == "":
            sess.run(tf.global_variables_initializer())
        else:
            saver.restore(sess, conf.model_path)
        merged_summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter('./logs/t_1', sess.graph)

        coord = tf.train.Coordinator()  #创建一个协调器,管理线程
        #启动QueueRunner, 此时文件名队列已经进队。
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        for epoch in xrange(2000000):
            _, m = sess.run([opt, loss])

            if (epoch + 1) % 100 == 0:
                summary_str = sess.run(merged_summary_op)
                summary_writer.add_summary(summary_str, epoch + 1)
                print("Iterate [%d]: time: %4.4f, loss: %.8f" %
                      (epoch + 1, time.time() - start_time, m))
            if (epoch + 1) % 100000 == 0:
                save_path = saver.save(
                    sess, conf.data_path + "/checkpoint/" + "model_%d.ckpt" %
                    (epoch + 1))

        coord.request_stop()
        coord.join(threads)
Esempio n. 13
0
def input_data():
    train_dataset = load(svhndata_path + 'train_32x32.mat',
                         variable_names='X').get('X')
    train_labels = load(svhndata_path + 'train_32x32.mat',
                        variable_names='y').get('y')
    test_dataset = load(svhndata_path + 'test_32x32.mat',
                        variable_names='X').get('X')
    test_labels = load(svhndata_path + 'test_32x32.mat',
                       variable_names='y').get('y')
    valid_dataset = load(svhndata_path + 'extra_32x32.mat',
                         variable_names='X').get('X')
    valid_labels = load(svhndata_path + 'extra_32x32.mat',
                        variable_names='y').get('y')

    n_labels = 10
    valid_index1 = []
    valid_index2 = []
    train_index1 = []
    train_index2 = []

    random.seed()

    for i in np.arange(n_labels):
        valid_index1.extend(
            np.where(train_labels[:, 0] == (i))[0][:400].tolist())
        train_index2.extend(
            np.where(train_labels[:, 0] == (i))[0][400:].tolist())
        valid_index2.extend(
            np.where(train_labels[:, 0] == (i))[0][:200].tolist())
        train_index2.extend(
            np.where(train_labels[:, 0] == (i))[0][200:].tolist())

    random.shuffle(valid_index1)
    random.shuffle(train_index1)
    random.shuffle(valid_index2)
    random.shuffle(train_index2)

    valid_dataset = np.concatenate(
        (valid_dataset[:, :, :, valid_index2], train_dataset[:, :, :,
                                                             valid_index1]),
        axis=3).transpose((3, 0, 1, 2))
    valid_labels = np.concatenate(
        (valid_labels[valid_index2, :], train_labels[valid_index1, :]),
        axis=0)[:, 0]
    train_dataset = np.concatenate(
        (valid_dataset[:, :, :, train_index2], train_dataset[:, :, :,
                                                             train_index1]),
        axis=3).transpose((3, 0, 1, 2))
    train_labels = np.concatenate(
        (valid_labels[train_index2, :], train_labels[train_index1, :]),
        axis=0)[:, 0]
    test_dataset = test_dataset.transpose((3, 0, 1, 2))
    test_labels = test_labels[:, 0]

    print(train_dataset.shape, train_labels.shape)
    print(test_dataset.shape, test_labels.shape)
    print(valid_dataset.shape, valid_labels.shape)

    return train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels
Esempio n. 14
0
 def loaddata(self):
     # path = "../data/train_32x32.mat"
     train = load(self.path)  #该文件夹中包含了训练数据与其对应的标签
     self.images = train['X']
     self.labels = train['y']
     if self.images is None:
         print('Images has no found..')
     else:
         print('Images has found successfully!')
     if self.labels is None:
         print('labels has no found..')
     else:
         print('labels has found successfully!')
     # print('Train datas shape',self.images.shape)
     # print('Train labels shape',self.labels.shape)
     #数据集中X,y的大小写要根据该数据集的介绍而定
     return self.images, self.labels
Esempio n. 15
0
def script6():

    data = load('seed_data.mat')
    c = array(data['c']).astype(float).tolist()
    nc = array(data['nc']).astype(float).tolist()
    x = array(data['x']).astype(float).tolist()
    nx = len(x[0])
    nc = nc[0][0]
    nc = int(nc)

    tempC = list()
    for i in c:
        tempC.append(i[0])

    a = naivebayes_train(tempC, x,nc)
    b = naivebayes_classify(a, x[1])

    print(b)
Esempio n. 16
0
def script4():

    data = load('restaurant.mat')
    c = array(data['c']).astype(int)
    nc = array(data['nc']).astype(int)[0]
    x = array(data['x']).astype(int)
    nx = array(data['nx']).astype(int)
    nx = reshape(nx, [1, nx.size])
    y = array(data['y']).astype(int)
    d = array(data['d']).astype(int)

    tr = tree_train(c, nc, x, nx)
    b = tree_classify(y, tr)

    your_output = b
    correct_output = d

    return
Esempio n. 17
0
def load_data_traces(datadir, stations, channels):
    '''
    Load data traces for the given stations and channels.
    '''
    trc_name_divider = '-'
    data_format = 'mseed'

    ref_channels = []
    for cha in channels:
        if cha == 'Z':
            ref_channels.append('u')
        elif cha == 'T':
            ref_channels.append('r')
        elif cha == 'R':
            ref_channels.append('a')
        else:
            raise Exception('No data for this channel!')

    # load recorded data
    data_trcs = []

    # (r)ight transverse, (a)way radial, vertical (u)p
    for ref_channel in ref_channels:
        for station in stations:
            trace_name = trc_name_divider.join(
                ('reference', station.network, station.station, ref_channel))

            tracepath = datadir + trace_name + '.' + data_format

            try:
                with open(tracepath):
                    dt = io.load(tracepath, data_format)[0]
                    # [nm] convert to m
                    dt.set_ydata(dt.ydata * m)
                    dt.station = station.station
                    dt.network = station.network
                    dt.location = '0'
                    # convert to BEAT seismic Dataset
                    data_trcs.append(
                        heart.SeismicDataset.from_pyrocko_trace(dt))
            except IOError:
                logger.warn('Unable to open file: ' + trace_name)

    return data_trcs
Esempio n. 18
0
def package_data(masked_data, nii_files, regressor_files):
    nii_subjs = list(map(lambda f: f.split('/')[-1][:-len('.nii')], nii_files))
    reg_subjs = list(
        map(lambda f: f.split('/')[-1][:-len('_regs_results.mat')],
            regressor_files))

    data = []
    for i, s in enumerate(nii_subjs):
        try:
            j = np.ravel(
                np.where(np.array(list(map(lambda x: x == s, reg_subjs)))))[0]
            print(f'matching {s} with {regressor_files[j]}')
            regmat = load(regressor_files[j])
            data.append({
                'data': masked_data[i],
                'regmat': regmat['all_regressors'],
                'mvpa': regmat['results']
            })
        except:
            print(f'missing data for {s}')
    return data
Esempio n. 19
0
def script4():
    data = load('restaurant.mat')
    c = array(data['c']).astype(int).tolist()  # final result for m sample
    nc = array(data['nc']).astype(
        int)[0].tolist()  # number of class, in restaurant data set, it is 2
    x = array(data['x']).astype(int).tolist()  # main data
    nx = array(data['nx']).astype(int)  # range of each attribute
    nx = reshape(nx, [1, nx.size]).tolist()
    nx = nx[0]
    global wang
    wang = nc

    y = array(data['y']).astype(int).tolist()
    d = array(data['d']).astype(int)
    nc = nc[0]

    tempC = list()
    for myc in c:
        tempC.append(myc[0])

    tr = tree_train(tempC, nc, x, nx)
    b = tree_classify(y, tr)
    print(b)
    print(d)
	plt.xticks(y_pos, x)
	plt.ylabel('Count')
	plt.title(name + ' Label Distribution')
	plt.show()

def inspect(dataset, labels, i):
	# 显示图片看看
	if dataset.shape[3] == 1:
		shape = dataset.shape
		dataset = dataset.reshape(shape[0], shape[1], shape[2])
	print(labels[i])
	plt.imshow(dataset[i])
	plt.show()


train = load('../data/train_32x32.mat')
test = load('../data/test_32x32.mat')
# extra = load('../data/extra_32x32.mat')

# print('Train Samples Shape:', train['X'].shape)
# print('Train  Labels Shape:', train['y'].shape)

# print('Train Samples Shape:', test['X'].shape)
# print('Train  Labels Shape:', test['y'].shape)

# print('Train Samples Shape:', extra['X'].shape)
# print('Train  Labels Shape:', extra['y'].shape)

train_samples = train['X']
train_labels = train['y']
test_samples = test['X']
Esempio n. 21
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import glob
import subprocess
import config
import numpy as np
from scipy.io import loadmat as load
from PIL import Image

mat = load(config.thetas_path)
theta1 = np.asmatrix(mat['Theta1'].transpose())
theta2 = np.asmatrix(mat['Theta2'].transpose())
num_labels = np.size(theta2, 1)

def size(matrix):
    return [np.size(matrix, 0), np.size(matrix, 1)]

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def imread(imgpath):
    img = Image.open(imgpath)
    img = img.convert('L', palette=Image.ADAPTIVE, colors=2)
    return np.array(img) > 128

def split_to_chars(imgpath, saveto):
    try:
        try:
            map(os.remove, glob.glob(os.path.join(saveto, '*.bmp')))

# 查看每个label的分布
def distribution(labels, name):
    pass


# Display image
def inspect(samples, labels, i):
    print(labels[i])

    plt.imshow(samples[i])
    plt.show()


train_data = load('data/train_32x32.mat')
test_data = load('data/test_32x32.mat')
extra_data = load('data/extra_32x32.mat')

print("Train data samples shape", train_data['X'].shape)
print("Train data Labels shape", train_data['y'].shape)

train_samples = train_data['X']
train_labels = train_data['y']
test_samples = test_data['X']
test_labels = test_data['y']

_train_samples, _train_labels = reformat(train_samples, train_labels)
_test_samples, _test_labels = reformat(test_samples, test_labels)

num_labels = 10
from __future__ import print_function
from scipy.io import loadmat as load

train = load('data/train_32x32.mat')
print(type(train))

print(train['__version__'])
print(train['__header__'])
print(train['X'])
print(train['X'].shape)  # (32, 32, 3, 73257) (width, height, channel, count)

print(train['y'].shape)  # (73257, 1) (count, data)

print(train['y'][1])