Exemple #1
0
    def test_howto_cyclic2(self):
        def my_network(*args):
            return args[0]

        reader = tftables.open_file(filename=self.test_filename, batch_size=10)

        # Non-cyclic access
        # -----------------

        array_batch_placeholder = reader.get_batch(path=self.test_array_path,
                                                   cyclic=False,
                                                   ordered=False)
        array_batch_float = tf.to_float(array_batch_placeholder)

        loader = reader.get_fifoloader(queue_size=10,
                                       inputs=[array_batch_float],
                                       threads=1)

        array_batch_cpu = loader.dequeue()
        result = my_network(array_batch_cpu)

        with tf.Session() as sess:
            loader.start(sess)

            # This context manager suppresses the exception.
            with loader.catch_termination():
                # Keep iterating until the exception breaks the loop
                while True:
                    sess.run(result)

            loader.stop(sess)
Exemple #2
0
    def test_howto_cyclic1(self):
        def my_network(*args):
            return args[0]

        reader = tftables.open_file(filename=self.test_filename, batch_size=10)

        # Non-cyclic access
        # -----------------

        array_batch_placeholder = reader.get_batch(path=self.test_array_path,
                                                   cyclic=False,
                                                   ordered=False)
        array_batch_float = tf.to_float(array_batch_placeholder)

        loader = reader.get_fifoloader(queue_size=10,
                                       inputs=[array_batch_float],
                                       threads=1)

        array_batch_cpu = loader.dequeue()
        result = my_network(array_batch_cpu)

        with tf.Session() as sess:
            loader.start(sess)

            try:
                # Keep iterating until the exception breaks the loop
                while True:
                    sess.run(result)
            # Now silently catch the exception.
            except tf.errors.OutOfRangeError:
                pass

            loader.stop(sess)
Exemple #3
0
 def set_up(path, array, batchsize, get_tensors):
     blocksize = batchsize*2 + 1
     reader = tftables.open_file(self.test_filename, batchsize)
     cycles = lcm(len(array), blocksize)//len(array)
     batch = reader.get_batch(path, block_size=blocksize, ordered=False)
     batches = get_batches(array, batchsize)*cycles*N_threads
     loader = reader.get_fifoloader(N, get_tensors(batch), threads=N_threads)
     return reader, loader, batches, batch
Exemple #4
0
    def test_shared_reader(self):
        batch_size = 8
        reader = tftables.open_file(self.test_filename, batch_size)

        array_batch = reader.get_batch(self.test_array_path, cyclic=False)
        table_batch = reader.get_batch(self.test_table_path, cyclic=False)

        array_batches = get_batches(self.test_array,
                                    batch_size,
                                    trim_remainder=True)
        table_batches = get_batches(self.test_table_ary,
                                    batch_size,
                                    trim_remainder=True)
        total_batches = min(len(array_batches), len(table_batches))

        loader = reader.get_fifoloader(
            10, [array_batch, table_batch['col_A'], table_batch['col_B']],
            threads=4)

        deq = loader.dequeue()
        array_result = []
        table_result = []

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())

            loader.start(sess)

            with loader.catch_termination():
                while True:
                    tbl = np.zeros_like(self.test_table_ary[:batch_size])
                    ary, tbl['col_A'], tbl['col_B'] = sess.run(deq)
                    array_result.append(ary)
                    table_result.append(tbl)

            assert_items_equal(self,
                               array_result,
                               array_batches[:total_batches],
                               key=None,
                               epsilon=0)

            assert_items_equal(self,
                               table_result,
                               table_batches[:total_batches],
                               key=None,
                               epsilon=0)

            loader.stop(sess)

        reader.close()
Exemple #5
0
def build_dataset_with_hdf5_table(file_name):
    inputs = list()
    names = list()
    shapes = list()
    types = list()


    reader = tftables.open_file(filename=file_name, batch_size=BATCH_SIZE)
    table_dict = reader.get_batch(
        path="/data",
        cyclic=True,
        ordered=True)

    # stft
    inputs.append(table_dict[STFTS_COL])
    names.append("stft")
    shapes.append(tf.TensorShape([None, MEL_STFT_FIRST_DIMENSION, 2050]))
    types.append(tf.float32)

    # mels
    inputs.append(table_dict[MELS_COL])
    names.append("mel")
    shapes.append(tf.TensorShape([None, MEL_STFT_FIRST_DIMENSION, 160]))
    types.append(tf.float32)

    # texts
    inputs.append(tf.to_int32(table_dict[TEXTS_COL]))
    names.append("text")
    shapes.append(tf.TensorShape([None, table_dict[TEXTS_COL].shape[1]]))
    types.append(tf.int32)

    # text_lens
    inputs.append(tf.to_int32(table_dict[TEXT_LENS_COL]))
    names.append("text_length")
    shapes.append(tf.TensorShape([None]))
    types.append(tf.int32)

    # speech_lens
    inputs.append(tf.to_int32(table_dict[SPEECH_LENS_COL]))
    names.append("speech_length")
    shapes.append(tf.TensorShape([None]))
    types.append(tf.int32)

    print("inputs: %s" % str(inputs))

    loader = reader.get_fifoloader(queue_size=BUFFER_SIZE, inputs=inputs, threads=1)
 
    return loader, reader, names, shapes, types
Exemple #6
0
def build_dataset_with_hdf5(file_name):
    inputs = list()
    names = list()
    shapes = list()
    types = list()
    placeholders = list()

    with tf.device('/cpu:0'):
        reader = tftables.open_file(filename=file_name, batch_size=BATCH_SIZE)
        stfts_array_batch_placeholder = reader.get_batch(
            path="/stfts",
            ordered=True)
        # inputs.append(tf.to_float(stfts_array_batch_placeholder))
        inputs.append(stfts_array_batch_placeholder)
        placeholders.append(stfts_array_batch_placeholder)
        names.append("stft")
        shapes.append(tf.TensorShape([None, 180, 2050]))
        types.append(tf.float32)

        mels_array_batch_placeholder = reader.get_batch(
            path="/mels",
            ordered=True)
        # inputs.append(tf.to_float(mels_array_batch_placeholder))
        inputs.append(mels_array_batch_placeholder)
        placeholders.append(mels_array_batch_placeholder)
        names.append("mel")
        shapes.append(tf.TensorShape([None, 180, 160]))
        types.append(tf.float32)

        # texts
        texts_array_batch_placeholder = reader.get_batch(
            path="/texts",
            ordered=True)
        inputs.append(tf.to_int32(texts_array_batch_placeholder))
        placeholders.append(texts_array_batch_placeholder)
        names.append("text")
        shapes.append(tf.TensorShape([None, 164]))
        types.append(tf.int32)

        # text_lens
        text_lens_array_batch_placeholder = reader.get_batch(
            path="/text_lens",
            ordered=True)
        inputs.append(tf.to_int32(text_lens_array_batch_placeholder))
        placeholders.append(text_lens_array_batch_placeholder)
        names.append("text_length")
        shapes.append(tf.TensorShape([None]))
        types.append(tf.int32)

        # speech_lens
        speech_lens_array_batch_placeholder = reader.get_batch(
            path="/speech_lens",
            ordered=True)
        inputs.append(tf.to_int32(speech_lens_array_batch_placeholder))
        placeholders.append(speech_lens_array_batch_placeholder)
        names.append("speech_length")
        shapes.append(tf.TensorShape([None]))
        types.append(tf.int32)

        print("Placeholders: %s" % str(placeholders))
        print("inputs: %s" % str(inputs))

        loader = reader.get_fifoloader(queue_size=BUFFER_SIZE, inputs=inputs, threads=1)

    return loader, reader, names, shapes, types
Exemple #7
0
class CSVPngReader(object):  #CSVSegReader(object):

    def __init__(self, filenames, base_folder='.', image_size=(5000,12,1), num_threads=4,
                 capacity=5000, min_after_dequeue=1000, num_epochs=None):
        """
        CSVSegReader is a class that reads csv files containing paths to input image and segmentation image and outputs
        batchs of correspoding image inputs and segmentation inputs.
         The inputs to the class are:

            filenames: a list of csv files filename
            num_epochs: the number of epochs - how many times to go over the data
            image_size: a tuple containing the image size in Y and X dimensions
            num_threads: number of threads for prefetch
            capacity: capacity of the shuffle queue, the larger capacity results in better mixing
            min_after_dequeue: the minimum example in the queue after a dequeue op. ensures good mixing
        """
        self.reader = tf.TextLineReader(skip_header_lines=0)
        self.input_queue = tf.train.string_input_producer(filenames, num_epochs=num_epochs)
        self.image_size = image_size
        self.batch_size = None
        self.num_threads = num_threads
        self.capacity = capacity
        self.min_after_dequeue = min_after_dequeue
        self.base_folder = base_folder

    def _get_image(self):
        _, records = self.reader.read(self.input_queue)
        print('records',records)
        # print('self.input_queue',self.input_queue)
        # print('records',records)

        file_names = tf.decode_csv(records, [tf.constant([],tf.string), tf.constant([],tf.float32)], field_delim=None, name=None)
        # file_names = tf.decode_csv(records, [tf.constant([],tf.string), tf.constant([],tf.string)], field_delim=None, name=None)

        # im_raw = tf.read_file(self.base_folder+file_names[0])
        # seg_raw =tf.read_file(self.base_folder+file_names[1])

        ecg_raw = tf.read_file(self.base_folder+file_names[0])
        k_raw = file_names[1]
        print('CSVPngReader   ecg_raw.get_shape().as_list()',ecg_raw.get_shape().as_list())

        ecg_image = tf.reshape(tf.cast(tf.image.decode_png(
                        ecg_raw, channels=1, dtype=tf.uint16),
                        tf.float32, ), self.image_size, name='input_image')
        potassium = k_raw
        # potassium = tf.reshape(
        #                 tf.cast(tf.image.decode_png(
        #                                             k_raw,
        #                                             channels=1, dtype=tf.uint8),
        #                 tf.float32,), self.image_size, name='input_seg')
        print('!!before!!ecg_image.get_shape().as_list()', ecg_image.get_shape().as_list())
        # ecg_image = (ecg_image-10000)/1000
        # ecg_image = (ecg_image[:,6:,:]-10000)/1000
        print('!!after!!ecg_image.get_shape().as_list()', ecg_image.get_shape().as_list())
        return ecg_image, potassium

###################################################################################
###################################################################################
###################################################################################

    from __future__ import print_function
    from math import ceil
    import pandas
    import numpy as np
    import h5py
    import cv2
    # import matplotlib.pyplot as plt
    import tftables
    import tensorflow as tf
    from random import shuffle

    seed = 2018
    np.random.seed(seed)

    shuffle_data = True  # shuffle the addresses before saving
    hdf5_path = 'dataset.hdf5'  # address to where you want to save the hdf5 file
    AllData_path = 'ActivDataFilteredCliped/'
    # AllData_path = 'ActivDataFiltered/'
    # AllData_path = 'ActivData/'

    dataseton = 1
    if dataseton == 0:
        # dataframe = pandas.read_csv("train.csv", delim_whitespace=True, header=None)
        dataframe = pandas.read_csv("train.csv", header=None)
        print('dataframe: ', dataframe)
        train_addrs = AllData_path + dataframe[0]
        train_labels = dataframe[1]
        print('train_addrs:', train_addrs)
        print('len train_addrs:', len(train_addrs))
        print('train_labels:', train_labels)
        print('lentrain_labels:', len(train_labels))

        dataframe = pandas.read_csv("val.csv", header=None)
        val_addrs = AllData_path + dataframe[0]
        val_labels = dataframe[1]
        print('val_addrs:', val_addrs)
        print('len val_addrs:', len(val_addrs))
        print('val_labels:', val_labels)
        print('len val_labels:', len(val_labels))

        dataframe = pandas.read_csv("test.csv", header=None)
        test_addrs = AllData_path + dataframe[0]
        test_labels = dataframe[1]
        print('test_addrs:', test_addrs)
        print('len test_addrs:', len(test_addrs))
        print('test_labels:', test_labels)
        print('len test_labels:', len(test_labels))

        '''To store images,
        we should define an array for each of train, validation and test sets
        with the shape of (number of data, image_height, image_width, image_depth) in Tensorflow order or
        (number of data, image_height, image_width, image_depth) in Theano order.
        For labels we also need an array for each of train, validation and test sets
        with the shape of (number of data).
        Finally, we calculate the pixel-wise mean of the train set
        and save it in an array with the shape of (1, image_height, image_width, image_depth).
        Note that you always should determine the type of data (dtype)
        when you want to create an array for it.
        '''
        ###############################################

        # Create a HDF5 file

        '''
        tables:In tables we can use create_earray which create an empty array
        (number of data=0)and we can append data to it later.
        For labels, it is more convenient here to use create_array
        as it lets us to write the lables when we are creating the array.
        To set the dtype of an array, you can use tables dtype such as tables.UInt8Atom() for uint8.
        The first attribute of create_earray and create_array methods is the data group
        (we create the arrays in root group) which lets you to manage your data
        by creating different data groups.
        You can consider groups as somethings like folders in your HDF5 file.

        h5py: in h5py we create an array using create_dataset.
        Note that we should determine the exact size of array when you are defining it.
        We can use the create_dataset for labels as well and immediately put the labels on it.
        You can set the dtype of an array directly using numpy dypes.'''

        ###############################################

        #####################################################

        # h5py

        train_shape = (len(train_addrs), 5000, 12, 1)
        val_shape = (len(val_addrs), 5000, 12, 1)
        test_shape = (len(test_addrs), 5000, 12, 1)

        print('train_shape:', train_shape)
        print('val_shape:', val_shape)
        print('test_shape:', test_shape)

        # open a hdf5 file and create earrays
        hdf5_file = h5py.File(hdf5_path, mode='w')

        hdf5_file.create_dataset("train_img", train_shape, np.uint16)
        hdf5_file.create_dataset("val_img", val_shape, np.uint16)
        hdf5_file.create_dataset("test_img", test_shape, np.uint16)

        hdf5_file.create_dataset("train_mean", train_shape[1:], np.float32)

        hdf5_file.create_dataset("train_labels", (len(train_addrs),), np.float32)
        hdf5_file["train_labels"][...] = train_labels
        hdf5_file.create_dataset("val_labels", (len(val_addrs),), np.float32)
        hdf5_file["val_labels"][...] = val_labels
        hdf5_file.create_dataset("test_labels", (len(test_addrs),), np.float32)
        hdf5_file["test_labels"][...] = test_labels

        print('hdf5_file:', hdf5_file)
        print('len  hdf5_file:', len(hdf5_file))

        # Now, it's time to read images one by one, apply preprocessing (only resize in our code) and then save it.

        ###############################################

        # h5py

        # a numpy array to save the mean of the images
        mean = np.zeros(train_shape[1:], np.float32)

        # loop over train addresses
        for i in range(len(train_addrs)):
            # print how many images are saved every 1000 images
            if i % 100 == 0 and i > 1:
                print('Train data: {}/{}'.format(i, len(train_addrs)))

            # read an image and resize to (224, 224)
            # cv2 load images as BGR, convert it to RGB
            addr = train_addrs[i]

            # print('train_addrs' , train_addrs)
            # print('addr' , addr)
            # img = addr
            img = cv2.imread(addr, cv2.IMREAD_UNCHANGED)
            # img = cv2.imread(addr, -1)
            # img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # np.reshape(img,(1,5000,12,1))
            # print(img, addr)
            img.resize(img.shape[0], img.shape[1], 1)
            # add any image pre-processing here
            # print('train' , i)
            # print('shape  img' , np.shape(img))

            # if the data order is Theano, axis orders should change
            # if data_order == 'th':
            #     img = np.rollaxis(img, 2)

            # save the image and calculate the mean so far
            hdf5_file["train_img"][i, ...] = img[None]
            # hdf5_file["train_img"][i, ...] = img[None]
            mean += img / float(len(train_labels))

        # loop over validation addresses
        for i in range(len(val_addrs)):
            # print how many images are saved every 1000 images
            if i % 100 == 0 and i > 1:
                print('Validation data: {}/{}'.format(i, len(val_addrs)))

            # read an image and resize to (224, 224)
            # cv2 load images as BGR, convert it to RGB
            addr = val_addrs[i]

            # print('addr', addr)
            # img = addr
            img = cv2.imread(addr, cv2.IMREAD_UNCHANGED)
            # img = cv2.imread(addr, -1)
            # img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img.resize((img.shape[0], img.shape[1], 1))
            # print('val' , i)

            # add any image pre-processing here

            # if the data order is Theano, axis orders should change
            # if data_order == 'th':
            #     img = np.rollaxis(img, 2)

            # save the image
            hdf5_file["val_img"][i, ...] = img[None]

        # loop over test addresses
        for i in range(len(test_addrs)):
            # print how many images are saved every 1000 images
            if i % 100 == 0 and i > 1:
                print('Test data: {}/{}'.format(i, len(test_addrs)))

            # read an image and resize to (224, 224)
            # cv2 load images as BGR, convert it to RGB
            addr = test_addrs[i]

            # print('addr', addr)
            # img = addr
            img = cv2.imread(addr, cv2.IMREAD_UNCHANGED)
            # img = cv2.imread(addr, -1)
            # img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img.resize((img.shape[0], img.shape[1], 1))
            # print('test' , i)

            # add any image pre-processing here

            # if the data order is Theano, axis orders should change
            # if data_order == 'th':
            #     img = np.rollaxis(img, 2)

            # save the image
            hdf5_file["test_img"][i, ...] = img[None]

        # save the mean and close the hdf5 file
        hdf5_file["train_mean"][...] = mean
        hdf5_file.close()

    ###############################################

    '''
    Read the HDF5 file

    It's time to check if the data is saved properly in the HDF5 file.
    To do so, we load the data in batchs of an arbitrary size and plot the first image
    of the first 5 batchs.
    We also check the label of each image.
    We define a variable, subtract_mean,
    which indicates if we want to subtract mean of the training set before showing the image.
    In tables we access each array calling its name after its data group
    (like this hdf5_file.group.arrayname).
    You can index it like a numpy array.
    However, in h5py we access an array using its name like a dictionary name
    (hdf5_file["arrayname""]).
    In either case, you have access to the shape of the array through .shape like a numpy array.
    '''

    # import h5py
    # import numpy as np

    # hdf5_path = 'Cat vs Dog/dataset.hdf5'
    subtract_mean = False

    # open the hdf5 file
    hdf5_file = h5py.File(hdf5_path, "r")

    # subtract the training mean
    if subtract_mean:
        mm = hdf5_file["train_mean"][0, ...]
        mm = mm[np.newaxis, ...]

    # Total number of samples
    data_num = hdf5_file["train_img"].shape[0]

    print('hdf5_file', hdf5_file)
    print('data_num', data_num)
    print('hdf5_file["train_img"]', hdf5_file["train_img"])

    # ################################################
    #
    # '''
    # Now we create a list of batches indeces and shuffle it.
    # Now, we loop over batches and read all images in each batch at once.
    #
    # '''
    # batch_size = 10
    # # nb_class=2
    # #
    # # from random import shuffle
    # # from math import ceil
    # # import matplotlib.pyplot as plt
    #
    # # create list of batches to shuffle the data
    # batches_list = list(range(int(ceil(float(data_num) / batch_size))))
    # shuffle(batches_list)
    # print('batches_list:', batches_list)
    # print('batches_list.shape:', len(batches_list))
    #
    # # loop over batches
    # # for counter, value in enumerate(some_list):
    # #     print(counter, value)
    # # for counter, value in enumerate(some_list):
    # for n, i in enumerate(batches_list):
    #     i_s = i * batch_size  # index of the first image in this batch
    #     i_e = min([(i + 1) * batch_size, data_num])  # index of the last image in this batch
    #
    #     # read batch images and remove training mean
    #     images = hdf5_file["train_img"][i_s:i_e, ...]
    #     if subtract_mean:
    #         images -= mm
    #
    #     # read labels and convert to one hot encoding
    #     labels = hdf5_file["train_labels"][i_s:i_e]
    #     print('labels:', labels)
    #     print('n, i:', n, i)
    #     # print('labels:', labels)
    #     # labels_one_hot = np.zeros((batch_size, nb_class))
    #     # labels_one_hot[np.arange(batch_size), labels] = 1
    #
    #     print('n+1, len(batches_list):', n + 1, '/', len(batches_list))
    #     print('labels[0], labels_one_hot[0, :]:', labels[0])  # , labels_one_hot[0, :])
    #     print('images[0]:', images[0])
    #     print('len(images[0]):', images[0].shape)
    #
    #     im0 = images[0]#(images[0] - 10000) / 1000
    #     print('im0    :', im0)
    #     lb0 = labels[0]
    #     leads_im = im0[:, :, 0]
    #     print('leads_im    :', leads_im)
    #     print('mean im0    :', np.max(leads_im, axis=0))
    #     n_leads = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
    #     n_subplots = 12
    #     pltfig1 = n
    #     plt.figure(pltfig1)  # , figsize=(20, 20))
    #     # plt.title("All filters ECG, origin leads 'upper'")
    #     n_columns = 2
    #     n_rows = 6
    #     for k in range(n_subplots):
    #         plt.subplot(n_rows, n_columns, k + 1)
    #         plt.title('ECG, Lead ' + str(n_leads[k]) + ', potassium=' + str(lb0))  # +'-'+train_addrs[i_s])
    #         plt.plot(leads_im[:, n_leads[k] - 1])  # , 'r')  # plotting t, a separately
    #         # plt.plot(leads_im[0:2500, n_leads[k] - 1])  # , 'r')  # plotting t, a separately
    #         # addr = train_addrs[i]
    #         # plt.colorbar(LeadsIm, orientation='horizontal')
    #     plt.savefig('Leads 1-12 ECG' + str(n) + ', potassium=' + str(lb0) + '.png')
    #
    #     # plt.imshow(im0[:,:,0])
    #     # plt.imshow(images[0][:,:,0])
    #     # plt.show()
    #
    #     if n == 5:  # break after 5 batches
    #         break
    #
    #         # hdf5_file.close()
    #
    #         ############################################
    #         #
    #         #
    #         #
    #         #
    #         #
    #         #
    #         #########################################

    # #######################################################
    '''
    If the dataset is an array instead of a table.Then input_transform can
    be omitted if no pre - processing is required.
    If only a single pass through the dataset is desired, then
    you should pass cyclic = False to load_dataset.
    A slightly more involved example showing how to access multiple datasets in one
    HDF5 file, as well as the full API.
    '''
    # reader = tftables.open_file(filename='path/to/h5_file',
    #                             batch_size = 20)
    reader = tftables.open_file(filename='/home/yehu/Desktop/new/nonPHIData/dataset.hdf5',
                                batch_size=10)
    print('reader    :', reader)
    '''
    # Accessing a single array
    # Suppose you only want to read a single array from your HDF5 file.
    # Doing this is quite straight-forward.
    # Start by getting a tensorflow placeholder for your batch from reader.
    #'''

    array_batch_placeholder = reader.get_batch(
        path='/train_labels',  # ''/h5/path',  # This is the path to your array inside the HDF5 file.
        cyclic=True,  # In cyclic access, when the reader gets to the end of the
        # array, it will wrap back to the beginning and continue.
        ordered=False  # The reader will not require the rows of the array to be
        # returned in the same order as on disk.
    )
    print('array_batch_placeholder    :', array_batch_placeholder)

    # You can transform the batch however you like now.
    # For example, casting it to floats.
    array_batch_float = tf.to_float(array_batch_placeholder)

    # The data can now be fed into your network
    result = my_network(array_batch_float)

    with tf.Session() as sess:
        # The feed method provides a generator that returns
        # feed_dict's containing batches from your HDF5 file.
        for i, feed_dict in enumerate(reader.feed()):
            sess.run(result, feed_dict=feed_dict)
            if i >= N:
                break

    # Finally, the reader should be closed.
    reader.close()

    # Note that be default, the ordered argument to get_batch is set to True.
    # If you require the rows of the array to be returned in the same order as they are on disk,
    # then you should leave it as ordered = True.
    # However, this may result in a performance penalty.
    # In machine learning, rows of a dataset often represent independent examples, or data points.
    # Thus their ordering is not important.

    ###############################################################

    # #######################################################
    # #######################################################
    # #######################################################
    # # read images
    # X_train = hdf5_file["train_img"]
    # Y_train = hdf5_file["train_labels"]
    #
    # X_val = hdf5_file["val_img"]
    # Y_val = hdf5_file["val_labels"]
    #
    # X_test = hdf5_file["test_img"]
    # Y_test = hdf5_file["test_labels"]
    #
    # print('Split train: ',X_train[1], len(X_train))
    # print('Split valid: ',X_val[1], len(X_val))
    # print('Split holdout: ',X_test[1], len(X_test))
    # # hdf5_file.close()
    # ############################################

    ###################################################################################
###################################################################################
###################################################################################

    def get_batch(self, batch_size=1):

        self.batch_size = batch_size

        ecg_image, potassium = self._get_image()
        image_batch, k_batch = tf.train.shuffle_batch([ecg_image, potassium], batch_size=self.batch_size,
                                                        num_threads=self.num_threads,
                                                        capacity=self.capacity,
                                                        min_after_dequeue=self.min_after_dequeue)
        return image_batch, k_batch

        ######################################
        #
        # sess = tf.Session()
        #
        # TYPE = np.float64
        #
        # N = 1000000
        # # data = np.random.normal(0, 1, N).astype(TYPE)
        # Truncate data to make it harder
        # data = data[(data > -1) & (data < 5)]
        ##################################################
#     # processes used to read the data from disk and store it in the queue.
#     with loader.begin(sess):
#         for _ in range(num_iterations):
#             sess.run(result)
# #######################################################
'''
If the dataset is an array instead of a table.Then input_transform can
be omitted if no pre - processing is required.
If only a single pass through the dataset is desired, then
you should pass cyclic = False to load_dataset.
A slightly more involved example showing how to access multiple datasets in one
HDF5 file, as well as the full API.
'''
# reader = tftables.open_file(filename='path/to/h5_file',
#                             batch_size = 20)
reader = tftables.open_file(
    filename='/home/yehu/Desktop/new/nonPHIData/dataset.hdf5', batch_size=10)
print('reader    :', reader)
'''
# Accessing a single array
# Suppose you only want to read a single array from your HDF5 file.
# Doing this is quite straight-forward.
# Start by getting a tensorflow placeholder for your batch from reader.
# X_train = hdf5_file["train_img"]
# Y_train = hdf5_file["train_labels"]
#'''

array_batch_placeholder = reader.get_batch(
    path=
    '/train_labels',  #''/h5/path',  # This is the path to your array inside the HDF5 file.
    cyclic=True,  # In cyclic access, when the reader gets to the end of the
    # array, it will wrap back to the beginning and continue.
Exemple #9
0
    def test_howto(self):
        def my_network(*args):
            return args[0]

        N = 100

        reader = tftables.open_file(filename=self.test_filename, batch_size=10)

        # Accessing a single array
        # ========================

        array_batch_placeholder = reader.get_batch(
            path=self.
            test_array_path,  # This is the path to your array inside the HDF5 file.
            cyclic=
            True,  # In cyclic access, when the reader gets to the end of the
            # array, it will wrap back to the beginning and continue.
            ordered=
            False  # The reader will not require the rows of the array to be
            # returned in the same order as on disk.
        )

        # You can transform the batch however you like now.
        # For example, casting it to floats.
        array_batch_float = tf.to_float(array_batch_placeholder)

        # The data can now be fed into your network
        result = my_network(array_batch_float)

        with tf.Session() as sess:
            # The feed method provides a generator that returns
            # feed_dict's containing batches from your HDF5 file.
            for i, feed_dict in enumerate(reader.feed()):
                sess.run(result, feed_dict=feed_dict)
                if i >= N:
                    break

        # Finally, the reader should be closed.
        #reader.close()

        # Accessing a single table
        # ========================

        table_batch = reader.get_batch(path=self.test_mock_data_path,
                                       cyclic=True,
                                       ordered=False)

        label_batch = table_batch['label']
        data_batch = table_batch['data']

        # Using a FIFO queue
        # ==================

        # As before
        array_batch_placeholder = reader.get_batch(path=self.test_array_path,
                                                   cyclic=True,
                                                   ordered=False)
        array_batch_float = tf.to_float(array_batch_placeholder)

        # Now we create a FIFO Loader
        loader = reader.get_fifoloader(
            queue_size=10,  # The maximum number of elements that the
            # internal Tensorflow queue should hold.
            inputs=[array_batch_float
                    ],  # A list of tensors that will be stored
            # in the queue.
            threads=1  # The number of threads used to stuff the
            # queue. If ordered access to a dataset
            # was requested, then only 1 thread
            # should be used.
        )

        # Batches can now be dequeued from the loader for use in your network.
        array_batch_cpu = loader.dequeue()
        result = my_network(array_batch_cpu)

        with tf.Session() as sess:

            # The loader needs to be started with your Tensorflow session.
            loader.start(sess)

            for i in range(N):
                # You can now cleanly evaluate your network without a feed_dict.
                sess.run(result)

            # It also needs to be stopped for clean shutdown.
            loader.stop(sess)

        # Finally, the reader should be closed.
        #reader.close()

        # Accessing multiple datasets
        # ===========================

        # Use get_batch to access the table.
        # Both datasets must be accessed in ordered mode.
        table_batch_dict = reader.get_batch(path=self.test_table_path,
                                            ordered=True)
        col_A_pl, col_B_pl = table_batch_dict['col_A'], table_batch_dict[
            'col_B']

        # Now use get_batch again to access an array.
        # Both datasets must be accessed in ordered mode.
        labels_batch = reader.get_batch(self.test_array_path, ordered=True)
        truth_batch = tf.one_hot(labels_batch, 2, 1, 0)

        # The loader takes a list of tensors to be stored in the queue.
        # When accessing in ordered mode, threads should be set to 1.
        loader = reader.get_fifoloader(
            queue_size=10, inputs=[truth_batch, col_A_pl, col_B_pl], threads=1)

        # Batches are taken out of the queue using a dequeue operation.
        # Tensors are returned in the order they were given when creating the loader.
        truth_cpu, col_A_cpu, col_B_cpu = loader.dequeue()

        # The dequeued data can then be used in your network.
        result = my_network(truth_cpu, col_A_cpu, col_B_cpu)

        with tf.Session() as sess:
            with loader.begin(sess):
                for _ in range(N):
                    sess.run(result)

        reader.close()
Exemple #10
0
 def test_uint64(self):
     reader = tftables.open_file(self.test_filename, 10)
     with self.assertRaises(ValueError):
         batch = reader.get_batch("/test_uint64")
     reader.close()