Example #1
0
def read_and_decode(filename_queue, label_type, shape):

    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'label_raw': tf.FixedLenFeature([], tf.string),
        })
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    image = tf.cast(image, tf.float32)

    image = (image - 127.5) * (1. / 128.0)
    image.set_shape([shape * shape * 3])
    image = tf.reshape(image, [shape, shape, 3])
    label = tf.decode_raw(features['label_raw'], tf.float32)

    if label_type == 'cls':
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)
        label.set_shape([2])
    elif label_type == 'bbx':
        label.set_shape([4])
    elif label_type == 'pts':
        label.set_shape([10])

    return image, label
Example #2
0
def _binary_parse_function_example(serialized_example_protocol):
    '''
    DESCRIPTION:
        This function will deserialize, decompress and then transform
        the image and label in the appropriate shape based on the (new) merged
        structure of the dataset.
    '''
    #Parsing the exampe from the binary format
    features={
        'image':    tf.FixedLenFeature((),tf.string),
        'label':    tf.FixedLenFeature((),tf.string)
    }
    parsed_feature=tf.parse_single_example(serialized_example_protocol,
                                            features)

    #Now setting the appropriate tranformation (decoding and reshape)
    height=514
    width=513
    depth=40
    #Decoding the image from biary
    image=tf.decode_raw(parsed_feature['image'],tf.float32)#BEWARE of dtype
    image.set_shape([depth*height*width])
    #Now reshape in usual way since reshape automatically read in c-order
    image=tf.reshape(image,[height,width,depth])

    #Now decoding the label
    target_len=6
    label=tf.decode_raw(parsed_feature['label'],tf.float32)
    label.set_shape([target_len])
    #Reshaping appropriately
    label=tf.reshape(label,[target_len,])

    #Returing the example tuple finally
    return image,label
def read_and_decode(filename_queue):

# input: filename
# output: image, label pair

# setup a TF record reader
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

# list the features we want to extract, i.e., the image and the label
    features = tf.parse_single_example(
        serialized_example,
        features={
            'img_raw': tf.FixedLenFeature([], tf.string),
            'label_raw': tf.FixedLenFeature([], tf.string),
        })

  # Decode the training image
  # Convert from a scalar string tensor (whose single string has
  # length 256*256) to a float tensor
    image = tf.decode_raw(features['img_raw'], tf.int64)
    image.set_shape([65536])
    image_re = tf.reshape(image, (256,256))

# Scale input pixels by 1024
    image_re = tf.cast(image_re, tf.float32) * (1. / 1024)

# decode the label image, an image with all 0's except 1's where the left
# ventricle exists
    label = tf.decode_raw(features['label_raw'], tf.uint8)
    label.set_shape([65536])
    label_re = tf.reshape(label, [256,256])

    return image_re, label_re
def read_and_decode(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        # Defaults are not specified since both keys are required.
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'mask_raw': tf.FixedLenFeature([], tf.string),
        }
    )

    # must be read back as uint8 here
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    segmentation = tf.decode_raw(features['mask_raw'], tf.uint8)

    image.set_shape([224*224*3])
    segmentation.set_shape([224*224*1])

    image = tf.reshape(image,[224,224,3])
    segmentation = tf.reshape(segmentation,[224,224])

    rgb = tf.cast(image, tf.float32)
    rgb = rgb * (1./255)
    rgb = tf.cast(image, tf.float32)

    mask = tf.cast(segmentation, tf.float32)
    mask = (mask / 255.) * 20
    mask = tf.cast(mask, tf.int64)
    
    return rgb, mask
Example #5
0
  def deserialize(examples_serialized):
    """Called by Dataset.map() to convert batches of records to tensors."""
    features = tf.parse_single_example(examples_serialized, feature_map)
    users = tf.reshape(tf.decode_raw(
        features[movielens.USER_COLUMN], tf.int32), (batch_size,))
    items = tf.reshape(tf.decode_raw(
        features[movielens.ITEM_COLUMN], tf.uint16), (batch_size,))

    if params["use_tpu"] or params["use_xla_for_gpu"]:
      items = tf.cast(items, tf.int32)  # TPU and XLA disallows uint16 infeed.

    if not training:
      dupe_mask = tf.reshape(tf.cast(tf.decode_raw(
          features[rconst.DUPLICATE_MASK], tf.int8), tf.bool), (batch_size,))
      return {
          movielens.USER_COLUMN: users,
          movielens.ITEM_COLUMN: items,
          rconst.DUPLICATE_MASK: dupe_mask,
      }

    labels = tf.reshape(tf.cast(tf.decode_raw(
        features["labels"], tf.int8), tf.bool), (batch_size,))

    return {
        movielens.USER_COLUMN: users,
        movielens.ITEM_COLUMN: items,
    }, labels
def read_single_example_and_decode(filename_queue):

    tfrecord_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)

    reader = tf.TFRecordReader(options=tfrecord_options)

    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(
        serialized=serialized_example,
        features={
            'img_name': tf.FixedLenFeature([], tf.string),
            'img_height': tf.FixedLenFeature([], tf.int64),
            'img_width': tf.FixedLenFeature([], tf.int64),
            'img': tf.FixedLenFeature([], tf.string),
            'gtboxes_and_label': tf.FixedLenFeature([], tf.string),
            'num_objects': tf.FixedLenFeature([], tf.int64)
        }
    )
    img_name = features['img_name']
    img_height = tf.cast(features['img_height'], tf.int32)
    img_width = tf.cast(features['img_width'], tf.int32)
    img = tf.decode_raw(features['img'], tf.uint8)

    img = tf.reshape(img, shape=[img_height, img_width, 3])

    gtboxes_and_label = tf.decode_raw(features['gtboxes_and_label'], tf.int32)
    gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 9])

    num_objects = tf.cast(features['num_objects'], tf.int32)
    return img_name, img, gtboxes_and_label, num_objects
Example #7
0
    def build_next_batch_op(self):
        reader = tf.TFRecordReader()

        _, serialized_experience = reader.read(self.filename_queue)

        features = tf.parse_single_example(serialized_experience, features={
            'state': tf.FixedLenFeature([], tf.string),
            'action': tf.FixedLenFeature([2], tf.float32),
            'reward': tf.FixedLenFeature([], tf.float32),
            'next_state': tf.FixedLenFeature([], tf.string),
            'is_episode_finished': tf.FixedLenFeature([], tf.int64)})

        state = tf.decode_raw(features['state'], tf.uint8)
        state.set_shape([86*86*4])
        action = features['action']
        reward = features['reward']
        next_state = tf.decode_raw(features['next_state'], tf.uint8)
        next_state.set_shape([86*86*4])
        is_episode_finished = features['is_episode_finished']

        state = tf.reshape(state, [86, 86, 4])
        next_state = tf.reshape(next_state, [86, 86, 4])

        state_batch, action_batch, reward_batch, next_state_batch, is_episode_finished_batch = tf.train.shuffle_batch(
            [state, action, reward, next_state, is_episode_finished], batch_size=self.batch_size, capacity=100,
            min_after_dequeue=0)

        return state_batch, action_batch, reward_batch, next_state_batch, is_episode_finished_batch
Example #8
0
    def parse_sequence_example(self, record_string):

        features_dict = {
            'images_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64),
            'width': tf.FixedLenFeature([], tf.int64),
            'height': tf.FixedLenFeature([], tf.int64),
            'depth': tf.FixedLenFeature([], tf.int64),
            'sequence_length': tf.FixedLenFeature([], tf.int64)
        }

        if ADD_GEOLOCATIONS:
            features_dict['geo'] = tf.FixedLenFeature([], tf.string)

        features = tf.parse_single_example(record_string, features_dict)
        images = tf.decode_raw(features['images_raw'], tf.float32)
        width = tf.cast(features['width'], tf.int32)
        height = tf.cast(features['height'], tf.int32)
        depth = tf.cast(features['depth'], tf.int32)
        label = tf.cast(features['label'], tf.int32)
        sequence_length = tf.cast(features['sequence_length'], tf.int32)
        images = tf.reshape(images, [sequence_length, height, width, depth])

        if ADD_GEOLOCATIONS:
            geo = tf.decode_raw(features['geo'], tf.float32)
            geo = tf.reshape(geo, [2, ])
            return images, label, geo
        else:
            return images, label
Example #9
0
    def read_and_decode(self, filename_queue):
        """
        A definition of how TF should read the file record.
        Slightly altered version from https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/how_tos/ \
                                      reading_data/fully_connected_reader.py

        :param filename_queue: The file name queue to be read.
        :type filename_queue: tf.QueueBase
        :return: The read file data including the image data and depth data.
        :rtype: (tf.Tensor, tf.Tensor)
        """
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(
            serialized_example,
            features={
                'image_raw': tf.FixedLenFeature([], tf.string),
                'depth_raw': tf.FixedLenFeature([], tf.string),
            })

        image = tf.decode_raw(features['image_raw'], tf.uint8)
        image = tf.reshape(image, [self.height, self.width, self.channels])
        image = tf.cast(image, tf.float32) * (1. / 255) - 0.5

        depth = tf.decode_raw(features['depth_raw'], tf.float32)
        depth = tf.reshape(depth, [self.height, self.width, 1])

        return image, depth
Example #10
0
def make_readers(file_prefix):
    """
    Return states and qvals tensors
    :param file_prefix:
    :return:
    """
    FLOAT_SIZE = 4
    states_reader = tf.FixedLengthRecordReader(STATES_HISTORY * N_STATE * FLOAT_SIZE)
    next_states_reader = tf.FixedLengthRecordReader(STATES_HISTORY * N_STATE * FLOAT_SIZE)
    actions_reader = tf.FixedLengthRecordReader(1)
    rewards_reader = tf.FixedLengthRecordReader(FLOAT_SIZE)
    _, states = states_reader.read(tf.train.string_input_producer([file_prefix + ".states"]))
    _, next_states = next_states_reader.read(tf.train.string_input_producer([file_prefix + ".next_states"]))
    _, actions = actions_reader.read(tf.train.string_input_producer([file_prefix + ".actions"]))
    _, rewards = rewards_reader.read(tf.train.string_input_producer([file_prefix + ".rewards"]))

    states = tf.decode_raw(states, tf.float32, name="decode_states")
    states = tf.reshape(states, (STATES_HISTORY * N_STATE, ), name="reshape_states")
    next_states = tf.decode_raw(next_states, tf.float32, name="decode_next_states")
    next_states = tf.reshape(next_states, (STATES_HISTORY * N_STATE, ), name="reshape_next_states")
    actions = tf.decode_raw(actions, tf.int8, name="decode_actions")
    actions = tf.reshape(actions, (1, ), name="reshape_actions")
    actions = tf.to_int32(actions)
    rewards = tf.decode_raw(rewards, tf.float32, name="decode_rewards")
    rewards = tf.reshape(rewards, (1, ), name="reshape_qvals")
    return states, actions, rewards, next_states
Example #11
0
    def parser(self, record):
        keys_to_features = {
            'labels': tf.FixedLenFeature([], tf.string),
            'userIds': tf.VarLenFeature(tf.int64),
            'itemIds': tf.VarLenFeature(tf.int64),
            'user_profiles_indices': tf.FixedLenFeature([], tf.string),
            'user_profiles_values': tf.VarLenFeature(tf.int64),
            'user_profiles_weights': tf.VarLenFeature(tf.float32),
            'user_profiles_shape': tf.FixedLenFeature([2], tf.int64),
            'item_profiles_indices': tf.FixedLenFeature([], tf.string),
            'item_profiles_values': tf.VarLenFeature(tf.int64),
            'item_profiles_weights': tf.VarLenFeature(tf.float32),
            'item_profiles_shape': tf.FixedLenFeature([2], tf.int64)
        }
        parsed = tf.parse_single_example(record, keys_to_features)
        labels = tf.reshape(tf.decode_raw(parsed['labels'], tf.float32), [-1, 1])
        userIds = tf.sparse_tensor_to_dense(parsed['userIds'])
        itemIds = tf.sparse_tensor_to_dense(parsed['itemIds'])

        user_profiles_indices = tf.reshape(tf.decode_raw(parsed['user_profiles_indices'], tf.int64), [-1, 2])
        user_profiles_values = tf.sparse_tensor_to_dense(parsed['user_profiles_values'])
        user_profiles_weights = tf.sparse_tensor_to_dense(parsed['user_profiles_weights'])
        user_profiles_shape = parsed['user_profiles_shape']

        item_profiles_indices = tf.reshape(tf.decode_raw(parsed['item_profiles_indices'], tf.int64), [-1, 2])
        item_profiles_values = tf.sparse_tensor_to_dense(parsed['item_profiles_values'])
        item_profiles_weights = tf.sparse_tensor_to_dense(parsed['item_profiles_weights'])
        item_profiles_shape = parsed['item_profiles_shape']

        return labels, userIds, itemIds, \
               user_profiles_indices, user_profiles_values, user_profiles_weights, user_profiles_shape, \
               item_profiles_indices, item_profiles_values, item_profiles_weights, item_profiles_shape
Example #12
0
def buildSpImageConverter(channelOrder, img_dtype):
    """
    Convert a imageIO byte encoded image into a image tensor suitable as input to ConvNets
    The name of the input must be a subset of those specified in `image.imageIO.imageSchema`.

    :param img_dtype: the type of data the underlying image bytes represent
    """
    with IsolatedSession() as issn:
        # Flat image data -> image dimensions
        # This has to conform to `imageIO.imageSchema`
        height = tf.placeholder(tf.int32, [], name="height")
        width = tf.placeholder(tf.int32, [], name="width")
        num_channels = tf.placeholder(tf.int32, [], name="nChannels")
        image_buffer = tf.placeholder(tf.string, [], name="data")

        # The image is packed into bytes with height as leading dimension
        # This is the default behavior of Python Image Library
        shape = tf.reshape(tf.stack([height, width, num_channels], axis=0),
                           shape=(3,), name='shape')
        if img_dtype == 'uint8':
            image_uint8 = tf.decode_raw(image_buffer, tf.uint8, name="decode_raw")
            image_float = tf.to_float(image_uint8)
        elif img_dtype == 'float32':
            image_float = tf.decode_raw(image_buffer, tf.float32, name="decode_raw")
        else:
            raise ValueError('''unsupported image data type "%s", currently only know how to
            handle uint8 and float32''' % img_dtype)
        image_reshaped = tf.reshape(image_float, shape, name="reshaped")
        image_reshaped = imageIO.fixColorChannelOrdering(channelOrder, image_reshaped)
        image_input = tf.expand_dims(image_reshaped, 0, name="image_input")
        gfn = issn.asGraphFunction([height, width, image_buffer, num_channels], [image_input])

    return gfn
Example #13
0
    def read_to_numpy(self, file_name, data_type=None):
        """
        Reads entire TFRecords file as NumPy.

        :param file_name: The TFRecords file name to read.
        :type file_name: str
        :param data_type: Data type of data. Used if that data type doesn't include things like labels.
        :type data_type: str
        :return: The images and labels NumPy
        :rtype: (np.ndarray, np.ndarray)
        """
        feature_types = self.attain_feature_types(data_type)
        images = []
        labels = []
        for tfrecord in tf.python_io.tf_record_iterator(file_name):
            with tf.Graph().as_default() as graph:  # Create a separate as this runs slow when on one graph.
                features = tf.parse_single_example(tfrecord, features=feature_types)
                image_shape, label_shape = self.extract_shapes_from_tfrecords_features(features, data_type)
                flat_image = tf.decode_raw(features['image_raw'], tf.uint8)
                image_tensor = tf.reshape(flat_image, image_shape)
                image_tensor = tf.squeeze(image_tensor)
                if data_type != 'deploy':
                    flat_label = tf.decode_raw(features['label_raw'], tf.float32)
                    label_tensor = tf.reshape(flat_label, label_shape)
                    label_tensor = tf.squeeze(label_tensor)
                else:
                    label_tensor = tf.constant(-1.0, dtype=tf.float32, shape=[1, 1, 1])
                with tf.Session(graph=graph) as session:
                    initialize_op = tf.global_variables_initializer()
                    session.run(initialize_op)
                    image, label = session.run([image_tensor, label_tensor])
            images.append(image)
            labels.append(label)
        return np.stack(images), np.stack(labels)
Example #14
0
    def create_image_and_label_inputs_from_file_name_queue(self, file_name_queue, data_type=None):
        """
        Creates the inputs for the image and label for a given file name queue.

        :param file_name_queue: The file name queue to be used.
        :type file_name_queue: tf.Queue
        :param data_type: The type of data (train, validation, test, deploy, etc) to determine how to process.
        :type data_type: str
        :return: The image and label inputs.
        :rtype: (tf.Tensor, tf.Tensor)
        """
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(file_name_queue)
        feature_types = self.attain_feature_types(data_type)
        features = tf.parse_single_example(serialized_example, features=feature_types)

        image_shape, label_shape = self.extract_shapes_from_tfrecords_features(features, data_type)

        flat_image = tf.decode_raw(features['image_raw'], tf.uint8)
        image = tf.reshape(flat_image, image_shape)

        if data_type != 'deploy':
            flat_label = tf.decode_raw(features['label_raw'], tf.float32)
            label = tf.reshape(flat_label, label_shape)
        else:
            # Makes a fake label tensor for preprocessing to work on.
            label = tf.constant(-1.0, dtype=tf.float32, shape=[1, 1, 1])
        return image, label
def read_decode_tfrecord_list(file_list, do_augment = False):
    ''''Read TFRecord content'''
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(file_list)
    features = tf.parse_single_example(
        serialized_example,
        # Defaults are not specified since both keys are required.
        features={
            'image': tf.FixedLenFeature([], tf.string),
            'shape': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.float32),
        })

    shape = tf.decode_raw(features['shape'], tf.uint8)
    #print('Shape (shape) is:', shape.shape)
    image = tf.decode_raw(features['image'], tf.uint8)
    #print('Shape (image) is:', image.shape)
    label = tf.cast(features['label'], tf.float32)

    # TODO: Infer from shape field from TFRecord
    image.set_shape([256* 256* 3])
    image = tf.reshape(image, [256, 256, 3])

    image, label = process_features(image, label, do_augment)


    return image, label
Example #16
0
def batch_parse_tf_example(batch_size, example_batch):
    '''
    Args:
        example_batch: a batch of tf.Example
    Returns:
        A dict of batched tensors
    '''
    features = {
        'x': tf.FixedLenFeature([], tf.string),
        'pi': tf.FixedLenFeature([], tf.string),
        'outcome': tf.FixedLenFeature([], tf.float32),
    }
    parsed = tf.parse_example(example_batch, features)
    x = tf.decode_raw(parsed['x'], tf.uint8)
    x = tf.cast(x, tf.float32)
    x = tf.reshape(x, [batch_size, go.N, go.N,
                       features_lib.NEW_FEATURES_PLANES])
    pi = tf.decode_raw(parsed['pi'], tf.float32)
    pi = tf.reshape(pi, [batch_size, go.N * go.N + 1])
    outcome = parsed['outcome']
    outcome.set_shape([batch_size])
    return {
        'pos_tensor': x,
        'pi_tensor': pi,
        'value_tensor': outcome,
    }
def tfrecord_to_graph_ops(filenames, num_epochs):
    file_queue = tf.train.string_input_producer(
        filenames, name='file_queue', num_epochs=num_epochs
    )
    reader = tf.TFRecordReader(
        options=tf.python_io.TFRecordOptions(
            compression_type=tf.python_io.TFRecordCompressionType.GZIP
        )
    )
    _, tfrecord = reader.read(file_queue)

    tfrecord_features = tf.parse_single_example(
        tfrecord,
        features={
            'images': tf.FixedLenFeature([], tf.string),
            'labels': tf.FixedLenFeature([], tf.string),
        },
        name='data'
    )
    tfeat = tf.decode_raw(tfrecord_features['images'], tf.uint8)
    # note, 'NCHW' is only supported on GPUs, so use 'NHWC'...
    tfeat = tf.reshape(tfeat, [-1, 28, 28, 1])
    ttarg = tf.decode_raw(tfrecord_features['labels'], tf.uint8)
    ttarg = tf.one_hot(indices=ttarg, depth=10, on_value=1, off_value=0)
    return tfeat, ttarg
def read_data(filename_queue, shape):
  """ reads data from tfrecord files.

  Args: 
    filename_queue: A que of strings with filenames 
    shape: image shape 

  Returns:
    frames: the frame data in size (batch_size, image height, image width, frames)
  """
  reader = tf.TFRecordReader()
  key, serialized_example = reader.read(filename_queue)
  features = tf.parse_single_example(
    serialized_example,
    features={
      'image':tf.FixedLenFeature([],tf.string),
      'mask':tf.FixedLenFeature([],tf.string)
    }) 
  image = tf.decode_raw(features['image'], tf.uint8)
  mask = tf.decode_raw(features['mask'], tf.uint8)
  image = tf.reshape(image, [shape[0], shape[1], 1])
  mask = tf.reshape(mask, [shape[0], shape[1], 1])
  image = tf.to_float(image)
  mask = tf.to_float(mask) 
  image_mean = tf.reduce_mean(image)
  image = image - image_mean
  #image = image / 255.0
  mask = mask / 255.0
  return image, mask
Example #19
0
def read_image(file_queue):
	reader = tf.TFRecordReader()
	# key, value = reader.read(file_queue)
	_, serialized_example = reader.read(file_queue)
	features = tf.parse_single_example(
		serialized_example,
		features={
			'label': tf.FixedLenFeature([], tf.string),
			'image_raw': tf.FixedLenFeature([], tf.string)
			})

	image = tf.decode_raw(features['image_raw'], tf.uint8)
	# print('image ' + str(image))
	image = tf.reshape(image, [INPUT_IMG_WIDE, INPUT_IMG_HEIGHT, INPUT_IMG_CHANNEL])
	# image = tf.image.convert_image_dtype(image, dtype=tf.float32)
	# image = tf.image.resize_images(image, (IMG_HEIGHT, IMG_WIDE))
	# image = tf.cast(image, tf.float32) * (1. / 255) - 0.5

	label = tf.decode_raw(features['label'], tf.uint8)
	# label = tf.cast(label, tf.int64)
	label = tf.reshape(label, [OUTPUT_IMG_WIDE, OUTPUT_IMG_HEIGHT])
	# label = tf.decode_raw(features['image_raw'], tf.uint8)
	# print(label)
	# label = tf.reshape(label, shape=[1, 4])
	return image, label
Example #20
0
def read_raw_images(data_set):
    dirs = './data/'+data_set+'/'
    filename = list_binary_files(dirs)
    print filename
    filename_queue = tf.train.string_input_producer(filename)

    if data_set is 'train':
        image_bytes = FLAGS.height * FLAGS.width * FLAGS.depth
        record_bytes = image_bytes + 1
        reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
        key, value = reader.read(filename_queue)
        record_bytes = tf.decode_raw(value, tf.uint8)
        label = tf.cast(tf.slice(record_bytes, [0], [1]), tf.int32)
        depth_major = tf.reshape(tf.slice(record_bytes, [1], [image_bytes]),[FLAGS.depth, FLAGS.height, FLAGS.width])
        uint8image = tf.transpose(depth_major, [1, 2, 0])
        return label, uint8image
    elif data_set is 'test':
        image_bytes = FLAGS.height * FLAGS.width * FLAGS.depth
        record_bytes = image_bytes + 1
        reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
        key, value = reader.read(filename_queue)
        record_bytes = tf.decode_raw(value, tf.uint8)
        depth_major = tf.reshape(tf.slice(record_bytes, [0], [image_bytes]),
        [FLAGS.depth, FLAGS.height, FLAGS.width])
        uint8image = tf.transpose(depth_major, [1, 2, 0])
        return uint8image
def get_batch():
    '''Makes batch queues from the training data.
    Returns:
      A Tuple of x (Tensor), y (Tensor).
      x and y have the shape [batch_size, maxlen].
    '''
    import tensorflow as tf

    # Load data
    X, Y = load_train_data()

    # Create Queues
    x, y = tf.train.slice_input_producer([tf.convert_to_tensor(X),
                                          tf.convert_to_tensor(Y)])

    x = tf.decode_raw(x, tf.int32)
    y = tf.decode_raw(y, tf.int32)

    x, y = tf.train.batch([x, y],
                          shapes=[(None,), (None,)],
                          num_threads=8,
                          batch_size=hp.batch_size,
                          capacity=hp.batch_size * 64,
                          allow_smaller_final_batch=False,
                          dynamic_pad=True)
    num_batch = len(X) // hp.batch_size

    return x, y, num_batch  # (N, None) int32, (N, None) int32, ()
Example #22
0
def read_and_decode(filename_queue):
    reader = tf.TFRecordReader()

    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(
        serialized_example,
        # Defaults are not specified since both keys are required.
        features={
            'height': tf.FixedLenFeature([], tf.int64),
            'width': tf.FixedLenFeature([], tf.int64),
            'image_raw': tf.FixedLenFeature([], tf.string),
            'mask_raw': tf.FixedLenFeature([], tf.string)
        })

    # Convert from a scalar string tensor (whose single string has
    # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
    # [mnist.IMAGE_PIXELS].
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    annotation = tf.decode_raw(features['mask_raw'], tf.uint8)

    height = tf.cast(features['height'], tf.int32)
    width = tf.cast(features['width'], tf.int32)

    image_shape = tf.stack([height, width, 3])
    annotation_shape = tf.stack([height, width, 3])

    image = tf.reshape(image, image_shape)
    annotation = tf.reshape(annotation, annotation_shape)

    image_size_const = tf.constant(
        (IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32)
    annotation_size_const = tf.constant(
        (IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32)

    # Random transformations can be put here: right before you crop images
    # to predefined size. To get more information look at the stackoverflow
    # question linked above.
    resized_image = tf.image.resize_image_with_crop_or_pad(image=image,
                                                           target_height=IMAGE_HEIGHT,
                                                           target_width=IMAGE_WIDTH)

    resized_annotation = tf.image.resize_image_with_crop_or_pad(image=annotation,
                                                                target_height=IMAGE_HEIGHT,
                                                                target_width=IMAGE_WIDTH)

    images, annotations = tf.train.shuffle_batch([resized_image, resized_annotation],
                                                 batch_size=2,
                                                 capacity=30,
                                                 num_threads=2,
                                                 min_after_dequeue=10)

    return images, annotations
Example #23
0
    def parser(self, record):
        keys_to_features = {
            'attention_news_indices': tf.FixedLenFeature([], tf.string),
            'attention_news_values': tf.VarLenFeature(tf.float32),
            'attention_news_shape': tf.FixedLenFeature([2], tf.int64),

            'attention_user_indices': tf.FixedLenFeature([], tf.string),
            'attention_user_values': tf.VarLenFeature(tf.int64),
            'attention_user_weights': tf.VarLenFeature(tf.float32),
            'attention_user_shape': tf.FixedLenFeature([2], tf.int64),

            'fm_feat_indices': tf.FixedLenFeature([], tf.string),
            'fm_feat_val': tf.VarLenFeature(tf.float32),
            'fm_feat_shape': tf.FixedLenFeature([2], tf.int64),

            'labels': tf.FixedLenFeature([], tf.string),

            'dnn_feat_indices': tf.FixedLenFeature([], tf.string),
            'dnn_feat_values': tf.VarLenFeature(tf.int64),
            'dnn_feat_weight': tf.VarLenFeature(tf.float32),
            'dnn_feat_shape': tf.FixedLenFeature([2], tf.int64),
        }
        parsed = tf.parse_single_example(record, keys_to_features)

        attention_news_indices = tf.reshape(tf.decode_raw(parsed['attention_news_indices'], \
                                                          tf.int64), [-1, 2])
        attention_news_values = tf.sparse_tensor_to_dense(parsed['attention_news_values'])
        attention_news_shape = parsed['attention_news_shape']

        attention_user_indices = tf.reshape(tf.decode_raw(parsed['attention_user_indices'], \
                                                          tf.int64), [-1, 2])
        attention_user_values = tf.sparse_tensor_to_dense(parsed['attention_user_values'])
        attention_user_weights = tf.sparse_tensor_to_dense(parsed['attention_user_weights'])
        attention_user_shape = parsed['attention_user_shape']

        fm_feat_indices = tf.reshape(tf.decode_raw(parsed['fm_feat_indices'], \
                                                   tf.int64), [-1, 2])
        fm_feat_val = tf.sparse_tensor_to_dense(parsed['fm_feat_val'])
        fm_feat_shape = parsed['fm_feat_shape']

        labels = tf.reshape(tf.decode_raw(parsed['labels'], tf.float32), [-1, 1])

        dnn_feat_indices = tf.reshape(tf.decode_raw(parsed['dnn_feat_indices'], \
                                                    tf.int64), [-1, 2])
        dnn_feat_values = tf.sparse_tensor_to_dense(parsed['dnn_feat_values'])
        dnn_feat_weight = tf.sparse_tensor_to_dense(parsed['dnn_feat_weight'])
        dnn_feat_shape = parsed['dnn_feat_shape']
        return (attention_news_indices, attention_news_values, attention_news_shape, \
                attention_user_indices, attention_user_values, attention_user_weights, \
                attention_user_shape, fm_feat_indices, fm_feat_val, \
                fm_feat_shape, labels, dnn_feat_indices, dnn_feat_values, \
                dnn_feat_weight, dnn_feat_shape)
Example #24
0
def read_tfrecord_and_decode_into_image_annotation_pair_tensors(tfrecord_filenames_queue):
    """Return image/annotation tensors that are created by reading tfrecord file.
    The function accepts tfrecord filenames queue as an input which is usually
    can be created using tf.train.string_input_producer() where filename
    is specified with desired number of epochs. This function takes queue
    produced by aforemention tf.train.string_input_producer() and defines
    tensors converted from raw binary representations into
    reshaped image/annotation tensors.
    Parameters
    ----------
    tfrecord_filenames_queue : tfrecord filename queue
        String queue object from tf.train.string_input_producer()
    
    Returns
    -------
    image, annotation : tuple of tf.int32 (image, annotation)
        Tuple of image/annotation tensors
    """
    
    reader = tf.TFRecordReader()

    _, serialized_example = reader.read(tfrecord_filenames_queue)

    features = tf.parse_single_example(
      serialized_example,
      features={
        'height': tf.FixedLenFeature([], tf.int64),
        'width': tf.FixedLenFeature([], tf.int64),
        'image_raw': tf.FixedLenFeature([], tf.string),
        'mask_raw': tf.FixedLenFeature([], tf.string)
        })

    
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    annotation = tf.decode_raw(features['mask_raw'], tf.uint8)
    
    height = tf.cast(features['height'], tf.int32)
    width = tf.cast(features['width'], tf.int32)
    
    image_shape = tf.stack([height, width, 3])
    
    # The last dimension was added because
    # the tf.resize_image_with_crop_or_pad() accepts tensors
    # that have depth. We need resize and crop later.
    # TODO: See if it is necessary and probably remove third
    # dimension
    annotation_shape = tf.stack([height, width, 1])
    
    image = tf.reshape(image, image_shape)
    annotation = tf.reshape(annotation, annotation_shape)
    
    return image, annotation
Example #25
0
def parse_example_proto(example_serialized):
    """Parses an Example proto containing a training example of an image.

    The output of the build_image_data.py image preprocessing script is a dataset
    containing serialized Example protocol buffers. Each Example proto contains
    the following fields:

    Args:
    example_serialized: scalar Tensor tf.string containing a serialized
      Example protocol buffer.

    Returns:
    image_buffer: Tensor tf.string containing the contents of a JPEG file.
    label: Tensor tf.int32 containing the label.
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged as
      [ymin, xmin, ymax, xmax].
    text: Tensor tf.string containing the human-readable label.
    """
    # Dense features in Example proto.
    feature_map = {
            'height': tf.FixedLenFeature((), tf.int64),
            'width': tf.FixedLenFeature((), tf.int64),
            'channel': tf.FixedLenFeature((), tf.int64),
            'label': tf.FixedLenFeature((), tf.int64),
            'label_depth': tf.FixedLenFeature((), tf.int64),
            'label_one_hot_raw': tf.FixedLenFeature((), tf.string),
            'image_raw': tf.FixedLenFeature((), tf.string),
            'location_raw': tf.FixedLenFeature((), tf.string)}
    
    #sparse_float32 = tf.VarLenFeature(dtype=tf.float32)
    # Sparse features in Example proto.

    features = tf.parse_single_example(example_serialized, feature_map)
    
    image_raw = tf.decode_raw(features["image_raw"], tf.uint8)
    image = tf.reshape(image_raw, [64, 64, 3])
    label = tf.cast(features['label'], dtype=tf.int32)
    label_one_hot = tf.decode_raw(features['label_one_hot_raw'], tf.float64)
    location = tf.decode_raw(features['location_raw'], tf.int64)

    # Note that we impose an ordering of (y, x) just to make life difficult.
    #bbox = tf.concat(axis=0, values=[ymin, xmin, ymax, xmax])

    # Force the variable number of bounding boxes into the shape
    # [1, num_boxes, coords].
    #bbox = tf.expand_dims(bbox, 0)
    #bbox = tf.transpose(bbox, [0, 2, 1])

    return image, location, label_one_hot
    def decode(self,batched_serialized_tensors,batch_size):
        """Decodes the input from batch of serialized tensors
           Formats and reshapes image
           Args:
            batched_serialized_tensors: tensor output from Batcher containing read in
                serialized tensors

          Returns:
            batched_decoded_tensors: dict of batches of decoded TFRecords of batch_size
        """

        #faster to decode tensors as a batch
        batched_decoded_tensors = tf.parse_example(batched_serialized_tensors[fields.InputDataFields.serialized],
                                                    self._keys_to_features)

        #Decode and cast tensors if needed
        for label in self._multi_task_labels:
            tensor = batched_decoded_tensors[label.name]
            #only strings need t obe decoded
            if label.dtype == "string":
                if label.decodetype:
                    tensor = tf.decode_raw(tensor, TYPE_MAP[label.decodetype])
                else:
                    raise ValueError("string type must have a type to be decoded to.")
            if label.casttype:
                tensor = tf.cast(tensor, TYPE_MAP[label.casttype])

            if label.shape:
                tensor = tf.reshape(tensor, [batch_size,*label.shape])
                tensor.set_shape([batch_size, *label.shape])

            batched_decoded_tensors[label.name] = tensor

        #input is handlded separately
        image_float = tf.cast(
                            tf.decode_raw(batched_decoded_tensors['input'],
                                          tf.uint8),
                            tf.float32)
        image_float = tf.reshape(image_float,[batch_size,
                                              self._image_height,
                                              self._image_width,
                                              self._channels])
        image_float.set_shape([batch_size,
                               self._image_height,
                               self._image_width,
                               self._channels])

        batched_decoded_tensors['input'] = image_float

        return batched_decoded_tensors
Example #27
0
def read_and_decode(filename, is_train):
    filename_queue = tf.train.string_input_producer([filename])
    reader = tf.TFRecordReader()
    _,serialized_example = reader.read(filename_queue)

    if is_train == True:
        features = tf.parse_single_example(serialized_example,
                                           features={
                                               "hat_label": tf.FixedLenFeature([], tf.int64),
                                               "hair_label": tf.FixedLenFeature([], tf.int64),
                                               "gender_label": tf.FixedLenFeature([], tf.int64),
                                               "top_label": tf.FixedLenFeature([], tf.int64),
                                               "down_label": tf.FixedLenFeature([], tf.int64),
                                               "shoes_label": tf.FixedLenFeature([], tf.int64),
                                               "bag_label": tf.FixedLenFeature([], tf.int64),
                                               "img_raw": tf.FixedLenFeature([], tf.string),
                                           })
        img = tf.decode_raw(features['img_raw'], tf.uint8)
        img = tf.reshape(img, [128, 256, 3])
	#image = Image.frombytes('RGB', (224, 224), img[0])
	img = tf.cast(img, tf.float32) * (1. / 255) - 0.5
	#print(type(img))
	#img = np.asarray(img, dtype=np.uint8)
	#print(type(img))
	#tl.visualize.frame(I=img, second=5, saveable=False, name='frame', fig_idx=12836)

        hat_label = tf.cast(features['hat_label'], tf.int32)
        hair_label = tf.cast(features['hair_label'], tf.int32)
        gender_label = tf.cast(features['gender_label'], tf.int32)
        top_label = tf.cast(features['top_label'], tf.int32)
        down_label = tf.cast(features['down_label'], tf.int32)
        shoes_label = tf.cast(features['shoes_label'], tf.int32)
        bag_label = tf.cast(features['bag_label'], tf.int32)
        labels = {"hat":hat_label, "hair":hair_label, "gender":gender_label,
                  "top":top_label, "down":down_label, "shoes":shoes_label,
                  "bag":bag_label}

        return img, labels
    else:
        features = tf.parse_single_example(serialized_example,
                                           features={
                                               "img_raw": tf.FixedLenFeature([], tf.string),
                                           })
        img = tf.decode_raw(features['img_raw'], tf.uint8)
        img = tf.reshape(img, [128, 256, 3])
	img = tf.cast(img, tf.float32) * (1. / 255) - 0.5
	#tl.visualize.frame(I=img, second=5, saveable=False, name='frame', fig_idx=12833)

        return img
    def parse_function(planes, probs, winner):
        """
        Convert unpacked record batches to tensors for tensorflow training
        """
        planes = tf.decode_raw(planes, tf.uint8)
        probs = tf.decode_raw(probs, tf.float32)
        winner = tf.decode_raw(winner, tf.float32)

        planes = tf.to_float(planes)
        planes = tf.reshape(planes, (ChunkParser.BATCH_SIZE, 112, 8*8))

        probs = tf.reshape(probs, (ChunkParser.BATCH_SIZE, 1858))
        winner = tf.reshape(winner, (ChunkParser.BATCH_SIZE, 1))

        return (planes, probs, winner)
Example #29
0
def read_and_decode(filename_queue):
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(filename_queue)
  features = tf.parse_single_example(
  serialized_example,
  # Defaults are not specified since both keys are required.
  features={
  'vector': tf.FixedLenFeature([], tf.string),
  'label': tf.FixedLenFeature([], tf.int64),
  })  
  
  
  
  # features = tf.parse_single_example(serialized_example, dense_keys=['vector', 'label'], dense_types=[tf.string, tf.int64])
  # Convert from a scalar string tensor (whose single string has
  # length tf_model.IMAGE_PIXELS) to a uint8 tensor with shape
  # [tf_model.IMAGE_PIXELS].
  image = tf.decode_raw(features['vector'], tf.float32)
  image.set_shape([FEATURE_DIMENSIONALITY])
  if FLAGS.transpose_input:
    image = tf.reshape(image, FEATURE_INPUT_SHAPE)
    image = tf.transpose(image, perm=[0,2,1])
    image = tf.reshape(image, [-1])

  # print("Image shape is %s" %(image.shape))
  # OPTIONAL: Could reshape into a 28x28 image and apply distortions
  # here.  Since we are not applying any distortions in this
  # example, and the next step expects the image to be flattened
  # into a vector, we don't bother.
  # Convert from [0, 255] -> [-0.5, 0.5] floats.
  # image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
  # Convert label from a scalar uint8 tensor to an int32 scalar.
  label = tf.cast(features['label'], tf.int32)
  return image, label
def read_and_decode(filename_queue):
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(filename_queue)
  features = tf.parse_single_example(
      serialized_example,
      dense_keys=['image_raw', 'label'],
      # Defaults are not specified since both keys are required.
      dense_types=[tf.string, tf.int64])

  # Convert from a scalar string tensor (whose single string has
  # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
  # [mnist.IMAGE_PIXELS].
  image = tf.decode_raw(features['image_raw'], tf.uint8)
  image.set_shape([mnist.IMAGE_PIXELS])

  # OPTIONAL: Could reshape into a 28x28 image and apply distortions
  # here.  Since we are not applying any distortions in this
  # example, and the next step expects the image to be flattened
  # into a vector, we don't bother.

  # Convert from [0, 255] -> [-0.5, 0.5] floats.
  image = tf.cast(image, tf.float32) * (1. / 255) - 0.5

  # Convert label from a scalar uint8 tensor to an int32 scalar.
  label = tf.cast(features['label'], tf.int32)

  return image, label
Example #31
0
 def DecodeLabelAndImage(r):
   r = tf.decode_raw(r, tf.uint8)
   return tf.to_float(
       tf.transpose(tf.reshape(r[1:], [3, 32, 32]),
                    [1, 2, 0])) / 255.0, tf.to_int32(r[0])
Example #32
0
def extract_frame_level_features_per_tf_record(frame_file_path,
                                               maximum_iter=False,
                                               stop_at_iter=10):
    '''
    Extraction of Youtube tfrecords frame file features.
    
    Args: 
    path to each tf_record (note: developed with assumption of storing on s3 bucket and assessing with glob)
    
    maximum_iter - flag- if True, will limit number of videos extracted from each TF record
    stop_at_iter - number of videos to extract
    num_tf_records - number of records to extract - WARNING!!! this is VERY slow, if bigger than 1
    
    Assumes each video in the tfrecord has following features:
    'id' : bytes_list
    'labels' : int64_list
    'audio': float arr, each frame 128
    'rgb', float arr, each frame 1024
    
    returns:
    numpy arrays of frame ids, frame multi-labels, frame audio, frame rgb
    '''
    frame_ids = []
    frame_labels = []
    feat_rgb = []
    feat_audio = []
    # ATTENTION: only use one TF record for debugging.
    print(
        f'There is {sum(1 for _ in tf.python_io.tf_record_iterator(frame_file_path))} videos in this TF record.'
    )
    iter_ = 0
    for example in tf.python_io.tf_record_iterator(frame_file_path):
        if maximum_iter and iter_ == stop_at_iter:
            break
        tf_example = tf.train.Example.FromString(example)

        frame_ids.append(
            tf_example.features.feature['id'].bytes_list.value[0].decode(
                encoding='UTF-8'))
        frame_labels.append(
            tf_example.features.feature['labels'].int64_list.value)

        tf_seq_example = tf.train.SequenceExample.FromString(example)
        n_frames = len(
            tf_seq_example.feature_lists.feature_list['audio'].feature)

        rgb_frame = []
        audio_frame = []

        # iterate through frames
        sys.stdout.flush()
        for i in range(n_frames):
            sess = tf.InteractiveSession()
            sys.stdout.write('\r' + 'iterating video: ' + str(iter_) +
                             ' ,frames: ' + str(i) + '/' + str(n_frames))
            sys.stdout.flush()
            rgb_frame.append(
                tf.cast(
                    tf.decode_raw(
                        tf_seq_example.feature_lists.feature_list['rgb'].
                        feature[i].bytes_list.value[0], tf.uint8),
                    tf.float32).eval())
            audio_frame.append(
                tf.cast(
                    tf.decode_raw(
                        tf_seq_example.feature_lists.feature_list['audio'].
                        feature[i].bytes_list.value[0], tf.uint8),
                    tf.float32).eval())

            tf.reset_default_graph()
            sess.close()
        feat_rgb.append(rgb_frame)
        feat_audio.append(audio_frame)
        iter_ += 1

    return frame_ids, frame_labels, feat_rgb, feat_audio
Example #33
0
 def decode_label(label):
     label = tf.decode_raw(label, tf.uint8)  # tf.string -> [tf.uint8]
     label = tf.reshape(label, [])  # label is a scalar
     return tf.to_int32(label)
Example #34
0
 def _gt_boxes_decoder(keys_to_tensors):
   bboxes = tf.decode_raw(keys_to_tensors['label/gt_boxes'], tf.float32)
   instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32)
   bboxes_shape = tf.stack([instances, 5])
   return tf.reshape(bboxes, bboxes_shape)
    def video_parse_function(example_proto):
        """Parses and preprocesses the features from a video tfrecord."""

        features = {
            "video":
            tf.VarLenFeature(dtype=tf.string),
            "video_length":
            tf.FixedLenFeature(dtype=tf.int64, shape=[], default_value=10),
            "video_height":
            tf.FixedLenFeature(dtype=tf.int64, shape=[], default_value=128),
            "video_width":
            tf.FixedLenFeature(dtype=tf.int64, shape=[], default_value=128),
            "video_channels":
            tf.FixedLenFeature(dtype=tf.int64, shape=[], default_value=1),
        }

        parsed_features = tf.parse_single_example(example_proto, features)

        video = tf.sparse_tensor_to_dense(parsed_features["video"],
                                          default_value="")
        video = tf.cast(tf.decode_raw(video, tf.uint8), tf.float32)

        # Rescale video from [0, 255] to [-1, 1]
        video = tf.reshape(
            video,
            tf.stack([
                parsed_features["video_length"],
                parsed_features["video_height"],
                parsed_features["video_width"],
                parsed_features["video_channels"]
            ]))
        video = video * (2 / 255) - 1

        start_index = tf.random_uniform(
            1,
            minval=0,
            maxval=parsed_features["video_length"] - config.num_frames + 1,
            dtype=tf.int64)
        import pdb
        pdb.set_trace()  # Make sure OK
        video = tf.gather(video, start_index, config.num_frames)

        import pdb
        pdb.set_trace()  # Make sure this works
        # start_index = 0
        # video = video[start_index:start_index + config.num_frames, ...]

        # TODO(drewjaegle): do we need these config fields?
        # Otherwise, we need to ensure the tensor values match the config values.
        video.set_shape([
            config.num_frames, config.im_height, config.im_width,
            config.im_channels
        ])

        # Reshape to NCHW from NHWC
        video = tf.transpose(video, [0, 3, 1, 2])

        # TODO(drewjaegle): Allow resampling of images here

        # Split images to input (10 frames) and predict (10 frames)
        input_sequence = video[:config.input_seq_len, ...]
        predict_sequence = video[config.input_seq_len:, ...]

        # TODO(drewjaegle): Do any preprocessing needed (i.e. downsample to 64)
        return input_sequence, predict_sequence
Example #36
0
def read_TFRecord(data_dir, batch_size, shuffle, in_classes):
    # 分类数目
    num_classes = in_classes
    # 获取record文件
    data_files = tf.gfile.Glob(data_dir)
    # 读取文件。
    filename_queue = tf.train.string_input_producer(data_files, shuffle=True)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    # 解析读取的样例。
    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'label':
                                           tf.FixedLenFeature([], tf.int64),
                                           'img_raw':
                                           tf.FixedLenFeature([], tf.string),
                                           'img_width':
                                           tf.FixedLenFeature([], tf.int64),
                                           'img_height':
                                           tf.FixedLenFeature([], tf.int64),
                                       })  # 取出包含image和label的feature对象
    # tf.decode_raw可以将字符串解析成图像对应的像素数组
    # 解析图片数据 string--unit8
    image = tf.decode_raw(features['img_raw'], tf.uint8)
    height = tf.cast(features['img_height'], tf.int32)
    width = tf.cast(features['img_width'], tf.int32)
    label = tf.cast(features['label'], tf.int32)
    channel = 3
    image = tf.reshape(image, [height, width, channel])
    # reshape   向量---三维矩阵
    # image = tf.reshape(image, [height, width, channel])
    # 图像的缩放处理
    image = tf.image.resize_image_with_crop_or_pad(image, 100, 100)
    # image = tf.image.resize_images(image, [240,240], method=0)
    image = tf.image.per_image_standardization(image)
    # unit8 -- float32
    # image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
    image = tf.cast(image, tf.float32)
    #组合batch
    min_after_dequeue = 1000
    capacity = min_after_dequeue + 3 * batch_size
    if shuffle:
        image_batch, label_batch = tf.train.shuffle_batch(
            [image, label],
            batch_size=batch_size,
            num_threads=64,
            capacity=capacity,
            min_after_dequeue=min_after_dequeue)
    else:
        image_batch, label_batch = tf.train.batch([image, label],
                                                  batch_size=batch_size,
                                                  num_threads=64,
                                                  capacity=capacity)

    ## ONE-HOT
    label_batch = tf.reshape(label_batch, [batch_size, 1])
    indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1])
    label_batch = tf.sparse_to_dense(
        tf.concat(values=[indices, label_batch], axis=1),
        [batch_size, num_classes], 1.0, 0.0)
    print(image_batch)
    print(label_batch)

    #n_classes = 10
    #label_batch = tf.one_hot(label_batch, depth= n_classes)
    #label_batch = tf.cast(label_batch, dtype=tf.int32)
    #label_batch = tf.reshape(label_batch, [batch_size, n_classes])

    return image_batch, label_batch
Example #37
0
def read_and_decode(filename_queue=None, img_dims=[256,256,3], resize_to=[256,256], model_dims=[224,224,3], size_of_batch=32,\
                     labels=True, augmentations_dic=None, num_of_threads=1, shuffle=True):

    """
    Reads in tf records and decodes the features of the image 
    Input: filename_queue - A node in a TensorFlow Graph used for asynchronous computations
           img_dims - Dimensions of the tensor image stored as a tfrecord, example: [256, 256, 3] 
           model_dims - Dimensions of the tensor image that the model accepts, example: [224, 224, 3] 
           resize_to - Size to resize tf record to before training if resize_to is the same a img_dims no resizing will take place
           size_of_batch - Size of the batch that will be fed into the model, example: 32
           labels - Option for if the images stored in tfrecords have labels associated with them
           augmentations_dic - Dictionary of augmentations that an image can have for training and validation. Augmentations
           are chosen in the config
           num_threads - Number of threads that execute a training op that dequeues mini-batches from the queue 
           shuffle - Boolean if batches fed into graph should be shuffled or not 
    Outputs: Tensor image, label of the image and filepath to the image. If labels is False only tensor image and filepath will be returned
    """
    reader = tf.TFRecordReader()

    _, serialized_example = reader.read(filename_queue)

    
    if not labels:
        features = tf.parse_single_example(
          serialized_example,
        
          features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'file_path': tf.FixedLenFeature([], tf.string),
            })

        image = tf.decode_raw(features['image_raw'], tf.uint8)
        file_path = tf.cast(features['file_path'], tf.string)
        
        
        image = tf.reshape(image, img_dims)
        image = tf.cast(image, tf.float32)

        image = tf.image.resize_images(image, resize_to)

        image = tf.to_float(image)
        image = image/255

        if augmentations_dic and augmentations_dic['scale_jitter']:
            random_size = randint(256,512)
            image = tf.image.resize_images(image, [random_size, random_size])
        else:
            image = tf.image.resize_images(image,resize_to)

        if augmentations_dic and  augmentations_dic['rand_crop']:
            image = tf.random_crop(image, model_dims)

        else:
            image = tf.image.resize_image_with_crop_or_pad(image, model_dims[0],\
                                                         model_dims[1])

        if augmentations_dic and  augmentations_dic['rand_color']:
            random_color_ordering = randint(0,3)
            image = distort_color(image,random_color_ordering)

        if augmentations_dic and augmentations_dic['rand_flip_left_right']:
            image = tf.image.random_flip_left_right(image)

        if augmentations_dic and augmentations_dic['rand_flip_top_bottom']:
            image = tf.image.random_flip_up_down(image)

        if augmentations_dic and augmentations_dic['rand_rotate']:
            random_angle = randint(0,359)
            image = tf.contrib.image.rotate(image, random_angle)

        if shuffle:
      
            img, f = tf.train.shuffle_batch([image, file_path],
                                                         batch_size=size_of_batch,
                                                         capacity=1000 + 3 * size_of_batch,
                                                         min_after_dequeue=1000,
                                                         num_threads=num_of_threads)
        else:
            img, f = tf.train.batch([image, file_path],
                                                         batch_size=size_of_batch,
                                                          capacity=100000,
                                                          allow_smaller_final_batch=True,
                                                         num_threads=num_of_threads)

        
        return img, f

    else:
        features = tf.parse_single_example(
          serialized_example,
        
          features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'file_path': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
            })

        image = tf.decode_raw(features['image_raw'], tf.uint8)

        label = tf.cast(features['label'], tf.int32)

        file_path = tf.cast(features['file_path'], tf.string)
        
        
        image = tf.reshape(image, img_dims)
        image = tf.cast(image, tf.float32)

        image = tf.image.resize_images(image, resize_to)

        image = tf.to_float(image)
        image = image/255

        if augmentations_dic and augmentations_dic['scale_jitter']:
            random_size = randint(256,512)
            image = tf.image.resize_images(image, [random_size, random_size])
        else:
            image = tf.image.resize_images(image,resize_to)

        if augmentations_dic and augmentations_dic['rand_crop']:
            image = tf.random_crop(image, model_dims)

        else:
            image = tf.image.resize_image_with_crop_or_pad(image, model_dims[0],\
                                                         model_dims[1])

        if augmentations_dic and augmentations_dic['rand_color']:
            random_color_ordering = randint(0,3)
            image = distort_color(image,random_color_ordering)

        if augmentations_dic and augmentations_dic['rand_flip_left_right']:
            image = tf.image.random_flip_left_right(image)

        if augmentations_dic and augmentations_dic['rand_flip_top_bottom']:
            image = tf.image.random_flip_up_down(image)

        if augmentations_dic and augmentations_dic['rand_rotate']:
            random_angle = randint(0,359)
            image = tf.contrib.image.rotate(image, random_angle)

        if shuffle:
            img, l, f = tf.train.shuffle_batch([image, label, file_path],
                                                         batch_size=size_of_batch,
                                                         capacity=1000 + 3 * size_of_batch,
                                                         min_after_dequeue=1000,
                                                         num_threads=num_of_threads)
        else:
            img, l, f = tf.train.batch([image, label, file_path],
                                                         batch_size=size_of_batch,
                                                         capacity=100000,
                                                         allow_smaller_final_batch=True,
                                                         num_threads=num_of_threads)        
        return img, l, f
Example #38
0
        def _decode_and_augment_image(example_proto):
            keys_to_features = {
                'label': tf.FixedLenFeature([], tf.int64),
                'shape': tf.FixedLenFeature([], tf.string),
                'image': tf.FixedLenFeature([], tf.string),
            }
            tfrecord_features = tf.parse_single_example(
                example_proto, keys_to_features)

            image = tf.decode_raw(tfrecord_features['image'], tf.uint8)
            shape = tf.decode_raw(tfrecord_features['shape'], tf.int64)
            if input_type == ".jpeg":
                image = tf.reshape(image, target_size + [3])
            else:
                image = tf.reshape(image, target_size)
            label = tfrecord_features['label']

            if augment:
                image = tf.image.random_flip_left_right(image)
                image = tf.image.random_flip_up_down(image)
                degrees = tf.random_uniform((), minval=-180, maxval=180)
                image = tf.contrib.image.rotate(image, degrees)

                width_shift = tf.random_uniform((), minval=0, maxval=0.05)
                height_shift = tf.random_uniform((), minval=0, maxval=0.05)

                horizontal_pad = tf.cast(tf.ceil(width_shift * target_size[0]),
                                         tf.int32)
                vertical_pad = tf.cast(tf.ceil(height_shift * target_size[1]),
                                       tf.int32)

                padding = tf.stack([
                    horizontal_pad, horizontal_pad, vertical_pad, vertical_pad,
                    tf.constant(0),
                    tf.constant(0)
                ])
                padding = tf.reshape(padding, (3, 2))

                image = tf.pad(image, padding)
                image = tf.random_crop(image, target_size + [3])

                zoom = tf.random_uniform((), minval=-0.1, maxval=0.1)
                new_dim = tf.cast(tf.ceil((1 - zoom) * target_size[0]),
                                  dtype=tf.int32)

                image = tf.image.resize_image_with_crop_or_pad(
                    image, new_dim, new_dim)

                image = tf.image.resize_images(
                    image, target_size, method=tf.image.ResizeMethod.BILINEAR)

            if normalize:
                std = tf.constant(np.array(
                    [70.53946096, 51.71475228, 43.03428563]),
                                  dtype=tf.float32)
                std = tf.expand_dims(tf.expand_dims(std, axis=0), axis=0)

                mean = tf.constant(np.array(
                    [108.64628601, 75.86886597, 54.34005736]),
                                   dtype=tf.float32)
                mean = tf.expand_dims(tf.expand_dims(mean, axis=0), axis=0)

                image = (tf.cast(image, dtype=tf.float32) - mean) / std

            label = tf.reshape(label, [1])
            if input_type == ".jpeg":
                image = tf.reshape(image, target_size + [3])
            else:
                image = tf.reshape(image, target_size)

            return {'shape': shape, 'image': image}, label
Example #39
0
 def read(bytes, dtype, shapex, shapey):
   inp = tf.decode_raw(bytes, dtype)
   retx = tf.reshape(inp[:np.prod(shapex)], shapex)
   rety = tf.reshape(inp[np.prod(shapex):], shapey)
   return (retx, rety)
batchsize = 2

batch = tf.train.shuffle_batch([ex],
                               batchsize,
                               capacity=batchsize * 10,
                               min_after_dequeue=batchsize * 5)

# 反序列化数据
example = tf.parse_example(batch, features=feature)

image = example['image']
label = example['label']

# 对byte数据解码成uint8类型的数据
image = tf.decode_raw(image, tf.uint8)
# 需要reshape,否则是一个向量
image = tf.reshape(image, [-1, 32, 32, 3])

with tf.Session() as sess:
    # 线程的协调器
    coord = tf.train.Coordinator()

    sess.run(tf.local_variables_initializer())
    threads = tf.train.start_queue_runners(sess, coord)

    for i in range(1):
        # image_bth, _ = sess.run([image, label])
        # import cv2
        # cv2.imshow("image", image_bth[0, ...])
        # cv2.waitKey(0)
    'lsize_dim0': tf.FixedLenFeature([], tf.int64),
    'lsize_dim1': tf.FixedLenFeature([], tf.int64),
    'lsize_dim2': tf.FixedLenFeature([], tf.int64),
    'data_vol': tf.FixedLenFeature([], tf.string),
    'label_vol': tf.FixedLenFeature([], tf.string)
}

with tf.Session() as sess:
    queue = tf.train.string_input_producer(val_list,
                                           num_epochs=None,
                                           shuffle=False)
    reader = tf.TFRecordReader()
    fid, serialized_example = reader.read(queue)
    parser = tf.parse_single_example(serialized_example,
                                     features=decomp_feature)
    data_vol = tf.decode_raw(parser['data_vol'], tf.float32)
    label_vol = tf.decode_raw(parser['label_vol'], tf.float32)

    data_vol = tf.reshape(data_vol, raw_size)
    label_vol = tf.reshape(label_vol, raw_size)
    data_vol = tf.slice(data_vol, [0, 0, 0], volume_size)
    label_vol = tf.slice(label_vol, [0, 0, 1], label_size)

    init_op = tf.initialize_all_variables()
    sess.run(init_op)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    for i in range(len(val_list)):
        example, l = sess.run([data_vol, label_vol])
        np.save(os.path.join(saveDir, str(i) + '.npy'), example)
        np.save(os.path.join(saveDir_, str(i) + '.npy'), l)
Example #42
0
import tensorflow as tf

reader = tf.TFRecordReader()

filename_queue = tf.train.string_input_producer(
    ["../image_data/output.tfrecords"])

_, serialized_example = reader.read(filename_queue)

features = tf.parse_single_example(serialized_example,
                                   features={
                                       'image_raw':
                                       tf.FixedLenFeature([], tf.string),
                                       'pixels':
                                       tf.FixedLenFeature([], tf.int64),
                                       'label':
                                       tf.FixedLenFeature([], tf.int64),
                                   })

image = tf.decode_raw(features['image_raw'], tf.uint8)
label = tf.cast(features['label'], tf.int32)
pixels = tf.cast(features['pixels'], tf.int32)

sess = tf.Session()

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

for i in range(10):
    print sess.run([image, label, pixels])
Example #43
0
# import tensorflow
import tensorflow as tf
# total bytes per image
TOTAL_BYTES = 3073
# create a list of filenames
filenames = ["dataset/data_batch_%d.bin" % i for i in range(1, 6)]
# create a queue of filenames
filename_queue = tf.train.string_input_producer(filenames, shuffle=False)
# initialize a reader to read TOTAL_BYTES bytes
reader = tf.FixedLengthRecordReader(TOTAL_BYTES)
# read TOTAL_BYTES bytes from the files
key, value = reader.read(filename_queue)
# decode read bytes to perceivable datatype
vector_bytes = tf.decode_raw(value, tf.uint8)
#create a session object
sess = tf.InteractiveSession()
#start queue runners
tf.train.start_queue_runners()
#print uint8 value of data read
print("vector_bytes ", sess.run(vector_bytes))
#print the number of elements in list vector_bytes
print("number of elements in vector_bytes ", len(sess.run(vector_bytes)))
label = tf.cast(tf.slice(vector_bytes, [0], [1]), tf.int32)

depth_major = tf.reshape(tf.slice(vector_bytes, [1], [3072]), [3, 32, 32])
uint8image = tf.transpose(depth_major, [1, 2, 0])

#print(sess.run(uint8image))
A = tf.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]])
print(A.get_shape())
Example #44
0
    def load_train_batch(self):
        """Load a batch of training instances.
        """
        opt = self.opt

        # Load the list of training files into queues
        #TODO
        if opt.train_lite:
            file_list = self.format_file_list(opt.dataset_dir,
                                              opt.filelist_dir, 'train_lite')
        else:
            file_list = self.format_file_list(opt.dataset_dir,
                                              opt.filelist_dir, 'train')
        image_paths_queue = tf.train.string_input_producer(
            file_list['image_file_list'], shuffle=False)
        cam_paths_queue = tf.train.string_input_producer(
            file_list['cam_file_list'], shuffle=False)

        # Load camera intrinsics
        cam_reader = tf.TextLineReader()
        _, raw_cam_contents = cam_reader.read(cam_paths_queue)
        rec_def = []
        for i in range(9):
            rec_def.append([1.])
        raw_cam_vec = tf.decode_csv(raw_cam_contents, record_defaults=rec_def)
        raw_cam_vec = tf.stack(raw_cam_vec)
        intrinsics = tf.reshape(raw_cam_vec, [3, 3])

        # Load images
        img_reader = tf.WholeFileReader()
        _, image_contents = img_reader.read(image_paths_queue)
        image_seq = tf.image.decode_jpeg(image_contents)
        tgt_image, src_image_stack = \
            self.unpack_image_sequence(
                image_seq, opt.img_height, opt.img_width, opt.num_source)

        #TODO Load Semantics
        #     See cityscape label defs in https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py#L62
        #     Also notice that deeplabv3+ uses `train_id` https://github.com/tensorflow/models/blob/69b016449ffc797421bf003d8b7fd8545db866d7/research/deeplab/datasets/build_cityscapes_data.py#L46
        #     Color maps are in https://github.com/tensorflow/models/blob/69b016449ffc797421bf003d8b7fd8545db866d7/research/deeplab/utils/get_dataset_colormap.py#L207
        if opt.sem_assist:
            sem_paths_queue = tf.train.string_input_producer(
                file_list['sem_image_file_list'], shuffle=False)
            sem_reader = tf.WholeFileReader()
            sem_keys, sem_contents = sem_reader.read(sem_paths_queue)

            if opt.load_from_raw:
                sem_seq = tf.reshape(
                    tf.decode_raw(sem_contents, tf.uint8),
                    [1, opt.img_height, (opt.num_source + 1) * opt.img_width])
            else:
                sem_seq = tf.py_func(read_npy_file, [sem_keys], [
                    tf.uint8,
                ])

        #TODO Load Instances: we use COCO
        #     Two channels: class and id level. For id level we only use the edge
        if opt.ins_assist:
            ins_paths_queue = tf.train.string_input_producer(
                file_list['ins_image_file_list'], shuffle=False)
            ins_reader = tf.WholeFileReader()
            ins_keys, ins_contents = ins_reader.read(ins_paths_queue)

            if opt.load_from_raw:
                ins_seq = tf.reshape(tf.decode_raw(ins_contents, tf.uint8), [
                    1, opt.img_height, (opt.num_source + 1) * opt.img_width, 2
                ])
            else:
                ins_seq = tf.py_func(read_npy_file, [ins_keys], [
                    tf.uint8,
                ])

        #TODO 1. SHUFFLE BATCH
        # Form training batches
        seed = random.randint(0, 2**31 - 1)
        min_after_dequeue = 2048
        capacity = min_after_dequeue + opt.num_threads * opt.batch_size

        if opt.sem_assist and opt.ins_assist:
            src_image_stack, tgt_image, intrinsics, sem_seq, ins_seq = tf.train.shuffle_batch(
                [
                    src_image_stack, tgt_image, intrinsics, sem_seq[0],
                    ins_seq[0]
                ], opt.batch_size, capacity, min_after_dequeue,
                opt.num_threads, seed)

        elif opt.sem_assist:
            src_image_stack, tgt_image, intrinsics, sem_seq = tf.train.shuffle_batch(
                [src_image_stack, tgt_image, intrinsics, sem_seq[0]],
                opt.batch_size, capacity, min_after_dequeue, opt.num_threads,
                seed)

        elif opt.ins_assist:
            src_image_stack, tgt_image, intrinsics, ins_seq = tf.train.shuffle_batch(
                [src_image_stack, tgt_image, intrinsics, ins_seq[0]],
                opt.batch_size, capacity, min_after_dequeue, opt.num_threads,
                seed)

        else:
            src_image_stack, tgt_image, intrinsics = tf.train.shuffle_batch(
                [src_image_stack, tgt_image, intrinsics], opt.batch_size,
                capacity, min_after_dequeue, opt.num_threads, seed)

        # semantic segmentation
        tgt_sem = None
        tgt_sem_map = None
        tgt_sem_mask = None
        tgt_sem_edge = None
        src_sem_stack = None
        src_sem_map_stack = None
        src_sem_mask_stack = None
        src_sem_edge_stack = None

        # ins0 ~ instance level, but still class segmentation
        tgt_ins0 = None
        tgt_ins0_map = None
        tgt_ins0_edge = None
        src_ins0_stack = None
        src_ins0_map_stack = None
        src_ins0_edge_stack = None

        # ins1 ~ instance level, but this is id segmentation
        tgt_ins1_edge = None
        src_ins1_edge_stack = None

        #TODO 2. TRAMSFORMATION AND UNPACKING
        if opt.sem_assist:
            #TODO get one-hot encoded         sem_oh_seq (4,128,1248,19)X{0,1}
            sem_oh_seq = tf.cast(
                tf.one_hot(sem_seq, on_value=1, depth=opt.sem_num_class),
                tf.uint8)
            #TODO decouple   tgt_sem (4,128,1248,19)X{0,1}   src_sem_stack (4,128,1248,2*19)X{0,1}
            tgt_sem, src_sem_stack = self.unpack_sem_sequence_batch_atom(
                sem_oh_seq, opt.sem_num_class)

            #TODO get densemap     sem_map_seq (4,128,1248,1)X{0,1,...,18}
            sem_map_seq = tf.expand_dims(sem_seq, -1)
            #TODO decouple   tgt_sem_map (4,128,1248,1)X{0,1,...,18}   src_sem_map_stack (4,128,1248,2*1)X{0,1,...,18}
            tgt_sem_map, src_sem_map_stack = self.unpack_sem_sequence_batch_atom(
                sem_map_seq, 1)

            if opt.sem_mask_explore:
                #TODO get sem mask   sem_mask_seq (4,128,1248,c) here we assume c=1
                sem_mask_seq = self.get_sem_mask_batch(sem_seq)
                #TODO decouple   tgt_sem_mask (4,128,1248,c)   src_sem_mask_stack (4,128,1248,2*c)
                tgt_sem_mask, src_sem_mask_stack = self.unpack_sem_sequence_batch_atom(
                    sem_mask_seq, 1)

            if opt.sem_edge_explore:
                #TODO get sem edge   sem_edge_seq (4,128,1248,c) here we assume c=1
                sem_edge_seq = self.get_sem_edge_batch(sem_seq)
                #TODO decouple   tgt_sem_edge (4,128,1248,c)   src_sem_edge_stack (4,128,1248,2*c)
                tgt_sem_edge, src_sem_edge_stack = self.unpack_sem_sequence_batch_atom(
                    sem_edge_seq, 1)

        if opt.ins_assist:
            ins0_seq = ins_seq[:, :, :, 0]
            ins1_seq = ins_seq[:, :, :, 1]

            #TODO get one-hot  ins0_oh_seq (4,128,1248,81)X{0,1}
            ins0_oh_seq = tf.cast(
                tf.one_hot(ins0_seq, on_value=1, depth=opt.ins_num_class),
                tf.uint8)
            #ins1_oh_seq = tf.cast(tf.one_hot(ins1_seq, on_value=1, depth = 255), tf.uint8)

            #TODO decouple   tgt_ins0 (4,128,1248,81)X{0,1}   src_ins0_stack (4,128,1248,2*81)X{0,1}
            tgt_ins0, src_ins0_stack = self.unpack_sem_sequence_batch_atom(
                ins0_oh_seq, opt.ins_num_class)
            #tgt_ins1, src_ins1_stack = self.unpack_sem_sequence_batch_atom(ins1_oh_seq, opt.ins_num_class)

            #TODO get densemap  sem_ins0_seq (4,128,1248,1)X{0,1,...,80}
            ins0_map_seq = ins_seq[:, :, :, :1]
            ins1_map_seq = ins_seq[:, :, :, 1:]

            #TODO decouple  tgt_ins0_map (4,128,1248,1)X{0,1,...,80}  src_ins0_map_stack (4,128,1248,2*1)X{0,1,...,80}
            tgt_ins0_map, src_ins0_map_stack = self.unpack_sem_sequence_batch_atom(
                ins0_map_seq, 1)
            tgt_ins1_map, src_ins1_map_stack = self.unpack_sem_sequence_batch_atom(
                ins1_map_seq, 1)

            if opt.ins0_edge_explore:
                #TODO get edge   ins0_edge_seq (4,128,1248,c)  here we assume c=1
                ins0_edge_seq = self.get_sem_edge_batch(ins0_seq)
                #TODO decouple   tgt_ins0_edge (4,128,1248,c)  src_ins0_edge_stack (4,128,1248,2*c)
                tgt_ins0_edge, src_ins0_edge_stack = self.unpack_sem_sequence_batch_atom(
                    ins0_edge_seq, 1)

            if opt.ins1_edge_explore:
                #TODO get edge   ins1_edge_seq (4,128,1248,c) here we assume c=1
                ins1_edge_seq = self.get_sem_edge_batch(ins1_seq)
                #TODO decouple   tgt_ins1_edge (4,128,1248,c)   src_ins1_edge_stack (4,128,1248,2*c)
                tgt_ins1_edge, src_ins1_edge_stack = self.unpack_sem_sequence_batch_atom(
                    ins1_edge_seq, 1)

        #TODO 3. DATA AUGMENTATION
        image_all = tf.concat([tgt_image, src_image_stack], axis=3)
        image_all, intrinsics, aug_params = self.data_augmentation(
            image_all, intrinsics, opt.img_height,
            opt.img_width)  #TODO changed API

        if opt.sem_assist:
            ##TODO Do the same data augmentation for semantic segmentations
            tgt_sem, src_sem_stack = self.data_aug(tgt_sem, src_sem_stack,
                                                   aug_params, "bilinear")
            tgt_sem_map, src_sem_map_stack = self.data_aug(
                tgt_sem_map, src_sem_map_stack, aug_params, "neighbor")

            if self.opt.sem_mask_explore:
                tgt_sem_mask, src_sem_mask_stack = \
                    self.data_aug(tgt_sem_mask, src_sem_mask_stack, aug_params, "bilinear")

            if self.opt.sem_edge_explore:
                tgt_sem_edge, src_sem_edge_stack = \
                    self.data_aug(tgt_sem_edge, src_sem_edge_stack, aug_params, "bilinear")
                #TODO maybe transfer needs this settings self.data_aug(tgt_sem_edge, src_sem_edge_stack, aug_params, "neighbor")

        if opt.ins_assist:
            ##TODO Do the same data augmentation for instance segmentations
            tgt_ins0, src_ins0_stack = self.data_aug(tgt_ins0, src_ins0_stack,
                                                     aug_params, "bilinear")
            #tgt_ins1, src_ins1_stack = self.data_aug(tgt_ins1, src_ins1_stack, aug_params, "bilinear")

            tgt_ins0_map, src_ins0_map_stack = self.data_aug(
                tgt_ins0_map, src_ins0_map_stack, aug_params, "neighbor")
            #tgt_ins1_map, src_ins1_map_stack = self.data_aug(tgt_ins1_map, src_ins1_map_stack, aug_params, "neighbor")

            if self.opt.ins0_edge_explore:
                tgt_ins0_edge, src_ins0_edge_stack = \
                    self.data_aug(tgt_ins0_edge, src_ins0_edge_stack, aug_params, "bilinear")
                #TODO maybe transfer needs this settings self.data_aug(tgt_ins0_edge, src_ins0_edge_stack, aug_params, "neighbor")

            if self.opt.ins1_edge_explore:
                tgt_ins1_edge, src_ins1_edge_stack = \
                    self.data_aug(tgt_ins1_edge, src_ins1_edge_stack, aug_params, "bilinear")
                #TODO maybe transfer needs this settings self.data_aug(tgt_ins1_edge, src_ins1_edge_stack, aug_params, "neighbor")

        # 4. RETURN
        # image_channels=3*opt.seq_length
        tgt_image = image_all[:, :, :, :3]
        src_image_stack = image_all[:, :, :, 3:]  #3:image_channels]
        intrinsics = self.get_multi_scale_intrinsics(intrinsics,
                                                     opt.num_scales)

        # if opt.sem_assist and opt.ins_assist:
        return tgt_image, src_image_stack, intrinsics, \
                [tgt_sem, tgt_sem_map, tgt_sem_mask, tgt_sem_edge], \
                [src_sem_stack, src_sem_map_stack, src_sem_mask_stack, src_sem_edge_stack], \
                [tgt_ins0, tgt_ins0_map, tgt_ins0_edge, tgt_ins1_edge], \
                [src_ins0_stack, src_ins0_map_stack, src_ins0_edge_stack, src_ins1_edge_stack]
Example #45
0
    num_reader_threads=
    1  # number of threads for prefetching SequenceExample protos.
)
serialized_sequence_example = input_queue.dequeue()
# serialized_sequence_example = tf.train.string_input_producer(["train.cat_caption"])   # don't work
context, sequence = tf.parse_single_sequence_example(
    serialized=serialized_sequence_example,
    context_features={
        "image/img_raw": tf.FixedLenFeature([], dtype=tf.string)
    },
    sequence_features={
        "image/caption": tf.FixedLenSequenceFeature([], dtype=tf.string),
        "image/caption_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64),
    })

img = tf.decode_raw(context["image/img_raw"], tf.uint8)
img = tf.reshape(img, [height, width, 3])
img = tf.image.convert_image_dtype(img, dtype=tf.float32)

try:
    # for TensorFlow 0.11
    img = tf.image.resize_images(img,
                                 size=(resize_height, resize_width),
                                 method=tf.image.ResizeMethod.BILINEAR)
except Exception:
    # for TensorFlow 0.10
    img = tf.image.resize_images(img,
                                 new_height=resize_height,
                                 new_width=resize_width,
                                 method=tf.image.ResizeMethod.BILINEAR)
# Crop to final dimensions.
Example #46
0
def read_and_decode(tfrecords_filename, batch_size):

    filename_queue = tf.train.string_input_producer(tfrecords_filename,
                                                    num_epochs=100)

    reader = tf.TFRecordReader()

    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(
        serialized_example,
        # Defaults are not specified since both keys are required.
        features={
            'label': tf.FixedLenFeature([], tf.int64),
            'seq_len': tf.FixedLenFeature([], tf.int64),
            'read_length': tf.FixedLenFeature([], tf.string),
            'read_addr': tf.FixedLenFeature([], tf.string),
            'ip': tf.FixedLenFeature([], tf.string),
            'sp': tf.FixedLenFeature([], tf.string),
            'write_length': tf.FixedLenFeature([], tf.string),
            'write_addr': tf.FixedLenFeature([], tf.string),
            'instruction_id': tf.FixedLenFeature([], tf.string),
            'read2_length': tf.FixedLenFeature([], tf.string),
            'read2_addr': tf.FixedLenFeature([], tf.string),
            'bp': tf.FixedLenFeature([], tf.string)
        })

    label = tf.cast(features['label'], tf.int32)
    #seq_len = tf.cast(features['seq_len'], tf.int32)
    seq_shape = [SEQ_LEN, 1]

    list_vars = []

    for field in [
            'read_length', 'read_addr', 'ip', 'sp', 'write_length',
            'write_addr', 'read2_addr', 'read2_length', 'bp'
    ]:

        new_field = tf.reshape(tf.decode_raw(features[field], tf.float64),
                               seq_shape)
        relative_field = tf.cast(cos_relative_positions(new_field), tf.float32)
        tf.summary.histogram('cos_relative_positions_{}'.format(field),
                             relative_field)
        list_vars.append(relative_field)
        print(relative_field)

    print(tf.concat(list_vars, axis=-1))

    instruction_id = tf.cast(
        tf.reshape(tf.decode_raw(features['instruction_id'], tf.float64),
                   seq_shape), tf.int32)

    X, instruction_ids, Y = tf.train.shuffle_batch(
        [tf.concat(list_vars, axis=-1), instruction_id, label],
        batch_size=batch_size,
        capacity=1000,
        num_threads=16,
        min_after_dequeue=2)

    tf.summary.histogram('All X', X)
    return X, instruction_ids, Y
Example #47
0
def read_cifar10(filename_queue):
    """Reads and parses examples from CIFAR10 data files.

  Recommendation: if you want N-way read parallelism, call this function
  N times.  This will give you N independent Readers reading different
  files & positions within those files, which will give better mixing of
  examples.

  Args:
    filename_queue: A queue of strings with the filenames to read from.

  Returns:
    An object representing a single example, with the following fields:
      height: number of rows in the result (32)
      width: number of columns in the result (32)
      depth: number of color channels in the result (3)
      key: a scalar string Tensor describing the filename & record number
        for this example.
      label: an int32 Tensor with the label in the range 0..9.
      uint8image: a [height, width, depth] uint8 Tensor with the image data
  """
    class CIFAR10Record(object):
        pass

    result = CIFAR10Record()

    # Dimensions of the images in the CIFAR-10 dataset.
    # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
    # input format.
    label_bytes = 1  # 2 for CIFAR-100
    result.height = 32
    result.width = 32
    result.depth = 3
    image_bytes = result.height * result.width * result.depth
    # Every record consists of a label followed by the image, with a
    # fixed number of bytes for each.
    record_bytes = label_bytes + image_bytes

    # Read a record, getting filenames from the filename_queue.  No
    # header or footer in the CIFAR-10 format, so we leave header_bytes
    # and footer_bytes at their default of 0.
    reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
    result.key, value = reader.read(filename_queue)

    # Convert from a string to a vector of uint8 that is record_bytes long.
    record_bytes = tf.decode_raw(value, tf.uint8)

    # The first bytes represent the label, which we convert from uint8->int32.
    result.label = tf.cast(tf.strided_slice(record_bytes, [0], [label_bytes]),
                           tf.int32)

    # The remaining bytes after the label represent the image, which we reshape
    # from [depth * height * width] to [depth, height, width].
    depth_major = tf.reshape(
        tf.strided_slice(record_bytes, [label_bytes],
                         [label_bytes + image_bytes]),
        [result.depth, result.height, result.width])
    # Convert from [depth, height, width] to [height, width, depth].
    result.uint8image = tf.transpose(depth_major, [1, 2, 0])

    return result
Example #48
0
features = tf.parse_single_example(serialized_example,
	features = {
	'image': tf.FixedLenFeature([], tf.string),
	'label': tf.FixedLenFeature([], tf.int64),
	'height': tf.FixedLenFeature([], tf.int64),
	'weight': tf.FixedLenFeature([], tf.int64),
	'channels': tf.FixedLenFeature([], tf.int64),
	}
	)
image = features['image']
label = features['label']
height = features['height']
weight = features['weight']
channels = features['channels']

decode_image = tf.decode_raw(image, tf.uint8)#解码tensor
decode_image.set_shape([height,weight,channels])
#前面的预处理图片函数
image_size = 299
distort_image = preprocess_for_train(decode_image, image_size, image_size, None)

#整理成输入batch队列
min_after_dequeue = 10000
batch_size = 100
capacity = min_after_dequeue + 3 * batch_size
image_batch, label_batch = tf.train.shuffle_batch(
	[distorted_image, label], batch_size=batch_size,
	capacity=capacity, min_after_dequeue=min_after_dequeue
	)

logit = inference(image_batch)
def get_batch(paths, options):
    """Returns a data split of the RECOLA dataset, which was saved in tfrecords format.
    Args:
        paths: list with paths to data files
        options: dict with data settings
    Returns:
        The raw audio examples and the corresponding arousal/valence
        labels.
    """
    batch_size = options['batch_size']
    frame_size = options['frame_size']
    num_channels = options['num_channels']
    num_classes = options['num_classes']
    crop_size = options['crop_size']
    # max_in_len = options['max_in_len']
    # max_out_len = options['max_out_len']
    time_window_len = options['time_window_len']

    if options['shuffle']:
        shuffle = options['shuffle']
    else:
        shuffle = False

    if options['horizontal_flip']:
        horizontal_flip = options['horizontal_flip']
    else:
        horizontal_flip = False

    if options['random_crop']:
        random_crop = options['random_crop']
    else:
        random_crop = False
    # root_path = Path(dataset_dir) / split_name
    # paths = [str(x) for x in root_path.glob('*.tfrecords')]

    filename_queue = tf.train.string_input_producer(paths, shuffle=shuffle)

    reader = tf.TFRecordReader()

    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(
        serialized_example,
        features={
            'video': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.string),
            'in_seq_len': tf.FixedLenFeature([], tf.int64),
            'out_seq_len': tf.FixedLenFeature([], tf.int64)
        }
    )

    video = tf.cast(tf.decode_raw(features['video'], tf.uint8), tf.float32)  # / 255.
    label = tf.cast(tf.decode_raw(features['label'], tf.uint8), tf.int32)
    in_seq_len = tf.cast(features['in_seq_len'], tf.int32)
    out_seq_len = tf.cast(features['out_seq_len'], tf.int32)

    # perform bucketing with input_length being the single filter (need to add out_length buckets)
    # Number of threads should always be one, in order to load samples sequentially.
    _seq_lens, [encoder_inputs, target_labels, encoder_inputs_lengths, target_labels_lengths] = \
        tf.contrib.training.bucket_by_sequence_length(in_seq_len,
                                                      [video, label, in_seq_len, out_seq_len], batch_size,
                                                      [20, 30, 50, 60, 88, 400],
                                                      num_threads=1, capacity=500, dynamic_pad=True,
                                                      allow_smaller_final_batch=True)
    # encoder_inputs, target_labels, encoder_inputs_lengths, target_labels_lengths = \
    #     tf.train.batch([video, label, in_seq_len, out_seq_len], batch_size,
    #                    num_threads=1, capacity=500, dynamic_pad=True,
    #                    allow_smaller_final_batch=True)

    encoder_inputs = tf.reshape(encoder_inputs, (batch_size,
                                                 tf.reduce_max(encoder_inputs_lengths),
                                                 frame_size,
                                                 frame_size,
                                                 num_channels))
    target_labels = tf.reshape(target_labels, (batch_size, -1))

    # create decoder_inputs
    # add <sos> token
    # decoder_inputs = tf.identity(target_labels)
    sos_slice = tf.constant(options['num_classes'] - 2, dtype=tf.int32, shape=[options['batch_size'], 1])
    decoder_inputs = tf.concat([sos_slice, target_labels], axis=1)
    decoder_inputs = tf.one_hot(decoder_inputs, num_classes)

    if crop_size is not None and random_crop:
        encoder_inputs = tf.random_crop(encoder_inputs, [batch_size,
                                                         tf.reduce_max(encoder_inputs_lengths),
                                                         crop_size,
                                                         crop_size,
                                                         num_channels])
    elif crop_size:
        start_xy = int((frame_size - crop_size) /  2)
        encoder_inputs = tf.slice(encoder_inputs,
                                  [0, 0, start_xy, start_xy, 0],
                                  [batch_size, tf.reduce_max(encoder_inputs_lengths),
                                   crop_size, crop_size, num_channels])

    encoder_inputs = tf.reshape(encoder_inputs, [batch_size, -1, crop_size, crop_size, 1])
    # random left right flip
    if horizontal_flip:
        sample = tf.random_uniform(shape=[], minval=0, maxval=1, dtype=tf.float32)
        option = tf.less(sample, 0.5)
        encoder_inputs = tf.cond(option,
                                 lambda: tf.map_fn(video_left_right_flip, encoder_inputs),
                                 lambda: tf.map_fn(tf.identity, encoder_inputs))
    encoder_inputs = normalize(encoder_inputs)

    # slicw video to time_window_len consecutive frames with stride 1
    if time_window_len != 1:
        # pad encoder_inputs s.t. each frame is in the same number of slices
        ei_paddings = [[0, 0], [time_window_len-1, time_window_len-1], [0, 0], [0, 0], [0, 0]]
        padded_encoder_inputs = tf.pad(encoder_inputs, ei_paddings, 'CONSTANT', constant_values=0)
        encoder_inputs = slice_video(padded_encoder_inputs,
                                     dims=[batch_size, tf.reduce_max(encoder_inputs_lengths) + 2*(time_window_len - 1),
                                           crop_size, crop_size, num_channels],
                                     time_window=time_window_len)
        encoder_inputs = tf.reshape(encoder_inputs, [batch_size, -1, crop_size, crop_size, time_window_len])
        encoder_inputs_lengths = encoder_inputs_lengths + time_window_len - 1

    return encoder_inputs, target_labels, decoder_inputs, encoder_inputs_lengths, target_labels_lengths
Example #50
0
def crnn_fn(features, labels, mode, params):
    """
    :param features: dict {
                            'image'
                            'images_width'
                            'corpora'
                            }
    :param labels: labels. flattend (1D) array with encoded label (one code per character)
    :param mode:
    :param params: dict {
                            'Params'
                        }
    :return:
    """

    parameters = params.get('Params')
    assert isinstance(parameters, Params)

    # Load pre-trained cnn model
    if parameters.cnn_pretained_ckpt_path:
        exclude = ['deep_bidirectional_lstm']
        variables_to_restore = tf.contrib.slim.get_variables_to_restore(
            exclude=exclude)
        tf.train.init_from_checkpoint(
            parameters.cnn_pretained_ckpt_path,
            {v.name.split(':')[0]: v
             for v in variables_to_restore})

    if mode != tf.estimator.ModeKeys.TRAIN:
        parameters.keep_prob_dropout = 1.0

    conv = deep_cnn(features['image'], (mode == tf.estimator.ModeKeys.TRAIN),
                    parameters.cnn_model,
                    summaries=False)

    logprob, raw_pred = deep_bidirectional_lstm(conv,
                                                features['corpus'],
                                                params=parameters,
                                                summaries=False)

    # Compute seq_len from image width
    n_pools = parameters.width_down_sampling

    seq_len_inputs = tf.divide(
        features['image_width'], n_pools, name='seq_len_input_op') - 1

    predictions_dict = {'prob': logprob, 'raw_predictions': raw_pred}

    if not mode == tf.estimator.ModeKeys.PREDICT:
        # Alphabet and codes
        keys = [c for c in parameters.alphabet.encode('latin1')]
        values = parameters.alphabet_codes

        # Convert string label to code label
        with tf.name_scope('str2code_conversion'):
            table_str2int = tf.contrib.lookup.HashTable(
                tf.contrib.lookup.KeyValueTensorInitializer(
                    keys, values, key_dtype=tf.int64, value_dtype=tf.int64),
                -1)
            splitted = tf.string_split(labels, delimiter='')
            values_int = tf.cast(
                tf.squeeze(tf.decode_raw(splitted.values, tf.uint8)), tf.int64)
            codes = table_str2int.lookup(values_int)
            codes = tf.cast(codes, tf.int32)
            sparse_code_target = tf.SparseTensor(splitted.indices, codes,
                                                 splitted.dense_shape)

        seq_lengths_labels = tf.bincount(
            tf.cast(sparse_code_target.indices[:, 0],
                    tf.int32),  #array of labels length
            minlength=tf.shape(predictions_dict['prob'])[1])

        # Loss
        # ----
        # >>> Cannot have longer labels than predictions -> error

        with tf.control_dependencies([
                tf.less_equal(sparse_code_target.dense_shape[1],
                              tf.reduce_max(tf.cast(seq_len_inputs, tf.int64)))
        ]):
            loss_ctc = tf.nn.ctc_loss(
                labels=sparse_code_target,
                inputs=predictions_dict['prob'],
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                preprocess_collapse_repeated=False,
                ctc_merge_repeated=True,
                ignore_longer_outputs_than_inputs=
                True,  # returns zero gradient in case it happens -> ema loss = NaN
                time_major=True)
            loss_ctc = tf.reduce_mean(loss_ctc)
            loss_ctc = tf.Print(loss_ctc, [loss_ctc], message='* Loss : ')

        global_step = tf.train.get_or_create_global_step()
        # # Create an ExponentialMovingAverage object
        ema = tf.train.ExponentialMovingAverage(decay=0.99,
                                                num_updates=global_step,
                                                zero_debias=True)
        # Create the shadow variables, and add op to maintain moving averages
        maintain_averages_op = ema.apply([loss_ctc])
        loss_ema = ema.average(loss_ctc)

        # Train op
        # --------
        if parameters.learning_rate_decay:
            learning_rate = tf.train.exponential_decay(
                parameters.learning_rate,
                global_step,
                parameters.learning_rate_steps,
                parameters.learning_rate_decay,
                staircase=True)
        else:
            learning_rate = tf.constant(parameters.learning_rate)

        if parameters.optimizer == 'ada':
            optimizer = tf.train.AdadeltaOptimizer(learning_rate)
        elif parameters.optimizer == 'momentum':
            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
        elif parameters.optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(
                learning_rate, beta1=0.5,
                epsilon=1e-07)  # at 1e-08 sometimes exploding gradient
        elif parameters.optimizer == 'rms':
            optimizer = tf.train.RMSPropOptimizer(learning_rate)

        if not parameters.train_cnn:
            trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          'deep_bidirectional_lstm')
            print('Training LSTM only')
        else:
            trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        opt_op = optimizer.minimize(loss_ctc,
                                    global_step=global_step,
                                    var_list=trainable)

        with tf.control_dependencies(update_ops + [opt_op]):
            train_op = tf.group(maintain_averages_op)

        # Summaries
        # ---------
        tf.summary.scalar('learning_rate', learning_rate)
        tf.summary.scalar('losses/ctc_loss', loss_ctc)
    else:
        loss_ctc, train_op = None, None

    if mode in [
            tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT,
            tf.estimator.ModeKeys.TRAIN
    ]:
        with tf.name_scope('code2str_conversion'):
            keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64)
            values = [c for c in parameters.alphabet_decoding]
            table_int2str = tf.contrib.lookup.HashTable(
                tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?')

            sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder(
                predictions_dict['prob'],
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                merge_repeated=False,
                beam_width=100,
                top_paths=parameters.nb_logprob)

            # likelihoood. For future rename it as confidence and take softmax of log_probability
            predictions_dict['score'] = log_probability

            sequence_lengths_pred = [
                tf.bincount(tf.cast(sparse_code_pred[i].indices[:, 0],
                                    tf.int32),
                            minlength=tf.shape(predictions_dict['prob'])[1])
                for i in range(parameters.top_paths)
            ]

            pred_chars = [
                table_int2str.lookup(sparse_code_pred[i])
                for i in range(parameters.top_paths)
            ]

            list_preds = [
                get_words_from_chars(pred_chars[i].values,
                                     sequence_lengths=sequence_lengths_pred[i])
                for i in range(parameters.top_paths)
            ]

            predictions_dict['words'] = tf.stack(list_preds)

            tf.summary.text('predicted_words',
                            predictions_dict['words'][0][:10])

    # Evaluation ops
    # --------------
    if mode == tf.estimator.ModeKeys.EVAL:
        with tf.name_scope('evaluation'):
            CER = tf.metrics.mean(tf.edit_distance(
                sparse_code_pred[0], tf.cast(sparse_code_target,
                                             dtype=tf.int64)),
                                  name='CER')

            # Convert label codes to decoding alphabet to compare predicted and groundtrouth words
            target_chars = table_int2str.lookup(
                tf.cast(sparse_code_target, tf.int64))
            target_words = get_words_from_chars(target_chars.values,
                                                seq_lengths_labels)
            accuracy = tf.metrics.accuracy(target_words,
                                           predictions_dict['words'][0],
                                           name='accuracy')

            eval_metric_ops = {
                'eval/accuracy': accuracy,
                'eval/CER': CER,
            }
            CER = tf.Print(CER, [CER], message='-- CER : ')
            accuracy = tf.Print(accuracy, [accuracy], message='-- Accuracy : ')

    else:
        eval_metric_ops = None

    export_outputs = {
        'predictions': tf.estimator.export.PredictOutput(predictions_dict)
    }

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions_dict,
                                      loss=loss_ctc,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops,
                                      export_outputs=export_outputs,
                                      scaffold=tf.train.Scaffold())
def read_and_decode(filename_queue):#输入文件名队列
    reader = tf.TFRecorder()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_sing_example(#解析example
    serialized_example,
    #必须写明features里面的key的名称
    features={
        'image_raw': tf.FixedLenFeature([], tf.string), #图片是string类型
        'label': tf.FixedLenFeature([], tf.int64), #标记是int64类型
    })
    #对于BytesList,要重新进行解码,把string类型的0维Tensor变成uint8类型的一维Tensor
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    mnist = input_data.read_data_sets('data/',
                                          dtype=tf.uint8,  # 注意这里编码是uint8
                                          reshape=False,
                                          # validation_size=FLAGS.validation_size
                                          )
    image.set_shape([mnist.IMAGE_PIXELS])
    #Tensor("input/DecodeRaw:0", shape=(784,), dtype=uint8)
    #image张量的形状为:Tensor("input/sub:0", shape=(784,), dtype=float32)
    image = tf.cast(image, tf.float32) * (1. / 255) - 0.5

    #把标记从unint8类型转换为int32类型
    #label张量的形状为Tensor("input/Cast_1;0“, shape=(), dtype=int32)
    label = tf.cast(features['label'], tf.int32)

    return image, label

    #接下来使用tf.train.shuffle_batch将前面生成的样本随机化,获得一个最小批次的张量
    def inputs(train, batch_size, num_epochs):
        #输入参数
        #batch_size:训练的每一批有多少个样本
        #num_epochs:过几遍数据,设置为O/None表示永远训练下去
        '''
        返回结果:A tuple (images, labels)
         *images:类型float,形状[batch_size, mnist.IMAGE_PIXELS], 范围[-0.5, 0.5].
         *labels:类型int32,形状[bathch_size],范围[0, mnist.NUM_CLASSES]
         注意tf.train.QueueRunner必须用tf.train.start_queue_runners()来启动线程
        '''
        if not num_epochs:num_epochs = None
        #获取文件路径,即/tmp/data/train.tfrecords, /tmp/data/validation.records
        filename = os.path.join('/home/niangu/桌面/TensorFlow/test.tfrecords', )
        with tf.name_scope('input'):
            #tf.train.string_input_producer返回一个QueueRunner, 里面有一个FIFOQQueue
    filename_queue = tf.train.string_input_producer(
        [filename], num_epochs=num_epochs)#如果样本量很大,可以分成若干文件,把文件名列表传入
    image, label = read_and_decode(filename_queue)

    #随机化example,并把它们规整成batch_size大小
    #tf.train.shuffle_batch生成了RandomShuffleQueue,并开启俩个线程
    images, sparse_labels = tf.train.shuffle_batch(
        [image, label], batch_size = batch_size, num_threads=2,
        capacity=1000 + 3 * batch_size,
        min_after_dequeue=1000)#留下一部分队列,来保存每次有足够的数据做随机打乱

    return images, sparse_labels

    #最后我们把生成的batch张量作为网络的输入,进行训练
    def run_training():
        with tf.Graph().as_default():
            #输入images和labels
            images, labels = inputs(train=True, batch_size=FLAGS.bathch_size, num_epochs=FLAGS.num_epochs)
            #构建一个从推理模型来预测数据的图
            logits = mnist.inference(images, FLAGS.hidden1, FLAGS.hidden2)

            loss = mnist.loss(logits, labels) #定义损失函数
            #Add to the Graph operations that train the model
            train_op = mnist.training(loss, FLAGS.learning_rate)
            #初始化参数,特别注意:string_input_producer内部创建了一个epoch计数变量
            #归入tf.GraphKeys.LOCAL_VARIABLES集和中,必须单独用initialize_local_variables()初始化
            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())
            sess = tf.Session()
            sess.run(init_op)
            #Start input enqueue threads
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            try:
                step = 0
                while not coord.should_stop():#进入永久循环
                    start_time = time.time()
                    _, loss_value = sess.run([train_op, loss])
                duration = time.time() - start_time

                #每100次训练输出一次结果
                if step % 100 == 0:
                    print('Step %d:loss = %.2f (%.3f sec)' % (step, loss_value, duration))
                step += 1
            except tf.errors.OutOfRangeError:
                print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
            finally:
                coord.request_stop() #通知其他线程关闭

            coord.join(threads)
            sess.close()
Example #52
0
def read_data(filename_queue, is_train):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'name':
                                           tf.FixedLenFeature([], tf.string),
                                           'm':
                                           tf.FixedLenFeature([], tf.int64),
                                           'n':
                                           tf.FixedLenFeature([], tf.int64),
                                           'query':
                                           tf.FixedLenFeature([], tf.string),
                                           'align':
                                           tf.FixedLenFeature([], tf.string),
                                           'y':
                                           tf.FixedLenFeature([], tf.string),
                                           'mask':
                                           tf.FixedLenFeature([], tf.string),
                                           'gap':
                                           tf.VarLenFeature(tf.float32),
                                           'identity':
                                           tf.VarLenFeature(tf.float32),
                                           'identity_cons':
                                           tf.VarLenFeature(tf.float32),
                                           'ss_dssp':
                                           tf.FixedLenFeature([], tf.string),
                                           'asa_num':
                                           tf.VarLenFeature(tf.int64),
                                       })
    name = features["name"]
    m = tf.cast(features["m"], tf.int32)
    n = tf.cast(features["n"], tf.int32)
    align = tf.reshape(tf.decode_raw(features["align"], tf.uint8),
                       tf.stack([m, n]))
    query = tf.decode_raw(features["query"], tf.uint8)
    y = tf.reshape(tf.decode_raw(features["y"], tf.uint8), tf.stack([n, n]))
    mask = tf.reshape(tf.decode_raw(features["mask"], tf.uint8),
                      tf.stack([n, n]))
    gap = features["gap"].values
    identity = features["identity"].values
    identity_cons = features["identity_cons"].values
    ss_dssp = tf.decode_raw(features["ss_dssp"], tf.uint8)
    asa_num = tf.cast(features["asa_num"].values, tf.int32)
    gap = features["gap"].values
    identity = features["identity"].values
    identity_cons = features["identity_cons"].values

    #clip
    def clipping(align, query, ss_dssp, asa_num, y, mask):
        begin = tf.random_uniform([],
                                  maxval=tf.shape(align)[1] - n_clip,
                                  dtype=tf.int32)
        align = align[:, begin:begin + n_clip]
        query = query[begin:begin + n_clip]
        ss_dssp = ss_dssp[begin:begin + n_clip]
        asa_num = asa_num[begin:begin + n_clip]
        y = y[begin:begin + n_clip, begin:begin + n_clip]
        mask = mask[begin:begin + n_clip, begin:begin + n_clip]
        return align, query, ss_dssp, asa_num, y, mask

    align, query, ss_dssp, asa_num, y, mask = tf.cond(
        (n > n_clip) & (is_train),
        lambda: clipping(align, query, ss_dssp, asa_num, y, mask), lambda:
        (align, query, ss_dssp, asa_num, y, mask))

    #sampling
    def sampling(align, gap, identity, identity_cons):
        idx = tf.random_uniform([n_alignment], maxval=m, dtype=tf.int32)
        align = tf.gather_nd(align, tf.expand_dims(idx, 1))
        gap = tf.gather_nd(gap, tf.expand_dims(idx, 1))
        identity = tf.gather_nd(identity, tf.expand_dims(idx, 1))
        identity_cons = tf.gather_nd(identity_cons, tf.expand_dims(idx, 1))
        return align, gap, identity, identity_cons

    align, gap, identity, identity_cons = tf.cond(
        (m > n_alignment) & (is_train),
        lambda: sampling(align, gap, identity, identity_cons), lambda:
        (align, gap, identity, identity_cons))

    return name, align, query, y, mask, gap, identity, identity_cons, ss_dssp, asa_num
Example #53
0
    def get(self):
        """ Provides input data to the graph. """
        # calculate size of each record (this lists what is contained in the db and how many bytes are occupied)
        record_bytes = 0

        encoding_bytes = 4
        kp_xyz_entries = 3 * self.num_kp
        record_bytes += encoding_bytes * kp_xyz_entries

        encoding_bytes = 4
        kp_uv_entries = 2 * self.num_kp
        record_bytes += encoding_bytes * kp_uv_entries

        kp_vis_entries = self.num_kp
        record_bytes += encoding_bytes * kp_vis_entries

        image_bytes = self.image_size[0] * self.image_size[1] * 3
        record_bytes += image_bytes
        """ READ DATA ITEMS"""
        # Start reader
        reader = tf.FixedLengthRecordReader(header_bytes=0,
                                            record_bytes=record_bytes)
        _, value = reader.read(
            tf.train.string_input_producer([self.path_to_db]))

        # decode to floats
        bytes_read = 0
        data_dict = dict()
        record_bytes_float32 = tf.decode_raw(value, tf.float32)

        # 1. Read keypoint xyz
        keypoint_xyz21 = tf.reshape(
            tf.slice(record_bytes_float32, [bytes_read // 4],
                     [kp_xyz_entries]), [self.num_kp, 3])
        bytes_read += encoding_bytes * kp_xyz_entries
        keypoint_xyz21 /= 1000.0  # scale to meters
        keypoint_xyz21 = self.convert_kp(keypoint_xyz21)

        # calculate wrist coord
        if self.use_wrist_coord:
            wrist_xyz = keypoint_xyz21[16, :] + 2.0 * (keypoint_xyz21[0, :] -
                                                       keypoint_xyz21[16, :])
            keypoint_xyz21 = tf.concat(
                [tf.expand_dims(wrist_xyz, 0), keypoint_xyz21[1:, :]], 0)

        data_dict['keypoint_xyz21'] = keypoint_xyz21

        # 2. Read keypoint uv AND VIS
        keypoint_uv_vis21 = tf.reshape(
            tf.slice(record_bytes_float32, [bytes_read // 4],
                     [kp_uv_entries + kp_vis_entries]), [self.num_kp, 3])
        bytes_read += encoding_bytes * (kp_uv_entries + kp_vis_entries)
        keypoint_uv_vis21 = self.convert_kp(keypoint_uv_vis21)
        keypoint_uv21 = keypoint_uv_vis21[:, :2]
        keypoint_vis21 = tf.equal(keypoint_uv_vis21[:, 2], 1.0)

        # calculate wrist vis
        if self.use_wrist_coord:
            wrist_vis = tf.logical_or(keypoint_vis21[16], keypoint_vis21[0])
            keypoint_vis21 = tf.concat(
                [tf.expand_dims(wrist_vis, 0), keypoint_vis21[1:]], 0)

            wrist_uv = keypoint_uv21[16, :] + 2.0 * (keypoint_uv21[0, :] -
                                                     keypoint_uv21[16, :])
            keypoint_uv21 = tf.concat(
                [tf.expand_dims(wrist_uv, 0), keypoint_uv21[1:, :]], 0)

        data_dict['keypoint_vis21'] = keypoint_vis21

        if self.coord_uv_noise:
            noise = tf.truncated_normal([42, 2],
                                        mean=0.0,
                                        stddev=self.coord_uv_noise_sigma)
            keypoint_uv21 += noise

        data_dict['keypoint_uv21'] = keypoint_uv21

        # decode to uint8
        record_bytes_uint8 = tf.decode_raw(value, tf.uint8)

        # 4. Read image
        image = tf.reshape(
            tf.slice(record_bytes_uint8, [bytes_read], [image_bytes]),
            [self.image_size[0], self.image_size[1], 3])
        image = tf.cast(image, tf.float32)
        bytes_read += image_bytes

        # subtract mean
        image = image / 255.0 - 0.5
        if self.hue_aug:
            image = tf.image.random_hue(image, self.hue_aug_max)
        data_dict['image'] = image
        """ CONSTANTS """
        # Camera intrinsics
        sx = 822.79041
        sy = 822.79041
        tx = 318.47345
        ty = 250.31296
        data_dict['cam_mat'] = tf.constant([[sx, 0.0, tx], [0.0, sy, ty],
                                            [0.0, 0.0, 1.0]])

        # Hand side: this dataset only contains left hands
        data_dict['hand_side'] = tf.one_hot(tf.constant(0, dtype=tf.int32),
                                            depth=2,
                                            on_value=1.0,
                                            off_value=0.0,
                                            dtype=tf.float32)

        assert bytes_read == record_bytes, "Doesnt add up."
        """ DEPENDENT DATA ITEMS: XYZ represenations. """
        # make coords relative to root joint
        kp_coord_xyz_root = keypoint_xyz21[0, :]  # this is the palm coord
        kp_coord_xyz21_rel = keypoint_xyz21 - kp_coord_xyz_root  # relative coords in metric coords
        index_root_bone_length = tf.sqrt(
            tf.reduce_sum(
                tf.square(kp_coord_xyz21_rel[12, :] -
                          kp_coord_xyz21_rel[11, :])))
        data_dict['keypoint_scale'] = index_root_bone_length
        data_dict[
            'keypoint_xyz21_normed'] = kp_coord_xyz21_rel / index_root_bone_length  # normalized by length of 12->11

        # calculate local coordinates
        kp_coord_xyz21_local = bone_rel_trafo(
            data_dict['keypoint_xyz21_normed'])
        kp_coord_xyz21_local = tf.squeeze(kp_coord_xyz21_local)
        data_dict['keypoint_xyz21_local'] = kp_coord_xyz21_local

        # calculate viewpoint and coords in canonical coordinates
        kp_coord_xyz21_rel_can, rot_mat = canonical_trafo(
            data_dict['keypoint_xyz21_normed'])
        kp_coord_xyz21_rel_can, rot_mat = tf.squeeze(
            kp_coord_xyz21_rel_can), tf.squeeze(rot_mat)
        data_dict['keypoint_xyz21_can'] = kp_coord_xyz21_rel_can
        data_dict['rot_mat'] = tf.matrix_inverse(rot_mat)
        """ DEPENDENT DATA ITEMS: HAND CROP """
        if self.hand_crop:
            crop_center = keypoint_uv21[12, ::-1]

            # catch problem, when no valid kp available (happens almost never)
            crop_center = tf.cond(tf.reduce_all(tf.is_finite(crop_center)),
                                  lambda: crop_center,
                                  lambda: tf.constant([0.0, 0.0]))
            crop_center.set_shape([
                2,
            ])

            if self.crop_center_noise:
                noise = tf.truncated_normal(
                    [2], mean=0.0, stddev=self.crop_center_noise_sigma)
                crop_center += noise

            crop_scale_noise = tf.constant(1.0)
            if self.crop_scale_noise:
                crop_scale_noise = tf.squeeze(
                    tf.random_uniform([1], minval=1.0, maxval=1.2))

            if not self.use_wrist_coord:
                wrist_uv = keypoint_uv21[16, :] + 2.0 * (keypoint_uv21[0, :] -
                                                         keypoint_uv21[16, :])
                keypoint_uv21 = tf.concat(
                    [tf.expand_dims(wrist_uv, 0), keypoint_uv21[1:, :]], 0)

            # select visible coords only
            kp_coord_h = tf.boolean_mask(keypoint_uv21[:, 1], keypoint_vis21)
            kp_coord_w = tf.boolean_mask(keypoint_uv21[:, 0], keypoint_vis21)
            kp_coord_hw = tf.stack([kp_coord_h, kp_coord_w], 1)

            # determine size of crop (measure spatial extend of hw coords first)
            min_coord = tf.maximum(tf.reduce_min(kp_coord_hw, 0), 0.0)
            max_coord = tf.minimum(tf.reduce_max(kp_coord_hw, 0),
                                   self.image_size)

            # find out larger distance wrt the center of crop
            crop_size_best = 2 * tf.maximum(max_coord - crop_center,
                                            crop_center - min_coord)
            crop_size_best = tf.reduce_max(crop_size_best)
            crop_size_best = tf.minimum(tf.maximum(crop_size_best, 50.0),
                                        500.0)

            # catch problem, when no valid kp available
            crop_size_best = tf.cond(
                tf.reduce_all(tf.is_finite(crop_size_best)),
                lambda: crop_size_best, lambda: tf.constant(200.0))
            crop_size_best.set_shape([])

            # calculate necessary scaling
            scale = tf.cast(self.crop_size, tf.float32) / crop_size_best
            scale = tf.minimum(tf.maximum(scale, 1.0), 10.0)
            scale *= crop_scale_noise
            data_dict['crop_scale'] = scale

            if self.crop_offset_noise:
                noise = tf.truncated_normal(
                    [2], mean=0.0, stddev=self.crop_offset_noise_sigma)
                crop_center += noise

            # Crop image
            img_crop = crop_image_from_xy(tf.expand_dims(image, 0),
                                          crop_center, self.crop_size, scale)
            data_dict['image_crop'] = tf.squeeze(img_crop)

            # Modify uv21 coordinates
            crop_center_float = tf.cast(crop_center, tf.float32)
            keypoint_uv21_u = (
                data_dict['keypoint_uv21'][:, 0] -
                crop_center_float[1]) * scale + self.crop_size // 2
            keypoint_uv21_v = (
                data_dict['keypoint_uv21'][:, 1] -
                crop_center_float[0]) * scale + self.crop_size // 2
            keypoint_uv21 = tf.stack([keypoint_uv21_u, keypoint_uv21_v], 1)
            data_dict['keypoint_uv21'] = keypoint_uv21

            # Modify camera intrinsics
            scale = tf.reshape(scale, [
                1,
            ])
            scale_matrix = tf.dynamic_stitch([
                [0], [1], [2], [3], [4], [5], [6], [7], [8]
            ], [scale, [0.0], [0.0], [0.0], scale, [0.0], [0.0], [0.0], [1.0]])
            scale_matrix = tf.reshape(scale_matrix, [3, 3])

            crop_center_float = tf.cast(crop_center, tf.float32)
            trans1 = crop_center_float[0] * scale - self.crop_size // 2
            trans2 = crop_center_float[1] * scale - self.crop_size // 2
            trans1 = tf.reshape(trans1, [
                1,
            ])
            trans2 = tf.reshape(trans2, [
                1,
            ])
            trans_matrix = tf.dynamic_stitch(
                [[0], [1], [2], [3], [4], [5], [6], [7], [8]],
                [[1.0], [0.0], -trans2, [0.0], [1.0], -trans1, [0.0], [0.0],
                 [1.0]])
            trans_matrix = tf.reshape(trans_matrix, [3, 3])

            data_dict['cam_mat'] = tf.matmul(
                trans_matrix, tf.matmul(scale_matrix, data_dict['cam_mat']))
        """ DEPENDENT DATA ITEMS: Scoremap from the SUBSET of 21 keypoints"""
        # create scoremaps from the subset of 2D annoataion
        keypoint_hw21 = tf.stack([keypoint_uv21[:, 1], keypoint_uv21[:, 0]],
                                 -1)

        scoremap_size = self.image_size

        if self.hand_crop:
            scoremap_size = (self.crop_size, self.crop_size)

        scoremap = self.create_multiple_gaussian_map(keypoint_hw21,
                                                     scoremap_size,
                                                     self.sigma,
                                                     valid_vec=keypoint_vis21)

        if self.scoremap_dropout:
            scoremap = tf.nn.dropout(scoremap,
                                     self.scoremap_dropout_prob,
                                     noise_shape=[1, 1, 21])
            scoremap *= self.scoremap_dropout_prob

        data_dict['scoremap'] = scoremap

        if self.random_crop_to_size:
            tensor_stack = tf.concat([
                data_dict['image'],
                tf.expand_dims(tf.cast(data_dict['hand_parts'], tf.float32),
                               -1),
                tf.cast(data_dict['hand_mask'], tf.float32)
            ], 2)
            s = tensor_stack.get_shape().as_list()
            tensor_stack_cropped = tf.random_crop(
                tensor_stack,
                [self.random_crop_size, self.random_crop_size, s[2]])
            data_dict = dict(
            )  # delete everything else because the random cropping makes the data invalid anyway
            data_dict['image'], data_dict['hand_parts'], data_dict['hand_mask'] = tensor_stack_cropped[:, :, :3],\
                                                                                  tf.cast(tensor_stack_cropped[:, :, 3], tf.int32),\
                                                                                  tf.cast(tensor_stack_cropped[:, :, 4:], tf.int32)

        names, tensors = zip(*data_dict.items())

        if self.shuffle:
            tensors = tf.train.shuffle_batch_join([tensors],
                                                  batch_size=self.batch_size,
                                                  capacity=100,
                                                  min_after_dequeue=50,
                                                  enqueue_many=False)
        else:
            tensors = tf.train.batch_join([tensors],
                                          batch_size=self.batch_size,
                                          capacity=100,
                                          enqueue_many=False)

        return dict(zip(names, tensors))
Example #54
0
 def parse_tfrecord_tf(record):
     features = tf.parse_single_example(record, features={
         'shape': tf.FixedLenFeature([3], tf.int64),
         'data': tf.FixedLenFeature([], tf.string)})
     data = tf.decode_raw(features['data'], tf.uint8)
     return tf.reshape(data, features['shape'])
Example #55
0
 def decode_image(image):
     # Normalize from [0, 255] to [0.0, 1.0]
     image = tf.decode_raw(image, tf.uint8)
     image = tf.cast(image, tf.float32)
     image = tf.reshape(image, [784])
     return image / 255.0
Example #56
0
    features={
        'image/height'     : tf.FixedLenFeature([], tf.int64 ),
        'image/width'      : tf.FixedLenFeature([], tf.int64 ),
        'image/colorspace' : tf.FixedLenFeature([], tf.string),
        'image/channels'   : tf.FixedLenFeature([], tf.int64 ),
        'image/class/label': tf.FixedLenFeature([], tf.int64 ),
        'image/class/text' : tf.FixedLenFeature([], tf.string),
        'image/format'     : tf.FixedLenFeature([], tf.string),
        'image/filename'   : tf.FixedLenFeature([], tf.string),
        'image/encoded'    : tf.FixedLenFeature([], tf.string)
    },
    name='features'
)

# image was saved as uint8, so we have to decode tf.string as uint8.
imageT  = tf.decode_raw(tfrecord['image/encoded'], tf.uint8)

# since exported as tf.int64 there is no need for tf.decode_raw
heightT = tfrecord['image/height']
widthT  = tfrecord['image/width' ]

# remember, it's all just ops, have to run to get result
with tf.Session() as sess:

    # init vars
    sess.run(tf.global_variables_initializer())
    sess.run(tf. local_variables_initializer())

    # init summary file writer
    sfw = tf.summary.FileWriter(os.getcwd(),graph=sess.graph)
Example #57
0
def read_cifar10(data_dir, is_train, batch_size, shuffle):
    """Read CIFAR10
    
    Args:
        data_dir: the directory of CIFAR10
        is_train: boolen
        batch_size:
        shuffle:       
    Returns:
        label: 1D tensor, tf.int32
        image: 4D tensor, [batch_size, height, width, 3], tf.float32
    
    """
    img_width = 32
    img_height = 32
    img_depth = 3
    label_bytes = 1
    image_bytes = img_width * img_height * img_depth

    with tf.name_scope('input'):

        if is_train:
            filenames = [
                os.path.join(data_dir, 'data_batch_%d.bin' % ii)
                for ii in np.arange(1, 6)
            ]
        else:
            filenames = [os.path.join(data_dir, 'test_batch.bin')]

        filename_queue = tf.train.string_input_producer(filenames)

        reader = tf.FixedLengthRecordReader(label_bytes + image_bytes)

        key, value = reader.read(filename_queue)

        record_bytes = tf.decode_raw(value, tf.uint8)

        label = tf.slice(record_bytes, [0], [label_bytes])
        label = tf.cast(label, tf.int32)

        image_raw = tf.slice(record_bytes, [label_bytes], [image_bytes])
        image_raw = tf.reshape(image_raw, [img_depth, img_height, img_width])
        image = tf.transpose(image_raw,
                             (1, 2, 0))  # convert from D/H/W to H/W/D
        image = tf.cast(image, tf.float32)

        image = tf.image.per_image_standardization(
            image)  #substract off the mean and divide by the variance

        if shuffle:
            images, label_batch = tf.train.shuffle_batch(
                [image, label],
                batch_size=batch_size,
                num_threads=64,
                capacity=20000,
                min_after_dequeue=3000)
        else:
            images, label_batch = tf.train.batch([image, label],
                                                 batch_size=batch_size,
                                                 num_threads=64,
                                                 capacity=2000)
        # ONE-HOT
        n_classes = 10
        label_batch = tf.one_hot(label_batch, depth=n_classes)
        label_batch = tf.cast(label_batch, dtype=tf.int32)
        label_batch = tf.reshape(label_batch, [batch_size, n_classes])

        return images, label_batch
def mnist_tfrecord_input(data_dir,
                         training=True,
                         sequence_length=20,
                         img_size=None,
                         batch_size=1,
                         seed=None):
    """Create input tfrecord tensors and queues.

    TFRecord:
      TFRecord(s) are assumed to be placed at `data_dir`.
      Each sample contains raw uint8 image sequences with key `'img_i'`.
      Training and validation set are pre-splitted and their corresponding
      record file have suffix `_trn.tfrecords` or `_val_tfrecords`.

    Preprocessing:
      Crop each image to a square one with size `min(ORIGINAL_HEIGHT, ORIGINAL_WIDTH)`
      and resize (bicubic) to `(IMG_WIDTH, IMG_HEIGHT)`. Normalize pixel value from
      [0, 255] to [0, 1]

    Args:
      data_dir: directory holding TFRecord(s).
      training: whether to use training or validation data.
      sequence_length: length of the video sequence.
      img_size: the (hight, width) of processed img input, if None use original size.
      batch_size: size of data mimi-batches.
      seed: random seed for `shuffle_batch` generator.
    Returns:
      list of tensors corresponding to images. The images
      tensor is 5D, batch x time x height x width x 1.
    Raises:
      RuntimeError: if no files found.
    """
    file_suffix = '*_trn.tfrecords' if training else '*_val.tfrecords'
    filenames = gfile.Glob(os.path.join(data_dir, file_suffix))
    if not filenames:
        raise RuntimeError('No data files found.')

    filename_queue = tf.train.string_input_producer(filenames, shuffle=True)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    image_seq = []

    for i in range(sequence_length):
        # extract image tensor
        image_name = 'img_{}'.format(i)
        features = tf.parse_single_example(
            serialized_example,
            features={image_name: tf.FixedLenFeature([], tf.string)}
        )
        image = tf.decode_raw(features[image_name], tf.uint8)
        image = tf.reshape(image, shape=[ORIGINAL_HEIGHT, ORIGINAL_WIDTH, COLOR_CHAN])

        # preprocessing
        crop_size = min(ORIGINAL_HEIGHT, ORIGINAL_WIDTH)
        image = tf.image.resize_image_with_crop_or_pad(image, crop_size, crop_size)
        image = tf.reshape(image, [1, crop_size, crop_size, COLOR_CHAN])
        if img_size is None:
            img_size = (ORIGINAL_HEIGHT, ORIGINAL_WIDTH)
        if img_size[0] != img_size[1]:
            raise ValueError('Unequal height and width unsupported')
        image = tf.image.resize_bicubic(image, img_size)
        image = tf.cast(image, tf.float32) / 255.0

        image_seq.append(image)

    image_seq = tf.concat(axis=0, values=image_seq)

    image_batch = tf.train.shuffle_batch(
        tensors=[image_seq],
        batch_size=batch_size,
        capacity=100 * batch_size,
        min_after_dequeue=50 * batch_size,
        num_threads=batch_size,
        seed=seed
    )
    return image_batch
Example #59
0
def read_cifar100(filename_queue, coarse_or_fine='fine'):
    """Reads and parses examples from CIFAR100 data files.

  Recommendation: if you want N-way read parallelism, call this function
  N times.  This will give you N independent Readers reading different
  files & positions within those files, which will give better mixing of
  examples.

  Args:
    filename_queue: A queue of strings with the filenames to read from.

  Returns:
    An object representing a single example, with the following fields:
      height: number of rows in the result (32)
      width: number of columns in the result (32)
      depth: number of color channels in the result (3)
      key: a scalar string Tensor describing the filename & record number
        for this example.
      label: an int32 Tensor with the label in the range 0..9.
      uint8image: a [height, width, depth] uint8 Tensor with the image data
  """
    class CIFAR100Record(object):
        pass

    result = CIFAR100Record()

    coarse_label_bytes = 1
    fine_label_bytes = 1
    result.height = 32
    result.width = 32
    result.depth = 3
    image_bytes = result.height * result.width * result.depth

    record_bytes = coarse_label_bytes + fine_label_bytes + image_bytes

    reader = tf.FixedLengthRecordReader(record_bytes=record_bytes,
                                        header_bytes=0,
                                        footer_bytes=0)
    result.key, value = reader.read(filename_queue)

    record_bytes = tf.decode_raw(value, tf.uint8)

    coarse_label = tf.cast(
        tf.strided_slice(record_bytes, [0], [coarse_label_bytes]), tf.int32)

    fine_label = tf.cast(
        tf.strided_slice(record_bytes, [coarse_label_bytes],
                         [coarse_label_bytes + fine_label_bytes]), tf.int32)

    if coarse_or_fine == 'fine':
        result.label = fine_label  #
    else:
        result.label = coarse_label  #

    depth_major = tf.reshape(
        tf.strided_slice(
            record_bytes, [coarse_label_bytes + fine_label_bytes],
            [coarse_label_bytes + fine_label_bytes + image_bytes]),
        [result.depth, result.height, result.width])

    result.uint8image = tf.transpose(depth_major, [1, 2, 0])

    return result
Example #60
0
def cifar10(path=pathcifar,
            activation="sigmoid",
            conv_channels=(16, 16, 16),
            linear_layers=32,
            batch_size=128,
            num_threads=4,
            min_queue_examples=1000,
            mode="train"):
    """Cifar10 classification with a convolutional network."""

    # Data.
    _open_cifar10(path)

    if activation == "sigmoid":
        activation_op = tf.sigmoid
    elif activation == "relu":
        activation_op = tf.nn.relu
    else:
        raise ValueError("{} activation not supported".format(activation))
# Read images and labels from disk.
    if mode == "train":
        filenames = [
            os.path.join(path, CIFAR10_FOLDER, "data_batch_{}.bin".format(i))
            for i in range(1, 6)
        ]
    elif mode == "test":
        filenames = [os.path.join(path, "test_batch.bin")]
    else:
        raise ValueError("Mode {} not recognised".format(mode))

    depth = 3
    height = 32
    width = 32
    label_bytes = 1
    image_bytes = depth * height * width
    record_bytes = label_bytes + image_bytes
    reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
    _, record = reader.read(tf.train.string_input_producer(filenames))
    record_bytes = tf.decode_raw(record, tf.uint8)

    label = tf.cast(tf.slice(record_bytes, [0], [label_bytes]), tf.int32)
    raw_image = tf.slice(record_bytes, [label_bytes], [image_bytes])
    image = tf.cast(tf.reshape(raw_image, [depth, height, width]), tf.float32)
    # height x width x depth.
    image = tf.transpose(image, [1, 2, 0])
    image = tf.div(image, 255)

    queue = tf.RandomShuffleQueue(
        capacity=min_queue_examples + 3 * batch_size,
        min_after_dequeue=min_queue_examples,
        dtypes=[tf.float32, tf.int32],
        shapes=[image.get_shape(), label.get_shape()])
    enqueue_ops = [queue.enqueue([image, label]) for _ in range(num_threads)]
    tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops))

    with tf.name_scope('Optimizee_loss'):

        def compute_loss():
            image_batch, label_batch = queue.dequeue_many(batch_size)
            label_batch = tf.reshape(label_batch, [batch_size])
            output = image_batch
            with tf.variable_scope('ConvMLP', reuse=tf.AUTO_REUSE):
                conv1_w = tf.get_variable(
                    "conv1_w",
                    shape=[5, 5, depth, conv_channels[0]],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                conv1_b = tf.get_variable(
                    "conv1_b",
                    shape=[
                        conv_channels[0],
                    ],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                conv1_beta = tf.get_variable(
                    "conv1_beta",
                    shape=[1, 1, 1, conv_channels[0]],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                conv2_w = tf.get_variable(
                    "conv2_w",
                    shape=[5, 5, conv_channels[0], conv_channels[1]],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                conv2_b = tf.get_variable(
                    "conv2_b",
                    shape=[
                        conv_channels[1],
                    ],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                conv2_beta = tf.get_variable(
                    "conv2_beta",
                    shape=[1, 1, 1, conv_channels[1]],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                conv3_w = tf.get_variable(
                    "conv3_w",
                    shape=[5, 5, conv_channels[1], conv_channels[2]],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                conv3_b = tf.get_variable(
                    "conv3_b",
                    shape=[
                        conv_channels[2],
                    ],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                conv3_beta = tf.get_variable(
                    "conv3_beta",
                    shape=[1, 1, 1, conv_channels[2]],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                output = tf.nn.convolution(output,
                                           conv1_w,
                                           padding='SAME',
                                           strides=[1, 1])
                output = tf.nn.relu(tf.nn.bias_add(output, conv1_b))
                output = tf.nn.max_pool(output,
                                        ksize=[1, 2, 2, 1],
                                        strides=[1, 2, 2, 1],
                                        padding='SAME')
                b_m_1, b_v_1 = tf.nn.moments(output, axes=[0, 1, 2])
                output = tf.nn.batch_normalization(output,
                                                   b_m_1,
                                                   b_v_1,
                                                   conv1_beta,
                                                   scale=None,
                                                   variance_epsilon=1e-8)
                output = tf.nn.convolution(output,
                                           conv2_w,
                                           padding='SAME',
                                           strides=[1, 1])
                output = tf.nn.relu(tf.nn.bias_add(output, conv2_b))
                output = tf.nn.max_pool(output,
                                        ksize=[1, 2, 2, 1],
                                        strides=[1, 2, 2, 1],
                                        padding='SAME')
                b_m_2, b_v_2 = tf.nn.moments(output, [0, 1, 2])
                output = tf.nn.batch_normalization(output,
                                                   b_m_2,
                                                   b_v_2,
                                                   conv2_beta,
                                                   scale=None,
                                                   variance_epsilon=1e-8)
                output = tf.nn.convolution(output,
                                           conv3_w,
                                           padding='SAME',
                                           strides=[1, 1])
                output = tf.nn.relu(tf.nn.bias_add(output, conv3_b))
                output = tf.nn.max_pool(output,
                                        ksize=[1, 2, 2, 1],
                                        strides=[1, 2, 2, 1],
                                        padding='SAME')
                b_m_3, b_v_3 = tf.nn.moments(output, [0, 1, 2])
                output = tf.nn.batch_normalization(output,
                                                   b_m_3,
                                                   b_v_3,
                                                   conv3_beta,
                                                   scale=None,
                                                   variance_epsilon=1e-8)
                output = tf.layers.flatten(output)
                W_in = tf.get_variable(
                    "W_in",
                    shape=[output.shape[1], linear_layers],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                b_in = tf.get_variable(
                    "b_in",
                    shape=[
                        linear_layers,
                    ],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                W_out = tf.get_variable(
                    "W_out",
                    shape=[linear_layers, 10],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
                b_out = tf.get_variable(
                    "b_out",
                    shape=[
                        10,
                    ],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))
            layer_out = activation_op(tf.add(tf.matmul(output, W_in), b_in))
            output = tf.add(tf.matmul(layer_out, W_out), b_out)
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=output, labels=label_batch)
            return tf.reduce_mean(loss)

    with tf.name_scope('Convex_loss'):

        def convex_loss():
            with tf.variable_scope('conv_var', reuse=tf.AUTO_REUSE):
                v = tf.get_variable(
                    "v",
                    shape=[1, 10],
                    dtype=tf.float32,
                    initializer=tf.random_normal_initializer(stddev=0.01))

                # Non-trainable variables.
                target = tf.get_variable(
                    "target",
                    shape=[1, 10],
                    dtype=tf.float32,
                    initializer=tf.random_uniform_initializer(),
                    trainable=False)

            return tf.reduce_mean(
                tf.clip_by_value(tf.square(v - target), 0, 10))

    return collections.OrderedDict([('Opt_loss', compute_loss),
                                    ('Aux_loss', convex_loss)])