def read_and_decode(filename_queue, label_type, shape): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'label_raw': tf.FixedLenFeature([], tf.string), }) image = tf.decode_raw(features['image_raw'], tf.uint8) image = tf.cast(image, tf.float32) image = (image - 127.5) * (1. / 128.0) image.set_shape([shape * shape * 3]) image = tf.reshape(image, [shape, shape, 3]) label = tf.decode_raw(features['label_raw'], tf.float32) if label_type == 'cls': image = tf.image.random_flip_left_right(image) image = tf.image.random_flip_up_down(image) label.set_shape([2]) elif label_type == 'bbx': label.set_shape([4]) elif label_type == 'pts': label.set_shape([10]) return image, label
def _binary_parse_function_example(serialized_example_protocol): ''' DESCRIPTION: This function will deserialize, decompress and then transform the image and label in the appropriate shape based on the (new) merged structure of the dataset. ''' #Parsing the exampe from the binary format features={ 'image': tf.FixedLenFeature((),tf.string), 'label': tf.FixedLenFeature((),tf.string) } parsed_feature=tf.parse_single_example(serialized_example_protocol, features) #Now setting the appropriate tranformation (decoding and reshape) height=514 width=513 depth=40 #Decoding the image from biary image=tf.decode_raw(parsed_feature['image'],tf.float32)#BEWARE of dtype image.set_shape([depth*height*width]) #Now reshape in usual way since reshape automatically read in c-order image=tf.reshape(image,[height,width,depth]) #Now decoding the label target_len=6 label=tf.decode_raw(parsed_feature['label'],tf.float32) label.set_shape([target_len]) #Reshaping appropriately label=tf.reshape(label,[target_len,]) #Returing the example tuple finally return image,label
def read_and_decode(filename_queue): # input: filename # output: image, label pair # setup a TF record reader reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # list the features we want to extract, i.e., the image and the label features = tf.parse_single_example( serialized_example, features={ 'img_raw': tf.FixedLenFeature([], tf.string), 'label_raw': tf.FixedLenFeature([], tf.string), }) # Decode the training image # Convert from a scalar string tensor (whose single string has # length 256*256) to a float tensor image = tf.decode_raw(features['img_raw'], tf.int64) image.set_shape([65536]) image_re = tf.reshape(image, (256,256)) # Scale input pixels by 1024 image_re = tf.cast(image_re, tf.float32) * (1. / 1024) # decode the label image, an image with all 0's except 1's where the left # ventricle exists label = tf.decode_raw(features['label_raw'], tf.uint8) label.set_shape([65536]) label_re = tf.reshape(label, [256,256]) return image_re, label_re
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'mask_raw': tf.FixedLenFeature([], tf.string), } ) # must be read back as uint8 here image = tf.decode_raw(features['image_raw'], tf.uint8) segmentation = tf.decode_raw(features['mask_raw'], tf.uint8) image.set_shape([224*224*3]) segmentation.set_shape([224*224*1]) image = tf.reshape(image,[224,224,3]) segmentation = tf.reshape(segmentation,[224,224]) rgb = tf.cast(image, tf.float32) rgb = rgb * (1./255) rgb = tf.cast(image, tf.float32) mask = tf.cast(segmentation, tf.float32) mask = (mask / 255.) * 20 mask = tf.cast(mask, tf.int64) return rgb, mask
def deserialize(examples_serialized): """Called by Dataset.map() to convert batches of records to tensors.""" features = tf.parse_single_example(examples_serialized, feature_map) users = tf.reshape(tf.decode_raw( features[movielens.USER_COLUMN], tf.int32), (batch_size,)) items = tf.reshape(tf.decode_raw( features[movielens.ITEM_COLUMN], tf.uint16), (batch_size,)) if params["use_tpu"] or params["use_xla_for_gpu"]: items = tf.cast(items, tf.int32) # TPU and XLA disallows uint16 infeed. if not training: dupe_mask = tf.reshape(tf.cast(tf.decode_raw( features[rconst.DUPLICATE_MASK], tf.int8), tf.bool), (batch_size,)) return { movielens.USER_COLUMN: users, movielens.ITEM_COLUMN: items, rconst.DUPLICATE_MASK: dupe_mask, } labels = tf.reshape(tf.cast(tf.decode_raw( features["labels"], tf.int8), tf.bool), (batch_size,)) return { movielens.USER_COLUMN: users, movielens.ITEM_COLUMN: items, }, labels
def read_single_example_and_decode(filename_queue): tfrecord_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) reader = tf.TFRecordReader(options=tfrecord_options) _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized=serialized_example, features={ 'img_name': tf.FixedLenFeature([], tf.string), 'img_height': tf.FixedLenFeature([], tf.int64), 'img_width': tf.FixedLenFeature([], tf.int64), 'img': tf.FixedLenFeature([], tf.string), 'gtboxes_and_label': tf.FixedLenFeature([], tf.string), 'num_objects': tf.FixedLenFeature([], tf.int64) } ) img_name = features['img_name'] img_height = tf.cast(features['img_height'], tf.int32) img_width = tf.cast(features['img_width'], tf.int32) img = tf.decode_raw(features['img'], tf.uint8) img = tf.reshape(img, shape=[img_height, img_width, 3]) gtboxes_and_label = tf.decode_raw(features['gtboxes_and_label'], tf.int32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 9]) num_objects = tf.cast(features['num_objects'], tf.int32) return img_name, img, gtboxes_and_label, num_objects
def build_next_batch_op(self): reader = tf.TFRecordReader() _, serialized_experience = reader.read(self.filename_queue) features = tf.parse_single_example(serialized_experience, features={ 'state': tf.FixedLenFeature([], tf.string), 'action': tf.FixedLenFeature([2], tf.float32), 'reward': tf.FixedLenFeature([], tf.float32), 'next_state': tf.FixedLenFeature([], tf.string), 'is_episode_finished': tf.FixedLenFeature([], tf.int64)}) state = tf.decode_raw(features['state'], tf.uint8) state.set_shape([86*86*4]) action = features['action'] reward = features['reward'] next_state = tf.decode_raw(features['next_state'], tf.uint8) next_state.set_shape([86*86*4]) is_episode_finished = features['is_episode_finished'] state = tf.reshape(state, [86, 86, 4]) next_state = tf.reshape(next_state, [86, 86, 4]) state_batch, action_batch, reward_batch, next_state_batch, is_episode_finished_batch = tf.train.shuffle_batch( [state, action, reward, next_state, is_episode_finished], batch_size=self.batch_size, capacity=100, min_after_dequeue=0) return state_batch, action_batch, reward_batch, next_state_batch, is_episode_finished_batch
def parse_sequence_example(self, record_string): features_dict = { 'images_raw': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64), 'width': tf.FixedLenFeature([], tf.int64), 'height': tf.FixedLenFeature([], tf.int64), 'depth': tf.FixedLenFeature([], tf.int64), 'sequence_length': tf.FixedLenFeature([], tf.int64) } if ADD_GEOLOCATIONS: features_dict['geo'] = tf.FixedLenFeature([], tf.string) features = tf.parse_single_example(record_string, features_dict) images = tf.decode_raw(features['images_raw'], tf.float32) width = tf.cast(features['width'], tf.int32) height = tf.cast(features['height'], tf.int32) depth = tf.cast(features['depth'], tf.int32) label = tf.cast(features['label'], tf.int32) sequence_length = tf.cast(features['sequence_length'], tf.int32) images = tf.reshape(images, [sequence_length, height, width, depth]) if ADD_GEOLOCATIONS: geo = tf.decode_raw(features['geo'], tf.float32) geo = tf.reshape(geo, [2, ]) return images, label, geo else: return images, label
def read_and_decode(self, filename_queue): """ A definition of how TF should read the file record. Slightly altered version from https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/how_tos/ \ reading_data/fully_connected_reader.py :param filename_queue: The file name queue to be read. :type filename_queue: tf.QueueBase :return: The read file data including the image data and depth data. :rtype: (tf.Tensor, tf.Tensor) """ reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'depth_raw': tf.FixedLenFeature([], tf.string), }) image = tf.decode_raw(features['image_raw'], tf.uint8) image = tf.reshape(image, [self.height, self.width, self.channels]) image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 depth = tf.decode_raw(features['depth_raw'], tf.float32) depth = tf.reshape(depth, [self.height, self.width, 1]) return image, depth
def make_readers(file_prefix): """ Return states and qvals tensors :param file_prefix: :return: """ FLOAT_SIZE = 4 states_reader = tf.FixedLengthRecordReader(STATES_HISTORY * N_STATE * FLOAT_SIZE) next_states_reader = tf.FixedLengthRecordReader(STATES_HISTORY * N_STATE * FLOAT_SIZE) actions_reader = tf.FixedLengthRecordReader(1) rewards_reader = tf.FixedLengthRecordReader(FLOAT_SIZE) _, states = states_reader.read(tf.train.string_input_producer([file_prefix + ".states"])) _, next_states = next_states_reader.read(tf.train.string_input_producer([file_prefix + ".next_states"])) _, actions = actions_reader.read(tf.train.string_input_producer([file_prefix + ".actions"])) _, rewards = rewards_reader.read(tf.train.string_input_producer([file_prefix + ".rewards"])) states = tf.decode_raw(states, tf.float32, name="decode_states") states = tf.reshape(states, (STATES_HISTORY * N_STATE, ), name="reshape_states") next_states = tf.decode_raw(next_states, tf.float32, name="decode_next_states") next_states = tf.reshape(next_states, (STATES_HISTORY * N_STATE, ), name="reshape_next_states") actions = tf.decode_raw(actions, tf.int8, name="decode_actions") actions = tf.reshape(actions, (1, ), name="reshape_actions") actions = tf.to_int32(actions) rewards = tf.decode_raw(rewards, tf.float32, name="decode_rewards") rewards = tf.reshape(rewards, (1, ), name="reshape_qvals") return states, actions, rewards, next_states
def parser(self, record): keys_to_features = { 'labels': tf.FixedLenFeature([], tf.string), 'userIds': tf.VarLenFeature(tf.int64), 'itemIds': tf.VarLenFeature(tf.int64), 'user_profiles_indices': tf.FixedLenFeature([], tf.string), 'user_profiles_values': tf.VarLenFeature(tf.int64), 'user_profiles_weights': tf.VarLenFeature(tf.float32), 'user_profiles_shape': tf.FixedLenFeature([2], tf.int64), 'item_profiles_indices': tf.FixedLenFeature([], tf.string), 'item_profiles_values': tf.VarLenFeature(tf.int64), 'item_profiles_weights': tf.VarLenFeature(tf.float32), 'item_profiles_shape': tf.FixedLenFeature([2], tf.int64) } parsed = tf.parse_single_example(record, keys_to_features) labels = tf.reshape(tf.decode_raw(parsed['labels'], tf.float32), [-1, 1]) userIds = tf.sparse_tensor_to_dense(parsed['userIds']) itemIds = tf.sparse_tensor_to_dense(parsed['itemIds']) user_profiles_indices = tf.reshape(tf.decode_raw(parsed['user_profiles_indices'], tf.int64), [-1, 2]) user_profiles_values = tf.sparse_tensor_to_dense(parsed['user_profiles_values']) user_profiles_weights = tf.sparse_tensor_to_dense(parsed['user_profiles_weights']) user_profiles_shape = parsed['user_profiles_shape'] item_profiles_indices = tf.reshape(tf.decode_raw(parsed['item_profiles_indices'], tf.int64), [-1, 2]) item_profiles_values = tf.sparse_tensor_to_dense(parsed['item_profiles_values']) item_profiles_weights = tf.sparse_tensor_to_dense(parsed['item_profiles_weights']) item_profiles_shape = parsed['item_profiles_shape'] return labels, userIds, itemIds, \ user_profiles_indices, user_profiles_values, user_profiles_weights, user_profiles_shape, \ item_profiles_indices, item_profiles_values, item_profiles_weights, item_profiles_shape
def buildSpImageConverter(channelOrder, img_dtype): """ Convert a imageIO byte encoded image into a image tensor suitable as input to ConvNets The name of the input must be a subset of those specified in `image.imageIO.imageSchema`. :param img_dtype: the type of data the underlying image bytes represent """ with IsolatedSession() as issn: # Flat image data -> image dimensions # This has to conform to `imageIO.imageSchema` height = tf.placeholder(tf.int32, [], name="height") width = tf.placeholder(tf.int32, [], name="width") num_channels = tf.placeholder(tf.int32, [], name="nChannels") image_buffer = tf.placeholder(tf.string, [], name="data") # The image is packed into bytes with height as leading dimension # This is the default behavior of Python Image Library shape = tf.reshape(tf.stack([height, width, num_channels], axis=0), shape=(3,), name='shape') if img_dtype == 'uint8': image_uint8 = tf.decode_raw(image_buffer, tf.uint8, name="decode_raw") image_float = tf.to_float(image_uint8) elif img_dtype == 'float32': image_float = tf.decode_raw(image_buffer, tf.float32, name="decode_raw") else: raise ValueError('''unsupported image data type "%s", currently only know how to handle uint8 and float32''' % img_dtype) image_reshaped = tf.reshape(image_float, shape, name="reshaped") image_reshaped = imageIO.fixColorChannelOrdering(channelOrder, image_reshaped) image_input = tf.expand_dims(image_reshaped, 0, name="image_input") gfn = issn.asGraphFunction([height, width, image_buffer, num_channels], [image_input]) return gfn
def read_to_numpy(self, file_name, data_type=None): """ Reads entire TFRecords file as NumPy. :param file_name: The TFRecords file name to read. :type file_name: str :param data_type: Data type of data. Used if that data type doesn't include things like labels. :type data_type: str :return: The images and labels NumPy :rtype: (np.ndarray, np.ndarray) """ feature_types = self.attain_feature_types(data_type) images = [] labels = [] for tfrecord in tf.python_io.tf_record_iterator(file_name): with tf.Graph().as_default() as graph: # Create a separate as this runs slow when on one graph. features = tf.parse_single_example(tfrecord, features=feature_types) image_shape, label_shape = self.extract_shapes_from_tfrecords_features(features, data_type) flat_image = tf.decode_raw(features['image_raw'], tf.uint8) image_tensor = tf.reshape(flat_image, image_shape) image_tensor = tf.squeeze(image_tensor) if data_type != 'deploy': flat_label = tf.decode_raw(features['label_raw'], tf.float32) label_tensor = tf.reshape(flat_label, label_shape) label_tensor = tf.squeeze(label_tensor) else: label_tensor = tf.constant(-1.0, dtype=tf.float32, shape=[1, 1, 1]) with tf.Session(graph=graph) as session: initialize_op = tf.global_variables_initializer() session.run(initialize_op) image, label = session.run([image_tensor, label_tensor]) images.append(image) labels.append(label) return np.stack(images), np.stack(labels)
def create_image_and_label_inputs_from_file_name_queue(self, file_name_queue, data_type=None): """ Creates the inputs for the image and label for a given file name queue. :param file_name_queue: The file name queue to be used. :type file_name_queue: tf.Queue :param data_type: The type of data (train, validation, test, deploy, etc) to determine how to process. :type data_type: str :return: The image and label inputs. :rtype: (tf.Tensor, tf.Tensor) """ reader = tf.TFRecordReader() _, serialized_example = reader.read(file_name_queue) feature_types = self.attain_feature_types(data_type) features = tf.parse_single_example(serialized_example, features=feature_types) image_shape, label_shape = self.extract_shapes_from_tfrecords_features(features, data_type) flat_image = tf.decode_raw(features['image_raw'], tf.uint8) image = tf.reshape(flat_image, image_shape) if data_type != 'deploy': flat_label = tf.decode_raw(features['label_raw'], tf.float32) label = tf.reshape(flat_label, label_shape) else: # Makes a fake label tensor for preprocessing to work on. label = tf.constant(-1.0, dtype=tf.float32, shape=[1, 1, 1]) return image, label
def read_decode_tfrecord_list(file_list, do_augment = False): ''''Read TFRecord content''' reader = tf.TFRecordReader() _, serialized_example = reader.read(file_list) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'image': tf.FixedLenFeature([], tf.string), 'shape': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.float32), }) shape = tf.decode_raw(features['shape'], tf.uint8) #print('Shape (shape) is:', shape.shape) image = tf.decode_raw(features['image'], tf.uint8) #print('Shape (image) is:', image.shape) label = tf.cast(features['label'], tf.float32) # TODO: Infer from shape field from TFRecord image.set_shape([256* 256* 3]) image = tf.reshape(image, [256, 256, 3]) image, label = process_features(image, label, do_augment) return image, label
def batch_parse_tf_example(batch_size, example_batch): ''' Args: example_batch: a batch of tf.Example Returns: A dict of batched tensors ''' features = { 'x': tf.FixedLenFeature([], tf.string), 'pi': tf.FixedLenFeature([], tf.string), 'outcome': tf.FixedLenFeature([], tf.float32), } parsed = tf.parse_example(example_batch, features) x = tf.decode_raw(parsed['x'], tf.uint8) x = tf.cast(x, tf.float32) x = tf.reshape(x, [batch_size, go.N, go.N, features_lib.NEW_FEATURES_PLANES]) pi = tf.decode_raw(parsed['pi'], tf.float32) pi = tf.reshape(pi, [batch_size, go.N * go.N + 1]) outcome = parsed['outcome'] outcome.set_shape([batch_size]) return { 'pos_tensor': x, 'pi_tensor': pi, 'value_tensor': outcome, }
def tfrecord_to_graph_ops(filenames, num_epochs): file_queue = tf.train.string_input_producer( filenames, name='file_queue', num_epochs=num_epochs ) reader = tf.TFRecordReader( options=tf.python_io.TFRecordOptions( compression_type=tf.python_io.TFRecordCompressionType.GZIP ) ) _, tfrecord = reader.read(file_queue) tfrecord_features = tf.parse_single_example( tfrecord, features={ 'images': tf.FixedLenFeature([], tf.string), 'labels': tf.FixedLenFeature([], tf.string), }, name='data' ) tfeat = tf.decode_raw(tfrecord_features['images'], tf.uint8) # note, 'NCHW' is only supported on GPUs, so use 'NHWC'... tfeat = tf.reshape(tfeat, [-1, 28, 28, 1]) ttarg = tf.decode_raw(tfrecord_features['labels'], tf.uint8) ttarg = tf.one_hot(indices=ttarg, depth=10, on_value=1, off_value=0) return tfeat, ttarg
def read_data(filename_queue, shape): """ reads data from tfrecord files. Args: filename_queue: A que of strings with filenames shape: image shape Returns: frames: the frame data in size (batch_size, image height, image width, frames) """ reader = tf.TFRecordReader() key, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image':tf.FixedLenFeature([],tf.string), 'mask':tf.FixedLenFeature([],tf.string) }) image = tf.decode_raw(features['image'], tf.uint8) mask = tf.decode_raw(features['mask'], tf.uint8) image = tf.reshape(image, [shape[0], shape[1], 1]) mask = tf.reshape(mask, [shape[0], shape[1], 1]) image = tf.to_float(image) mask = tf.to_float(mask) image_mean = tf.reduce_mean(image) image = image - image_mean #image = image / 255.0 mask = mask / 255.0 return image, mask
def read_image(file_queue): reader = tf.TFRecordReader() # key, value = reader.read(file_queue) _, serialized_example = reader.read(file_queue) features = tf.parse_single_example( serialized_example, features={ 'label': tf.FixedLenFeature([], tf.string), 'image_raw': tf.FixedLenFeature([], tf.string) }) image = tf.decode_raw(features['image_raw'], tf.uint8) # print('image ' + str(image)) image = tf.reshape(image, [INPUT_IMG_WIDE, INPUT_IMG_HEIGHT, INPUT_IMG_CHANNEL]) # image = tf.image.convert_image_dtype(image, dtype=tf.float32) # image = tf.image.resize_images(image, (IMG_HEIGHT, IMG_WIDE)) # image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 label = tf.decode_raw(features['label'], tf.uint8) # label = tf.cast(label, tf.int64) label = tf.reshape(label, [OUTPUT_IMG_WIDE, OUTPUT_IMG_HEIGHT]) # label = tf.decode_raw(features['image_raw'], tf.uint8) # print(label) # label = tf.reshape(label, shape=[1, 4]) return image, label
def read_raw_images(data_set): dirs = './data/'+data_set+'/' filename = list_binary_files(dirs) print filename filename_queue = tf.train.string_input_producer(filename) if data_set is 'train': image_bytes = FLAGS.height * FLAGS.width * FLAGS.depth record_bytes = image_bytes + 1 reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) key, value = reader.read(filename_queue) record_bytes = tf.decode_raw(value, tf.uint8) label = tf.cast(tf.slice(record_bytes, [0], [1]), tf.int32) depth_major = tf.reshape(tf.slice(record_bytes, [1], [image_bytes]),[FLAGS.depth, FLAGS.height, FLAGS.width]) uint8image = tf.transpose(depth_major, [1, 2, 0]) return label, uint8image elif data_set is 'test': image_bytes = FLAGS.height * FLAGS.width * FLAGS.depth record_bytes = image_bytes + 1 reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) key, value = reader.read(filename_queue) record_bytes = tf.decode_raw(value, tf.uint8) depth_major = tf.reshape(tf.slice(record_bytes, [0], [image_bytes]), [FLAGS.depth, FLAGS.height, FLAGS.width]) uint8image = tf.transpose(depth_major, [1, 2, 0]) return uint8image
def get_batch(): '''Makes batch queues from the training data. Returns: A Tuple of x (Tensor), y (Tensor). x and y have the shape [batch_size, maxlen]. ''' import tensorflow as tf # Load data X, Y = load_train_data() # Create Queues x, y = tf.train.slice_input_producer([tf.convert_to_tensor(X), tf.convert_to_tensor(Y)]) x = tf.decode_raw(x, tf.int32) y = tf.decode_raw(y, tf.int32) x, y = tf.train.batch([x, y], shapes=[(None,), (None,)], num_threads=8, batch_size=hp.batch_size, capacity=hp.batch_size * 64, allow_smaller_final_batch=False, dynamic_pad=True) num_batch = len(X) // hp.batch_size return x, y, num_batch # (N, None) int32, (N, None) int32, ()
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'height': tf.FixedLenFeature([], tf.int64), 'width': tf.FixedLenFeature([], tf.int64), 'image_raw': tf.FixedLenFeature([], tf.string), 'mask_raw': tf.FixedLenFeature([], tf.string) }) # Convert from a scalar string tensor (whose single string has # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape # [mnist.IMAGE_PIXELS]. image = tf.decode_raw(features['image_raw'], tf.uint8) annotation = tf.decode_raw(features['mask_raw'], tf.uint8) height = tf.cast(features['height'], tf.int32) width = tf.cast(features['width'], tf.int32) image_shape = tf.stack([height, width, 3]) annotation_shape = tf.stack([height, width, 3]) image = tf.reshape(image, image_shape) annotation = tf.reshape(annotation, annotation_shape) image_size_const = tf.constant( (IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32) annotation_size_const = tf.constant( (IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32) # Random transformations can be put here: right before you crop images # to predefined size. To get more information look at the stackoverflow # question linked above. resized_image = tf.image.resize_image_with_crop_or_pad(image=image, target_height=IMAGE_HEIGHT, target_width=IMAGE_WIDTH) resized_annotation = tf.image.resize_image_with_crop_or_pad(image=annotation, target_height=IMAGE_HEIGHT, target_width=IMAGE_WIDTH) images, annotations = tf.train.shuffle_batch([resized_image, resized_annotation], batch_size=2, capacity=30, num_threads=2, min_after_dequeue=10) return images, annotations
def parser(self, record): keys_to_features = { 'attention_news_indices': tf.FixedLenFeature([], tf.string), 'attention_news_values': tf.VarLenFeature(tf.float32), 'attention_news_shape': tf.FixedLenFeature([2], tf.int64), 'attention_user_indices': tf.FixedLenFeature([], tf.string), 'attention_user_values': tf.VarLenFeature(tf.int64), 'attention_user_weights': tf.VarLenFeature(tf.float32), 'attention_user_shape': tf.FixedLenFeature([2], tf.int64), 'fm_feat_indices': tf.FixedLenFeature([], tf.string), 'fm_feat_val': tf.VarLenFeature(tf.float32), 'fm_feat_shape': tf.FixedLenFeature([2], tf.int64), 'labels': tf.FixedLenFeature([], tf.string), 'dnn_feat_indices': tf.FixedLenFeature([], tf.string), 'dnn_feat_values': tf.VarLenFeature(tf.int64), 'dnn_feat_weight': tf.VarLenFeature(tf.float32), 'dnn_feat_shape': tf.FixedLenFeature([2], tf.int64), } parsed = tf.parse_single_example(record, keys_to_features) attention_news_indices = tf.reshape(tf.decode_raw(parsed['attention_news_indices'], \ tf.int64), [-1, 2]) attention_news_values = tf.sparse_tensor_to_dense(parsed['attention_news_values']) attention_news_shape = parsed['attention_news_shape'] attention_user_indices = tf.reshape(tf.decode_raw(parsed['attention_user_indices'], \ tf.int64), [-1, 2]) attention_user_values = tf.sparse_tensor_to_dense(parsed['attention_user_values']) attention_user_weights = tf.sparse_tensor_to_dense(parsed['attention_user_weights']) attention_user_shape = parsed['attention_user_shape'] fm_feat_indices = tf.reshape(tf.decode_raw(parsed['fm_feat_indices'], \ tf.int64), [-1, 2]) fm_feat_val = tf.sparse_tensor_to_dense(parsed['fm_feat_val']) fm_feat_shape = parsed['fm_feat_shape'] labels = tf.reshape(tf.decode_raw(parsed['labels'], tf.float32), [-1, 1]) dnn_feat_indices = tf.reshape(tf.decode_raw(parsed['dnn_feat_indices'], \ tf.int64), [-1, 2]) dnn_feat_values = tf.sparse_tensor_to_dense(parsed['dnn_feat_values']) dnn_feat_weight = tf.sparse_tensor_to_dense(parsed['dnn_feat_weight']) dnn_feat_shape = parsed['dnn_feat_shape'] return (attention_news_indices, attention_news_values, attention_news_shape, \ attention_user_indices, attention_user_values, attention_user_weights, \ attention_user_shape, fm_feat_indices, fm_feat_val, \ fm_feat_shape, labels, dnn_feat_indices, dnn_feat_values, \ dnn_feat_weight, dnn_feat_shape)
def read_tfrecord_and_decode_into_image_annotation_pair_tensors(tfrecord_filenames_queue): """Return image/annotation tensors that are created by reading tfrecord file. The function accepts tfrecord filenames queue as an input which is usually can be created using tf.train.string_input_producer() where filename is specified with desired number of epochs. This function takes queue produced by aforemention tf.train.string_input_producer() and defines tensors converted from raw binary representations into reshaped image/annotation tensors. Parameters ---------- tfrecord_filenames_queue : tfrecord filename queue String queue object from tf.train.string_input_producer() Returns ------- image, annotation : tuple of tf.int32 (image, annotation) Tuple of image/annotation tensors """ reader = tf.TFRecordReader() _, serialized_example = reader.read(tfrecord_filenames_queue) features = tf.parse_single_example( serialized_example, features={ 'height': tf.FixedLenFeature([], tf.int64), 'width': tf.FixedLenFeature([], tf.int64), 'image_raw': tf.FixedLenFeature([], tf.string), 'mask_raw': tf.FixedLenFeature([], tf.string) }) image = tf.decode_raw(features['image_raw'], tf.uint8) annotation = tf.decode_raw(features['mask_raw'], tf.uint8) height = tf.cast(features['height'], tf.int32) width = tf.cast(features['width'], tf.int32) image_shape = tf.stack([height, width, 3]) # The last dimension was added because # the tf.resize_image_with_crop_or_pad() accepts tensors # that have depth. We need resize and crop later. # TODO: See if it is necessary and probably remove third # dimension annotation_shape = tf.stack([height, width, 1]) image = tf.reshape(image, image_shape) annotation = tf.reshape(annotation, annotation_shape) return image, annotation
def parse_example_proto(example_serialized): """Parses an Example proto containing a training example of an image. The output of the build_image_data.py image preprocessing script is a dataset containing serialized Example protocol buffers. Each Example proto contains the following fields: Args: example_serialized: scalar Tensor tf.string containing a serialized Example protocol buffer. Returns: image_buffer: Tensor tf.string containing the contents of a JPEG file. label: Tensor tf.int32 containing the label. bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. text: Tensor tf.string containing the human-readable label. """ # Dense features in Example proto. feature_map = { 'height': tf.FixedLenFeature((), tf.int64), 'width': tf.FixedLenFeature((), tf.int64), 'channel': tf.FixedLenFeature((), tf.int64), 'label': tf.FixedLenFeature((), tf.int64), 'label_depth': tf.FixedLenFeature((), tf.int64), 'label_one_hot_raw': tf.FixedLenFeature((), tf.string), 'image_raw': tf.FixedLenFeature((), tf.string), 'location_raw': tf.FixedLenFeature((), tf.string)} #sparse_float32 = tf.VarLenFeature(dtype=tf.float32) # Sparse features in Example proto. features = tf.parse_single_example(example_serialized, feature_map) image_raw = tf.decode_raw(features["image_raw"], tf.uint8) image = tf.reshape(image_raw, [64, 64, 3]) label = tf.cast(features['label'], dtype=tf.int32) label_one_hot = tf.decode_raw(features['label_one_hot_raw'], tf.float64) location = tf.decode_raw(features['location_raw'], tf.int64) # Note that we impose an ordering of (y, x) just to make life difficult. #bbox = tf.concat(axis=0, values=[ymin, xmin, ymax, xmax]) # Force the variable number of bounding boxes into the shape # [1, num_boxes, coords]. #bbox = tf.expand_dims(bbox, 0) #bbox = tf.transpose(bbox, [0, 2, 1]) return image, location, label_one_hot
def decode(self,batched_serialized_tensors,batch_size): """Decodes the input from batch of serialized tensors Formats and reshapes image Args: batched_serialized_tensors: tensor output from Batcher containing read in serialized tensors Returns: batched_decoded_tensors: dict of batches of decoded TFRecords of batch_size """ #faster to decode tensors as a batch batched_decoded_tensors = tf.parse_example(batched_serialized_tensors[fields.InputDataFields.serialized], self._keys_to_features) #Decode and cast tensors if needed for label in self._multi_task_labels: tensor = batched_decoded_tensors[label.name] #only strings need t obe decoded if label.dtype == "string": if label.decodetype: tensor = tf.decode_raw(tensor, TYPE_MAP[label.decodetype]) else: raise ValueError("string type must have a type to be decoded to.") if label.casttype: tensor = tf.cast(tensor, TYPE_MAP[label.casttype]) if label.shape: tensor = tf.reshape(tensor, [batch_size,*label.shape]) tensor.set_shape([batch_size, *label.shape]) batched_decoded_tensors[label.name] = tensor #input is handlded separately image_float = tf.cast( tf.decode_raw(batched_decoded_tensors['input'], tf.uint8), tf.float32) image_float = tf.reshape(image_float,[batch_size, self._image_height, self._image_width, self._channels]) image_float.set_shape([batch_size, self._image_height, self._image_width, self._channels]) batched_decoded_tensors['input'] = image_float return batched_decoded_tensors
def read_and_decode(filename, is_train): filename_queue = tf.train.string_input_producer([filename]) reader = tf.TFRecordReader() _,serialized_example = reader.read(filename_queue) if is_train == True: features = tf.parse_single_example(serialized_example, features={ "hat_label": tf.FixedLenFeature([], tf.int64), "hair_label": tf.FixedLenFeature([], tf.int64), "gender_label": tf.FixedLenFeature([], tf.int64), "top_label": tf.FixedLenFeature([], tf.int64), "down_label": tf.FixedLenFeature([], tf.int64), "shoes_label": tf.FixedLenFeature([], tf.int64), "bag_label": tf.FixedLenFeature([], tf.int64), "img_raw": tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['img_raw'], tf.uint8) img = tf.reshape(img, [128, 256, 3]) #image = Image.frombytes('RGB', (224, 224), img[0]) img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 #print(type(img)) #img = np.asarray(img, dtype=np.uint8) #print(type(img)) #tl.visualize.frame(I=img, second=5, saveable=False, name='frame', fig_idx=12836) hat_label = tf.cast(features['hat_label'], tf.int32) hair_label = tf.cast(features['hair_label'], tf.int32) gender_label = tf.cast(features['gender_label'], tf.int32) top_label = tf.cast(features['top_label'], tf.int32) down_label = tf.cast(features['down_label'], tf.int32) shoes_label = tf.cast(features['shoes_label'], tf.int32) bag_label = tf.cast(features['bag_label'], tf.int32) labels = {"hat":hat_label, "hair":hair_label, "gender":gender_label, "top":top_label, "down":down_label, "shoes":shoes_label, "bag":bag_label} return img, labels else: features = tf.parse_single_example(serialized_example, features={ "img_raw": tf.FixedLenFeature([], tf.string), }) img = tf.decode_raw(features['img_raw'], tf.uint8) img = tf.reshape(img, [128, 256, 3]) img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 #tl.visualize.frame(I=img, second=5, saveable=False, name='frame', fig_idx=12833) return img
def parse_function(planes, probs, winner): """ Convert unpacked record batches to tensors for tensorflow training """ planes = tf.decode_raw(planes, tf.uint8) probs = tf.decode_raw(probs, tf.float32) winner = tf.decode_raw(winner, tf.float32) planes = tf.to_float(planes) planes = tf.reshape(planes, (ChunkParser.BATCH_SIZE, 112, 8*8)) probs = tf.reshape(probs, (ChunkParser.BATCH_SIZE, 1858)) winner = tf.reshape(winner, (ChunkParser.BATCH_SIZE, 1)) return (planes, probs, winner)
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'vector': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64), }) # features = tf.parse_single_example(serialized_example, dense_keys=['vector', 'label'], dense_types=[tf.string, tf.int64]) # Convert from a scalar string tensor (whose single string has # length tf_model.IMAGE_PIXELS) to a uint8 tensor with shape # [tf_model.IMAGE_PIXELS]. image = tf.decode_raw(features['vector'], tf.float32) image.set_shape([FEATURE_DIMENSIONALITY]) if FLAGS.transpose_input: image = tf.reshape(image, FEATURE_INPUT_SHAPE) image = tf.transpose(image, perm=[0,2,1]) image = tf.reshape(image, [-1]) # print("Image shape is %s" %(image.shape)) # OPTIONAL: Could reshape into a 28x28 image and apply distortions # here. Since we are not applying any distortions in this # example, and the next step expects the image to be flattened # into a vector, we don't bother. # Convert from [0, 255] -> [-0.5, 0.5] floats. # image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 # Convert label from a scalar uint8 tensor to an int32 scalar. label = tf.cast(features['label'], tf.int32) return image, label
def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, dense_keys=['image_raw', 'label'], # Defaults are not specified since both keys are required. dense_types=[tf.string, tf.int64]) # Convert from a scalar string tensor (whose single string has # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape # [mnist.IMAGE_PIXELS]. image = tf.decode_raw(features['image_raw'], tf.uint8) image.set_shape([mnist.IMAGE_PIXELS]) # OPTIONAL: Could reshape into a 28x28 image and apply distortions # here. Since we are not applying any distortions in this # example, and the next step expects the image to be flattened # into a vector, we don't bother. # Convert from [0, 255] -> [-0.5, 0.5] floats. image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 # Convert label from a scalar uint8 tensor to an int32 scalar. label = tf.cast(features['label'], tf.int32) return image, label
def DecodeLabelAndImage(r): r = tf.decode_raw(r, tf.uint8) return tf.to_float( tf.transpose(tf.reshape(r[1:], [3, 32, 32]), [1, 2, 0])) / 255.0, tf.to_int32(r[0])
def extract_frame_level_features_per_tf_record(frame_file_path, maximum_iter=False, stop_at_iter=10): ''' Extraction of Youtube tfrecords frame file features. Args: path to each tf_record (note: developed with assumption of storing on s3 bucket and assessing with glob) maximum_iter - flag- if True, will limit number of videos extracted from each TF record stop_at_iter - number of videos to extract num_tf_records - number of records to extract - WARNING!!! this is VERY slow, if bigger than 1 Assumes each video in the tfrecord has following features: 'id' : bytes_list 'labels' : int64_list 'audio': float arr, each frame 128 'rgb', float arr, each frame 1024 returns: numpy arrays of frame ids, frame multi-labels, frame audio, frame rgb ''' frame_ids = [] frame_labels = [] feat_rgb = [] feat_audio = [] # ATTENTION: only use one TF record for debugging. print( f'There is {sum(1 for _ in tf.python_io.tf_record_iterator(frame_file_path))} videos in this TF record.' ) iter_ = 0 for example in tf.python_io.tf_record_iterator(frame_file_path): if maximum_iter and iter_ == stop_at_iter: break tf_example = tf.train.Example.FromString(example) frame_ids.append( tf_example.features.feature['id'].bytes_list.value[0].decode( encoding='UTF-8')) frame_labels.append( tf_example.features.feature['labels'].int64_list.value) tf_seq_example = tf.train.SequenceExample.FromString(example) n_frames = len( tf_seq_example.feature_lists.feature_list['audio'].feature) rgb_frame = [] audio_frame = [] # iterate through frames sys.stdout.flush() for i in range(n_frames): sess = tf.InteractiveSession() sys.stdout.write('\r' + 'iterating video: ' + str(iter_) + ' ,frames: ' + str(i) + '/' + str(n_frames)) sys.stdout.flush() rgb_frame.append( tf.cast( tf.decode_raw( tf_seq_example.feature_lists.feature_list['rgb']. feature[i].bytes_list.value[0], tf.uint8), tf.float32).eval()) audio_frame.append( tf.cast( tf.decode_raw( tf_seq_example.feature_lists.feature_list['audio']. feature[i].bytes_list.value[0], tf.uint8), tf.float32).eval()) tf.reset_default_graph() sess.close() feat_rgb.append(rgb_frame) feat_audio.append(audio_frame) iter_ += 1 return frame_ids, frame_labels, feat_rgb, feat_audio
def decode_label(label): label = tf.decode_raw(label, tf.uint8) # tf.string -> [tf.uint8] label = tf.reshape(label, []) # label is a scalar return tf.to_int32(label)
def _gt_boxes_decoder(keys_to_tensors): bboxes = tf.decode_raw(keys_to_tensors['label/gt_boxes'], tf.float32) instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32) bboxes_shape = tf.stack([instances, 5]) return tf.reshape(bboxes, bboxes_shape)
def video_parse_function(example_proto): """Parses and preprocesses the features from a video tfrecord.""" features = { "video": tf.VarLenFeature(dtype=tf.string), "video_length": tf.FixedLenFeature(dtype=tf.int64, shape=[], default_value=10), "video_height": tf.FixedLenFeature(dtype=tf.int64, shape=[], default_value=128), "video_width": tf.FixedLenFeature(dtype=tf.int64, shape=[], default_value=128), "video_channels": tf.FixedLenFeature(dtype=tf.int64, shape=[], default_value=1), } parsed_features = tf.parse_single_example(example_proto, features) video = tf.sparse_tensor_to_dense(parsed_features["video"], default_value="") video = tf.cast(tf.decode_raw(video, tf.uint8), tf.float32) # Rescale video from [0, 255] to [-1, 1] video = tf.reshape( video, tf.stack([ parsed_features["video_length"], parsed_features["video_height"], parsed_features["video_width"], parsed_features["video_channels"] ])) video = video * (2 / 255) - 1 start_index = tf.random_uniform( 1, minval=0, maxval=parsed_features["video_length"] - config.num_frames + 1, dtype=tf.int64) import pdb pdb.set_trace() # Make sure OK video = tf.gather(video, start_index, config.num_frames) import pdb pdb.set_trace() # Make sure this works # start_index = 0 # video = video[start_index:start_index + config.num_frames, ...] # TODO(drewjaegle): do we need these config fields? # Otherwise, we need to ensure the tensor values match the config values. video.set_shape([ config.num_frames, config.im_height, config.im_width, config.im_channels ]) # Reshape to NCHW from NHWC video = tf.transpose(video, [0, 3, 1, 2]) # TODO(drewjaegle): Allow resampling of images here # Split images to input (10 frames) and predict (10 frames) input_sequence = video[:config.input_seq_len, ...] predict_sequence = video[config.input_seq_len:, ...] # TODO(drewjaegle): Do any preprocessing needed (i.e. downsample to 64) return input_sequence, predict_sequence
def read_TFRecord(data_dir, batch_size, shuffle, in_classes): # 分类数目 num_classes = in_classes # 获取record文件 data_files = tf.gfile.Glob(data_dir) # 读取文件。 filename_queue = tf.train.string_input_producer(data_files, shuffle=True) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # 解析读取的样例。 features = tf.parse_single_example(serialized_example, features={ 'label': tf.FixedLenFeature([], tf.int64), 'img_raw': tf.FixedLenFeature([], tf.string), 'img_width': tf.FixedLenFeature([], tf.int64), 'img_height': tf.FixedLenFeature([], tf.int64), }) # 取出包含image和label的feature对象 # tf.decode_raw可以将字符串解析成图像对应的像素数组 # 解析图片数据 string--unit8 image = tf.decode_raw(features['img_raw'], tf.uint8) height = tf.cast(features['img_height'], tf.int32) width = tf.cast(features['img_width'], tf.int32) label = tf.cast(features['label'], tf.int32) channel = 3 image = tf.reshape(image, [height, width, channel]) # reshape 向量---三维矩阵 # image = tf.reshape(image, [height, width, channel]) # 图像的缩放处理 image = tf.image.resize_image_with_crop_or_pad(image, 100, 100) # image = tf.image.resize_images(image, [240,240], method=0) image = tf.image.per_image_standardization(image) # unit8 -- float32 # image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 image = tf.cast(image, tf.float32) #组合batch min_after_dequeue = 1000 capacity = min_after_dequeue + 3 * batch_size if shuffle: image_batch, label_batch = tf.train.shuffle_batch( [image, label], batch_size=batch_size, num_threads=64, capacity=capacity, min_after_dequeue=min_after_dequeue) else: image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=capacity) ## ONE-HOT label_batch = tf.reshape(label_batch, [batch_size, 1]) indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1]) label_batch = tf.sparse_to_dense( tf.concat(values=[indices, label_batch], axis=1), [batch_size, num_classes], 1.0, 0.0) print(image_batch) print(label_batch) #n_classes = 10 #label_batch = tf.one_hot(label_batch, depth= n_classes) #label_batch = tf.cast(label_batch, dtype=tf.int32) #label_batch = tf.reshape(label_batch, [batch_size, n_classes]) return image_batch, label_batch
def read_and_decode(filename_queue=None, img_dims=[256,256,3], resize_to=[256,256], model_dims=[224,224,3], size_of_batch=32,\ labels=True, augmentations_dic=None, num_of_threads=1, shuffle=True): """ Reads in tf records and decodes the features of the image Input: filename_queue - A node in a TensorFlow Graph used for asynchronous computations img_dims - Dimensions of the tensor image stored as a tfrecord, example: [256, 256, 3] model_dims - Dimensions of the tensor image that the model accepts, example: [224, 224, 3] resize_to - Size to resize tf record to before training if resize_to is the same a img_dims no resizing will take place size_of_batch - Size of the batch that will be fed into the model, example: 32 labels - Option for if the images stored in tfrecords have labels associated with them augmentations_dic - Dictionary of augmentations that an image can have for training and validation. Augmentations are chosen in the config num_threads - Number of threads that execute a training op that dequeues mini-batches from the queue shuffle - Boolean if batches fed into graph should be shuffled or not Outputs: Tensor image, label of the image and filepath to the image. If labels is False only tensor image and filepath will be returned """ reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) if not labels: features = tf.parse_single_example( serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'file_path': tf.FixedLenFeature([], tf.string), }) image = tf.decode_raw(features['image_raw'], tf.uint8) file_path = tf.cast(features['file_path'], tf.string) image = tf.reshape(image, img_dims) image = tf.cast(image, tf.float32) image = tf.image.resize_images(image, resize_to) image = tf.to_float(image) image = image/255 if augmentations_dic and augmentations_dic['scale_jitter']: random_size = randint(256,512) image = tf.image.resize_images(image, [random_size, random_size]) else: image = tf.image.resize_images(image,resize_to) if augmentations_dic and augmentations_dic['rand_crop']: image = tf.random_crop(image, model_dims) else: image = tf.image.resize_image_with_crop_or_pad(image, model_dims[0],\ model_dims[1]) if augmentations_dic and augmentations_dic['rand_color']: random_color_ordering = randint(0,3) image = distort_color(image,random_color_ordering) if augmentations_dic and augmentations_dic['rand_flip_left_right']: image = tf.image.random_flip_left_right(image) if augmentations_dic and augmentations_dic['rand_flip_top_bottom']: image = tf.image.random_flip_up_down(image) if augmentations_dic and augmentations_dic['rand_rotate']: random_angle = randint(0,359) image = tf.contrib.image.rotate(image, random_angle) if shuffle: img, f = tf.train.shuffle_batch([image, file_path], batch_size=size_of_batch, capacity=1000 + 3 * size_of_batch, min_after_dequeue=1000, num_threads=num_of_threads) else: img, f = tf.train.batch([image, file_path], batch_size=size_of_batch, capacity=100000, allow_smaller_final_batch=True, num_threads=num_of_threads) return img, f else: features = tf.parse_single_example( serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'file_path': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64) }) image = tf.decode_raw(features['image_raw'], tf.uint8) label = tf.cast(features['label'], tf.int32) file_path = tf.cast(features['file_path'], tf.string) image = tf.reshape(image, img_dims) image = tf.cast(image, tf.float32) image = tf.image.resize_images(image, resize_to) image = tf.to_float(image) image = image/255 if augmentations_dic and augmentations_dic['scale_jitter']: random_size = randint(256,512) image = tf.image.resize_images(image, [random_size, random_size]) else: image = tf.image.resize_images(image,resize_to) if augmentations_dic and augmentations_dic['rand_crop']: image = tf.random_crop(image, model_dims) else: image = tf.image.resize_image_with_crop_or_pad(image, model_dims[0],\ model_dims[1]) if augmentations_dic and augmentations_dic['rand_color']: random_color_ordering = randint(0,3) image = distort_color(image,random_color_ordering) if augmentations_dic and augmentations_dic['rand_flip_left_right']: image = tf.image.random_flip_left_right(image) if augmentations_dic and augmentations_dic['rand_flip_top_bottom']: image = tf.image.random_flip_up_down(image) if augmentations_dic and augmentations_dic['rand_rotate']: random_angle = randint(0,359) image = tf.contrib.image.rotate(image, random_angle) if shuffle: img, l, f = tf.train.shuffle_batch([image, label, file_path], batch_size=size_of_batch, capacity=1000 + 3 * size_of_batch, min_after_dequeue=1000, num_threads=num_of_threads) else: img, l, f = tf.train.batch([image, label, file_path], batch_size=size_of_batch, capacity=100000, allow_smaller_final_batch=True, num_threads=num_of_threads) return img, l, f
def _decode_and_augment_image(example_proto): keys_to_features = { 'label': tf.FixedLenFeature([], tf.int64), 'shape': tf.FixedLenFeature([], tf.string), 'image': tf.FixedLenFeature([], tf.string), } tfrecord_features = tf.parse_single_example( example_proto, keys_to_features) image = tf.decode_raw(tfrecord_features['image'], tf.uint8) shape = tf.decode_raw(tfrecord_features['shape'], tf.int64) if input_type == ".jpeg": image = tf.reshape(image, target_size + [3]) else: image = tf.reshape(image, target_size) label = tfrecord_features['label'] if augment: image = tf.image.random_flip_left_right(image) image = tf.image.random_flip_up_down(image) degrees = tf.random_uniform((), minval=-180, maxval=180) image = tf.contrib.image.rotate(image, degrees) width_shift = tf.random_uniform((), minval=0, maxval=0.05) height_shift = tf.random_uniform((), minval=0, maxval=0.05) horizontal_pad = tf.cast(tf.ceil(width_shift * target_size[0]), tf.int32) vertical_pad = tf.cast(tf.ceil(height_shift * target_size[1]), tf.int32) padding = tf.stack([ horizontal_pad, horizontal_pad, vertical_pad, vertical_pad, tf.constant(0), tf.constant(0) ]) padding = tf.reshape(padding, (3, 2)) image = tf.pad(image, padding) image = tf.random_crop(image, target_size + [3]) zoom = tf.random_uniform((), minval=-0.1, maxval=0.1) new_dim = tf.cast(tf.ceil((1 - zoom) * target_size[0]), dtype=tf.int32) image = tf.image.resize_image_with_crop_or_pad( image, new_dim, new_dim) image = tf.image.resize_images( image, target_size, method=tf.image.ResizeMethod.BILINEAR) if normalize: std = tf.constant(np.array( [70.53946096, 51.71475228, 43.03428563]), dtype=tf.float32) std = tf.expand_dims(tf.expand_dims(std, axis=0), axis=0) mean = tf.constant(np.array( [108.64628601, 75.86886597, 54.34005736]), dtype=tf.float32) mean = tf.expand_dims(tf.expand_dims(mean, axis=0), axis=0) image = (tf.cast(image, dtype=tf.float32) - mean) / std label = tf.reshape(label, [1]) if input_type == ".jpeg": image = tf.reshape(image, target_size + [3]) else: image = tf.reshape(image, target_size) return {'shape': shape, 'image': image}, label
def read(bytes, dtype, shapex, shapey): inp = tf.decode_raw(bytes, dtype) retx = tf.reshape(inp[:np.prod(shapex)], shapex) rety = tf.reshape(inp[np.prod(shapex):], shapey) return (retx, rety)
batchsize = 2 batch = tf.train.shuffle_batch([ex], batchsize, capacity=batchsize * 10, min_after_dequeue=batchsize * 5) # 反序列化数据 example = tf.parse_example(batch, features=feature) image = example['image'] label = example['label'] # 对byte数据解码成uint8类型的数据 image = tf.decode_raw(image, tf.uint8) # 需要reshape,否则是一个向量 image = tf.reshape(image, [-1, 32, 32, 3]) with tf.Session() as sess: # 线程的协调器 coord = tf.train.Coordinator() sess.run(tf.local_variables_initializer()) threads = tf.train.start_queue_runners(sess, coord) for i in range(1): # image_bth, _ = sess.run([image, label]) # import cv2 # cv2.imshow("image", image_bth[0, ...]) # cv2.waitKey(0)
'lsize_dim0': tf.FixedLenFeature([], tf.int64), 'lsize_dim1': tf.FixedLenFeature([], tf.int64), 'lsize_dim2': tf.FixedLenFeature([], tf.int64), 'data_vol': tf.FixedLenFeature([], tf.string), 'label_vol': tf.FixedLenFeature([], tf.string) } with tf.Session() as sess: queue = tf.train.string_input_producer(val_list, num_epochs=None, shuffle=False) reader = tf.TFRecordReader() fid, serialized_example = reader.read(queue) parser = tf.parse_single_example(serialized_example, features=decomp_feature) data_vol = tf.decode_raw(parser['data_vol'], tf.float32) label_vol = tf.decode_raw(parser['label_vol'], tf.float32) data_vol = tf.reshape(data_vol, raw_size) label_vol = tf.reshape(label_vol, raw_size) data_vol = tf.slice(data_vol, [0, 0, 0], volume_size) label_vol = tf.slice(label_vol, [0, 0, 1], label_size) init_op = tf.initialize_all_variables() sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for i in range(len(val_list)): example, l = sess.run([data_vol, label_vol]) np.save(os.path.join(saveDir, str(i) + '.npy'), example) np.save(os.path.join(saveDir_, str(i) + '.npy'), l)
import tensorflow as tf reader = tf.TFRecordReader() filename_queue = tf.train.string_input_producer( ["../image_data/output.tfrecords"]) _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'pixels': tf.FixedLenFeature([], tf.int64), 'label': tf.FixedLenFeature([], tf.int64), }) image = tf.decode_raw(features['image_raw'], tf.uint8) label = tf.cast(features['label'], tf.int32) pixels = tf.cast(features['pixels'], tf.int32) sess = tf.Session() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(10): print sess.run([image, label, pixels])
# import tensorflow import tensorflow as tf # total bytes per image TOTAL_BYTES = 3073 # create a list of filenames filenames = ["dataset/data_batch_%d.bin" % i for i in range(1, 6)] # create a queue of filenames filename_queue = tf.train.string_input_producer(filenames, shuffle=False) # initialize a reader to read TOTAL_BYTES bytes reader = tf.FixedLengthRecordReader(TOTAL_BYTES) # read TOTAL_BYTES bytes from the files key, value = reader.read(filename_queue) # decode read bytes to perceivable datatype vector_bytes = tf.decode_raw(value, tf.uint8) #create a session object sess = tf.InteractiveSession() #start queue runners tf.train.start_queue_runners() #print uint8 value of data read print("vector_bytes ", sess.run(vector_bytes)) #print the number of elements in list vector_bytes print("number of elements in vector_bytes ", len(sess.run(vector_bytes))) label = tf.cast(tf.slice(vector_bytes, [0], [1]), tf.int32) depth_major = tf.reshape(tf.slice(vector_bytes, [1], [3072]), [3, 32, 32]) uint8image = tf.transpose(depth_major, [1, 2, 0]) #print(sess.run(uint8image)) A = tf.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]]) print(A.get_shape())
def load_train_batch(self): """Load a batch of training instances. """ opt = self.opt # Load the list of training files into queues #TODO if opt.train_lite: file_list = self.format_file_list(opt.dataset_dir, opt.filelist_dir, 'train_lite') else: file_list = self.format_file_list(opt.dataset_dir, opt.filelist_dir, 'train') image_paths_queue = tf.train.string_input_producer( file_list['image_file_list'], shuffle=False) cam_paths_queue = tf.train.string_input_producer( file_list['cam_file_list'], shuffle=False) # Load camera intrinsics cam_reader = tf.TextLineReader() _, raw_cam_contents = cam_reader.read(cam_paths_queue) rec_def = [] for i in range(9): rec_def.append([1.]) raw_cam_vec = tf.decode_csv(raw_cam_contents, record_defaults=rec_def) raw_cam_vec = tf.stack(raw_cam_vec) intrinsics = tf.reshape(raw_cam_vec, [3, 3]) # Load images img_reader = tf.WholeFileReader() _, image_contents = img_reader.read(image_paths_queue) image_seq = tf.image.decode_jpeg(image_contents) tgt_image, src_image_stack = \ self.unpack_image_sequence( image_seq, opt.img_height, opt.img_width, opt.num_source) #TODO Load Semantics # See cityscape label defs in https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py#L62 # Also notice that deeplabv3+ uses `train_id` https://github.com/tensorflow/models/blob/69b016449ffc797421bf003d8b7fd8545db866d7/research/deeplab/datasets/build_cityscapes_data.py#L46 # Color maps are in https://github.com/tensorflow/models/blob/69b016449ffc797421bf003d8b7fd8545db866d7/research/deeplab/utils/get_dataset_colormap.py#L207 if opt.sem_assist: sem_paths_queue = tf.train.string_input_producer( file_list['sem_image_file_list'], shuffle=False) sem_reader = tf.WholeFileReader() sem_keys, sem_contents = sem_reader.read(sem_paths_queue) if opt.load_from_raw: sem_seq = tf.reshape( tf.decode_raw(sem_contents, tf.uint8), [1, opt.img_height, (opt.num_source + 1) * opt.img_width]) else: sem_seq = tf.py_func(read_npy_file, [sem_keys], [ tf.uint8, ]) #TODO Load Instances: we use COCO # Two channels: class and id level. For id level we only use the edge if opt.ins_assist: ins_paths_queue = tf.train.string_input_producer( file_list['ins_image_file_list'], shuffle=False) ins_reader = tf.WholeFileReader() ins_keys, ins_contents = ins_reader.read(ins_paths_queue) if opt.load_from_raw: ins_seq = tf.reshape(tf.decode_raw(ins_contents, tf.uint8), [ 1, opt.img_height, (opt.num_source + 1) * opt.img_width, 2 ]) else: ins_seq = tf.py_func(read_npy_file, [ins_keys], [ tf.uint8, ]) #TODO 1. SHUFFLE BATCH # Form training batches seed = random.randint(0, 2**31 - 1) min_after_dequeue = 2048 capacity = min_after_dequeue + opt.num_threads * opt.batch_size if opt.sem_assist and opt.ins_assist: src_image_stack, tgt_image, intrinsics, sem_seq, ins_seq = tf.train.shuffle_batch( [ src_image_stack, tgt_image, intrinsics, sem_seq[0], ins_seq[0] ], opt.batch_size, capacity, min_after_dequeue, opt.num_threads, seed) elif opt.sem_assist: src_image_stack, tgt_image, intrinsics, sem_seq = tf.train.shuffle_batch( [src_image_stack, tgt_image, intrinsics, sem_seq[0]], opt.batch_size, capacity, min_after_dequeue, opt.num_threads, seed) elif opt.ins_assist: src_image_stack, tgt_image, intrinsics, ins_seq = tf.train.shuffle_batch( [src_image_stack, tgt_image, intrinsics, ins_seq[0]], opt.batch_size, capacity, min_after_dequeue, opt.num_threads, seed) else: src_image_stack, tgt_image, intrinsics = tf.train.shuffle_batch( [src_image_stack, tgt_image, intrinsics], opt.batch_size, capacity, min_after_dequeue, opt.num_threads, seed) # semantic segmentation tgt_sem = None tgt_sem_map = None tgt_sem_mask = None tgt_sem_edge = None src_sem_stack = None src_sem_map_stack = None src_sem_mask_stack = None src_sem_edge_stack = None # ins0 ~ instance level, but still class segmentation tgt_ins0 = None tgt_ins0_map = None tgt_ins0_edge = None src_ins0_stack = None src_ins0_map_stack = None src_ins0_edge_stack = None # ins1 ~ instance level, but this is id segmentation tgt_ins1_edge = None src_ins1_edge_stack = None #TODO 2. TRAMSFORMATION AND UNPACKING if opt.sem_assist: #TODO get one-hot encoded sem_oh_seq (4,128,1248,19)X{0,1} sem_oh_seq = tf.cast( tf.one_hot(sem_seq, on_value=1, depth=opt.sem_num_class), tf.uint8) #TODO decouple tgt_sem (4,128,1248,19)X{0,1} src_sem_stack (4,128,1248,2*19)X{0,1} tgt_sem, src_sem_stack = self.unpack_sem_sequence_batch_atom( sem_oh_seq, opt.sem_num_class) #TODO get densemap sem_map_seq (4,128,1248,1)X{0,1,...,18} sem_map_seq = tf.expand_dims(sem_seq, -1) #TODO decouple tgt_sem_map (4,128,1248,1)X{0,1,...,18} src_sem_map_stack (4,128,1248,2*1)X{0,1,...,18} tgt_sem_map, src_sem_map_stack = self.unpack_sem_sequence_batch_atom( sem_map_seq, 1) if opt.sem_mask_explore: #TODO get sem mask sem_mask_seq (4,128,1248,c) here we assume c=1 sem_mask_seq = self.get_sem_mask_batch(sem_seq) #TODO decouple tgt_sem_mask (4,128,1248,c) src_sem_mask_stack (4,128,1248,2*c) tgt_sem_mask, src_sem_mask_stack = self.unpack_sem_sequence_batch_atom( sem_mask_seq, 1) if opt.sem_edge_explore: #TODO get sem edge sem_edge_seq (4,128,1248,c) here we assume c=1 sem_edge_seq = self.get_sem_edge_batch(sem_seq) #TODO decouple tgt_sem_edge (4,128,1248,c) src_sem_edge_stack (4,128,1248,2*c) tgt_sem_edge, src_sem_edge_stack = self.unpack_sem_sequence_batch_atom( sem_edge_seq, 1) if opt.ins_assist: ins0_seq = ins_seq[:, :, :, 0] ins1_seq = ins_seq[:, :, :, 1] #TODO get one-hot ins0_oh_seq (4,128,1248,81)X{0,1} ins0_oh_seq = tf.cast( tf.one_hot(ins0_seq, on_value=1, depth=opt.ins_num_class), tf.uint8) #ins1_oh_seq = tf.cast(tf.one_hot(ins1_seq, on_value=1, depth = 255), tf.uint8) #TODO decouple tgt_ins0 (4,128,1248,81)X{0,1} src_ins0_stack (4,128,1248,2*81)X{0,1} tgt_ins0, src_ins0_stack = self.unpack_sem_sequence_batch_atom( ins0_oh_seq, opt.ins_num_class) #tgt_ins1, src_ins1_stack = self.unpack_sem_sequence_batch_atom(ins1_oh_seq, opt.ins_num_class) #TODO get densemap sem_ins0_seq (4,128,1248,1)X{0,1,...,80} ins0_map_seq = ins_seq[:, :, :, :1] ins1_map_seq = ins_seq[:, :, :, 1:] #TODO decouple tgt_ins0_map (4,128,1248,1)X{0,1,...,80} src_ins0_map_stack (4,128,1248,2*1)X{0,1,...,80} tgt_ins0_map, src_ins0_map_stack = self.unpack_sem_sequence_batch_atom( ins0_map_seq, 1) tgt_ins1_map, src_ins1_map_stack = self.unpack_sem_sequence_batch_atom( ins1_map_seq, 1) if opt.ins0_edge_explore: #TODO get edge ins0_edge_seq (4,128,1248,c) here we assume c=1 ins0_edge_seq = self.get_sem_edge_batch(ins0_seq) #TODO decouple tgt_ins0_edge (4,128,1248,c) src_ins0_edge_stack (4,128,1248,2*c) tgt_ins0_edge, src_ins0_edge_stack = self.unpack_sem_sequence_batch_atom( ins0_edge_seq, 1) if opt.ins1_edge_explore: #TODO get edge ins1_edge_seq (4,128,1248,c) here we assume c=1 ins1_edge_seq = self.get_sem_edge_batch(ins1_seq) #TODO decouple tgt_ins1_edge (4,128,1248,c) src_ins1_edge_stack (4,128,1248,2*c) tgt_ins1_edge, src_ins1_edge_stack = self.unpack_sem_sequence_batch_atom( ins1_edge_seq, 1) #TODO 3. DATA AUGMENTATION image_all = tf.concat([tgt_image, src_image_stack], axis=3) image_all, intrinsics, aug_params = self.data_augmentation( image_all, intrinsics, opt.img_height, opt.img_width) #TODO changed API if opt.sem_assist: ##TODO Do the same data augmentation for semantic segmentations tgt_sem, src_sem_stack = self.data_aug(tgt_sem, src_sem_stack, aug_params, "bilinear") tgt_sem_map, src_sem_map_stack = self.data_aug( tgt_sem_map, src_sem_map_stack, aug_params, "neighbor") if self.opt.sem_mask_explore: tgt_sem_mask, src_sem_mask_stack = \ self.data_aug(tgt_sem_mask, src_sem_mask_stack, aug_params, "bilinear") if self.opt.sem_edge_explore: tgt_sem_edge, src_sem_edge_stack = \ self.data_aug(tgt_sem_edge, src_sem_edge_stack, aug_params, "bilinear") #TODO maybe transfer needs this settings self.data_aug(tgt_sem_edge, src_sem_edge_stack, aug_params, "neighbor") if opt.ins_assist: ##TODO Do the same data augmentation for instance segmentations tgt_ins0, src_ins0_stack = self.data_aug(tgt_ins0, src_ins0_stack, aug_params, "bilinear") #tgt_ins1, src_ins1_stack = self.data_aug(tgt_ins1, src_ins1_stack, aug_params, "bilinear") tgt_ins0_map, src_ins0_map_stack = self.data_aug( tgt_ins0_map, src_ins0_map_stack, aug_params, "neighbor") #tgt_ins1_map, src_ins1_map_stack = self.data_aug(tgt_ins1_map, src_ins1_map_stack, aug_params, "neighbor") if self.opt.ins0_edge_explore: tgt_ins0_edge, src_ins0_edge_stack = \ self.data_aug(tgt_ins0_edge, src_ins0_edge_stack, aug_params, "bilinear") #TODO maybe transfer needs this settings self.data_aug(tgt_ins0_edge, src_ins0_edge_stack, aug_params, "neighbor") if self.opt.ins1_edge_explore: tgt_ins1_edge, src_ins1_edge_stack = \ self.data_aug(tgt_ins1_edge, src_ins1_edge_stack, aug_params, "bilinear") #TODO maybe transfer needs this settings self.data_aug(tgt_ins1_edge, src_ins1_edge_stack, aug_params, "neighbor") # 4. RETURN # image_channels=3*opt.seq_length tgt_image = image_all[:, :, :, :3] src_image_stack = image_all[:, :, :, 3:] #3:image_channels] intrinsics = self.get_multi_scale_intrinsics(intrinsics, opt.num_scales) # if opt.sem_assist and opt.ins_assist: return tgt_image, src_image_stack, intrinsics, \ [tgt_sem, tgt_sem_map, tgt_sem_mask, tgt_sem_edge], \ [src_sem_stack, src_sem_map_stack, src_sem_mask_stack, src_sem_edge_stack], \ [tgt_ins0, tgt_ins0_map, tgt_ins0_edge, tgt_ins1_edge], \ [src_ins0_stack, src_ins0_map_stack, src_ins0_edge_stack, src_ins1_edge_stack]
num_reader_threads= 1 # number of threads for prefetching SequenceExample protos. ) serialized_sequence_example = input_queue.dequeue() # serialized_sequence_example = tf.train.string_input_producer(["train.cat_caption"]) # don't work context, sequence = tf.parse_single_sequence_example( serialized=serialized_sequence_example, context_features={ "image/img_raw": tf.FixedLenFeature([], dtype=tf.string) }, sequence_features={ "image/caption": tf.FixedLenSequenceFeature([], dtype=tf.string), "image/caption_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64), }) img = tf.decode_raw(context["image/img_raw"], tf.uint8) img = tf.reshape(img, [height, width, 3]) img = tf.image.convert_image_dtype(img, dtype=tf.float32) try: # for TensorFlow 0.11 img = tf.image.resize_images(img, size=(resize_height, resize_width), method=tf.image.ResizeMethod.BILINEAR) except Exception: # for TensorFlow 0.10 img = tf.image.resize_images(img, new_height=resize_height, new_width=resize_width, method=tf.image.ResizeMethod.BILINEAR) # Crop to final dimensions.
def read_and_decode(tfrecords_filename, batch_size): filename_queue = tf.train.string_input_producer(tfrecords_filename, num_epochs=100) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'label': tf.FixedLenFeature([], tf.int64), 'seq_len': tf.FixedLenFeature([], tf.int64), 'read_length': tf.FixedLenFeature([], tf.string), 'read_addr': tf.FixedLenFeature([], tf.string), 'ip': tf.FixedLenFeature([], tf.string), 'sp': tf.FixedLenFeature([], tf.string), 'write_length': tf.FixedLenFeature([], tf.string), 'write_addr': tf.FixedLenFeature([], tf.string), 'instruction_id': tf.FixedLenFeature([], tf.string), 'read2_length': tf.FixedLenFeature([], tf.string), 'read2_addr': tf.FixedLenFeature([], tf.string), 'bp': tf.FixedLenFeature([], tf.string) }) label = tf.cast(features['label'], tf.int32) #seq_len = tf.cast(features['seq_len'], tf.int32) seq_shape = [SEQ_LEN, 1] list_vars = [] for field in [ 'read_length', 'read_addr', 'ip', 'sp', 'write_length', 'write_addr', 'read2_addr', 'read2_length', 'bp' ]: new_field = tf.reshape(tf.decode_raw(features[field], tf.float64), seq_shape) relative_field = tf.cast(cos_relative_positions(new_field), tf.float32) tf.summary.histogram('cos_relative_positions_{}'.format(field), relative_field) list_vars.append(relative_field) print(relative_field) print(tf.concat(list_vars, axis=-1)) instruction_id = tf.cast( tf.reshape(tf.decode_raw(features['instruction_id'], tf.float64), seq_shape), tf.int32) X, instruction_ids, Y = tf.train.shuffle_batch( [tf.concat(list_vars, axis=-1), instruction_id, label], batch_size=batch_size, capacity=1000, num_threads=16, min_after_dequeue=2) tf.summary.histogram('All X', X) return X, instruction_ids, Y
def read_cifar10(filename_queue): """Reads and parses examples from CIFAR10 data files. Recommendation: if you want N-way read parallelism, call this function N times. This will give you N independent Readers reading different files & positions within those files, which will give better mixing of examples. Args: filename_queue: A queue of strings with the filenames to read from. Returns: An object representing a single example, with the following fields: height: number of rows in the result (32) width: number of columns in the result (32) depth: number of color channels in the result (3) key: a scalar string Tensor describing the filename & record number for this example. label: an int32 Tensor with the label in the range 0..9. uint8image: a [height, width, depth] uint8 Tensor with the image data """ class CIFAR10Record(object): pass result = CIFAR10Record() # Dimensions of the images in the CIFAR-10 dataset. # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the # input format. label_bytes = 1 # 2 for CIFAR-100 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth # Every record consists of a label followed by the image, with a # fixed number of bytes for each. record_bytes = label_bytes + image_bytes # Read a record, getting filenames from the filename_queue. No # header or footer in the CIFAR-10 format, so we leave header_bytes # and footer_bytes at their default of 0. reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) # Convert from a string to a vector of uint8 that is record_bytes long. record_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.cast(tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape( tf.strided_slice(record_bytes, [label_bytes], [label_bytes + image_bytes]), [result.depth, result.height, result.width]) # Convert from [depth, height, width] to [height, width, depth]. result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
features = tf.parse_single_example(serialized_example, features = { 'image': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64), 'height': tf.FixedLenFeature([], tf.int64), 'weight': tf.FixedLenFeature([], tf.int64), 'channels': tf.FixedLenFeature([], tf.int64), } ) image = features['image'] label = features['label'] height = features['height'] weight = features['weight'] channels = features['channels'] decode_image = tf.decode_raw(image, tf.uint8)#解码tensor decode_image.set_shape([height,weight,channels]) #前面的预处理图片函数 image_size = 299 distort_image = preprocess_for_train(decode_image, image_size, image_size, None) #整理成输入batch队列 min_after_dequeue = 10000 batch_size = 100 capacity = min_after_dequeue + 3 * batch_size image_batch, label_batch = tf.train.shuffle_batch( [distorted_image, label], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue ) logit = inference(image_batch)
def get_batch(paths, options): """Returns a data split of the RECOLA dataset, which was saved in tfrecords format. Args: paths: list with paths to data files options: dict with data settings Returns: The raw audio examples and the corresponding arousal/valence labels. """ batch_size = options['batch_size'] frame_size = options['frame_size'] num_channels = options['num_channels'] num_classes = options['num_classes'] crop_size = options['crop_size'] # max_in_len = options['max_in_len'] # max_out_len = options['max_out_len'] time_window_len = options['time_window_len'] if options['shuffle']: shuffle = options['shuffle'] else: shuffle = False if options['horizontal_flip']: horizontal_flip = options['horizontal_flip'] else: horizontal_flip = False if options['random_crop']: random_crop = options['random_crop'] else: random_crop = False # root_path = Path(dataset_dir) / split_name # paths = [str(x) for x in root_path.glob('*.tfrecords')] filename_queue = tf.train.string_input_producer(paths, shuffle=shuffle) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'video': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.string), 'in_seq_len': tf.FixedLenFeature([], tf.int64), 'out_seq_len': tf.FixedLenFeature([], tf.int64) } ) video = tf.cast(tf.decode_raw(features['video'], tf.uint8), tf.float32) # / 255. label = tf.cast(tf.decode_raw(features['label'], tf.uint8), tf.int32) in_seq_len = tf.cast(features['in_seq_len'], tf.int32) out_seq_len = tf.cast(features['out_seq_len'], tf.int32) # perform bucketing with input_length being the single filter (need to add out_length buckets) # Number of threads should always be one, in order to load samples sequentially. _seq_lens, [encoder_inputs, target_labels, encoder_inputs_lengths, target_labels_lengths] = \ tf.contrib.training.bucket_by_sequence_length(in_seq_len, [video, label, in_seq_len, out_seq_len], batch_size, [20, 30, 50, 60, 88, 400], num_threads=1, capacity=500, dynamic_pad=True, allow_smaller_final_batch=True) # encoder_inputs, target_labels, encoder_inputs_lengths, target_labels_lengths = \ # tf.train.batch([video, label, in_seq_len, out_seq_len], batch_size, # num_threads=1, capacity=500, dynamic_pad=True, # allow_smaller_final_batch=True) encoder_inputs = tf.reshape(encoder_inputs, (batch_size, tf.reduce_max(encoder_inputs_lengths), frame_size, frame_size, num_channels)) target_labels = tf.reshape(target_labels, (batch_size, -1)) # create decoder_inputs # add <sos> token # decoder_inputs = tf.identity(target_labels) sos_slice = tf.constant(options['num_classes'] - 2, dtype=tf.int32, shape=[options['batch_size'], 1]) decoder_inputs = tf.concat([sos_slice, target_labels], axis=1) decoder_inputs = tf.one_hot(decoder_inputs, num_classes) if crop_size is not None and random_crop: encoder_inputs = tf.random_crop(encoder_inputs, [batch_size, tf.reduce_max(encoder_inputs_lengths), crop_size, crop_size, num_channels]) elif crop_size: start_xy = int((frame_size - crop_size) / 2) encoder_inputs = tf.slice(encoder_inputs, [0, 0, start_xy, start_xy, 0], [batch_size, tf.reduce_max(encoder_inputs_lengths), crop_size, crop_size, num_channels]) encoder_inputs = tf.reshape(encoder_inputs, [batch_size, -1, crop_size, crop_size, 1]) # random left right flip if horizontal_flip: sample = tf.random_uniform(shape=[], minval=0, maxval=1, dtype=tf.float32) option = tf.less(sample, 0.5) encoder_inputs = tf.cond(option, lambda: tf.map_fn(video_left_right_flip, encoder_inputs), lambda: tf.map_fn(tf.identity, encoder_inputs)) encoder_inputs = normalize(encoder_inputs) # slicw video to time_window_len consecutive frames with stride 1 if time_window_len != 1: # pad encoder_inputs s.t. each frame is in the same number of slices ei_paddings = [[0, 0], [time_window_len-1, time_window_len-1], [0, 0], [0, 0], [0, 0]] padded_encoder_inputs = tf.pad(encoder_inputs, ei_paddings, 'CONSTANT', constant_values=0) encoder_inputs = slice_video(padded_encoder_inputs, dims=[batch_size, tf.reduce_max(encoder_inputs_lengths) + 2*(time_window_len - 1), crop_size, crop_size, num_channels], time_window=time_window_len) encoder_inputs = tf.reshape(encoder_inputs, [batch_size, -1, crop_size, crop_size, time_window_len]) encoder_inputs_lengths = encoder_inputs_lengths + time_window_len - 1 return encoder_inputs, target_labels, decoder_inputs, encoder_inputs_lengths, target_labels_lengths
def crnn_fn(features, labels, mode, params): """ :param features: dict { 'image' 'images_width' 'corpora' } :param labels: labels. flattend (1D) array with encoded label (one code per character) :param mode: :param params: dict { 'Params' } :return: """ parameters = params.get('Params') assert isinstance(parameters, Params) # Load pre-trained cnn model if parameters.cnn_pretained_ckpt_path: exclude = ['deep_bidirectional_lstm'] variables_to_restore = tf.contrib.slim.get_variables_to_restore( exclude=exclude) tf.train.init_from_checkpoint( parameters.cnn_pretained_ckpt_path, {v.name.split(':')[0]: v for v in variables_to_restore}) if mode != tf.estimator.ModeKeys.TRAIN: parameters.keep_prob_dropout = 1.0 conv = deep_cnn(features['image'], (mode == tf.estimator.ModeKeys.TRAIN), parameters.cnn_model, summaries=False) logprob, raw_pred = deep_bidirectional_lstm(conv, features['corpus'], params=parameters, summaries=False) # Compute seq_len from image width n_pools = parameters.width_down_sampling seq_len_inputs = tf.divide( features['image_width'], n_pools, name='seq_len_input_op') - 1 predictions_dict = {'prob': logprob, 'raw_predictions': raw_pred} if not mode == tf.estimator.ModeKeys.PREDICT: # Alphabet and codes keys = [c for c in parameters.alphabet.encode('latin1')] values = parameters.alphabet_codes # Convert string label to code label with tf.name_scope('str2code_conversion'): table_str2int = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer( keys, values, key_dtype=tf.int64, value_dtype=tf.int64), -1) splitted = tf.string_split(labels, delimiter='') values_int = tf.cast( tf.squeeze(tf.decode_raw(splitted.values, tf.uint8)), tf.int64) codes = table_str2int.lookup(values_int) codes = tf.cast(codes, tf.int32) sparse_code_target = tf.SparseTensor(splitted.indices, codes, splitted.dense_shape) seq_lengths_labels = tf.bincount( tf.cast(sparse_code_target.indices[:, 0], tf.int32), #array of labels length minlength=tf.shape(predictions_dict['prob'])[1]) # Loss # ---- # >>> Cannot have longer labels than predictions -> error with tf.control_dependencies([ tf.less_equal(sparse_code_target.dense_shape[1], tf.reduce_max(tf.cast(seq_len_inputs, tf.int64))) ]): loss_ctc = tf.nn.ctc_loss( labels=sparse_code_target, inputs=predictions_dict['prob'], sequence_length=tf.cast(seq_len_inputs, tf.int32), preprocess_collapse_repeated=False, ctc_merge_repeated=True, ignore_longer_outputs_than_inputs= True, # returns zero gradient in case it happens -> ema loss = NaN time_major=True) loss_ctc = tf.reduce_mean(loss_ctc) loss_ctc = tf.Print(loss_ctc, [loss_ctc], message='* Loss : ') global_step = tf.train.get_or_create_global_step() # # Create an ExponentialMovingAverage object ema = tf.train.ExponentialMovingAverage(decay=0.99, num_updates=global_step, zero_debias=True) # Create the shadow variables, and add op to maintain moving averages maintain_averages_op = ema.apply([loss_ctc]) loss_ema = ema.average(loss_ctc) # Train op # -------- if parameters.learning_rate_decay: learning_rate = tf.train.exponential_decay( parameters.learning_rate, global_step, parameters.learning_rate_steps, parameters.learning_rate_decay, staircase=True) else: learning_rate = tf.constant(parameters.learning_rate) if parameters.optimizer == 'ada': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif parameters.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) elif parameters.optimizer == 'adam': optimizer = tf.train.AdamOptimizer( learning_rate, beta1=0.5, epsilon=1e-07) # at 1e-08 sometimes exploding gradient elif parameters.optimizer == 'rms': optimizer = tf.train.RMSPropOptimizer(learning_rate) if not parameters.train_cnn: trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'deep_bidirectional_lstm') print('Training LSTM only') else: trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) opt_op = optimizer.minimize(loss_ctc, global_step=global_step, var_list=trainable) with tf.control_dependencies(update_ops + [opt_op]): train_op = tf.group(maintain_averages_op) # Summaries # --------- tf.summary.scalar('learning_rate', learning_rate) tf.summary.scalar('losses/ctc_loss', loss_ctc) else: loss_ctc, train_op = None, None if mode in [ tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT, tf.estimator.ModeKeys.TRAIN ]: with tf.name_scope('code2str_conversion'): keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64) values = [c for c in parameters.alphabet_decoding] table_int2str = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?') sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder( predictions_dict['prob'], sequence_length=tf.cast(seq_len_inputs, tf.int32), merge_repeated=False, beam_width=100, top_paths=parameters.nb_logprob) # likelihoood. For future rename it as confidence and take softmax of log_probability predictions_dict['score'] = log_probability sequence_lengths_pred = [ tf.bincount(tf.cast(sparse_code_pred[i].indices[:, 0], tf.int32), minlength=tf.shape(predictions_dict['prob'])[1]) for i in range(parameters.top_paths) ] pred_chars = [ table_int2str.lookup(sparse_code_pred[i]) for i in range(parameters.top_paths) ] list_preds = [ get_words_from_chars(pred_chars[i].values, sequence_lengths=sequence_lengths_pred[i]) for i in range(parameters.top_paths) ] predictions_dict['words'] = tf.stack(list_preds) tf.summary.text('predicted_words', predictions_dict['words'][0][:10]) # Evaluation ops # -------------- if mode == tf.estimator.ModeKeys.EVAL: with tf.name_scope('evaluation'): CER = tf.metrics.mean(tf.edit_distance( sparse_code_pred[0], tf.cast(sparse_code_target, dtype=tf.int64)), name='CER') # Convert label codes to decoding alphabet to compare predicted and groundtrouth words target_chars = table_int2str.lookup( tf.cast(sparse_code_target, tf.int64)) target_words = get_words_from_chars(target_chars.values, seq_lengths_labels) accuracy = tf.metrics.accuracy(target_words, predictions_dict['words'][0], name='accuracy') eval_metric_ops = { 'eval/accuracy': accuracy, 'eval/CER': CER, } CER = tf.Print(CER, [CER], message='-- CER : ') accuracy = tf.Print(accuracy, [accuracy], message='-- Accuracy : ') else: eval_metric_ops = None export_outputs = { 'predictions': tf.estimator.export.PredictOutput(predictions_dict) } return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions_dict, loss=loss_ctc, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=tf.train.Scaffold())
def read_and_decode(filename_queue):#输入文件名队列 reader = tf.TFRecorder() _, serialized_example = reader.read(filename_queue) features = tf.parse_sing_example(#解析example serialized_example, #必须写明features里面的key的名称 features={ 'image_raw': tf.FixedLenFeature([], tf.string), #图片是string类型 'label': tf.FixedLenFeature([], tf.int64), #标记是int64类型 }) #对于BytesList,要重新进行解码,把string类型的0维Tensor变成uint8类型的一维Tensor image = tf.decode_raw(features['image_raw'], tf.uint8) mnist = input_data.read_data_sets('data/', dtype=tf.uint8, # 注意这里编码是uint8 reshape=False, # validation_size=FLAGS.validation_size ) image.set_shape([mnist.IMAGE_PIXELS]) #Tensor("input/DecodeRaw:0", shape=(784,), dtype=uint8) #image张量的形状为:Tensor("input/sub:0", shape=(784,), dtype=float32) image = tf.cast(image, tf.float32) * (1. / 255) - 0.5 #把标记从unint8类型转换为int32类型 #label张量的形状为Tensor("input/Cast_1;0“, shape=(), dtype=int32) label = tf.cast(features['label'], tf.int32) return image, label #接下来使用tf.train.shuffle_batch将前面生成的样本随机化,获得一个最小批次的张量 def inputs(train, batch_size, num_epochs): #输入参数 #batch_size:训练的每一批有多少个样本 #num_epochs:过几遍数据,设置为O/None表示永远训练下去 ''' 返回结果:A tuple (images, labels) *images:类型float,形状[batch_size, mnist.IMAGE_PIXELS], 范围[-0.5, 0.5]. *labels:类型int32,形状[bathch_size],范围[0, mnist.NUM_CLASSES] 注意tf.train.QueueRunner必须用tf.train.start_queue_runners()来启动线程 ''' if not num_epochs:num_epochs = None #获取文件路径,即/tmp/data/train.tfrecords, /tmp/data/validation.records filename = os.path.join('/home/niangu/桌面/TensorFlow/test.tfrecords', ) with tf.name_scope('input'): #tf.train.string_input_producer返回一个QueueRunner, 里面有一个FIFOQQueue filename_queue = tf.train.string_input_producer( [filename], num_epochs=num_epochs)#如果样本量很大,可以分成若干文件,把文件名列表传入 image, label = read_and_decode(filename_queue) #随机化example,并把它们规整成batch_size大小 #tf.train.shuffle_batch生成了RandomShuffleQueue,并开启俩个线程 images, sparse_labels = tf.train.shuffle_batch( [image, label], batch_size = batch_size, num_threads=2, capacity=1000 + 3 * batch_size, min_after_dequeue=1000)#留下一部分队列,来保存每次有足够的数据做随机打乱 return images, sparse_labels #最后我们把生成的batch张量作为网络的输入,进行训练 def run_training(): with tf.Graph().as_default(): #输入images和labels images, labels = inputs(train=True, batch_size=FLAGS.bathch_size, num_epochs=FLAGS.num_epochs) #构建一个从推理模型来预测数据的图 logits = mnist.inference(images, FLAGS.hidden1, FLAGS.hidden2) loss = mnist.loss(logits, labels) #定义损失函数 #Add to the Graph operations that train the model train_op = mnist.training(loss, FLAGS.learning_rate) #初始化参数,特别注意:string_input_producer内部创建了一个epoch计数变量 #归入tf.GraphKeys.LOCAL_VARIABLES集和中,必须单独用initialize_local_variables()初始化 init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.Session() sess.run(init_op) #Start input enqueue threads coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: step = 0 while not coord.should_stop():#进入永久循环 start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time #每100次训练输出一次结果 if step % 100 == 0: print('Step %d:loss = %.2f (%.3f sec)' % (step, loss_value, duration)) step += 1 except tf.errors.OutOfRangeError: print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) finally: coord.request_stop() #通知其他线程关闭 coord.join(threads) sess.close()
def read_data(filename_queue, is_train): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'name': tf.FixedLenFeature([], tf.string), 'm': tf.FixedLenFeature([], tf.int64), 'n': tf.FixedLenFeature([], tf.int64), 'query': tf.FixedLenFeature([], tf.string), 'align': tf.FixedLenFeature([], tf.string), 'y': tf.FixedLenFeature([], tf.string), 'mask': tf.FixedLenFeature([], tf.string), 'gap': tf.VarLenFeature(tf.float32), 'identity': tf.VarLenFeature(tf.float32), 'identity_cons': tf.VarLenFeature(tf.float32), 'ss_dssp': tf.FixedLenFeature([], tf.string), 'asa_num': tf.VarLenFeature(tf.int64), }) name = features["name"] m = tf.cast(features["m"], tf.int32) n = tf.cast(features["n"], tf.int32) align = tf.reshape(tf.decode_raw(features["align"], tf.uint8), tf.stack([m, n])) query = tf.decode_raw(features["query"], tf.uint8) y = tf.reshape(tf.decode_raw(features["y"], tf.uint8), tf.stack([n, n])) mask = tf.reshape(tf.decode_raw(features["mask"], tf.uint8), tf.stack([n, n])) gap = features["gap"].values identity = features["identity"].values identity_cons = features["identity_cons"].values ss_dssp = tf.decode_raw(features["ss_dssp"], tf.uint8) asa_num = tf.cast(features["asa_num"].values, tf.int32) gap = features["gap"].values identity = features["identity"].values identity_cons = features["identity_cons"].values #clip def clipping(align, query, ss_dssp, asa_num, y, mask): begin = tf.random_uniform([], maxval=tf.shape(align)[1] - n_clip, dtype=tf.int32) align = align[:, begin:begin + n_clip] query = query[begin:begin + n_clip] ss_dssp = ss_dssp[begin:begin + n_clip] asa_num = asa_num[begin:begin + n_clip] y = y[begin:begin + n_clip, begin:begin + n_clip] mask = mask[begin:begin + n_clip, begin:begin + n_clip] return align, query, ss_dssp, asa_num, y, mask align, query, ss_dssp, asa_num, y, mask = tf.cond( (n > n_clip) & (is_train), lambda: clipping(align, query, ss_dssp, asa_num, y, mask), lambda: (align, query, ss_dssp, asa_num, y, mask)) #sampling def sampling(align, gap, identity, identity_cons): idx = tf.random_uniform([n_alignment], maxval=m, dtype=tf.int32) align = tf.gather_nd(align, tf.expand_dims(idx, 1)) gap = tf.gather_nd(gap, tf.expand_dims(idx, 1)) identity = tf.gather_nd(identity, tf.expand_dims(idx, 1)) identity_cons = tf.gather_nd(identity_cons, tf.expand_dims(idx, 1)) return align, gap, identity, identity_cons align, gap, identity, identity_cons = tf.cond( (m > n_alignment) & (is_train), lambda: sampling(align, gap, identity, identity_cons), lambda: (align, gap, identity, identity_cons)) return name, align, query, y, mask, gap, identity, identity_cons, ss_dssp, asa_num
def get(self): """ Provides input data to the graph. """ # calculate size of each record (this lists what is contained in the db and how many bytes are occupied) record_bytes = 0 encoding_bytes = 4 kp_xyz_entries = 3 * self.num_kp record_bytes += encoding_bytes * kp_xyz_entries encoding_bytes = 4 kp_uv_entries = 2 * self.num_kp record_bytes += encoding_bytes * kp_uv_entries kp_vis_entries = self.num_kp record_bytes += encoding_bytes * kp_vis_entries image_bytes = self.image_size[0] * self.image_size[1] * 3 record_bytes += image_bytes """ READ DATA ITEMS""" # Start reader reader = tf.FixedLengthRecordReader(header_bytes=0, record_bytes=record_bytes) _, value = reader.read( tf.train.string_input_producer([self.path_to_db])) # decode to floats bytes_read = 0 data_dict = dict() record_bytes_float32 = tf.decode_raw(value, tf.float32) # 1. Read keypoint xyz keypoint_xyz21 = tf.reshape( tf.slice(record_bytes_float32, [bytes_read // 4], [kp_xyz_entries]), [self.num_kp, 3]) bytes_read += encoding_bytes * kp_xyz_entries keypoint_xyz21 /= 1000.0 # scale to meters keypoint_xyz21 = self.convert_kp(keypoint_xyz21) # calculate wrist coord if self.use_wrist_coord: wrist_xyz = keypoint_xyz21[16, :] + 2.0 * (keypoint_xyz21[0, :] - keypoint_xyz21[16, :]) keypoint_xyz21 = tf.concat( [tf.expand_dims(wrist_xyz, 0), keypoint_xyz21[1:, :]], 0) data_dict['keypoint_xyz21'] = keypoint_xyz21 # 2. Read keypoint uv AND VIS keypoint_uv_vis21 = tf.reshape( tf.slice(record_bytes_float32, [bytes_read // 4], [kp_uv_entries + kp_vis_entries]), [self.num_kp, 3]) bytes_read += encoding_bytes * (kp_uv_entries + kp_vis_entries) keypoint_uv_vis21 = self.convert_kp(keypoint_uv_vis21) keypoint_uv21 = keypoint_uv_vis21[:, :2] keypoint_vis21 = tf.equal(keypoint_uv_vis21[:, 2], 1.0) # calculate wrist vis if self.use_wrist_coord: wrist_vis = tf.logical_or(keypoint_vis21[16], keypoint_vis21[0]) keypoint_vis21 = tf.concat( [tf.expand_dims(wrist_vis, 0), keypoint_vis21[1:]], 0) wrist_uv = keypoint_uv21[16, :] + 2.0 * (keypoint_uv21[0, :] - keypoint_uv21[16, :]) keypoint_uv21 = tf.concat( [tf.expand_dims(wrist_uv, 0), keypoint_uv21[1:, :]], 0) data_dict['keypoint_vis21'] = keypoint_vis21 if self.coord_uv_noise: noise = tf.truncated_normal([42, 2], mean=0.0, stddev=self.coord_uv_noise_sigma) keypoint_uv21 += noise data_dict['keypoint_uv21'] = keypoint_uv21 # decode to uint8 record_bytes_uint8 = tf.decode_raw(value, tf.uint8) # 4. Read image image = tf.reshape( tf.slice(record_bytes_uint8, [bytes_read], [image_bytes]), [self.image_size[0], self.image_size[1], 3]) image = tf.cast(image, tf.float32) bytes_read += image_bytes # subtract mean image = image / 255.0 - 0.5 if self.hue_aug: image = tf.image.random_hue(image, self.hue_aug_max) data_dict['image'] = image """ CONSTANTS """ # Camera intrinsics sx = 822.79041 sy = 822.79041 tx = 318.47345 ty = 250.31296 data_dict['cam_mat'] = tf.constant([[sx, 0.0, tx], [0.0, sy, ty], [0.0, 0.0, 1.0]]) # Hand side: this dataset only contains left hands data_dict['hand_side'] = tf.one_hot(tf.constant(0, dtype=tf.int32), depth=2, on_value=1.0, off_value=0.0, dtype=tf.float32) assert bytes_read == record_bytes, "Doesnt add up." """ DEPENDENT DATA ITEMS: XYZ represenations. """ # make coords relative to root joint kp_coord_xyz_root = keypoint_xyz21[0, :] # this is the palm coord kp_coord_xyz21_rel = keypoint_xyz21 - kp_coord_xyz_root # relative coords in metric coords index_root_bone_length = tf.sqrt( tf.reduce_sum( tf.square(kp_coord_xyz21_rel[12, :] - kp_coord_xyz21_rel[11, :]))) data_dict['keypoint_scale'] = index_root_bone_length data_dict[ 'keypoint_xyz21_normed'] = kp_coord_xyz21_rel / index_root_bone_length # normalized by length of 12->11 # calculate local coordinates kp_coord_xyz21_local = bone_rel_trafo( data_dict['keypoint_xyz21_normed']) kp_coord_xyz21_local = tf.squeeze(kp_coord_xyz21_local) data_dict['keypoint_xyz21_local'] = kp_coord_xyz21_local # calculate viewpoint and coords in canonical coordinates kp_coord_xyz21_rel_can, rot_mat = canonical_trafo( data_dict['keypoint_xyz21_normed']) kp_coord_xyz21_rel_can, rot_mat = tf.squeeze( kp_coord_xyz21_rel_can), tf.squeeze(rot_mat) data_dict['keypoint_xyz21_can'] = kp_coord_xyz21_rel_can data_dict['rot_mat'] = tf.matrix_inverse(rot_mat) """ DEPENDENT DATA ITEMS: HAND CROP """ if self.hand_crop: crop_center = keypoint_uv21[12, ::-1] # catch problem, when no valid kp available (happens almost never) crop_center = tf.cond(tf.reduce_all(tf.is_finite(crop_center)), lambda: crop_center, lambda: tf.constant([0.0, 0.0])) crop_center.set_shape([ 2, ]) if self.crop_center_noise: noise = tf.truncated_normal( [2], mean=0.0, stddev=self.crop_center_noise_sigma) crop_center += noise crop_scale_noise = tf.constant(1.0) if self.crop_scale_noise: crop_scale_noise = tf.squeeze( tf.random_uniform([1], minval=1.0, maxval=1.2)) if not self.use_wrist_coord: wrist_uv = keypoint_uv21[16, :] + 2.0 * (keypoint_uv21[0, :] - keypoint_uv21[16, :]) keypoint_uv21 = tf.concat( [tf.expand_dims(wrist_uv, 0), keypoint_uv21[1:, :]], 0) # select visible coords only kp_coord_h = tf.boolean_mask(keypoint_uv21[:, 1], keypoint_vis21) kp_coord_w = tf.boolean_mask(keypoint_uv21[:, 0], keypoint_vis21) kp_coord_hw = tf.stack([kp_coord_h, kp_coord_w], 1) # determine size of crop (measure spatial extend of hw coords first) min_coord = tf.maximum(tf.reduce_min(kp_coord_hw, 0), 0.0) max_coord = tf.minimum(tf.reduce_max(kp_coord_hw, 0), self.image_size) # find out larger distance wrt the center of crop crop_size_best = 2 * tf.maximum(max_coord - crop_center, crop_center - min_coord) crop_size_best = tf.reduce_max(crop_size_best) crop_size_best = tf.minimum(tf.maximum(crop_size_best, 50.0), 500.0) # catch problem, when no valid kp available crop_size_best = tf.cond( tf.reduce_all(tf.is_finite(crop_size_best)), lambda: crop_size_best, lambda: tf.constant(200.0)) crop_size_best.set_shape([]) # calculate necessary scaling scale = tf.cast(self.crop_size, tf.float32) / crop_size_best scale = tf.minimum(tf.maximum(scale, 1.0), 10.0) scale *= crop_scale_noise data_dict['crop_scale'] = scale if self.crop_offset_noise: noise = tf.truncated_normal( [2], mean=0.0, stddev=self.crop_offset_noise_sigma) crop_center += noise # Crop image img_crop = crop_image_from_xy(tf.expand_dims(image, 0), crop_center, self.crop_size, scale) data_dict['image_crop'] = tf.squeeze(img_crop) # Modify uv21 coordinates crop_center_float = tf.cast(crop_center, tf.float32) keypoint_uv21_u = ( data_dict['keypoint_uv21'][:, 0] - crop_center_float[1]) * scale + self.crop_size // 2 keypoint_uv21_v = ( data_dict['keypoint_uv21'][:, 1] - crop_center_float[0]) * scale + self.crop_size // 2 keypoint_uv21 = tf.stack([keypoint_uv21_u, keypoint_uv21_v], 1) data_dict['keypoint_uv21'] = keypoint_uv21 # Modify camera intrinsics scale = tf.reshape(scale, [ 1, ]) scale_matrix = tf.dynamic_stitch([ [0], [1], [2], [3], [4], [5], [6], [7], [8] ], [scale, [0.0], [0.0], [0.0], scale, [0.0], [0.0], [0.0], [1.0]]) scale_matrix = tf.reshape(scale_matrix, [3, 3]) crop_center_float = tf.cast(crop_center, tf.float32) trans1 = crop_center_float[0] * scale - self.crop_size // 2 trans2 = crop_center_float[1] * scale - self.crop_size // 2 trans1 = tf.reshape(trans1, [ 1, ]) trans2 = tf.reshape(trans2, [ 1, ]) trans_matrix = tf.dynamic_stitch( [[0], [1], [2], [3], [4], [5], [6], [7], [8]], [[1.0], [0.0], -trans2, [0.0], [1.0], -trans1, [0.0], [0.0], [1.0]]) trans_matrix = tf.reshape(trans_matrix, [3, 3]) data_dict['cam_mat'] = tf.matmul( trans_matrix, tf.matmul(scale_matrix, data_dict['cam_mat'])) """ DEPENDENT DATA ITEMS: Scoremap from the SUBSET of 21 keypoints""" # create scoremaps from the subset of 2D annoataion keypoint_hw21 = tf.stack([keypoint_uv21[:, 1], keypoint_uv21[:, 0]], -1) scoremap_size = self.image_size if self.hand_crop: scoremap_size = (self.crop_size, self.crop_size) scoremap = self.create_multiple_gaussian_map(keypoint_hw21, scoremap_size, self.sigma, valid_vec=keypoint_vis21) if self.scoremap_dropout: scoremap = tf.nn.dropout(scoremap, self.scoremap_dropout_prob, noise_shape=[1, 1, 21]) scoremap *= self.scoremap_dropout_prob data_dict['scoremap'] = scoremap if self.random_crop_to_size: tensor_stack = tf.concat([ data_dict['image'], tf.expand_dims(tf.cast(data_dict['hand_parts'], tf.float32), -1), tf.cast(data_dict['hand_mask'], tf.float32) ], 2) s = tensor_stack.get_shape().as_list() tensor_stack_cropped = tf.random_crop( tensor_stack, [self.random_crop_size, self.random_crop_size, s[2]]) data_dict = dict( ) # delete everything else because the random cropping makes the data invalid anyway data_dict['image'], data_dict['hand_parts'], data_dict['hand_mask'] = tensor_stack_cropped[:, :, :3],\ tf.cast(tensor_stack_cropped[:, :, 3], tf.int32),\ tf.cast(tensor_stack_cropped[:, :, 4:], tf.int32) names, tensors = zip(*data_dict.items()) if self.shuffle: tensors = tf.train.shuffle_batch_join([tensors], batch_size=self.batch_size, capacity=100, min_after_dequeue=50, enqueue_many=False) else: tensors = tf.train.batch_join([tensors], batch_size=self.batch_size, capacity=100, enqueue_many=False) return dict(zip(names, tensors))
def parse_tfrecord_tf(record): features = tf.parse_single_example(record, features={ 'shape': tf.FixedLenFeature([3], tf.int64), 'data': tf.FixedLenFeature([], tf.string)}) data = tf.decode_raw(features['data'], tf.uint8) return tf.reshape(data, features['shape'])
def decode_image(image): # Normalize from [0, 255] to [0.0, 1.0] image = tf.decode_raw(image, tf.uint8) image = tf.cast(image, tf.float32) image = tf.reshape(image, [784]) return image / 255.0
features={ 'image/height' : tf.FixedLenFeature([], tf.int64 ), 'image/width' : tf.FixedLenFeature([], tf.int64 ), 'image/colorspace' : tf.FixedLenFeature([], tf.string), 'image/channels' : tf.FixedLenFeature([], tf.int64 ), 'image/class/label': tf.FixedLenFeature([], tf.int64 ), 'image/class/text' : tf.FixedLenFeature([], tf.string), 'image/format' : tf.FixedLenFeature([], tf.string), 'image/filename' : tf.FixedLenFeature([], tf.string), 'image/encoded' : tf.FixedLenFeature([], tf.string) }, name='features' ) # image was saved as uint8, so we have to decode tf.string as uint8. imageT = tf.decode_raw(tfrecord['image/encoded'], tf.uint8) # since exported as tf.int64 there is no need for tf.decode_raw heightT = tfrecord['image/height'] widthT = tfrecord['image/width' ] # remember, it's all just ops, have to run to get result with tf.Session() as sess: # init vars sess.run(tf.global_variables_initializer()) sess.run(tf. local_variables_initializer()) # init summary file writer sfw = tf.summary.FileWriter(os.getcwd(),graph=sess.graph)
def read_cifar10(data_dir, is_train, batch_size, shuffle): """Read CIFAR10 Args: data_dir: the directory of CIFAR10 is_train: boolen batch_size: shuffle: Returns: label: 1D tensor, tf.int32 image: 4D tensor, [batch_size, height, width, 3], tf.float32 """ img_width = 32 img_height = 32 img_depth = 3 label_bytes = 1 image_bytes = img_width * img_height * img_depth with tf.name_scope('input'): if is_train: filenames = [ os.path.join(data_dir, 'data_batch_%d.bin' % ii) for ii in np.arange(1, 6) ] else: filenames = [os.path.join(data_dir, 'test_batch.bin')] filename_queue = tf.train.string_input_producer(filenames) reader = tf.FixedLengthRecordReader(label_bytes + image_bytes) key, value = reader.read(filename_queue) record_bytes = tf.decode_raw(value, tf.uint8) label = tf.slice(record_bytes, [0], [label_bytes]) label = tf.cast(label, tf.int32) image_raw = tf.slice(record_bytes, [label_bytes], [image_bytes]) image_raw = tf.reshape(image_raw, [img_depth, img_height, img_width]) image = tf.transpose(image_raw, (1, 2, 0)) # convert from D/H/W to H/W/D image = tf.cast(image, tf.float32) image = tf.image.per_image_standardization( image) #substract off the mean and divide by the variance if shuffle: images, label_batch = tf.train.shuffle_batch( [image, label], batch_size=batch_size, num_threads=64, capacity=20000, min_after_dequeue=3000) else: images, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=2000) # ONE-HOT n_classes = 10 label_batch = tf.one_hot(label_batch, depth=n_classes) label_batch = tf.cast(label_batch, dtype=tf.int32) label_batch = tf.reshape(label_batch, [batch_size, n_classes]) return images, label_batch
def mnist_tfrecord_input(data_dir, training=True, sequence_length=20, img_size=None, batch_size=1, seed=None): """Create input tfrecord tensors and queues. TFRecord: TFRecord(s) are assumed to be placed at `data_dir`. Each sample contains raw uint8 image sequences with key `'img_i'`. Training and validation set are pre-splitted and their corresponding record file have suffix `_trn.tfrecords` or `_val_tfrecords`. Preprocessing: Crop each image to a square one with size `min(ORIGINAL_HEIGHT, ORIGINAL_WIDTH)` and resize (bicubic) to `(IMG_WIDTH, IMG_HEIGHT)`. Normalize pixel value from [0, 255] to [0, 1] Args: data_dir: directory holding TFRecord(s). training: whether to use training or validation data. sequence_length: length of the video sequence. img_size: the (hight, width) of processed img input, if None use original size. batch_size: size of data mimi-batches. seed: random seed for `shuffle_batch` generator. Returns: list of tensors corresponding to images. The images tensor is 5D, batch x time x height x width x 1. Raises: RuntimeError: if no files found. """ file_suffix = '*_trn.tfrecords' if training else '*_val.tfrecords' filenames = gfile.Glob(os.path.join(data_dir, file_suffix)) if not filenames: raise RuntimeError('No data files found.') filename_queue = tf.train.string_input_producer(filenames, shuffle=True) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) image_seq = [] for i in range(sequence_length): # extract image tensor image_name = 'img_{}'.format(i) features = tf.parse_single_example( serialized_example, features={image_name: tf.FixedLenFeature([], tf.string)} ) image = tf.decode_raw(features[image_name], tf.uint8) image = tf.reshape(image, shape=[ORIGINAL_HEIGHT, ORIGINAL_WIDTH, COLOR_CHAN]) # preprocessing crop_size = min(ORIGINAL_HEIGHT, ORIGINAL_WIDTH) image = tf.image.resize_image_with_crop_or_pad(image, crop_size, crop_size) image = tf.reshape(image, [1, crop_size, crop_size, COLOR_CHAN]) if img_size is None: img_size = (ORIGINAL_HEIGHT, ORIGINAL_WIDTH) if img_size[0] != img_size[1]: raise ValueError('Unequal height and width unsupported') image = tf.image.resize_bicubic(image, img_size) image = tf.cast(image, tf.float32) / 255.0 image_seq.append(image) image_seq = tf.concat(axis=0, values=image_seq) image_batch = tf.train.shuffle_batch( tensors=[image_seq], batch_size=batch_size, capacity=100 * batch_size, min_after_dequeue=50 * batch_size, num_threads=batch_size, seed=seed ) return image_batch
def read_cifar100(filename_queue, coarse_or_fine='fine'): """Reads and parses examples from CIFAR100 data files. Recommendation: if you want N-way read parallelism, call this function N times. This will give you N independent Readers reading different files & positions within those files, which will give better mixing of examples. Args: filename_queue: A queue of strings with the filenames to read from. Returns: An object representing a single example, with the following fields: height: number of rows in the result (32) width: number of columns in the result (32) depth: number of color channels in the result (3) key: a scalar string Tensor describing the filename & record number for this example. label: an int32 Tensor with the label in the range 0..9. uint8image: a [height, width, depth] uint8 Tensor with the image data """ class CIFAR100Record(object): pass result = CIFAR100Record() coarse_label_bytes = 1 fine_label_bytes = 1 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth record_bytes = coarse_label_bytes + fine_label_bytes + image_bytes reader = tf.FixedLengthRecordReader(record_bytes=record_bytes, header_bytes=0, footer_bytes=0) result.key, value = reader.read(filename_queue) record_bytes = tf.decode_raw(value, tf.uint8) coarse_label = tf.cast( tf.strided_slice(record_bytes, [0], [coarse_label_bytes]), tf.int32) fine_label = tf.cast( tf.strided_slice(record_bytes, [coarse_label_bytes], [coarse_label_bytes + fine_label_bytes]), tf.int32) if coarse_or_fine == 'fine': result.label = fine_label # else: result.label = coarse_label # depth_major = tf.reshape( tf.strided_slice( record_bytes, [coarse_label_bytes + fine_label_bytes], [coarse_label_bytes + fine_label_bytes + image_bytes]), [result.depth, result.height, result.width]) result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
def cifar10(path=pathcifar, activation="sigmoid", conv_channels=(16, 16, 16), linear_layers=32, batch_size=128, num_threads=4, min_queue_examples=1000, mode="train"): """Cifar10 classification with a convolutional network.""" # Data. _open_cifar10(path) if activation == "sigmoid": activation_op = tf.sigmoid elif activation == "relu": activation_op = tf.nn.relu else: raise ValueError("{} activation not supported".format(activation)) # Read images and labels from disk. if mode == "train": filenames = [ os.path.join(path, CIFAR10_FOLDER, "data_batch_{}.bin".format(i)) for i in range(1, 6) ] elif mode == "test": filenames = [os.path.join(path, "test_batch.bin")] else: raise ValueError("Mode {} not recognised".format(mode)) depth = 3 height = 32 width = 32 label_bytes = 1 image_bytes = depth * height * width record_bytes = label_bytes + image_bytes reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) _, record = reader.read(tf.train.string_input_producer(filenames)) record_bytes = tf.decode_raw(record, tf.uint8) label = tf.cast(tf.slice(record_bytes, [0], [label_bytes]), tf.int32) raw_image = tf.slice(record_bytes, [label_bytes], [image_bytes]) image = tf.cast(tf.reshape(raw_image, [depth, height, width]), tf.float32) # height x width x depth. image = tf.transpose(image, [1, 2, 0]) image = tf.div(image, 255) queue = tf.RandomShuffleQueue( capacity=min_queue_examples + 3 * batch_size, min_after_dequeue=min_queue_examples, dtypes=[tf.float32, tf.int32], shapes=[image.get_shape(), label.get_shape()]) enqueue_ops = [queue.enqueue([image, label]) for _ in range(num_threads)] tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) with tf.name_scope('Optimizee_loss'): def compute_loss(): image_batch, label_batch = queue.dequeue_many(batch_size) label_batch = tf.reshape(label_batch, [batch_size]) output = image_batch with tf.variable_scope('ConvMLP', reuse=tf.AUTO_REUSE): conv1_w = tf.get_variable( "conv1_w", shape=[5, 5, depth, conv_channels[0]], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) conv1_b = tf.get_variable( "conv1_b", shape=[ conv_channels[0], ], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) conv1_beta = tf.get_variable( "conv1_beta", shape=[1, 1, 1, conv_channels[0]], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) conv2_w = tf.get_variable( "conv2_w", shape=[5, 5, conv_channels[0], conv_channels[1]], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) conv2_b = tf.get_variable( "conv2_b", shape=[ conv_channels[1], ], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) conv2_beta = tf.get_variable( "conv2_beta", shape=[1, 1, 1, conv_channels[1]], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) conv3_w = tf.get_variable( "conv3_w", shape=[5, 5, conv_channels[1], conv_channels[2]], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) conv3_b = tf.get_variable( "conv3_b", shape=[ conv_channels[2], ], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) conv3_beta = tf.get_variable( "conv3_beta", shape=[1, 1, 1, conv_channels[2]], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) output = tf.nn.convolution(output, conv1_w, padding='SAME', strides=[1, 1]) output = tf.nn.relu(tf.nn.bias_add(output, conv1_b)) output = tf.nn.max_pool(output, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') b_m_1, b_v_1 = tf.nn.moments(output, axes=[0, 1, 2]) output = tf.nn.batch_normalization(output, b_m_1, b_v_1, conv1_beta, scale=None, variance_epsilon=1e-8) output = tf.nn.convolution(output, conv2_w, padding='SAME', strides=[1, 1]) output = tf.nn.relu(tf.nn.bias_add(output, conv2_b)) output = tf.nn.max_pool(output, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') b_m_2, b_v_2 = tf.nn.moments(output, [0, 1, 2]) output = tf.nn.batch_normalization(output, b_m_2, b_v_2, conv2_beta, scale=None, variance_epsilon=1e-8) output = tf.nn.convolution(output, conv3_w, padding='SAME', strides=[1, 1]) output = tf.nn.relu(tf.nn.bias_add(output, conv3_b)) output = tf.nn.max_pool(output, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') b_m_3, b_v_3 = tf.nn.moments(output, [0, 1, 2]) output = tf.nn.batch_normalization(output, b_m_3, b_v_3, conv3_beta, scale=None, variance_epsilon=1e-8) output = tf.layers.flatten(output) W_in = tf.get_variable( "W_in", shape=[output.shape[1], linear_layers], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) b_in = tf.get_variable( "b_in", shape=[ linear_layers, ], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) W_out = tf.get_variable( "W_out", shape=[linear_layers, 10], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) b_out = tf.get_variable( "b_out", shape=[ 10, ], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) layer_out = activation_op(tf.add(tf.matmul(output, W_in), b_in)) output = tf.add(tf.matmul(layer_out, W_out), b_out) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=output, labels=label_batch) return tf.reduce_mean(loss) with tf.name_scope('Convex_loss'): def convex_loss(): with tf.variable_scope('conv_var', reuse=tf.AUTO_REUSE): v = tf.get_variable( "v", shape=[1, 10], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) # Non-trainable variables. target = tf.get_variable( "target", shape=[1, 10], dtype=tf.float32, initializer=tf.random_uniform_initializer(), trainable=False) return tf.reduce_mean( tf.clip_by_value(tf.square(v - target), 0, 10)) return collections.OrderedDict([('Opt_loss', compute_loss), ('Aux_loss', convex_loss)])