def read_and_decode(filename_queue, batch_size): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, # Defaults are not specified since both keys are required. features={ 'height': tf.FixedLenFeature([], tf.int64), 'word_opinion': tf.FixedLenFeature([Hp.w_maxlen * 6], dtype=tf.int64, default_value=[-1] * Hp.w_maxlen * 6), 'char_opinion': tf.FixedLenFeature([], tf.string) }) char_opinion = tf.decode_raw(features['char_opinion'], tf.uint8) height = tf.cast(features['height'], tf.int32) word_opinion = tf.cast(features['word_opinion'], tf.int32) char_opinion = tf.reshape(char_opinion, tf.stack([6, Hp.c_maxlen])) word_opinion = tf.reshape(word_opinion, tf.stack([6, Hp.w_maxlen])) words, chars = tf.train.batch([word_opinion, char_opinion], batch_size=batch_size, capacity=3 * batch_size, num_threads=1) return (words, chars)
def read_from_tfrecords(tfFileDirName, varNames, sizeBatch, shape, shuffle=True, rs=888): """ example: read_from_tfrecords('./Data/digits.tfrecords',['x','y'],32,[[28,28],[1]]) return: list of tensors. (this function should be only used in tensorflow codes) """ varNames = list(varNames) tmp = [np.asarray(i, dtype=np.int32) for i in shape] shape = [] for i in tmp: if np.sum(np.shape(i)) > 1: shape.append(list(i)) else: shape.append([int(i)]) print(shape) filename_queue = tf.train.string_input_producer([tfFileDirName]) print(filename_queue) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) tmpFeatures = {} for ii in varNames: tmpFeatures[ii] = tf.FixedLenFeature([], tf.string) tmpFeatures = tf.parse_single_example(serialized_example, features=tmpFeatures) tmpVar = [] for i in range(len(varNames)): ii = varNames[i] tmp = tf.decode_raw(tmpFeatures[ii], tf.float32) tmp = tf.reshape(tmp, shape=list(shape[i])) tmpVar.append(tmp) print(tmpVar) # Trouble caused here if shuffle: tmpBatch = tf.train.shuffle_batch(tmpVar, sizeBatch, capacity=sizeBatch * 128, min_after_dequeue=sizeBatch * 32, name=None, seed=rs) else: tmpBatch = tf.train.batch(tmpVar, sizeBatch, capacity=sizeBatch * 128, name=None) print(tmpBatch) return tmpBatch