def read_and_decode(filename_queue, batch_size):

    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        # Defaults are not specified since both keys are required.
        features={
            'height':
            tf.FixedLenFeature([], tf.int64),
            'word_opinion':
            tf.FixedLenFeature([Hp.w_maxlen * 6],
                               dtype=tf.int64,
                               default_value=[-1] * Hp.w_maxlen * 6),
            'char_opinion':
            tf.FixedLenFeature([], tf.string)
        })

    char_opinion = tf.decode_raw(features['char_opinion'], tf.uint8)
    height = tf.cast(features['height'], tf.int32)
    word_opinion = tf.cast(features['word_opinion'], tf.int32)

    char_opinion = tf.reshape(char_opinion, tf.stack([6, Hp.c_maxlen]))
    word_opinion = tf.reshape(word_opinion, tf.stack([6, Hp.w_maxlen]))
    words, chars = tf.train.batch([word_opinion, char_opinion],
                                  batch_size=batch_size,
                                  capacity=3 * batch_size,
                                  num_threads=1)

    return (words, chars)
Esempio n. 2
0
def read_from_tfrecords(tfFileDirName,
                        varNames,
                        sizeBatch,
                        shape,
                        shuffle=True,
                        rs=888):
    """
    example:
    read_from_tfrecords('./Data/digits.tfrecords',['x','y'],32,[[28,28],[1]])

    return: list of tensors. (this function should be only used in tensorflow codes)
    """
    varNames = list(varNames)
    tmp = [np.asarray(i, dtype=np.int32) for i in shape]
    shape = []
    for i in tmp:
        if np.sum(np.shape(i)) > 1:
            shape.append(list(i))
        else:
            shape.append([int(i)])
    print(shape)
    filename_queue = tf.train.string_input_producer([tfFileDirName])
    print(filename_queue)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    tmpFeatures = {}
    for ii in varNames:
        tmpFeatures[ii] = tf.FixedLenFeature([], tf.string)
    tmpFeatures = tf.parse_single_example(serialized_example,
                                          features=tmpFeatures)
    tmpVar = []
    for i in range(len(varNames)):
        ii = varNames[i]
        tmp = tf.decode_raw(tmpFeatures[ii], tf.float32)
        tmp = tf.reshape(tmp, shape=list(shape[i]))
        tmpVar.append(tmp)
    print(tmpVar)

    # Trouble caused here
    if shuffle:
        tmpBatch = tf.train.shuffle_batch(tmpVar,
                                          sizeBatch,
                                          capacity=sizeBatch * 128,
                                          min_after_dequeue=sizeBatch * 32,
                                          name=None,
                                          seed=rs)
    else:
        tmpBatch = tf.train.batch(tmpVar,
                                  sizeBatch,
                                  capacity=sizeBatch * 128,
                                  name=None)

    print(tmpBatch)
    return tmpBatch