def build_inputs(self): if self.mode == "encode": encode_ids = tf.placeholder(tf.int64, (200, None), name="encode_ids") encode_mask = tf.placeholder(tf.int8, (200, None), name="encode_mask") encode_labels = tf.placeholder(tf.int64, (200, None), name="encode_labels") else: # Prefetch serialized tf.Example protos. input_queue_word = input_ops.prefetch_input_data( self.reader_word, FLAGS.input_file_pattern_word, shuffle=FLAGS.shuffle_input_data, capacity=FLAGS.input_queue_capacity, num_reader_threads=FLAGS.num_input_reader_threads) # Deserialize a batch. serialized = input_queue_word.dequeue_many(FLAGS.batch_size) encode_word = input_ops.parse_example_batch(serialized) # Prefetch serialized tf.Example protos. input_queue_POS = input_ops.prefetch_input_data( self.reader_POS, FLAGS.input_file_pattern_POS, shuffle=FLAGS.shuffle_input_data, capacity=FLAGS.input_queue_capacity, num_reader_threads=FLAGS.num_input_reader_threads) # Deserialize a batch. serialized = input_queue_POS.dequeue_many(FLAGS.batch_size) encode_POS = input_ops.parse_example_batch(serialized) encode_ids = encode_word.ids encode_mask = encode_word.mask encode_labels = tf.cast(encode_POS.ids, tf.int32) encode_mask_labels = encode_POS.mask # encode_ids = tf.Print(encode_ids,[encode_ids[1,:30]], summarize=30) # encode_labels = tf.Print(encode_labels,[encode_labels[1,:30]], summarize=30) # encode_mask = tf.Print(encode_mask,[encode_mask[1,:30]], summarize=10) # words_length = tf.reduce_sum(encode_mask[1,:30]) # words_length = tf.Print(words_length,[words_length]) # POSs_length = tf.reduce_sum(encode_mask_labels[1,:30]) # POSs_length = tf.Print(POSs_length,[POSs_length]) self.encode_ids = encode_ids self.encode_mask = encode_mask self.encode_labels = encode_labels
def build_inputs(self): if self.mode == "encode": encode_ids = tf.placeholder(tf.int64, (None, None), name="encode_ids") encode_mask = tf.placeholder(tf.int8, (None, None), name="encode_mask") else: # Prefetch serialized tf.Example protos. input_queue = input_ops.prefetch_input_data( self.reader, FLAGS.input_file_pattern, shuffle=FLAGS.shuffle_input_data, capacity=FLAGS.input_queue_capacity, num_reader_threads=FLAGS.num_input_reader_threads) # Deserialize a batch. serialized = input_queue.dequeue_many(FLAGS.batch_size) encode = input_ops.parse_example_batch(serialized) encode_ids = encode.ids encode_mask = encode.mask self.encode_ids = encode_ids self.encode_mask = encode_mask
def build_inputs(self): """Builds the ops for reading input data. Outputs: self.encode_ids self.decode_pre_ids self.decode_post_ids self.encode_mask self.decode_pre_mask self.decode_post_mask """ if self.mode == "encode": # Word embeddings are fed from an external vocabulary which has possibly # been expanded (see vocabulary_expansion.py). encode_ids = None decode_pre_ids = None decode_post_ids = None encode_mask = tf.placeholder(tf.int8, (None, None), name="encode_mask") decode_pre_mask = None decode_post_mask = None else: # Prefetch serialized tf.Example protos. input_queue = input_ops.prefetch_input_data( self.reader, self.config.input_file_pattern, shuffle=self.config.shuffle_input_data, capacity=self.config.input_queue_capacity, num_reader_threads=self.config.num_input_reader_threads) # Deserialize a batch. serialized = input_queue.dequeue_many(self.config.batch_size) encode, decode_pre, decode_post = input_ops.parse_example_batch( serialized) encode_ids = encode.ids decode_pre_ids = decode_pre.ids decode_post_ids = decode_post.ids encode_mask = encode.mask decode_pre_mask = decode_pre.mask decode_post_mask = decode_post.mask self.encode_ids = encode_ids self.decode_pre_ids = decode_pre_ids self.decode_post_ids = decode_post_ids self.encode_mask = encode_mask self.decode_pre_mask = decode_pre_mask self.decode_post_mask = decode_post_mask