def _read_input(filename_queue): """Reads a single record and converts it to a tensor. Each record consists the 3x32x32 image with one byte for the label. Args: filename_queue: A queue of strings with the filenames to read from. Returns: image: a [32, 32, 3] float32 Tensor with the image data. label: an int32 Tensor with the label in the range 0..9. """ label_bytes = 1 height = 32 depth = 3 image_bytes = height * height * depth record_bytes = label_bytes + image_bytes reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) _, byte_data = reader.read(filename_queue) uint_data = tf.decode_raw(byte_data, tf.uint8) label = tf.cast(tf.strided_slice(uint_data, [0], [label_bytes]), tf.int32) label.set_shape([1]) depth_major = tf.reshape( tf.strided_slice(uint_data, [label_bytes], [record_bytes]), [depth, height, height]) image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) return image, label
def read_cifar10(filename_queue): class CIFAR10Record(object): pass result = CIFAR10Record() label_bytes = 1 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth record_bytes = label_bytes + image_bytes # Read a record, getting filenames from the filename_queue. reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) # Convert from a string to a vector of uint8 that is record_bytes long. record_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.cast(tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape( tf.strided_slice(record_bytes, [label_bytes], [label_bytes + image_bytes]), [result.depth, result.height, result.width]) # Convert from [depth, height, width] to [height, width, depth]. result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
def ptb_producer(raw_data, batch_size, num_steps, name=None): with tf.name_scope(name, "PTBProducer", [raw_data, batch_size, num_steps]): raw_data = tf.convert_to_tensor(raw_data, dtype=tf.int32, name="raw_data") data_len = tf.size(raw_data) batch_len = data_len // batch_size data = tf.reshape(raw_data[0: batch_len*batch_size], [batch_size, batch_len]) epoch_size = (batch_len-1) // num_steps assertion = tf.assert_positive( epoch_size, message="batch size too large") with tf.control_dependencies([assertion]): epoch_size = tf.identity(epoch_size, name="epoch_size") i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue() x = tf.strided_slice(data, [0, i*num_steps], [batch_size, (i+1)*num_steps]) x.set_shape([batch_size, num_steps]) y = tf.strided_slice(data, [0, i*num_steps+1], [batch_size, (i+1)*num_steps+1]) y.set_shape([batch_size, num_steps]) return x, y
def read_data(file_q): # Code from https://github.com/tensorflow/models/blob/master/tutorials/image/cifar10/cifar10_input.py class CIFAR10Record(object): pass result = CIFAR10Record() # Dimensions of the images in the CIFAR-10 dataset. # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the # input format. label_bytes = 1 # 2 for CIFAR-100 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth # Every record consists of a label followed by the image, with a # fixed number of bytes for each. record_bytes = label_bytes + image_bytes # Read a record, getting filenames from the filename_queue. No # header or footer in the CIFAR-10 format, so we leave header_bytes # and footer_bytes at their default of 0. reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(file_q) # Convert from a string to a vector of uint8 that is record_bytes long. record_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.cast( tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape( tf.strided_slice(record_bytes, [label_bytes], [label_bytes + image_bytes]), [result.depth, result.height, result.width]) # Convert from [depth, height, width] to [height, width, depth]. result.uint8image = tf.transpose(depth_major, [1, 2, 0]) reshaped_image = tf.cast(result.uint8image, tf.float32) height = 24 width = 24 # Image processing for evaluation. # Crop the central [height, width] of the image. resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, height, width) # Subtract off the mean and divide by the variance of the pixels. float_image = tf.image.per_image_standardization(resized_image) # Set the shapes of tensors. float_image.set_shape([height, width, 3]) result.label.set_shape([1]) return float_image, result.label
def read_cifar10(filename_queue): """Reads and parses examples from CIFAR10 data files. Recommendation: if you want N-way read parallelism, call this function N times. This will give you N independent Readers reading different files & positions within those files, which will give better mixing of examples. Args: filename_queue: A queue of strings with the filenames to read from. Returns: An object representing a single example, with the following fields: height: number of rows in the result (32) width: number of columns in the result (32) depth: number of color channels in the result (3) key: a scalar string Tensor describing the filename & record number for this example. label: an int32 Tensor with the label in the range 0..9. uint8image: a [height, width, depth] uint8 Tensor with the image data """ class CIFAR10Record(object): pass result = CIFAR10Record() # Dimensions of the images in the CIFAR-10 dataset. # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the # input format. label_bytes = 1 # 2 for CIFAR-100 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth # Every record consists of a label followed by the image, with a # fixed number of bytes for each. record_bytes = label_bytes + image_bytes # Read a record, getting filenames from the filename_queue. No # header or footer in the CIFAR-10 format, so we leave header_bytes # and footer_bytes at their default of 0. reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) # Convert from a string to a vector of uint8 that is record_bytes long. record_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.cast( tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape( tf.strided_slice(record_bytes, [label_bytes], [label_bytes + image_bytes]), [result.depth, result.height, result.width]) # Convert from [depth, height, width] to [height, width, depth]. result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
def __init__(self, **kwargs): """ """ super(AlternatingRealToComplexLayer, self).__init__(**kwargs) input_placeholder = self.input_data.get_placeholder_as_batch_major() real_value = tf.strided_slice(input_placeholder, [0, 0, 0], tf.shape(input_placeholder), [1, 1, 2]) imag_value = tf.strided_slice(input_placeholder, [0, 0, 1], tf.shape(input_placeholder), [1, 1, 2]) self.output.placeholder = tf.complex(real_value, imag_value) self.output.size_placeholder = {0: self.input_data.size_placeholder[self.input_data.time_dim_axis_excluding_batch]}
def _build_clp_multiplication(self, clp_kernel): from TFUtil import safe_log input_placeholder = self.input_data.get_placeholder_as_batch_major() tf.assert_equal(tf.shape(clp_kernel)[1], tf.shape(input_placeholder)[2] // 2) tf.assert_equal(tf.shape(clp_kernel)[2], self._nr_of_filters) input_real = tf.strided_slice(input_placeholder, [0, 0, 0], tf.shape(input_placeholder), [1, 1, 2]) input_imag = tf.strided_slice(input_placeholder, [0, 0, 1], tf.shape(input_placeholder), [1, 1, 2]) kernel_real = self._clp_kernel[0, :, :] kernel_imag = self._clp_kernel[1, :, :] output_real = tf.einsum('btf,fp->btp', input_real, kernel_real) - tf.einsum('btf,fp->btp', input_imag, kernel_imag) output_imag = tf.einsum('btf,fp->btp', input_imag, kernel_real) + tf.einsum('btf,fp->btp', input_real, kernel_imag) output_uncompressed = tf.sqrt(tf.pow(output_real, 2) + tf.pow(output_imag, 2)) output_compressed = safe_log(output_uncompressed) return output_compressed
def _test_stridedslice(ip_shape, begin, end, stride, dtype, begin_mask=0, end_mask=0, new_axis_mask=0, shrink_axis_mask=0, ellipsis_mask=0): """ One iteration of a Stridedslice """ tf.reset_default_graph() in_data = tf.placeholder(dtype, ip_shape, name="in_data") tf.strided_slice(in_data, begin, end, stride, begin_mask=begin_mask, end_mask=end_mask, new_axis_mask=new_axis_mask, shrink_axis_mask=shrink_axis_mask, ellipsis_mask=ellipsis_mask, name="strided_slice") np_data = np.random.uniform(size=ip_shape).astype(dtype) compare_tf_with_tvm(np_data, 'in_data:0', 'strided_slice:0')
def read_cifar10(filename_queue): """Reads and parses examples from CIFAR10 data files. Recommendation: if you want N-way read parallelism, call this function N times. This will give you N independent Readers reading different files & positions within those files, which will give better mixing of examples. Args: filename_queue: A queue of strings with the filenames to read from. Returns: An object representing a single example, with the following fields: height: number of rows in the result (32) width: number of columns in the result (32) depth: number of color channels in the result (3) key: a scalar string Tensor describing the filename & record number for this example. label: an int32 Tensor with the label in the range 0..9. uint8image: a [height, width, depth] uint8 Tensor with the image data """ class CIFAR10Record(object): pass result = CIFAR10Record() label_bytes = 1 result.height, result.width, result.depth = 32, 32, 3 image_bytes = result.height * result.width * result.depth record_bytes = label_bytes + image_bytes reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) # Convert from a string to a vector of uint8 that is record_bytes long. record_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.cast(tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape(tf.strided_slice(record_bytes, [label_bytes], [label_bytes + image_bytes]), [result.depth, result.height, result.width]) # Convert from [depth, height, width] to [height, width, depth]. result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
def AddCrossEntropy(batch_size, n): """Adds a cross entropy cost function.""" cross_entropies = [] def _Pass(): return tf.constant(0, dtype=tf.float32, shape=[1]) for beam_id in range(batch_size): beam_gold_slot = tf.reshape( tf.strided_slice(n['gold_slot'], [beam_id], [beam_id + 1]), [1]) def _ComputeCrossEntropy(): """Adds ops to compute cross entropy of the gold path in a beam.""" # Requires a cast so that UnsortedSegmentSum, in the gradient, # is happy with the type of its input 'segment_ids', which # must be int32. idx = tf.cast( tf.reshape( tf.where(tf.equal(n['beam_ids'], beam_id)), [-1]), tf.int32) beam_scores = tf.reshape(tf.gather(n['all_path_scores'], idx), [1, -1]) num = tf.shape(idx) return tf.nn.softmax_cross_entropy_with_logits( labels=tf.expand_dims( tf.sparse_to_dense(beam_gold_slot, num, [1.], 0.), 0), logits=beam_scores) # The conditional here is needed to deal with the last few batches of the # corpus which can contain -1 in beam_gold_slot for empty batch slots. cross_entropies.append(cf.cond( beam_gold_slot[0] >= 0, _ComputeCrossEntropy, _Pass)) return {'cross_entropy': tf.div(tf.add_n(cross_entropies), batch_size)}
def process_encoding_input(target_data, vocab_to_int, batch_size): '''Remove the last word id from each batch and concat the <GO> to the begining of each batch''' ending = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1]) dec_input = tf.concat([tf.fill([batch_size, 1], vocab_to_int['<GO>']), ending], 1) return dec_input
def _my_metric_op(predictions, labels): # For the case of binary classification, the 2nd column of "predictions" # denotes the model predictions. labels = tf.to_float(labels) predictions = tf.strided_slice( predictions, [0, 1], [-1, 2], end_mask=1) labels = math_ops.cast(labels, predictions.dtype) return tf.reduce_sum(tf.multiply(predictions, labels))
def objective(x): """Rosenbrock function. (Carl Edward Rasmussen, 2001-07-21). f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 Args: x: a Variable Returns: f: a tensor (objective value) """ d = tf.size(x) s = tf.add( 100 * tf.square(tf.sub(tf.strided_slice(x, [1], [d]), tf.square(tf.strided_slice(x, [0], [d - 1])))), tf.square(tf.sub(1.0, tf.strided_slice(x, [0], [d - 1]))), ) return tf.reduce_sum(s)
def process_decoder_input(data, vocab_to_int, batch_size): ''' 补充<GO>,并移除最后一个字符 ''' # cut掉最后一个字符 ending = tf.strided_slice(data, [0, 0], [batch_size, -1], [1, 1]) decoder_input = tf.concat([tf.fill([batch_size, 1], vocab_to_int['<GO>']), ending], 1) return decoder_input
def gather(self, src, force_copy=False): """ Fetches the data corresponding to ``src`` from the base array. Parameters ---------- src : `.TensorSignal` Signal indicating the data to be read from base array force_copy : bool If True, always perform a gather, not a slice (this forces a copy). Note that setting ``force_copy=False`` does not guarantee that a copy won't be performed. Returns ------- gathered : ``tf.Tensor`` Tensor object corresponding to a dense subset of data from the base array """ logger.debug("gather") logger.debug("src %s", src) logger.debug("indices %s", src.indices) logger.debug("src base %s", self.bases[src.key]) var = self.bases[src.key] # we prefer to get the data via `strided_slice` or `identity` if # possible, as it is more efficient if force_copy or src.tf_slice is None: result = tf.gather(var, src.tf_indices) self.read_types["gather"] += 1 elif (src.indices[0] == 0 and src.indices[-1] == var.get_shape()[0].value - 1 and len(src.indices) == var.get_shape()[0]): result = var self.read_types["identity"] += 1 else: result = tf.strided_slice(var, *src.tf_slice) self.read_types["strided_slice"] += 1 # reshape the data according to the shape set in `src`, if there is # one, otherwise keep the shape of the base array if result.get_shape() != src.full_shape: result = tf.reshape(result, src.tf_shape) # for some reason the shape inference doesn't work in some cases result.set_shape(src.full_shape) # whenever we read from an array we use this to mark it as "read" # (so that any future writes to the array will be scheduled after # the read) self.mark_gather(src) return result
def ptb_producer(raw_data, batch_size, num_steps, name=None): """Iterate on the raw PTB data. This chunks up raw_data into batches of examples and returns Tensors that are drawn from these batches. Args: raw_data: one of the raw data outputs from ptb_raw_data. batch_size: int, the batch size. num_steps: int, the number of unrolls. name: the name of this operation (optional). Returns: A pair of Tensors, each shaped [batch_size, num_steps]. The second element of the tuple is the same data time-shifted to the right by one. Raises: tf.errors.InvalidArgumentError: if batch_size or num_steps are too high. """ with tf.name_scope(name, "PTBProducer", [raw_data, batch_size, num_steps]): raw_data = tf.convert_to_tensor(raw_data, name="raw_data", dtype=tf.int32) data_len = tf.size(raw_data) batch_len = data_len // batch_size data = tf.reshape(raw_data[0 : batch_size * batch_len], [batch_size, batch_len]) epoch_size = (batch_len - 1) // num_steps assertion = tf.assert_positive( epoch_size, message="epoch_size == 0, decrease batch_size or num_steps") with tf.control_dependencies([assertion]): epoch_size = tf.identity(epoch_size, name="epoch_size") i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue() x = tf.strided_slice(data, [0, i * num_steps], [batch_size, (i + 1) * num_steps]) x.set_shape([batch_size, num_steps]) y = tf.strided_slice(data, [0, i * num_steps + 1], [batch_size, (i + 1) * num_steps + 1]) y.set_shape([batch_size, num_steps]) return x, y
def get_out_data_from_opts(cls, name, sources, pool_size, n_out=None, **kwargs): input_data = get_concat_sources_data_template(sources) assert not input_data.sparse return Data( name="%s_output" % name, shape=[input_data.get_placeholder_as_batch_major().shape[1].value, input_data.get_placeholder_as_batch_major().shape[2].value], dtype=input_data.dtype, size_placeholder={0: tf.strided_slice(input_data.size_placeholder[input_data.time_dim_axis_excluding_batch], [0], tf.shape(input_data.size_placeholder[input_data.time_dim_axis_excluding_batch]), [pool_size])}, sparse=False, batch_dim_axis=0, time_dim_axis=1)
def read_input(file): # start class Record(object): pass result = Record() # Dimensions of the images in the CIFAR-10 dataset. # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the # input format. label_bytes = 1 # 2 for CIFAR-100 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth # Every record consists of a label followed by the image, with a # fixed number of bytes for each. record_bytes = label_bytes + image_bytes # Read a record, getting filenames from the filename_queue. No # header or footer in the CIFAR-10 format, so we leave header_bytes # and footer_bytes at their default of 0. reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) # Convert from a string to a vector of uint8 that is record_bytes long. record_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.cast( tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape( tf.strided_slice(record_bytes, [label_bytes], [label_bytes + image_bytes]), [result.depth, result.height, result.width]) # Convert from [depth, height, width] to [height, width, depth]. result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
def token2ids(token): with tf.name_scope("token2ids_preprocessor"): char_ids = tf.decode_raw(token, tf.uint8, name='decode_raw2get_char_ids') char_ids = tf.cast(char_ids, tf.int32, name='cast2int_token') char_ids = tf.strided_slice(char_ids, [0], [max_word_length - 2], [1], name='slice2resized_token') ids_num = tf.shape(char_ids)[0] fill_ids_num = (_max_word_length - 2) - ids_num pads = tf.fill([fill_ids_num], _pad_id) bow_token_eow_pads = tf.concat([[_bow_id], char_ids, [_eow_id], pads], 0, name='concat2bow_token_eow_pads') return bow_token_eow_pads
def read_data(filename_queue): images_bytes = IMG_HEIGHT * IMG_WIDTH * IMG_CHANNELS # Compute how many bytes to read per image. record_bytes = images_bytes + LABEL_BYTES record = ImageRecord() record.height = IMG_HEIGHT record.width = IMG_WIDTH record.channels = IMG_CHANNELS # Read a record, getting filenames from filename_queue. reader = tf.FixedLengthRecordReader( record_bytes=record_bytes ) record.key, value = reader.read(filename_queue) # Convert from a string to vector of uint8 record_data = tf.decode_raw(value, tf.uint8) record.label = tf.cast( tf.strided_slice(record_data, [0], [LABEL_BYTES]), tf.int32 ) # The remaining bytes after the label # Reshape image from vector to 3D tensor depth_major = tf.reshape( tf.strided_slice( record_data, [LABEL_BYTES], [record_bytes] ), [record.channels, record.height, record.width] ) # Convert from [channels, height, width] to [height, width, channels] record.uint8image = tf.transpose( depth_major, [1,2,0] ) return record
def ptb_producer(raw_data, batch_size, num_steps, name=None): """raw PTB 데이터에 대해 반복한다. raw_data를 batches of examples로 변환하고 이 batches들로부터 얻은 Tensors를 반환한다. 인자들(Args): raw_data: ptb_raw_data로부터 얻은 raw data outputs 중 하나. batch_size: int, 배치 크기(the batch size). num_steps: int, 학습하는 스텝의 크기(the number of unrolls). name: operation의 이름 (optional). 반환값들(Returns): [batch_size, num_steps]로 표현된 Tensors 쌍(pair). tuple의 두번째 element는 한 step만큼 time-shifted된 같은 데이터이다. 에러값 발생(Raises): tf.errors.InvalidArgumentError: batch_size나 num_steps가 너무 크면 발생한다. """ with tf.name_scope(name, "PTBProducer", [raw_data, batch_size, num_steps]): raw_data = tf.convert_to_tensor(raw_data, name="raw_data", dtype=tf.int32) data_len = tf.size(raw_data) batch_len = data_len // batch_size data = tf.reshape(raw_data[0 : batch_size * batch_len], [batch_size, batch_len]) epoch_size = (batch_len - 1) // num_steps assertion = tf.assert_positive( epoch_size, message="epoch_size == 0, decrease batch_size or num_steps") with tf.control_dependencies([assertion]): epoch_size = tf.identity(epoch_size, name="epoch_size") i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue() x = tf.strided_slice(data, [0, i * num_steps], [batch_size, (i + 1) * num_steps]) x.set_shape([batch_size, num_steps]) y = tf.strided_slice(data, [0, i * num_steps + 1], [batch_size, (i + 1) * num_steps + 1]) y.set_shape([batch_size, num_steps]) return x, y
def parser(self, value): """Parse a Cifar10 record from value. Output images are in [height, width, depth] layout. """ # Dimensions of the images in the CIFAR-10 dataset. # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the # input format. label_bytes = 1 image_bytes = HEIGHT * WIDTH * DEPTH # Every record consists of a label followed by the image, with a # fixed number of bytes for each. record_bytes = label_bytes + image_bytes # Convert from a string to a vector of uint8 that is record_bytes long. record_as_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from # uint8->int32. label = tf.cast( tf.strided_slice(record_as_bytes, [0], [label_bytes]), tf.int32) label.set_shape([1]) # The remaining bytes after the label represent the image, which # we reshape from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape( tf.strided_slice(record_as_bytes, [label_bytes], [record_bytes]), [3, 32, 32]) # Convert from [depth, height, width] to [height, width, depth]. # This puts data in a compatible layout with TF image preprocessing APIs. image = tf.transpose(depth_major, [1, 2, 0]) # Do custom preprocessing here. image = self.preprocess(image) return image, label
def _process_pipeline(params, record_pipeline, test=False): label = tf.cast(tf.strided_slice(record_pipeline, [0], [NUM_LABEL_BYTES]), tf.int32) label = tf.reshape(tf.one_hot(label,10), [10]) depth_major = tf.reshape(tf.strided_slice(record_pipeline, [NUM_LABEL_BYTES], [NUM_LABEL_BYTES + NUM_IMAGE_BYTES]), [3, 32, 32]) image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) / 255.0 base_img = tf.image.resize_images(tf.image.resize_images(image, params['coarse_shape'][:2], tf.image.ResizeMethod.AREA), params['fine_shape'][:2]) target_img = tf.image.resize_images(image, params['fine_shape'][:2]) real_diff = target_img - base_img batch_label, batch_base_img, batch_real_diff = \ tf.train.shuffle_batch([label, base_img, real_diff], enqueue_many=False, batch_size=params['batch_size'], num_threads=params['preprocess_threads'], capacity=100 * params['batch_size'], min_after_dequeue=10 * params['batch_size']) noise = tf.random_normal(tf.shape(batch_base_img)[:-1], stddev=params['noise'], name='noise') keep_prob = tf.constant(1.0) if test else tf.constant(0.5) return {'base_img': batch_base_img, 'diff_real': batch_real_diff, 'class_cond': batch_label, 'noise': noise, 'keep_prob': keep_prob}
def __init__(self, pool_size=1, **kwargs): """ :param pool_size int: size of the pool to take median of (is also used as stride size) """ super(BatchMedianPoolingLayer, self).__init__(**kwargs) input_placeholder = self.input_data.get_placeholder_as_batch_major() # get median over pooled batches # - reshape input for usage with tf.nn.top_k reshaped_input = tf.reshape(tf.transpose(input_placeholder, [1, 2, 0]), shape=(tf.shape(input_placeholder)[1], tf.shape(input_placeholder)[2], tf.shape(input_placeholder)[0] / pool_size, pool_size)) # - get median of each pool median = tf.nn.top_k(reshaped_input, k=tf.cast(tf.ceil(tf.constant(pool_size, dtype=tf.float32) / 2), dtype=tf.int32)).values[:, :, :, -1] median_batch_major = tf.transpose(median, [2, 0, 1]) self.output.placeholder = median_batch_major self.output.size_placeholder = {self.output.time_dim_axis_excluding_batch: tf.strided_slice(self.input_data.size_placeholder[self.input_data.time_dim_axis_excluding_batch], [0], tf.shape(self.input_data.size_placeholder[self.input_data.time_dim_axis_excluding_batch]), [pool_size])}
def set_similarity(self, valid_examples=None, pca=True): if valid_examples == None: if pca: valid_examples = np.array(range(20)) else: valid_examples = np.array(range(self.num_vocabulary)) self.valid_dataset = tf.constant(valid_examples, dtype=tf.int32) self.norm = tf.sqrt(tf.reduce_sum(tf.square(self.g_embeddings), 1, keep_dims=True)) self.normalized_embeddings = self.g_embeddings / self.norm # PCA if self.num_vocabulary >= 20 and pca == True: emb = tf.matmul(self.normalized_embeddings, tf.transpose(self.normalized_embeddings)) s, u, v = tf.svd(emb) u_r = tf.strided_slice(u, begin=[0, 0], end=[20, self.num_vocabulary], strides=[1, 1]) self.normalized_embeddings = tf.matmul(u_r, self.normalized_embeddings) self.valid_embeddings = tf.nn.embedding_lookup( self.normalized_embeddings, self.valid_dataset) self.similarity = tf.matmul(self.valid_embeddings, tf.transpose(self.normalized_embeddings))
def testPtbProducer(self): raw_data = [4, 3, 2, 1, 0, 5, 6, 1, 1, 1, 1, 0, 3, 4, 1] batch_size = 3 num_steps = 2 x, y = reader.ptb_producer(raw_data, batch_size, num_steps) with self.test_session() as session: coord = tf.train.Coordinator() tf.train.start_queue_runners(session, coord=coord) try: data_len = tf.size(raw_data) batch_len = data_len // batch_size data = tf.reshape(tf.slice(raw_data, [0],[batch_size * batch_len]),[batch_size, batch_len]) epoch_size = (batch_len - 1) // num_steps epoch_size = tf.identity(epoch_size, name="epoch_size") # i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue() # x1 = tf.strided_slice(data, [0, 1 * num_steps], [batch_size, (1 + 1) * num_steps]) # x.set_shape([batch_size, num_steps]) # y = tf.strided_slice(data, [0, i * num_steps + 1], # [batch_size, (i + 1) * num_steps + 1]) # y.set_shape([batch_size, num_steps]) print('data_len' + str(session.run(data_len))) print('batch_len' + str(session.run(batch_len))) print('data' + str(session.run(data))) print('epoch_size' + str(session.run(epoch_size))) print('x1' + str(session.run(x1))) print('data' + str(session.run(data))) # print('data' + str(session.run(data))) xval, yval = session.run([x, y]) # print(str((xval, yval))) self.assertAllEqual(xval, [[4, 3], [5, 6], [1, 0]]) self.assertAllEqual(yval, [[3, 2], [6, 1], [0, 3]]) xval, yval = session.run([x, y]) # print(str((xval, yval))) self.assertAllEqual(xval, [[2, 1], [1, 1], [3, 4]]) self.assertAllEqual(yval, [[1, 0], [1, 1], [4, 1]]) finally: coord.request_stop() coord.join()
def _crop(image, offset_height, offset_width, crop_height, crop_width): """Crops the given image using the provided offsets and sizes. Note that the method doesn't assume we know the input image size but it does assume we know the input image rank. Args: image: an image of shape [height, width, channels]. offset_height: a scalar tensor indicating the height offset. offset_width: a scalar tensor indicating the width offset. crop_height: the height of the cropped image. crop_width: the width of the cropped image. Returns: the cropped (and resized) image. Raises: InvalidArgumentError: if the rank is not 3 or if the image dimensions are less than the crop size. """ original_shape = tf.shape(image) rank_assertion = tf.Assert( tf.equal(tf.rank(image), 3), ['Rank of image must be equal to 3.']) with tf.control_dependencies([rank_assertion]): cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) size_assertion = tf.Assert( tf.logical_and( tf.greater_equal(original_shape[0], crop_height), tf.greater_equal(original_shape[1], crop_width)), ['Crop size greater than the image size.']) offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) # Use tf.strided_slice instead of crop_to_bounding box as it accepts tensors # to define the crop size. with tf.control_dependencies([size_assertion]): image = tf.strided_slice(image, offsets, offsets + cropped_shape, strides=tf.ones_like(offsets)) return tf.reshape(image, cropped_shape)
def _log_prob(self, x): if self.input_output_cholesky: x_sqrt = x else: # Complexity: O(nbk**3) x_sqrt = tf.cholesky(x) batch_shape = self.batch_shape_tensor() event_shape = self.event_shape_tensor() ndims = tf.rank(x_sqrt) # sample_ndims = ndims - batch_ndims - event_ndims sample_ndims = ndims - tf.shape(batch_shape)[0] - 2 sample_shape = tf.strided_slice(tf.shape(x_sqrt), [0], [sample_ndims]) # We need to be able to pre-multiply each matrix by its corresponding # batch scale matrix. Since a Distribution Tensor supports multiple # samples per batch, this means we need to reshape the input matrix `x` # so that the first b dimensions are batch dimensions and the last two # are of shape [dimension, dimensions*number_of_samples]. Doing these # gymnastics allows us to do a batch_solve. # # After we're done with sqrt_solve (the batch operation) we need to undo # this reshaping so what we're left with is a Tensor partitionable by # sample, batch, event dimensions. # Complexity: O(nbk**2) since transpose must access every element. scale_sqrt_inv_x_sqrt = x_sqrt perm = tf.concat([tf.range(sample_ndims, ndims), tf.range(0, sample_ndims)], 0) scale_sqrt_inv_x_sqrt = tf.transpose(scale_sqrt_inv_x_sqrt, perm) shape = tf.concat((batch_shape, (tf.cast(self.dimension, dtype=tf.int32), -1)), 0) scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape) # Complexity: O(nbM*k) where M is the complexity of the operator solving a # vector system. For LinearOperatorLowerTriangular, each solve is O(k**2) so # this step has complexity O(nbk^3). scale_sqrt_inv_x_sqrt = self.scale_operator.solve( scale_sqrt_inv_x_sqrt) # Undo make batch-op ready. # Complexity: O(nbk**2) shape = tf.concat([batch_shape, event_shape, sample_shape], 0) scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape) perm = tf.concat([ tf.range(ndims - sample_ndims, ndims), tf.range(0, ndims - sample_ndims) ], 0) scale_sqrt_inv_x_sqrt = tf.transpose(scale_sqrt_inv_x_sqrt, perm) # Write V = SS', X = LL'. Then: # tr[inv(V) X] = tr[inv(S)' inv(S) L L'] # = tr[inv(S) L L' inv(S)'] # = tr[(inv(S) L) (inv(S) L)'] # = sum_{ik} (inv(S) L)_{ik}**2 # The second equality follows from the cyclic permutation property. # Complexity: O(nbk**2) trace_scale_inv_x = tf.reduce_sum( tf.square(scale_sqrt_inv_x_sqrt), axis=[-2, -1]) # Complexity: O(nbk) half_log_det_x = tf.reduce_sum( tf.log(tf.matrix_diag_part(x_sqrt)), axis=[-1]) # Complexity: O(nbk**2) log_prob = ((self.df - self.dimension - 1.) * half_log_det_x - 0.5 * trace_scale_inv_x - self.log_normalization()) # Set shape hints. # Try to merge what we know from the input then what we know from the # parameters of this distribution. if x.get_shape().ndims is not None: log_prob.set_shape(x.get_shape()[:-2]) if (log_prob.get_shape().ndims is not None and self.batch_shape.ndims is not None and self.batch_shape.ndims > 0): log_prob.get_shape()[-self.batch_shape.ndims:].merge_with( self.batch_shape) return log_prob
def __init__(self, params_common, params_model, is_training): self.params_common = params_common self.params_model = params_model # *************** PLACEHOLDER & INPUT *************** # [batch_size, sequence_len] self.source_input = tf.placeholder(tf.int32, [None, None], name="source_input") self.target = tf.placeholder(tf.int32, [None, None], name="target") self.source_sequence_length = tf.placeholder( tf.int32, [None], name="source_sequence_length") # if reverse_target, length doesn't include <S> at the end self.target_sequence_length = tf.placeholder( tf.int32, [None], name="target_sequence_length") # 获取max target len max_target_len = tf.reduce_max(self.target_sequence_length) # 获取可变的batch_size batch_size = tf.shape(self.source_input)[0] if params_common["reverse_target"]: # target input: <EOS> 4 3 2 <S> <PAD>, <EOS> 6 5 4 3 2, target_input = self.target # target output: 3 2 1 <S> <PAD> <S>, 6 5 4 3 2 <S> # target seq len: 4, 6 first_slices = tf.strided_slice(target_input, [0, 1], [batch_size, max_target_len], [1, 1]) self.target_output = tf.concat([ first_slices, tf.fill([batch_size, 1], params_common["start_id"]) ], 1) else: # target output: 1 2 3 <EOS> self.target_output = self.target # target input: <S> 1 2 3 after_slice = tf.strided_slice(self.target_output, [0, 0], [batch_size, -1], [1, 1]) target_input = tf.concat([ tf.fill([batch_size, 1], params_common["start_id"]), after_slice ], 1) # *************** GRAPH **************** if not is_training: params_model["keep_prob"] = 1.0 # ------ RNN Encoder ------ # TODO change to independent embedding with tf.variable_scope("encode", reuse=tf.AUTO_REUSE): enc_embeddings = tf.get_variable( "input_embedding", initializer=tf.random_uniform([ params_common["source_vocab_size"], params_model["encoding_embedding_size"] ])) # (uni-rnn) # list of separated rnn cells # rnn_cell = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(params["rnn_size"]), params["keep_prob"]) # stack n layers together # stacked_cells = tf.contrib.rnn.MultiRNNCell(l_dropped_out_rnn_cell) # unroll rnn_cell instance, output是通过主动提供输入tok得到的 # _, encoder_state = tf.nn.dynamic_rnn(rnn_cell, embed, dtype=tf.float32) # (bi-rnn) input = tf.nn.embedding_lookup(enc_embeddings, self.source_input) encoder_states = [] for _ in range(params_model["num_layers"]): with tf.variable_scope(None, default_name="stacked_bilstm"): fw_rnn_cell = tf.contrib.rnn.LSTMCell( params_model["rnn_size"] / 2) fw_dropped_out_rnn_cell = tf.contrib.rnn.DropoutWrapper( fw_rnn_cell, params_model["keep_prob"]) bw_rnn_cell = tf.contrib.rnn.LSTMCell( params_model["rnn_size"] / 2) bw_dropped_out_rnn_cell = tf.contrib.rnn.DropoutWrapper( bw_rnn_cell, params_model["keep_prob"]) # outputs = tuple(fw_out, bw_out) # state = (fw_state, bw_state), fw_state = (c, h) outputs, state = tf.nn.bidirectional_dynamic_rnn( fw_dropped_out_rnn_cell, bw_dropped_out_rnn_cell, input, self.source_sequence_length, dtype=tf.float32) # update input = tf.concat(outputs, 2) states = tf.concat([state[0], state[1]], axis=2) encoder_state = tf.nn.rnn_cell.LSTMStateTuple( states[0], states[1]) encoder_states.append(encoder_state) # encoder_states = tuple(encoder_states) # if no attention # [batch_size, max_time, num_units] encoder_outputs = input # ------ RNN Decoder ------- # reuse: shared rnn cells with tf.variable_scope("decode", reuse=tf.AUTO_REUSE): # Create an attention mechanism attention_mechanism = tf.contrib.seq2seq.LuongAttention( params_model["rnn_size"], encoder_outputs, memory_sequence_length=self.source_sequence_length) dec_embeddings = tf.get_variable( "output_embedding", initializer=tf.random_uniform([ params_common["target_vocab_size"], params_model["decoding_embedding_size"] ])) # dec_embeddings = tf.Variable(tf.random_uniform([params_common["target_vocab_size"], # params_model["decoding_embedding_size"]]), # name="output_embedding") dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, target_input) l_dec_rnn_cell = [ tf.contrib.rnn.LSTMCell(params_model["rnn_size"]) for i in range(params_model["num_layers"]) ] dec_stacked_cells = tf.contrib.rnn.MultiRNNCell(l_dec_rnn_cell) dec_stacked_att_cells = tf.contrib.seq2seq.AttentionWrapper( dec_stacked_cells, attention_mechanism, attention_layer_size=params_model["rnn_size"]) # --- Train phase --- dec_train_cells = tf.contrib.rnn.DropoutWrapper( dec_stacked_att_cells, output_keep_prob=params_model["keep_prob"]) output_layer = tf.layers.Dense(params_common["target_vocab_size"]) # dynamic_rnn只能使用提供的input得到output,(helper + decoder + dynamic_decode)可以自定义得到output的方式 # 由helper决定decoder的input。此处dec_embed是true label的输入tok helper = tf.contrib.seq2seq.TrainingHelper( dec_embed_input, self.target_sequence_length) # 核心decoder,使用helper的input和rnn_cell,以及输出层,返回单次的RNN output decoder_train = tf.contrib.seq2seq.BasicDecoder( dec_train_cells, helper, dec_train_cells.zero_state(dtype=tf.float32, batch_size=batch_size), output_layer) # 使用核心decoder,提供用来unroll的大循环 self.decoder_train_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder_train, impute_finished=True, maximum_iterations=max_target_len) # --- Infer phase --- infer_start = params_common["end_id"] if params_common[ "reverse_target"] else params_common["start_id"] infer_end = params_common["start_id"] if params_common[ "reverse_target"] else params_common["end_id"] gd_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( dec_embeddings, tf.fill([batch_size], infer_start), infer_end) decoder_infer = tf.contrib.seq2seq.BasicDecoder( dec_stacked_att_cells, gd_helper, dec_train_cells.zero_state(dtype=tf.float32, batch_size=batch_size), output_layer) self.decoder_infer_outputs, _, self.infer_sequence_lengths = tf.contrib.seq2seq.dynamic_decode( decoder_infer, impute_finished=True, maximum_iterations=max_target_len) # ------ FORWARD ------- # TODO: with same value, why need identity? self.training_logits = tf.identity( self.decoder_train_outputs.rnn_output, name="logits") self.inference_sample_id = tf.identity( self.decoder_infer_outputs.sample_id, name="predictions") vars = tf.trainable_variables() Monitor.print_params(vars)
def convertTargetToInputDec(self, target): after_slice = tf.strided_slice(target, [0, 0], [self.params["win"], -1], [1, 1]) after_concat = tf.concat( [tf.fill([self.params["win"], 1], 0.0), after_slice], 1) return after_concat
def get_horizen_minAreaRectangle(boxs, img_shape, with_label=True): rpn_proposals_boxes_convert = tf.py_func(forward_convert, inp=[boxs, with_label], Tout=tf.float32) if with_label: rpn_proposals_boxes_convert = tf.reshape(rpn_proposals_boxes_convert, [-1, 9]) boxes_shape = tf.shape(rpn_proposals_boxes_convert) x_list = tf.strided_slice(rpn_proposals_boxes_convert, begin=[0, 0], end=[boxes_shape[0], boxes_shape[1] - 1], strides=[1, 2]) y_list = tf.strided_slice(rpn_proposals_boxes_convert, begin=[0, 1], end=[boxes_shape[0], boxes_shape[1] - 1], strides=[1, 2]) label = tf.unstack(rpn_proposals_boxes_convert, axis=1)[-1] y_max = tf.reduce_max(y_list, axis=1) y_min = tf.reduce_min(y_list, axis=1) x_max = tf.reduce_max(x_list, axis=1) x_min = tf.reduce_min(x_list, axis=1) ''' The following codes aims to avoid gtbox out_sde''' # img_h, img_w = img_shape[0], img_shape[1] # img_h = tf.cast(img_h, tf.float32) # img_w = tf.cast(img_w, tf.float32) # x_min = tf.maximum(x_min, 0) # y_min = tf.maximum(y_min, 0) # x_max = tf.minimum(x_max, img_w) # y_max = tf.minimum(y_max, img_h) return tf.transpose( tf.stack([x_min, y_min, x_max, y_max, label], axis=0)) else: rpn_proposals_boxes_convert = tf.reshape(rpn_proposals_boxes_convert, [-1, 8]) boxes_shape = tf.shape(rpn_proposals_boxes_convert) x_list = tf.strided_slice(rpn_proposals_boxes_convert, begin=[0, 0], end=[boxes_shape[0], boxes_shape[1]], strides=[1, 2]) y_list = tf.strided_slice(rpn_proposals_boxes_convert, begin=[0, 1], end=[boxes_shape[0], boxes_shape[1]], strides=[1, 2]) y_max = tf.reduce_max(y_list, axis=1) y_min = tf.reduce_min(y_list, axis=1) x_max = tf.reduce_max(x_list, axis=1) x_min = tf.reduce_min(x_list, axis=1) ''' The following codes aims to avoid gtbox out_sde''' # img_h, img_w = img_shape[0], img_shape[1] # img_h = tf.cast(img_h, tf.float32) # img_w = tf.cast(img_w, tf.float32) # x_min = tf.maximum(x_min, 0) # y_min = tf.maximum(y_min, 0) # x_max = tf.minimum(x_max, img_w) # y_max = tf.minimum(y_max, img_h) return tf.transpose(tf.stack([x_min, y_min, x_max, y_max], axis=0))
def __init__(self, size_layer, num_layers, embedded_size, from_dict_size, to_dict_size, learning_rate): ''' :param size_layer: 每步的输出 :param num_layers: 层数 :param embedded_size: 词嵌入的维度 :param from_dict_size: 问题词典 :param to_dict_size: 回答词典 :param learning_rate: 学习率 ''' def cells(size, reuse=False): return tf.nn.rnn_cell.LSTMCell( size, initializer=tf.orthogonal_initializer(), reuse=reuse) # define input self.X = tf.placeholder(tf.int32, [None, None]) self.Y = tf.placeholder(tf.int32, [None, None]) self.X_seq_len = tf.placeholder(tf.int32, [None]) self.Y_seq_len = tf.placeholder(tf.int32, [None]) batch_size = tf.shape(self.X)[0] # embedding encoder and decoder encoder_embeddings = tf.Variable( tf.random_uniform([from_dict_size, embedded_size], -1, 1)) decoder_embeddings = tf.Variable( tf.random_uniform([to_dict_size, embedded_size], -1, 1)) encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X) main = tf.strided_slice(self.X, [0, 0], [batch_size, -1], [1, 1]) decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1) decoder_embedded = tf.nn.embedding_lookup(decoder_embeddings, decoder_input) def attention(): # 为cell中加入bahdanauAtttention注意力 attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( num_units=size_layer // 2, memory=encoder_embedded) return tf.contrib.seq2seq.AttentionWrapper( cell=cells(size_layer // 2), attention_mechanism=attention_mechanism, attention_layer_size=size_layer // 2) # 双向lstm 编码 for n in range(num_layers): (out_fw, out_bw), (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw=attention(), cell_bw=attention(), inputs=encoder_embedded, sequence_length=self.X_seq_len, dtype=tf.float32, scope='bidirectional_rnn_%d' % (n)) encoder_embedded = tf.concat((out_fw, out_bw), 2) # 得到隐态的输出 bi_state_c = tf.concat((state_fw[0].c, state_bw[0].c), -1) bi_state_h = tf.concat((state_fw[0].h, state_bw[0].h), -1) bi_lstm_state = tf.nn.rnn_cell.LSTMStateTuple(c=bi_state_c, h=bi_state_h) last_state = tuple([bi_lstm_state] * num_layers) # 构造隐态 因为我们解码用的也是多层rnn # 解码 with tf.variable_scope("decoder"): rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell( [cells(size_layer) for _ in range(num_layers)]) outputs, _ = tf.nn.dynamic_rnn(rnn_cells_dec, decoder_embedded, initial_state=last_state, dtype=tf.float32) self.logits = tf.layers.dense(outputs, to_dict_size) # dense 输出 masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32) self.cost = tf.contrib.seq2seq.sequence_loss(logits=self.logits, targets=self.Y, weights=masks) # 定义损失 self.optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(self.cost) y_t = tf.argmax(self.logits, axis=2) y_t = tf.cast(y_t, tf.int32) self.prediction = tf.boolean_mask(y_t, masks) mask_label = tf.boolean_mask(self.Y, masks) correct_pred = tf.equal(self.prediction, mask_label) correct_index = tf.cast(correct_pred, tf.float32) self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
(row_count, column_count)) print(data) batch_count = int(math.ceil(row_count / batch_size)) step_count = int(math.ceil(column_count / step_size)) print(batch_count, step_count) slices = tf.train.slice_input_producer([data], num_epochs=1, shuffle=False) batch = tf.train.batch(slices, batch_size, allow_smaller_final_batch=True) queue = tf.FIFOQueue(32, dtypes=[batch.dtype]) enqueue_ops = [] dependency = None for step_index in range(step_count): step = tf.strided_slice(batch, [0, step_index * step_size], [tf.shape(batch)[0], (step_index + 1) * step_size]) if dependency is None: dependency = queue.enqueue(step) else: with tf.control_dependencies([dependency]): step = queue.enqueue(step) dependency = step enqueue_ops.append(step) queue_runner.add_queue_runner( queue_runner.QueueRunner(queue=queue, enqueue_ops=[tf.group(*enqueue_ops)])) step = queue.dequeue()
def processed_decoder_input(self): main = tf.strided_slice(self.Y, [0, 0], [self.batch_size, -1], [1, 1]) # remove last char decoder_input = tf.concat([tf.fill([self.batch_size, 1], self._y_go), main], 1) return decoder_input
def build_model(self): print('building model... ...') # =================================1, 定义模型的placeholder self.encoder_inputs = tf.placeholder(tf.int32, [None, None], name='encoder_inputs') self.encoder_inputs_length = tf.placeholder( tf.int32, [None], name='encoder_inputs_length') self.batch_size = tf.placeholder(tf.int32, [], name='batch_size') self.keep_prob_placeholder = tf.placeholder( tf.float32, name='keep_prob_placeholder') self.decoder_targets = tf.placeholder(tf.int32, [None, None], name='decoder_targets') self.decoder_targets_length = tf.placeholder( tf.int32, [None], name='decoder_targets_length') self.max_target_sequence_length = tf.reduce_max( self.decoder_targets_length, name='max_target_len') self.mask = tf.sequence_mask(self.decoder_targets_length, self.max_target_sequence_length, dtype=tf.float32, name='masks') # =================================2, 定义模型的encoder部分 with tf.variable_scope('encoder'): # 创建LSTMCell,两层+dropout encoder_cell = self._create_rnn_cell() # 构建embedding矩阵,encoder和decoder公用该词向量矩阵 embedding = tf.get_variable('embedding', [self.vocab_size, self.embedding_size]) encoder_inputs_embedded = tf.nn.embedding_lookup( embedding, self.encoder_inputs) # 使用dynamic_rnn构建LSTM模型,将输入编码成隐层向量。 # encoder_outputs用于attention,batch_size*encoder_inputs_length*rnn_size, # encoder_state用于decoder的初始化状态,batch_size*rnn_szie encoder_outputs, encoder_state = tf.nn.dynamic_rnn( encoder_cell, encoder_inputs_embedded, sequence_length=self.encoder_inputs_length, dtype=tf.float32) # =================================3, 定义模型的decoder部分 with tf.variable_scope('decoder'): encoder_inputs_length = self.encoder_inputs_length # if self.beam_search: # # 如果使用beam_search,则需要将encoder的输出进行tile_batch,其实就是复制beam_size份。 # print("use beamsearch decoding..") # encoder_outputs = tf.contrib.seq2seq.tile_batch(encoder_outputs, multiplier=self.beam_size) # encoder_state = nest.map_structure(lambda s: tf.contrib.seq2seq.tile_batch(s, self.beam_size), encoder_state) # encoder_inputs_length = tf.contrib.seq2seq.tile_batch(self.encoder_inputs_length, multiplier=self.beam_size) attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( num_units=self.rnn_size, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length) # attention_mechanism = tf.contrib.seq2seq.LuongAttention(num_units=self.rnn_size, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length) # 定义decoder阶段要是用的LSTMCell,然后为其封装attention wrapper decoder_cell = self._create_rnn_cell() decoder_cell = tf.contrib.seq2seq.AttentionWrapper( cell=decoder_cell, attention_mechanism=attention_mechanism, attention_layer_size=self.rnn_size, name='Attention_Wrapper') # 如果使用beam_seach则batch_size = self.batch_size * self.beam_size。因为之前已经复制过一次 # batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size batch_size = self.batch_size # 定义decoder阶段的初始化状态,直接使用encoder阶段的最后一个隐层状态进行赋值 decoder_initial_state = decoder_cell.zero_state( batch_size=batch_size, dtype=tf.float32).clone(cell_state=encoder_state) output_layer = tf.layers.Dense( self.vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) if self.mode == 'train': # 定义decoder阶段的输入,其实就是在decoder的target开始处添加一个<go>,并删除结尾处的<end>,并进行embedding。 # decoder_inputs_embedded的shape为[batch_size, decoder_targets_length, embedding_size] ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1]) decoder_input = tf.concat([ tf.fill([self.batch_size, 1], self.word_to_idx['<go>']), ending ], 1) decoder_inputs_embedded = tf.nn.embedding_lookup( embedding, decoder_input) # 训练阶段,使用TrainingHelper+BasicDecoder的组合,这一般是固定的,当然也可以自己定义Helper类,实现自己的功能 training_helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=self.decoder_targets_length, time_major=False, name='training_helper') training_decoder = tf.contrib.seq2seq.BasicDecoder( cell=decoder_cell, helper=training_helper, initial_state=decoder_initial_state, output_layer=output_layer) # 调用dynamic_decode进行解码,decoder_outputs是一个namedtuple,里面包含两项(rnn_outputs, sample_id) # rnn_output: [batch_size, decoder_targets_length, vocab_size],保存decode每个时刻每个单词的概率,可以用来计算loss # sample_id: [batch_size], tf.int32,保存最终的编码结果。可以表示最后的答案 decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder=training_decoder, impute_finished=True, maximum_iterations=self.max_target_sequence_length) # 根据输出计算loss和梯度,并定义进行更新的AdamOptimizer和train_op self.decoder_logits_train = tf.identity( decoder_outputs.rnn_output) self.decoder_predict_train = tf.argmax( self.decoder_logits_train, axis=-1, name='decoder_pred_train') # 使用sequence_loss计算loss,这里需要传入之前定义的mask标志 self.loss = tf.contrib.seq2seq.sequence_loss( logits=self.decoder_logits_train, targets=self.decoder_targets, weights=self.mask) # Training summary for the current batch_loss tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all() optimizer = tf.train.AdamOptimizer(self.learing_rate) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm( gradients, self.max_gradient_norm) self.train_op = optimizer.apply_gradients( zip(clip_gradients, trainable_params)) elif self.mode == 'decode': start_tokens = tf.ones([ self.batch_size, ], tf.int32) * self.word_to_idx['<go>'] end_token = self.word_to_idx['<eos>'] # decoder阶段根据是否使用beam_search决定不同的组合, # 如果使用则直接调用BeamSearchDecoder(里面已经实现了helper类) # 如果不使用则调用GreedyEmbeddingHelper+BasicDecoder的组合进行贪婪式解码 if self.beam_search: inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder( cell=decoder_cell, embedding=embedding, start_tokens=start_tokens, end_token=end_token, initial_state=decoder_initial_state, beam_width=self.beam_size, output_layer=output_layer) else: decoding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=embedding, start_tokens=start_tokens, end_token=end_token) inference_decoder = tf.contrib.seq2seq.BasicDecoder( cell=decoder_cell, helper=decoding_helper, initial_state=decoder_initial_state, output_layer=output_layer) decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder=inference_decoder, maximum_iterations=10) # 调用dynamic_decode进行解码,decoder_outputs是一个namedtuple, # 对于不使用beam_search的时候,它里面包含两项(rnn_outputs, sample_id) # rnn_output: [batch_size, decoder_targets_length, vocab_size] # sample_id: [batch_size, decoder_targets_length], tf.int32 # 对于使用beam_search的时候,它里面包含两项(predicted_ids, beam_search_decoder_output) # predicted_ids: [batch_size, decoder_targets_length, beam_size],保存输出结果 # beam_search_decoder_output: BeamSearchDecoderOutput instance namedtuple(scores, predicted_ids, parent_ids) # 所以对应只需要返回predicted_ids或者sample_id即可翻译成最终的结果 if self.beam_search: self.decoder_predict_decode = decoder_outputs.predicted_ids else: self.decoder_predict_decode = tf.expand_dims( decoder_outputs.sample_id, -1) # =================================4, 保存模型 self.saver = tf.train.Saver(tf.global_variables())
def read_cifar10(filename_queue): """Reads and parses examples from CIFAR10 data files. Recommendation: if you want N-way read parallelism, call this function N times. This will give you N independent Readers reading different files & positions within those files, which will give better mixing of examples. Args: filename_queue: A queue of strings with the filenames to read from. Returns: An object representing a single example, with the following fields: height: number of rows in the result (32) width: number of columns in the result (32) depth: number of color channels in the result (3) key: a scalar string Tensor describing the filename & record number for this example. label: an int32 Tensor with the label in the range 0..9. uint8image: a [height, width, depth] uint8 Tensor with the image data """ class CIFAR10Record(object): pass result = CIFAR10Record() # Dimensions of the images in the CIFAR-10 dataset. # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the # input format. label_bytes = 1 # 2 for CIFAR-100 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth # Every record consists of a label followed by the image, with a # fixed number of bytes for each. record_bytes = label_bytes + image_bytes # Read a record, getting filenames from the filename_queue. No # header or footer in the CIFAR-10 format, so we leave header_bytes # and footer_bytes at their default of 0. reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) # Convert from a string to a vector of uint8 that is record_bytes long. record_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.cast(tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape( tf.strided_slice(record_bytes, [label_bytes], [label_bytes + image_bytes]), [result.depth, result.height, result.width]) # Convert from [depth, height, width] to [height, width, depth]. result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
def preprocess_targets(targets, word2int, batch_size): left_side = tf.fill([batch_size, 1], word2int['<SOS>']) right_side = tf.strided_slice(targets, [0, 0], [batch_size, -1], [1, 1]) preprocessed_targets = tf.concat([left_side, right_side], 1) return preprocessed_targets
def ProvideData(self, batch_size): """Build CIFAR image and labels. Args: batch_size: Input batch size. Returns: images: Batches of images. [batch_size, crop_size, crop_size, 3] norm_images: Batches of images. [batch_size, crop_size, crop_size, 6] labels: Batches of labels. [batch_size, NUM_CLASSES] Raises: ValueError: when the specified dataset is not supported. """ label_bytes = 1 label_offset = 0 image_bytes = IMAGE_SIZE * IMAGE_SIZE * NUM_CHANNELS record_bytes = label_bytes + label_offset + image_bytes file_names = tf.gfile.Glob(self.data_files) file_queue = tf.train.string_input_producer(file_names, shuffle=True) # Read examples from files in the filename queue. reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) _, value = reader.read(file_queue) # Convert these examples to dense labels and processed images. record = tf.reshape(tf.decode_raw(value, tf.uint8), [record_bytes]) label = tf.cast( tf.strided_slice(record, [label_offset], [label_offset + label_bytes]), tf.int32) # Convert from string to [depth * height * width] to [depth, height, width]. depth_major = tf.reshape( tf.strided_slice(record, [label_bytes], [label_bytes + image_bytes]), [NUM_CHANNELS, IMAGE_SIZE, IMAGE_SIZE]) # Convert from [depth, height, width] to [height, width, depth]. image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) if self.split_name == 'train': # Randomly crop a [height, width] section of the image. if FLAGS.random_crop: image = tf.random_crop(image, [FLAGS.crop_size, FLAGS.crop_size, 3]) else: # Crop the central [FLAGS.crop_size, FLAGS.crop_size] of the image. image = tf.image.resize_image_with_crop_or_pad( image, FLAGS.crop_size, FLAGS.crop_size) if FLAGS.data_augmentation: # Randomly flip the image horizontally. image = tf.image.random_flip_left_right(image) # Randomize the pixel values. # Most images = 0 if random_brightness applied, so test before using. #image = tf.image.random_brightness(image, max_delta=63./255.) image = tf.image.random_saturation(image, lower=0.5, upper=1.5) image = tf.image.random_contrast(image, lower=0.2, upper=1.8) if FLAGS.per_image_whitening: image = tf.image.per_image_standardization(image) else: image = image / 255.0 example_queue = tf.RandomShuffleQueue( capacity=16 * batch_size, min_after_dequeue=8 * batch_size, dtypes=[tf.float32, tf.int32], shapes=[[FLAGS.crop_size, FLAGS.crop_size, NUM_CHANNELS], [1]]) num_threads = 16 else: image = tf.image.resize_image_with_crop_or_pad( image, FLAGS.crop_size, FLAGS.crop_size) if FLAGS.per_image_whitening: image = tf.image.per_image_standardization(image) else: image = image / 255.0 example_queue = tf.FIFOQueue( 3 * batch_size, dtypes=[tf.float32, tf.int32], shapes=[[FLAGS.crop_size, FLAGS.crop_size, NUM_CHANNELS], [1]]) num_threads = 1 example_enqueue_op = example_queue.enqueue([image, label]) tf.train.add_queue_runner(tf.train.queue_runner.QueueRunner( example_queue, [example_enqueue_op] * num_threads)) # Read 'batch' labels + images from the example queue. images, labels = example_queue.dequeue_many(batch_size) labels = tf.one_hot(tf.squeeze(labels), self.config.number_of_classes) assert len(images.get_shape()) == 4 assert images.get_shape()[0] == batch_size assert images.get_shape()[-1] == NUM_CHANNELS assert len(labels.get_shape()) == 2 assert labels.get_shape()[0] == batch_size assert labels.get_shape()[1] == self.config.number_of_classes return images, self.NormalizeData(images, 3), labels
def predict_weight(testDatas): #batch_size = 1 testDatas = np.array(testDatas) x_vals_test = normalize_cols(testDatas) print('x_vals_test', x_vals_test) # Create graph session tf.reset_default_graph() sess = tf.Session() # Define Variable Functions (weights and bias) def init_weight(shape, st_dev): weight = tf.Variable(tf.random_normal(shape, stddev=st_dev)) return weight def init_bias(shape, st_dev): bias = tf.Variable(tf.random_normal(shape, stddev=st_dev)) return bias # Create a fully connected layer: def fully_connected(input_layer, weights, biases): layer = tf.add(tf.matmul(input_layer, weights), biases) return tf.nn.relu(layer) # Initialize placeholders x_data = tf.placeholder(shape=[None, 3], dtype=tf.float32) y_target = tf.placeholder(shape=[None, 2], dtype=tf.float32) weight_1 = init_weight(shape=[3, 10], st_dev=1.0) bias_1 = init_bias(shape=[10], st_dev=1.0) layer_1 = fully_connected(x_data, weight_1, bias_1) weight_2 = init_weight(shape=[10, 15], st_dev=1.0) bias_2 = init_bias(shape=[15], st_dev=1.0) layer_2 = fully_connected(layer_1, weight_2, bias_2) weight_3 = init_weight(shape=[15, 15], st_dev=1.0) bias_3 = init_bias(shape=[15], st_dev=1.0) layer_3 = fully_connected(layer_2, weight_3, bias_3) weight_4 = init_weight(shape=[15, 15], st_dev=1.0) bias_4 = init_bias(shape=[15], st_dev=1.0) layer_4 = fully_connected(layer_3, weight_4, bias_4) weight_5 = init_weight(shape=[15, 8], st_dev=1.0) bias_5 = init_bias(shape=[8], st_dev=1.0) layer_5 = fully_connected(layer_4, weight_5, bias_5) weight_6 = init_weight(shape=[8, 2], st_dev=1.0) bias_6 = init_bias(shape=[2], st_dev=1.0) final_output = tf.sigmoid(tf.add(tf.matmul(layer_5, weight_6), bias_6)) # batch_num = tf.shape(final_output)[0] #batch_num = batch_size batch_num = tf.shape(final_output)[0] predict_weight = tf.strided_slice(final_output, [0, 0], [batch_num, 1], [1, 1]) y_target_weght = tf.strided_slice(y_target, [0, 0], [batch_num, 1], [1, 1]) predict_waist = tf.strided_slice(final_output, [0, 1], [batch_num, 2], [1, 1]) y_target_waist = tf.strided_slice(y_target, [0, 1], [batch_num, 2], [1, 1]) weight_loss = tf.reduce_mean(tf.square(y_target_weght - predict_weight)) waist_loss = tf.reduce_mean(tf.square(y_target_waist - predict_waist)) # Declare loss function (MSE) loss = tf.add(waist_loss, weight_loss) # This is caculate the accuracy Temp1 = tf.abs(tf.subtract(predict_weight, y_target_weght)) accuracy_weight = tf.reduce_mean(tf.cast(tf.less(Temp1, 0.12), tf.float32)) Temp2 = tf.abs(tf.subtract(predict_waist, y_target_waist)) accuracy_waist = tf.reduce_mean(tf.cast(tf.less(Temp2, 0.3), tf.float32)) # Initialize variables init = tf.global_variables_initializer() sess.run(init) # Training loop checkpoint_dir = '/home/shen/Trying/Predict/up/Final_OneNerualNet_output_two/Net_save/' saver = tf.train.Saver( ) # defaults to saving all variables - in this case w and b ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: pass x = x_vals_test # y = np.transpose([testLabels]) predict_weight_lost = sess.run(predict_weight, feed_dict={x_data: x}) predict_weight_lost *= 10 predict_waist_lost = sess.run(predict_waist, feed_dict={x_data: x}) predict_waist_lost *= 10 origin_weight = [testDatas[:, 0]] transpose_o_weight = np.transpose(origin_weight) predict_weight_old = transpose_o_weight - predict_weight_lost output_weight = predict_weight_old[0][0] origin_waist = [testDatas[:, 2]] transpose_o_waist = np.transpose(origin_waist) predict_waist_old = transpose_o_waist - predict_waist_lost output_waist = predict_waist_old[0][0] return (output_weight, output_waist)
def inference(self): output_layer = self.model.get_pooled_output() logging.info(output_layer) with tf.variable_scope("loss"): def apply_dropout_last_layer(output_layer): output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) return output_layer def not_apply_dropout(output_layer): return output_layer output_layer = tf.cond( self.is_training, lambda: apply_dropout_last_layer(output_layer), lambda: not_apply_dropout(output_layer)) match_1 = tf.strided_slice(output_layer, [0], [train_batch_size], [2]) match_2 = tf.strided_slice(output_layer, [1], [train_batch_size], [2]) match = tf.concat([match_1, match_2], 1) self.logits = tf.layers.dense(match, self.num_labels, name='fc') self.y_pred_cls = tf.argmax(tf.nn.softmax(self.logits), 1, name="pred") logging.info(self.y_pred_cls) self.r_labels = tf.strided_slice(self.labels, [0], [train_batch_size], [2]) print(self.r_labels) one_hot_labels = tf.one_hot(self.r_labels, depth=self.num_labels, dtype=tf.float32) #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=one_hot_labels) #self.loss = tf.reduce_mean(cross_entropy, name="loss") log_probs = tf.nn.log_softmax(self.logits, axis=-1) per_example_loss = - (30*one_hot_labels[:,0] * log_probs[:,0]) \ - (9*one_hot_labels[:,1] * log_probs[:,1]) \ - (2*one_hot_labels[:,2] * log_probs[:,2]) \ - (2*one_hot_labels[:,3] * log_probs[:,3]) \ - (9*one_hot_labels[:,4] * log_probs[:,4]) \ + 1e-10 self.loss = tf.reduce_mean(per_example_loss) self.optim = optimization.create_optimizer(self.loss, learning_rate, num_train_steps, num_warmup_steps, False) #self.optim = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) with tf.name_scope("accuracy"): # 准确率 correct_pred = tf.equal(tf.argmax(one_hot_labels, 1), self.y_pred_cls) self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name="acc") print(self.acc) self.cm = tf.contrib.metrics.confusion_matrix( tf.argmax(one_hot_labels, 1), self.y_pred_cls, num_classes=num_labels) print(self.cm)
def processed_decoder_output(self): return tf.strided_slice( self.Y, [0, 1], [self.batch_size, tf.shape(self.Y)[1]], [1, 1]) # remove first char
def processed_decoder_input(self): return tf.strided_slice(self.Y, [0, 0], [self.batch_size, -1], [1, 1]) # remove last char
def process_decoding_input(target_data, vocab_to_int, batch_size): ending = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1]) dec_input = tf.concat( [tf.fill([batch_size, 1], vocab_to_int['<GO>']), ending], 1) return dec_input
def stridedslice_test(g1): with g1.as_default(): g1_input = tf.placeholder(tf.float32, shape=(1, 1, 1, 10), name='0') tf.strided_slice(g1_input, [0, 0, 0, 0], [1, 1, 1, 5], [1, 1, 1, 1], shrink_axis_mask=2, name='stridedslice1')
def process_encoding_input(target_data, vocab_to_int, batch_size): '''Remove the last word id from each batch and concat the <GO> to the begining of each batch''' ending = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1]) dec_input = tf.concat( [tf.fill([batch_size, 1], vocab_to_int['<GO>']), ending], 1) return dec_input
def loadAndGetGraph( self, num_steps, state_size, learningRate=1e-4, # default of Adam is 1e-3 numOfLastToCombine=1, stride=1, verbose=True): """numOfLastToCombine is meant to take into account the last 3 or 4 or 5 or 120 segments even if stride reduces them stride is useful for picking only a few items to take into account instead of everything""" assert 1 <= numOfLastToCombine <= self.segment_count assert stride >= 1 # even if it is very last it is going to take into account the last one in any case graph = tf.Graph() # create new graph with graph.as_default(): with tf.name_scope('data'): inputs = tf.placeholder( self.dtype, [self.batch_size, self.segment_count, self.segment_len], name='input_placeholder') targets = tf.placeholder(self.dtype, [self.batch_size, self.num_classes], name='labels_placeholder') init_state = tf.zeros([self.batch_size, state_size], dtype=self.dtype) # list where each item have dim 50 x 25 rnn_inputs = tf.unpack(inputs, axis=1, name='rnn_inputs') cell = tf.nn.rnn_cell.BasicRNNCell( state_size) # tanh is default activation rnn_outputs, final_state = tf.nn.rnn( cell, rnn_inputs, initial_state=init_state, sequence_length=np.repeat(num_steps, self.batch_size) ) # each rnn_output from rnn_outputs has 50 x state size with tf.variable_scope('rnn_outputs_multiplex'): rnn_outputs_packed = tf.pack(rnn_outputs, axis=1) # 50 x 120 x state size # aa = np.arange(10) # aa[-1:-(5+1):-2] #five last with a step of two # setting it with python code break the flow # rnn_outputs_of_interest = rnn_outputs[-1:-(numOfLastToCombine + 1):-stride] reversed_rnn_outputs = tf.reverse_v2(rnn_outputs_packed, axis=[1]) rnn_outputs_of_interest = tf.strided_slice( reversed_rnn_outputs, [0, 0, 0], [ int(reversed_rnn_outputs.get_shape()[0]), numOfLastToCombine, int(reversed_rnn_outputs.get_shape()[2]) ], [1, stride, 1]) # rnn_outputs_of_interest_packed = tf.pack(rnn_outputs_of_interest, axis=1) # rnn_outputs_of_interest_combined = final_state # rnn_outputs_of_interest_combined = tf.concat(concat_dim=1, values=rnn_outputs_of_interest) rnn_outputs_of_interest_combined = tf.reshape( rnn_outputs_of_interest, (self.batch_size, -1)) rnn_outputs_multiplex = tf.concat( concat_dim=1, values=[final_state, rnn_outputs_of_interest_combined]) with tf.variable_scope('readout'): # input_dim = state_size * len(rnn_outputs_of_interest) input_dim = state_size * int( rnn_outputs_of_interest.get_shape()[1]) assert rnn_outputs_of_interest_combined.get_shape( )[-1] == input_dim input_dim += state_size #because we are adding the final state if verbose: if input_dim > 2000: print "input dimensionality for readout layer is too large: {}".format( input_dim) output_dim = self.num_classes W = tf.Variable(tf.truncated_normal( [input_dim, output_dim], stddev=2. / (input_dim + output_dim)**0.5), name='readout_weights') b = tf.Variable(tf.zeros([output_dim]), name='readout_biases') logits = tf.matmul(rnn_outputs_multiplex, W) + b # shape: (50, 10) # print logits.get_shape() with tf.name_scope('error'): error = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, targets)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(logits, 1), tf.argmax(targets, 1)), dtype=self.dtype)) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer( learning_rate=learningRate).minimize(error) init = tf.global_variables_initializer() self.init = init self.error = error self.accuracy = accuracy self.inputs = inputs self.targets = targets self.train_step = train_step self.logits = logits self.lastRnnOut = rnn_outputs[-1] self.finalState = final_state self.initialState = init_state self.firstRnnOut = rnn_outputs[0] self.allRnnOuts = rnn_outputs return graph
import numpy as np import tensorflow as tf # ======================================== # http://blog.csdn.net/banana1006034246/article/details/75092388 # ======================================== data = [[1, 2, 3, 4, 5, 6, 7, 8], [11, 12, 13, 14, 15, 16, 17, 18]] if __name__ == "__main__": x_dim1_start = 0 x_dim1_end = 1 x_dim2_start = 0 x_dim2_end = 4 x = tf.strided_slice(data, [x_dim1_start, x_dim2_start], [x_dim1_end, x_dim2_end]) y = tf.strided_slice(data, [1, 1], [2, 4]) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print(data) # x output shoud be [1, 2, 3, 4] dim1=0, dim2=0~4 print(sess.run(x)) print(sess.run(y))
def export_strided_slice(filename, input_shape): t = tf.placeholder(tf.float32, shape=input_shape, name="input") out = tf.strided_slice(t, [1, 0, 0], [2, 1, 3], [1, 1, 1]) return export(out, filename)
def build_model(self): print('building model... ...') #=================================1, 定义模型的placeholder self.encoder_inputs = tf.placeholder(tf.int32, [None, None], name='encoder_inputs') self.encoder_inputs_length = tf.placeholder(tf.int32, [None], name='encoder_inputs_length') self.batch_size = tf.placeholder(tf.int32, [], name='batch_size') self.keep_prob_placeholder = tf.placeholder(tf.float32, name='keep_prob_placeholder') self.decoder_targets = tf.placeholder(tf.int32, [None, None], name='decoder_targets') self.decoder_targets_length = tf.placeholder(tf.int32, [None], name='decoder_targets_length') self.max_target_sequence_length = tf.reduce_max(self.decoder_targets_length, name='max_target_len') self.mask = tf.sequence_mask(self.decoder_targets_length, self.max_target_sequence_length, dtype=tf.float32, name='masks') #=================================2, 定义模型的encoder部分 with tf.variable_scope('encoder'): #创建LSTMCell,两层+dropout encoder_cell = self._create_rnn_cell() #构建embedding矩阵,encoder和decoder公用该词向量矩阵 embedding = tf.get_variable('embedding', [self.vocab_size, self.embedding_size]) encoder_inputs_embedded = tf.nn.embedding_lookup(embedding, self.encoder_inputs) # 使用dynamic_rnn构建LSTM模型,将输入编码成隐层向量。 # encoder_outputs用于attention,batch_size*encoder_inputs_length*rnn_size, # encoder_state用于decoder的初始化状态,batch_size*rnn_szie encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder_cell, encoder_inputs_embedded, sequence_length=self.encoder_inputs_length, dtype=tf.float32) # =================================3, 定义模型的decoder部分 with tf.variable_scope('decoder'): encoder_inputs_length = self.encoder_inputs_length # if self.beam_search: # # 如果使用beam_search,则需要将encoder的输出进行tile_batch,其实就是复制beam_size份。 # print("use beamsearch decoding..") # encoder_outputs = tf.contrib.seq2seq.tile_batch(encoder_outputs, multiplier=self.beam_size) # encoder_state = nest.map_structure(lambda s: tf.contrib.seq2seq.tile_batch(s, self.beam_size), encoder_state) # encoder_inputs_length = tf.contrib.seq2seq.tile_batch(self.encoder_inputs_length, multiplier=self.beam_size) attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=self.rnn_size, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length) #attention_mechanism = tf.contrib.seq2seq.LuongAttention(num_units=self.rnn_size, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length) # 定义decoder阶段要是用的LSTMCell,然后为其封装attention wrapper decoder_cell = self._create_rnn_cell() decoder_cell = tf.contrib.seq2seq.AttentionWrapper(cell=decoder_cell, attention_mechanism=attention_mechanism, attention_layer_size=self.rnn_size, name='Attention_Wrapper') #如果使用beam_seach则batch_size = self.batch_size * self.beam_size。因为之前已经复制过一次 #batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size batch_size = self.batch_size #定义decoder阶段的初始化状态,直接使用encoder阶段的最后一个隐层状态进行赋值 decoder_initial_state = decoder_cell.zero_state(batch_size=batch_size, dtype=tf.float32).clone(cell_state=encoder_state) output_layer = tf.layers.Dense(self.vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) if self.mode == 'train': # 定义decoder阶段的输入,其实就是在decoder的target开始处添加一个<go>,并删除结尾处的<end>,并进行embedding。 # decoder_inputs_embedded的shape为[batch_size, decoder_targets_length, embedding_size] ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1]) decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_idx['<go>']), ending], 1) decoder_inputs_embedded = tf.nn.embedding_lookup(embedding, decoder_input) #训练阶段,使用TrainingHelper+BasicDecoder的组合,这一般是固定的,当然也可以自己定义Helper类,实现自己的功能 training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs_embedded, sequence_length=self.decoder_targets_length, time_major=False, name='training_helper') training_decoder = tf.contrib.seq2seq.BasicDecoder(cell=decoder_cell, helper=training_helper, initial_state=decoder_initial_state, output_layer=output_layer) #调用dynamic_decode进行解码,decoder_outputs是一个namedtuple,里面包含两项(rnn_outputs, sample_id) # rnn_output: [batch_size, decoder_targets_length, vocab_size],保存decode每个时刻每个单词的概率,可以用来计算loss # sample_id: [batch_size], tf.int32,保存最终的编码结果。可以表示最后的答案 decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=training_decoder, impute_finished=True, maximum_iterations=self.max_target_sequence_length) # 根据输出计算loss和梯度,并定义进行更新的AdamOptimizer和train_op self.decoder_logits_train = tf.identity(decoder_outputs.rnn_output) self.decoder_predict_train = tf.argmax(self.decoder_logits_train, axis=-1, name='decoder_pred_train') # 使用sequence_loss计算loss,这里需要传入之前定义的mask标志 self.loss = tf.contrib.seq2seq.sequence_loss(logits=self.decoder_logits_train, targets=self.decoder_targets, weights=self.mask) # Training summary for the current batch_loss tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all() optimizer = tf.train.AdamOptimizer(self.learing_rate) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm(gradients, self.max_gradient_norm) self.train_op = optimizer.apply_gradients(zip(clip_gradients, trainable_params)) elif self.mode == 'decode': start_tokens = tf.ones([self.batch_size, ], tf.int32) * self.word_to_idx['<go>'] end_token = self.word_to_idx['<eos>'] # decoder阶段根据是否使用beam_search决定不同的组合, # 如果使用则直接调用BeamSearchDecoder(里面已经实现了helper类) # 如果不使用则调用GreedyEmbeddingHelper+BasicDecoder的组合进行贪婪式解码 if self.beam_search: inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(cell=decoder_cell, embedding=embedding, start_tokens=start_tokens, end_token=end_token, initial_state=decoder_initial_state, beam_width=self.beam_size, output_layer=output_layer) else: decoding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding=embedding, start_tokens=start_tokens, end_token=end_token) inference_decoder = tf.contrib.seq2seq.BasicDecoder(cell=decoder_cell, helper=decoding_helper, initial_state=decoder_initial_state, output_layer=output_layer) decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=inference_decoder, maximum_iterations=10) # 调用dynamic_decode进行解码,decoder_outputs是一个namedtuple, # 对于不使用beam_search的时候,它里面包含两项(rnn_outputs, sample_id) # rnn_output: [batch_size, decoder_targets_length, vocab_size] # sample_id: [batch_size, decoder_targets_length], tf.int32 # 对于使用beam_search的时候,它里面包含两项(predicted_ids, beam_search_decoder_output) # predicted_ids: [batch_size, decoder_targets_length, beam_size],保存输出结果 # beam_search_decoder_output: BeamSearchDecoderOutput instance namedtuple(scores, predicted_ids, parent_ids) # 所以对应只需要返回predicted_ids或者sample_id即可翻译成最终的结果 if self.beam_search: self.decoder_predict_decode = decoder_outputs.predicted_ids else: self.decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id, -1) # =================================4, 保存模型 self.saver = tf.train.Saver(tf.global_variables())
def build_search_images(self): """Crop search images from the input image based on the last target position 1. The input image is scaled such that the area of target&context takes up to (scale_factor * z_image_size) ^ 2 2. Crop an image patch as large as x_image_size centered at the target center. 3. If the cropped image region is beyond the boundary of the input image, mean values are padded. """ model_config = self.model_config track_config = self.track_config size_z = model_config['z_image_size'] size_x = track_config['x_image_size'] context_amount = 0.5 num_scales = track_config['num_scales'] scales = np.arange(num_scales) - get_center(num_scales) assert np.sum(scales) == 0, 'scales should be symmetric' search_factors = [track_config['scale_step']**x for x in scales] frame_sz = tf.shape(self.image) #target_yx = self.target_bbox_feed[0:2] target_yx = tf.strided_slice(self.target_bbox_feed, [0], [2], name="target_bbox_yx") #target_size = self.target_bbox_feed[2:4] target_size = tf.strided_slice(self.target_bbox_feed, [2], [4], name="target_bbox_size") avg_chan = tf.reduce_mean(self.image, axis=(0, 1), name='avg_chan') # Compute base values self.base_z_context_size = target_size + context_amount * tf.reduce_sum( target_size, name="target_z_size_sum") canonical_size = tf.sqrt(tf.reduce_prod( self.base_z_context_size)) # Canonical size base_scale_z = tf.div(tf.to_float(size_z), canonical_size) d_search = (size_x - size_z) / 2.0 base_pad = tf.div(d_search, base_scale_z) base_s_x = canonical_size + 2 * base_pad base_scale_x = tf.div(tf.to_float(size_x), base_s_x) boxes = [] for factor in search_factors: s_x = factor * base_s_x frame_sz_1 = tf.to_float(frame_sz[0:2] - 1) topleft = tf.div(target_yx - get_center(s_x), frame_sz_1) bottomright = tf.div(target_yx + get_center(s_x), frame_sz_1) box = tf.concat([topleft, bottomright], axis=0) boxes.append(box) self.boxes = tf.stack(boxes) scale_xs = [] for factor in search_factors: scale_x = base_scale_x / factor scale_xs.append(scale_x) self.scale_xs = tf.stack(scale_xs) # Note we use different padding values for each image # while the original implementation uses only the average value # of the first image for all images. image_minus_avg = tf.expand_dims(self.image - avg_chan, 0) image_cropped = tf.image.crop_and_resize( image_minus_avg, self.boxes, box_ind=tf.zeros((track_config['num_scales']), tf.int32), crop_size=[size_x, size_x]) self.search_images = image_cropped + avg_chan
def get_seq_element(sequence_position, input_batch): return tf.strided_slice( input_batch, [sequence_position, 0], [sequence_position + 1, input_batch.shape[-1]], [1, 1] )
def _build_hrne_encoder(self, seq_embeddings, seq_len, params, use_min_partition=False): """ Builds hierarchical text encoder with specified stride """ with tf.variable_scope('dynamic_seq2seq') as scope: #Define GRU cells for both the layers gru_1 = self._gru_cell(params, scope_name='gru_1') gru_2 = self._gru_cell(params, scope_name='gru_2') gru_1_len = params.stride gru_2_len = seq_embeddings.shape.as_list()[1] # Initialize the layer 1 and layer 2 hidden state. h_prev = tf.zeros([params.batch_size, params.num_units], name='h_init') output_gru_1 = [] #Run the first layer of GRU and stack individual timesteps for step in range(gru_2_len): out, h_prev = gru_1(inputs=seq_embeddings[:, step, :], state=h_prev) output_gru_1.append(out) #Stack all the states and split the batch into individual samples stacked_states = tf.stack(output_gru_1, axis=1) state_dim = stacked_states.shape.as_list()[-1] batch_padded_state_vectors = tf.split( stacked_states, num_or_size_splits=params.batch_size, axis=0) batch_state_vectors = [] batch_strided_states = [] partitions = [] inter = [] for index in range(len(batch_padded_state_vectors)): # Get the vectors corresponding to the actual length of the sample caption sliced_state = tf.squeeze( tf.slice(batch_padded_state_vectors[index], begin=[0, 0, 0], size=[1, seq_len[index], state_dim])) final_timestep = sliced_state[-1, :] batch_state_vectors.append(final_timestep) # Get the strided outputs. Strided slice includes the first timestep as well. Ignore that !! strided_sliced_state = tf.strided_slice( sliced_state[:-1, :], [0, 0], [seq_len[index], state_dim], [params.stride, 1])[1:, :] # Infer the partitions given by strided slice op inferred_partitions = tf.cast( tf.ceil(tf.divide(seq_len[index] - 1, params.stride)), tf.int32) - 1 partitions.append(inferred_partitions) batch_strided_states.append(strided_sliced_state) inter.append(strided_sliced_state[-1, :]) # Batch all the individual final timestep vectors back layer1_state_vectors = tf.stack(batch_state_vectors, axis=0) # Above looping caused dynamic shapes. # Set the static shape to ensure rest of the graph builds with static shapes. layer1_state_vectors.set_shape([params.batch_size, state_dim]) intermediate_state_vec = tf.stack(inter, axis=0) intermediate_state_vec.set_shape([params.batch_size, state_dim]) # Use the minimum batch seq_len to determine partitions else consider the more general case layer2_input_states = [] if use_min_partition: min_batch_seq_len = tf.reduce_min(seq_len) minimum_partitions = tf.cast( tf.floor( tf.divide(tf.cast(min_batch_seq_len, tf.float32), params.stride)), tf.int32) # Slice out minimum partitions from strided states for each sample in the batch for strided_state in batch_strided_states: min_sliced_strided_state = tf.slice( strided_state, [0, 0], [minimum_partitions, state_dim]) layer2_input_states.append(min_sliced_strided_state) # Sequence length input to second layer should all be minimum partitions partitions = params.batch_size * [minimum_partitions] else: # Get the maximum length of sequences in a batch max_pad_len = tf.reduce_max(partitions) # Pad the rest of the samples to the maximum length sequence to form inputs to second GRU layer for k, state in enumerate(batch_strided_states): current_num_partitions = partitions[k] pad_value = max_pad_len - current_num_partitions constant_pad_vector = tf.pad(state, [[0, pad_value], [0, 0]]) layer2_input_states.append(constant_pad_vector) # Stack all the batch minimum strided states stacked_layer2_input_states = tf.stack( layer2_input_states, axis=0, name='stacked_layer2_input_states') # Append the last state for comprehensive information all_layer2_input_states = tf.concat([ stacked_layer2_input_states, tf.expand_dims(layer1_state_vectors, 1) ], axis=1) # Form GRU_2 chain with all the strided states from layer 1 # Gather the final state from GRU_2 output_state = self._dynamic_rnn( gru_2, all_layer2_input_states, partitions + tf.ones_like(partitions) ) # Since we are adding the final timestep vectors later # Concat outputs from both the layers final_concat_vector = tf.concat( [layer1_state_vectors, output_state], axis=1) return final_concat_vector
def strided_slice(self, args: Any, kwargs: Any) -> 'PrimeTensor': return self.factory.tensor( tf.strided_slice(self.value, *args, **kwargs))
def attention_decoder(output_data, corpus_size, word2id, emb_matrix, hidden_size, num_layers, vocab_size, output_sequence_length, max_output_sequence_length, max_inference_sequence_length, encoder_output): # numpy数据切片 output_data[0:corpus_size:1,0:-1:1],删除output_data最后一列数据 ending = tf.strided_slice(output_data, begin=[0, 0], end=[corpus_size, -1], strides=[1, 1]) begin_sigmal = tf.fill(dims=[corpus_size, 1], value=word2id['_BOS']) decoder_input_data = tf.concat([begin_sigmal, ending], axis=1, name='decoder_input_data') decoder_embedding_input = tf.nn.embedding_lookup(params=emb_matrix, ids=decoder_input_data) decoder_cells = tf.contrib.rnn.MultiRNNCell([get_lstm_cell(hidden_size) for i in range(num_layers)]) # Attention机制 attention_mechanism = tf.contrib.seq2seq.LuongAttention( num_units=hidden_size, memory=encoder_output, memory_sequence_length=input_sequence_length ) decoder_cells = tf.contrib.seq2seq.AttentionWrapper( cell=decoder_cells, attention_mechanism=attention_mechanism, attention_layer_size=hidden_size ) project_layer = tf.layers.Dense( units=vocab_size, # 全连接层神经元个数 kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1) # 权重矩阵初始化 ) with tf.variable_scope('Decoder'): # Helper对象 training_helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_embedding_input, sequence_length=output_sequence_length) # Basic Decoder training_decoder = tf.contrib.seq2seq.BasicDecoder( cell=decoder_cells, helper=training_helper, output_layer=project_layer, initial_state=decoder_cells.zero_state(batch_size=corpus_size, dtype=tf.float32) ) # Dynamic RNN training_final_output, training_final_state, training_sequence_length = tf.contrib.seq2seq.dynamic_decode( decoder=training_decoder, maximum_iterations=max_output_sequence_length, impute_finished=True) with tf.variable_scope('Decoder', reuse=True): # Helper对象 start_tokens = tf.tile(input=tf.constant(value=[word2id['_BOS']], dtype=tf.int32), multiples=[corpus_size], name='start_tokens') inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=emb_matrix, start_tokens=start_tokens, end_token=word2id['_EOS']) # Basic Decoder inference_decoder = tf.contrib.seq2seq.BasicDecoder( cell=decoder_cells, helper=inference_helper, output_layer=project_layer, initial_state=decoder_cells.zero_state(batch_size=corpus_size, dtype=tf.float32) ) # Dynamic RNN inference_final_output, inference_final_state, inference_sequence_length = tf.contrib.seq2seq.dynamic_decode( decoder=inference_decoder, maximum_iterations=max_inference_sequence_length, impute_finished=True) return training_final_output, training_final_state, inference_final_output, inference_final_state
def process_decoder_input(data, word2int, batch_size): ending = tf.strided_slice(data, [0, 0], [batch_size, -1], [1, 1]) decoder_input = tf.concat( [tf.fill([batch_size, 1], word2int["<GO>"]), ending], 1) return decoder_input
def build_train_decoder(self): print('Building train decoder...') # tf.strided_slice(data,begin,end,stride)是一个跨步切片操作,切片区间左闭右开 # 如果原来并不清楚,我在这里也讲不太清楚,也没找到一个特别好的网站解释,个人建议自己搜索一下 # 本例中data为decoder_targets,对真实的下一句子进行切片,end中的-1会得到那一维度的最后一个 # 得到的ending为一个batch中的一行行target句子 ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1]) # tf.fill(dim,value)的功能是创建一个dim维度,值为value的tensor对象 # tf.concat(values,axis)的功能是将values在axis维上进行拼接 # 在本例中,是将每一个target句子的前面加上<GO> decoder_input = tf.concat( [tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1) # tensorflow.nn.embedding_lookup()方法在张量中寻找索引对应的元素 # 第一个参数是张量,第二个参数是索引 # 将decoder_inputs中的词语id转换为embedding向量 decoder_inputs_embedded = tf.nn.embedding_lookup( self.embedding, decoder_input) # 定义一个Helper,是Decoder的一部分,决定Decoder的输入是什么。 # 官网给出了下面几种Helper类: # "Helper":最基本的抽象类 # "TrainingHelper":训练过程中最常使用的Helper,下一时刻输入就是上一时刻target的真实值 # "GreedyEmbeddingHelper":预测阶段最常使用的Helper,下一时刻输入是上一时刻概率最大的单词通过embedding之后的向量 # "SampleEmbeddingHelper":预测时helper,继承自GreedyEmbeddingHelper,下一时刻输入是上一时刻通过某种概率分布采样而来在经过embedding之后的向量 # "CustomHelper":最简单的helper,一般用户自定义helper时会基于此,需要用户自己定义如何根据输出得到下一时刻输入 # "ScheduledEmbeddingTrainingHelper":训练时Helper,继承自TrainingHelper,添加了广义伯努利分布,对id的embedding向量进行sampling # "ScheduledOutputTrainingHelper":训练时Helper,继承自TrainingHelper,直接对输出进行采样 # "InferenceHelper":CustomHelper的特例,只用于预测的helper,也需要用户自定义如何得到下一时刻输入 if self.teacher_forcing: # 如果使用teacher_forcing training_helper = ScheduledEmbeddingTrainingHelper( # 定义一个ScheduledEmbeddingTrainingHelper inputs=decoder_inputs_embedded, # decoder的输入 sequence_length=self.decoder_targets_length, # 输入的长度 embedding=self.embedding, # embedding矩阵 sampling_probability=self. teacher_forcing_probability, # teacher_forcing中使用target或是output的概率 # time_major表示是否时间序列为第一维,如果是True,则输入需要是T×B×E,否则,为B×T×E # 其中T代表时间序列的长度,B代表batch size。 E代表词向量的维度。 time_major=False, name='teacher_forcing_training_helper') else: # 如果不使用teacher_forcing training_helper = TrainingHelper( # 定义一个TrainingHelper inputs=decoder_inputs_embedded, # decoder的输入 sequence_length=self.decoder_targets_length, # 输入的长度 # time_major表示是否时间序列为第一维,如果是True,则输入需要是T×B×E,否则,为B×T×E # 其中T代表时间序列的长度,B代表batch size。 E代表词向量的维度。 time_major=False, name='training_helper') training_decoder = BasicDecoder( # 基础的取样解码器 cell=self.decoder_cell, # 使用的RNN网络 helper=training_helper, # 使用的helper initial_state=self.decoder_initial_state, # 使用的h0 output_layer=self.output_layer # 使用的输出层 ) decoder_outputs, _, _ = dynamic_decode( # 动态解码器 decoder=training_decoder, # decoder实例 # impute_finished为真时会拷贝最后一个时刻的状态并将输出置零,程序运行更稳定,使最终状态和输出具有正确的值, # 在反向传播时忽略最后一个完成步。但是会降低程序运行速度 TODO:不懂实际用处 impute_finished=True, maximum_iterations=self. max_target_sequence_length # 最大解码步数,这里就设置为最大的target长度 ) # 那这里就卖个萌吧,我也不知道为什么要用tf.identity TODO:为什么需要tf.identity()? self.decoder_logits_train = tf.identity(decoder_outputs.rnn_output) # 定义损失函数 self.loss = sequence_loss( # 将损失函数定义为sequence_loss logits=self.decoder_logits_train, # 输出logits targets=self.decoder_targets, # 真实targets weights=self.mask # 即mask,滤去padding的loss计算,使loss计算更准确 ) # summary,用于可视化 tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all() # 进入build_optimizer() self.build_optimizer()
def batch_divide(taget,batchsize,word_int_dict): sos = tf.fill([batchsize,1],word_int_dict['<SOS>']) q_batch = tf.strided_slice(target,[0,0],[batchsize,-1],stride=[1,1]) batch = tf.concat([sos,q_batch],1) return batch
def processed_decoder_input(self): main = tf.strided_slice(self.Y, [0, 0], [self.batch_size, -1], [1, 1]) # remove last char decoder_input = tf.concat( [tf.fill([self.batch_size, 1], self._y_go), main], 1) return decoder_input
def __init__(self, size_layer, num_layers, embedded_size, from_dict_size, to_dict_size, learning_rate, grad_clip=5.0, beam_width=5, force_teaching_ratio=0.5): def lstm_cell(size, reuse=False): return tf.nn.rnn_cell.LSTMCell( size, initializer=tf.orthogonal_initializer(), reuse=reuse) self.X = tf.placeholder(tf.int32, [None, None]) self.Y = tf.placeholder(tf.int32, [None, None]) self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32) self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32) batch_size = tf.shape(self.X)[0] # 词嵌入 encoder_embeddings = tf.Variable( tf.random_uniform([from_dict_size, embedded_size], -1, 1)) decoder_embeddings = tf.Variable( tf.random_uniform([to_dict_size, embedded_size], -1, 1)) self.encoder_out = tf.nn.embedding_lookup(encoder_embeddings, self.X) # 多层编码 for n in range(num_layers): (out_fw, out_bw), (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw=lstm_cell(size_layer // 2), cell_bw=lstm_cell(size_layer // 2), inputs=self.encoder_out, sequence_length=self.X_seq_len, dtype=tf.float32, scope='bidirectional_rnn_%d' % (n)) self.encoder_out = tf.concat((out_fw, out_bw), 2) bi_state_c = tf.concat((state_fw.c, state_bw.c), -1) bi_state_h = tf.concat((state_fw.h, state_bw.h), -1) bi_lstm_state = tf.nn.rnn_cell.LSTMStateTuple(c=bi_state_c, h=bi_state_h) encoder_state = tuple([bi_lstm_state] * num_layers) # 多层解码 with tf.variable_scope('decode'): # 1. 加注意力 attention_mechanism = tf.contrib.seq2seq.LuongAttention( num_units=size_layer, memory=self.encoder_out, memory_sequence_length=self.X_seq_len) decoder_cell = tf.contrib.seq2seq.AttentionWrapper( cell=tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)]), attention_mechanism=attention_mechanism, attention_layer_size=size_layer) main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1]) decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1) # 训练解码辅助器 training_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper( inputs=tf.nn.embedding_lookup(decoder_embeddings, decoder_input), sequence_length=self.Y_seq_len, embedding=decoder_embeddings, sampling_probability=1 - force_teaching_ratio, time_major=False) # 基本解码 training_decoder = tf.contrib.seq2seq.BasicDecoder( cell=decoder_cell, helper=training_helper, initial_state=decoder_cell.zero_state( batch_size, tf.float32).clone(cell_state=encoder_state), output_layer=tf.layers.Dense(to_dict_size)) training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder=training_decoder, impute_finished=True, maximum_iterations=tf.reduce_max(self.Y_seq_len)) self.training_logits = training_decoder_output.rnn_output # 推断 with tf.variable_scope('decode', reuse=True): encoder_out_tiled = tf.contrib.seq2seq.tile_batch( self.encoder_out, beam_width) encoder_state_tiled = tf.contrib.seq2seq.tile_batch( encoder_state, beam_width) X_seq_len_tiled = tf.contrib.seq2seq.tile_batch( self.X_seq_len, beam_width) attention_mechanism = tf.contrib.seq2seq.LuongAttention( num_units=size_layer, memory=encoder_out_tiled, memory_sequence_length=X_seq_len_tiled) decoder_cell = tf.contrib.seq2seq.AttentionWrapper( cell=tf.nn.rnn_cell.MultiRNNCell([ lstm_cell(size_layer, reuse=True) for _ in range(num_layers) ]), attention_mechanism=attention_mechanism, attention_layer_size=size_layer) predicting_decoder = tf.contrib.seq2seq.BeamSearchDecoder( cell=decoder_cell, embedding=decoder_embeddings, start_tokens=tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]), end_token=EOS, initial_state=decoder_cell.zero_state( batch_size * beam_width, tf.float32).clone(cell_state=encoder_state_tiled), beam_width=beam_width, output_layer=tf.layers.Dense(to_dict_size, _reuse=True), length_penalty_weight=0.0) predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder=predicting_decoder, impute_finished=False, maximum_iterations=2 * tf.reduce_max(self.X_seq_len)) self.predicting_ids = predicting_decoder_output.predicted_ids[:, :, 0] masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32) self.cost = tf.contrib.seq2seq.sequence_loss( logits=self.training_logits, targets=self.Y, weights=masks) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost) y_t = tf.argmax(self.training_logits, axis=2) y_t = tf.cast(y_t, tf.int32) self.prediction = tf.boolean_mask(y_t, masks) mask_label = tf.boolean_mask(self.Y, masks) correct_pred = tf.equal(self.prediction, mask_label) correct_index = tf.cast(correct_pred, tf.float32) self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
def _build_hrne_att_encoder(self, seq_embeddings, seq_len, params): """ Builds HRNE model with attention in 2nd layer """ with tf.variable_scope('dynamic_seq2seq') as scope: #Define GRU cells for both the layers gru_1 = self._gru_cell(params, scope_name='gru_1') gru_1_len = params.stride num_timesteps = seq_embeddings.shape.as_list()[1] # Initialize the layer 1 and layer 2 hidden state. h_prev = tf.zeros([params.batch_size, params.num_units], name='h_init') output_gru_1 = [] #Run the first layer of GRU and stack individual timesteps for step in range(num_timesteps): out, h_prev = gru_1(inputs=seq_embeddings[:, step, :], state=h_prev) output_gru_1.append(out) #Stack all the states and split the batch into individual samples stacked_states = tf.stack(output_gru_1, axis=1) state_dim = stacked_states.shape.as_list()[-1] batch_padded_state_vectors = tf.split( stacked_states, num_or_size_splits=params.batch_size, axis=0) batch_state_vectors = [] batch_strided_states = [] partitions = [] for index in range(len(batch_padded_state_vectors)): # Get the vectors corresponding to the actual length of the sample caption sliced_state = tf.squeeze( tf.slice(batch_padded_state_vectors[index], begin=[0, 0, 0], size=[1, seq_len[index], state_dim])) final_timestep = sliced_state[-1, :] batch_state_vectors.append(final_timestep) # Get the strided outputs. Strided slice includes the first timestep as well. Ignore that !! strided_sliced_state = tf.strided_slice( sliced_state[:-1, :], [0, 0], [seq_len[index], state_dim], [params.stride, 1])[1:, :] # Infer the partitions given by strided slice op inferred_partitions = tf.cast( tf.ceil(tf.divide(seq_len[index] - 1, params.stride)), tf.int32) - 1 partitions.append(inferred_partitions) batch_strided_states.append(strided_sliced_state) # Batch all the individual final timestep vectors back layer1_state_vectors = tf.stack(batch_state_vectors, axis=0) # Above looping caused dynamic shapes. # Set the static shape to ensure rest of the graph builds with static shapes. layer1_state_vectors.set_shape([params.batch_size, state_dim]) # Define shared attention matrix w_att = tf.get_variable(shape=[state_dim, state_dim], name='w_att', trainable=True) # Apply attention to strided states batch_context_states = [] for strided_state in batch_strided_states: att_strided_state = tf.matmul(strided_state, w_att) max_pooled_att_state = tf.reduce_max(att_strided_state, axis=0) batch_context_states.append(max_pooled_att_state) # Stack the local context batch_context_vector = tf.stack(batch_context_states, axis=0, name='batch_context_vector') fused_vector = self._gated_fusion_unit(batch_context_vector, layer1_state_vectors) return fused_vector