def add_volume_iou_metrics(inputs, outputs): """Computes the per-instance volume IOU. Args: inputs: Input dictionary of the voxel generation model. outputs: Output dictionary returned by the voxel generation model. Returns: names_to_values: metrics->values (dict). names_to_updates: metrics->ops (dict). """ names_to_values = dict() names_to_updates = dict() labels = tf.greater_equal(inputs['voxels'], 0.5) predictions = tf.greater_equal(outputs['voxels_1'], 0.5) labels = 2 - tf.to_int32(labels) predictions = 3 - tf.to_int32(predictions) * 2 tmp_values, tmp_updates = tf.metrics.mean_iou( labels=labels, predictions=predictions, num_classes=3) names_to_values['volume_iou'] = tmp_values * 3.0 names_to_updates['volume_iou'] = tmp_updates return names_to_values, names_to_updates
def crossentropy(logits, targets, sequence_length): """ Computes cross entropy loss of a batch of data. (Not averaged by batch_size) The final loss is averaged by the number of samples in the batch. Args: logits: The logits Tensor with shape [timesteps, batch_size, vocab_size]. targets: The gold labels Tensor with shape [timesteps, batch_size]. sequence_length: The length of `targets`, [batch_size, ] Returns: Loss sum and weight sum. """ # [timesteps, batch_size] losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets) # [timesteps, batch_size] loss_mask = tf.transpose( tf.sequence_mask( lengths=tf.to_int32(sequence_length), maxlen=tf.to_int32(tf.shape(targets)[0]), dtype=tf.float32), [1, 0]) losses = losses * loss_mask loss_sum = tf.reduce_sum(losses) return loss_sum, tf.to_float(tf.shape(sequence_length)[0])
def pad_to_multiple(tensor, multiple): """Returns the tensor zero padded to the specified multiple. Appends 0s to the end of the first and second dimension (height and width) of the tensor until both dimensions are a multiple of the input argument 'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input multiple of 4, PadToMultiple will append 0s so that the resulting tensor will be of shape [1, 4, 8, 1]. Args: tensor: rank 4 float32 tensor, where tensor -> [batch_size, height, width, channels]. multiple: the multiple to pad to. Returns: padded_tensor: the tensor zero padded to the specified multiple. """ tensor_shape = tensor.get_shape() batch_size = static_shape.get_batch_size(tensor_shape) tensor_height = static_shape.get_height(tensor_shape) tensor_width = static_shape.get_width(tensor_shape) tensor_depth = static_shape.get_depth(tensor_shape) if batch_size is None: batch_size = tf.shape(tensor)[0] if tensor_height is None: tensor_height = tf.shape(tensor)[1] padded_tensor_height = tf.to_int32( tf.ceil(tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple else: padded_tensor_height = int( math.ceil(float(tensor_height) / multiple) * multiple) if tensor_width is None: tensor_width = tf.shape(tensor)[2] padded_tensor_width = tf.to_int32( tf.ceil(tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple else: padded_tensor_width = int( math.ceil(float(tensor_width) / multiple) * multiple) if tensor_depth is None: tensor_depth = tf.shape(tensor)[3] # Use tf.concat instead of tf.pad to preserve static shape if padded_tensor_height != tensor_height: height_pad = tf.zeros([ batch_size, padded_tensor_height - tensor_height, tensor_width, tensor_depth ]) tensor = tf.concat([tensor, height_pad], 1) if padded_tensor_width != tensor_width: width_pad = tf.zeros([ batch_size, padded_tensor_height, padded_tensor_width - tensor_width, tensor_depth ]) tensor = tf.concat([tensor, width_pad], 2) return tensor
def adjust_bboxes(bboxes, old_height, old_width, new_height, new_width): """Adjusts the bboxes of an image that has been resized. Args: bboxes: Tensor with shape (num_bboxes, 5). Last element is the label. old_height: Float. Height of the original image. old_width: Float. Width of the original image. new_height: Float. Height of the image after resizing. new_width: Float. Width of the image after resizing. Returns: Tensor with shape (num_bboxes, 5), with the adjusted bboxes. """ # We normalize bounding boxes points. bboxes_float = tf.to_float(bboxes) x_min, y_min, x_max, y_max, label = tf.unstack(bboxes_float, axis=1) x_min = x_min / old_width y_min = y_min / old_height x_max = x_max / old_width y_max = y_max / old_height # Use new size to scale back the bboxes points to absolute values. x_min = tf.to_int32(x_min * new_width) y_min = tf.to_int32(y_min * new_height) x_max = tf.to_int32(x_max * new_width) y_max = tf.to_int32(y_max * new_height) label = tf.to_int32(label) # Cast back to int. # Concat points and label to return a [num_bboxes, 5] tensor. return tf.stack([x_min, y_min, x_max, y_max, label], axis=1)
def decoder(self, logits_main, logits_sub, inputs_seq_len, beam_width=1): """Operation for decoding. Args: logits_main: A tensor of size `[T, B, input_size]` logits_sub: A tensor of size `[T, B, input_size]` inputs_seq_len: A tensor of size `[B]` beam_width (int, optional): beam width for beam search. 1 disables beam search, which mean greedy decoding. Return: decode_op_main: operation for decoding of the main task decode_op_sub: operation for decoding of the sub task """ assert isinstance(beam_width, int), "beam_width must be integer." assert beam_width >= 1, "beam_width must be >= 1" # inputs_seq_len = tf.cast(inputs_seq_len, tf.int32) if beam_width == 1: decoded_main, _ = tf.nn.ctc_greedy_decoder( logits_main, inputs_seq_len) decoded_sub, _ = tf.nn.ctc_greedy_decoder( logits_sub, inputs_seq_len) else: decoded_main, _ = tf.nn.ctc_beam_search_decoder( logits_main, inputs_seq_len, beam_width=beam_width) decoded_sub, _ = tf.nn.ctc_beam_search_decoder( logits_sub, inputs_seq_len, beam_width=beam_width) decode_op_main = tf.to_int32(decoded_main[0]) decode_op_sub = tf.to_int32(decoded_sub[0]) return decode_op_main, decode_op_sub
def _smallest_size_at_least(height, width, smallest_side): """Computes new shape with the smallest side equal to `smallest_side`. Computes new shape with the smallest side equal to `smallest_side` while preserving the original aspect ratio. Args: height: an int32 scalar tensor indicating the current height. width: an int32 scalar tensor indicating the current width. smallest_side: A python integer or scalar `Tensor` indicating the size of the smallest side after resize. Returns: new_height: an int32 scalar tensor indicating the new height. new_width: and int32 scalar tensor indicating the new width. """ smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) height = tf.to_float(height) width = tf.to_float(width) smallest_side = tf.to_float(smallest_side) scale = tf.cond(tf.greater(height, width), lambda: smallest_side / width, lambda: smallest_side / height) new_height = tf.to_int32(height * scale) new_width = tf.to_int32(width * scale) return new_height, new_width
def indices_to_dense_vector(indices, size, indices_value=1., default_value=0, dtype=tf.float32): """Creates dense vector with indices set to specific value and rest to zeros. This function exists because it is unclear if it is safe to use tf.sparse_to_dense(indices, [size], 1, validate_indices=False) with indices which are not ordered. This function accepts a dynamic size (e.g. tf.shape(tensor)[0]) Args: indices: 1d Tensor with integer indices which are to be set to indices_values. size: scalar with size (integer) of output Tensor. indices_value: values of elements specified by indices in the output vector default_value: values of other elements in the output vector. dtype: data type. Returns: dense 1D Tensor of shape [size] with indices set to indices_values and the rest set to default_value. """ size = tf.to_int32(size) zeros = tf.ones([size], dtype=dtype) * default_value values = tf.ones_like(indices, dtype=dtype) * indices_value return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)], [zeros, values])
def get_exemplar_images(images, exemplar_size, targets_pos=None): """Crop exemplar image from input images""" with tf.name_scope('get_exemplar_image'): batch_size, x_height, x_width = images.get_shape().as_list()[:3] z_height, z_width = exemplar_size if targets_pos is None: target_pos_single = [[get_center(x_height), get_center(x_width)]] targets_pos_ = tf.tile(target_pos_single, [batch_size, 1]) else: targets_pos_ = targets_pos # convert to top-left corner based coordinates top = tf.to_int32(tf.round(targets_pos_[:, 0] - get_center(z_height))) bottom = tf.to_int32(top + z_height) left = tf.to_int32(tf.round(targets_pos_[:, 1] - get_center(z_width))) right = tf.to_int32(left + z_width) def _slice(x): f, t, l, b, r = x c = f[t:b, l:r] return c exemplar_img = tf.map_fn(_slice, (images, top, left, bottom, right), dtype=images.dtype) exemplar_img.set_shape([batch_size, z_height, z_width, 3]) return exemplar_img
def test_accuracy(logits, labels): logits_idx = tf.to_int32(tf.argmax(logits, axis=1)) logits_idx = tf.reshape(logits_idx, shape=(cfg.batch_size,)) correct_preds = tf.equal(tf.to_int32(labels), logits_idx) accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32)) / cfg.batch_size return accuracy
def crop_or_pad(waves, length, channels): """Crop or pad wave to have shape [N, length, channels]. Args: waves: A 3D `Tensor` of NLC format. length: A Python scalar. The output wave size. channels: Number of output waves channels. Returns: A 3D `Tensor` of NLC format with shape [N, length, channels]. """ waves = tf.convert_to_tensor(waves) batch_size = waves.shape[0].value waves_shape = tf.shape(waves) # Force audio length. pad = tf.maximum(0, length - waves_shape[1]) right_pad = tf.to_int32(tf.to_float(pad) / 2.0) left_pad = pad - right_pad waves = tf.pad(waves, [[0, 0], [left_pad, right_pad], [0, 0]]) waves = waves[:, :length, :] # Force number of channels. num_repeats = tf.to_int32( tf.ceil(tf.to_float(channels) / tf.to_float(waves_shape[2]))) waves = tf.tile(waves, [1, 1, num_repeats])[:, :, :channels] waves.set_shape([batch_size, length, channels]) return waves
def padded_sequence_accuracy(predictions, labels, weights_fn=common_layers.weights_nonzero): """Percentage of times that predictions matches labels everywhere (non-0).""" # If the last dimension is 1 then we're using L1/L2 loss. if common_layers.shape_list(predictions)[-1] == 1: return rounding_sequence_accuracy( predictions, labels, weights_fn=weights_fn) with tf.variable_scope( "padded_sequence_accuracy", values=[predictions, labels]): padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) # Flatten, keeping batch dim (and num_classes dim for predictions) # TPU argmax can only deal with a limited number of dimensions predictions_shape = common_layers.shape_list(padded_predictions) batch_size = predictions_shape[0] num_classes = predictions_shape[-1] flat_size = common_layers.list_product( common_layers.shape_list(padded_labels)[1:]) padded_predictions = tf.reshape( padded_predictions, [batch_size, common_layers.list_product(predictions_shape[1:-1]), num_classes]) padded_labels = tf.reshape(padded_labels, [batch_size, flat_size]) weights = tf.reshape(weights, [batch_size, flat_size]) outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) padded_labels = tf.to_int32(padded_labels) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) return correct_seq, tf.constant(1.0)
def smoothing_crossentropy_avgall(logits, targets, sequence_length): """ Computes cross entropy loss of a batch of data with label smoothing. The final loss is averaged by the length of each sequence and then averaged by the batch size. Args: logits: The logits Tensor with shape [timesteps, batch_size, vocab_size]. targets: The gold labels Tensor with shape [timesteps, batch_size]. sequence_length: The length of `targets`, [batch_size, ] Returns: Loss sum and weight sum. """ soft_targets, normalizing = label_smoothing(targets, logits.get_shape().as_list()[-1]) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=soft_targets) - normalizing # [timesteps, batch_size] loss_mask = tf.transpose( tf.sequence_mask( lengths=tf.to_int32(sequence_length), maxlen=tf.to_int32(tf.shape(targets)[0]), dtype=tf.float32), [1, 0]) losses = losses * loss_mask # average loss avg_length = tf.to_float(sequence_length) loss_by_time = tf.reduce_sum(losses, axis=0) / avg_length loss_sum = tf.reduce_sum(loss_by_time) return loss_sum, tf.to_float(tf.shape(sequence_length)[0])
def padded_accuracy(logits, labels): """Percentage of times that predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy", values=[logits, labels]): logits, labels = _pad_tensors_to_same_length(logits, labels) weights = tf.to_float(tf.not_equal(labels, 0)) outputs = tf.to_int32(tf.argmax(logits, axis=-1)) padded_labels = tf.to_int32(labels) return tf.to_float(tf.equal(outputs, padded_labels)), weights
def subsample(self, indicator, batch_size, labels, scope=None): """Returns subsampled minibatch. Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. batch_size: desired batch size. If None, keeps all positive samples and randomly selects negative samples so that the positive sample fraction matches self._positive_fraction. It cannot be None is is_static is True. labels: boolean tensor of shape [N] denoting positive(=True) and negative (=False) examples. scope: name scope. Returns: sampled_idx_indicator: boolean tensor of shape [N], True for entries which are sampled. Raises: ValueError: if labels and indicator are not 1D boolean tensors. """ if len(indicator.get_shape().as_list()) != 1: raise ValueError('indicator must be 1 dimensional, got a tensor of ' 'shape %s' % indicator.get_shape()) if len(labels.get_shape().as_list()) != 1: raise ValueError('labels must be 1 dimensional, got a tensor of ' 'shape %s' % labels.get_shape()) if labels.dtype != tf.bool: raise ValueError('labels should be of type bool. Received: %s' % labels.dtype) if indicator.dtype != tf.bool: raise ValueError('indicator should be of type bool. Received: %s' % indicator.dtype) with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'): if self._is_static: return self._static_subsample(indicator, batch_size, labels) else: # Only sample from indicated samples negative_idx = tf.logical_not(labels) positive_idx = tf.logical_and(labels, indicator) negative_idx = tf.logical_and(negative_idx, indicator) # Sample positive and negative samples separately if batch_size is None: max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx)) else: max_num_pos = int(self._positive_fraction * batch_size) sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32)) if batch_size is None: negative_positive_ratio = ( 1 - self._positive_fraction) / self._positive_fraction max_num_neg = tf.to_int32( negative_positive_ratio * tf.to_float(num_sampled_pos)) else: max_num_neg = batch_size - num_sampled_pos sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg) return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
def _build_once(self, dataset, feature_transformer): with tf.device(self._local_device): tr_batch = dataset() te_batch = dataset() num_classes = tr_batch.label_onehot.shape.as_list()[1] all_batch = utils.structure_map_multi(lambda x: tf.concat(x, 0), [tr_batch, te_batch]) features = feature_transformer(all_batch) trX, teX = utils.structure_map_split(lambda x: tf.split(x, 2, axis=0), features) trY = tf.to_int64(tr_batch.label) trY_onehot = tf.to_int32(tr_batch.label_onehot) teY = tf.to_int64(te_batch.label) teY_shape = teY.shape.as_list() def blackbox((trX, trY, teX, teY)): trY = tf.to_int32(tf.rint(trY)) teY = tf.to_int32(tf.rint(teY)) tf_fn = build_fit( self._local_device, self._get_model, num_classes=num_classes, probs=self.probs) if self.probs: trP, teP, teP_probs = tf_fn(trX, trY, teX) else: trP, teP = tf_fn(trX, trY, teX) teY.set_shape(teY_shape) if self.probs: onehot = tf.one_hot(teY, num_classes) crossent = -tf.reduce_sum(onehot * teP_probs, [1]) return tf.reduce_mean(crossent) else: # use error rate as the loss if no surrogate is avalible. return 1 - tf.reduce_mean( tf.to_float(tf.equal(teY, tf.to_int32(teP)))) test_loss = blackbox((trX, tf.to_float(trY), teX, tf.to_float(teY))) stats = {} tf_fn = build_fit( self._local_device, self._get_model, num_classes=num_classes, probs=self.probs) if self.probs: trP, teP, teP_probs = tf_fn(trX, trY, teX) else: trP, teP = tf_fn(trX, trY, teX) stats["%s/accuracy_train" % self.name] = tf.reduce_mean( tf.to_float(tf.equal(tf.to_int32(trY), tf.to_int32(trP)))) stats["%s/accuracy_test" % self.name] = tf.reduce_mean( tf.to_float(tf.equal(tf.to_int32(teY), tf.to_int32(teP)))) stats["%s/test_loss" % self.name] = test_loss return test_loss, stats
def predict_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.test_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(tf.shape(image_batch)[2]) image_batch_075 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))])) image_batch_05 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))])) # Create network if self.conf.encoder_name not in ['res101', 'res50']: print('encoder_name ERROR!') print("Please input: res101, res50") sys.exit(-1) else: with tf.variable_scope('', reuse=False): net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net075 = ResNet_segmentation(image_batch_075, self.conf.num_classes, False, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net05 = ResNet_segmentation(image_batch_05, self.conf.num_classes, False, self.conf.encoder_name) # predictions # Network raw output raw_output100 = net.outputs raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0) raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) raw_output = tf.argmax(raw_output, axis=3) self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # Create directory if not os.path.exists(self.conf.out_dir): os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction') if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction') # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def rounding_accuracy(predictions, labels, weights_fn=common_layers.weights_nonzero): """Rounding accuracy for L1/L2 losses: round down the predictions to ints.""" outputs = tf.squeeze(tf.to_int32(predictions)) labels = tf.squeeze(labels) weights = weights_fn(labels) labels = tf.to_int32(labels) return tf.to_float(tf.equal(outputs, labels)), weights
def _anchor_component_tf(self): print('Use TF anchors') with tf.variable_scope('ANCHOR_' + self._tag) as scope: # just to get the shape right height = tf.to_int32(tf.ceil(self._im_info[0, 0] / np.float32(self._feat_stride[0]))) width = tf.to_int32(tf.ceil(self._im_info[0, 1] / np.float32(self._feat_stride[0]))) self._anchors, self._anchor_length = generate_anchors_pre_tf( height, width, self._feat_stride[0], self._anchor_scales, self._anchor_ratios)
def preprocess_example(self, example, mode, hparams): example = super(AudioTimitProblem, self).preprocess_example( example, mode, hparams) # Reshape audio to proper shape sample_count = tf.to_int32(example.pop("audio/sample_count")) sample_width = tf.to_int32(example.pop("audio/sample_width")) channel_count = 1 example["inputs"] = tf.reshape(example["inputs"], [sample_count, sample_width, channel_count]) return example
def arg_max_2d(x_in): orig_shape = tf.shape(x_in) reshape_t = tf.concat([orig_shape[0:1], [-1], orig_shape[3:4]], 0) zz = tf.reshape(x_in, reshape_t) pp = tf.to_int32(tf.argmax(zz, 1)) sz1 = tf.slice(orig_shape, [2], [1]) cc1 = tf.div(pp, tf.to_int32(sz1)) cc2 = tf.mod(pp, tf.to_int32(sz1)) return tf.stack([cc1, cc2])
def __init__(self, requests, expert_capacity): """Create a TruncatingDispatcher. Args: requests: a boolean `Tensor` of shape `[batch, length, num_experts]`. Alternatively, a float or int Tensor containing zeros and ones. expert_capacity: a Scalar - maximum number of examples per expert per batch element. Returns: a TruncatingDispatcher """ self._requests = tf.to_float(requests) self._expert_capacity = expert_capacity expert_capacity_f = tf.to_float(expert_capacity) self._batch, self._length, self._num_experts = tf.unstack( tf.shape(self._requests), num=3) # [batch, length, num_experts] position_in_expert = tf.cumsum(self._requests, axis=1, exclusive=True) # [batch, length, num_experts] self._gates = self._requests * tf.to_float( tf.less(position_in_expert, expert_capacity_f)) batch_index = tf.reshape( tf.to_float(tf.range(self._batch)), [self._batch, 1, 1]) length_index = tf.reshape( tf.to_float(tf.range(self._length)), [1, self._length, 1]) expert_index = tf.reshape( tf.to_float(tf.range(self._num_experts)), [1, 1, self._num_experts]) # position in a Tensor with shape [batch * num_experts * expert_capacity] flat_position = ( position_in_expert + batch_index * (tf.to_float(self._num_experts) * expert_capacity_f) + expert_index * expert_capacity_f) # Tensor of shape [batch * num_experts * expert_capacity]. # each element is an integer in [0, length) self._indices = tf.unsorted_segment_sum( data=tf.reshape((length_index + 1.0) * self._gates, [-1]), segment_ids=tf.to_int32(tf.reshape(flat_position, [-1])), num_segments=self._batch * self._num_experts * expert_capacity) self._indices = tf.reshape( self._indices, [self._batch, self._num_experts, expert_capacity]) # Tensors of shape [batch, num_experts, expert_capacity]. # each element is 0.0 or 1.0 self._nonpadding = tf.minimum(self._indices, 1.0) # each element is an integer in [0, length) self._indices = tf.nn.relu(self._indices - 1.0) # self._flat_indices is [batch, num_experts, expert_capacity], with values # in [0, batch * length) self._flat_indices = tf.to_int32( self._indices + (tf.reshape(tf.to_float(tf.range(self._batch)), [-1, 1, 1]) * tf.to_float(self._length))) self._indices = tf.to_int32(self._indices)
def create_learning_rate_decay_fn(decay_type, decay_steps, decay_rate, start_decay_at=0, stop_decay_at=1e9, min_learning_rate=None, staircase=False): """Creates a function that decays the learning rate. Args: decay_steps: How often to apply decay. decay_rate: A Python number. The decay rate. start_decay_at: Don't decay before this step stop_decay_at: Don't decay after this step min_learning_rate: Don't decay below this number decay_type: A decay function name defined in `tf.train` staircase: Whether to apply decay in a discrete staircase, as opposed to continuous, fashion. Returns: A function that takes (learning_rate, global_step) as inputs and returns the learning rate for the given step. Returns `None` if decay_type is empty or None. """ if decay_type is None or decay_type == "": return None start_decay_at = tf.to_int32(start_decay_at) stop_decay_at = tf.to_int32(stop_decay_at) def decay_fn(learning_rate, global_step): """The computed learning rate decay function. """ global_step = tf.to_int32(global_step) decay_type_fn = getattr(tf.train, decay_type) decayed_learning_rate = decay_type_fn( learning_rate=learning_rate, global_step=tf.minimum(global_step, stop_decay_at) - start_decay_at, decay_steps=decay_steps, decay_rate=decay_rate, staircase=staircase, name="decayed_learning_rate") final_lr = tf.train.piecewise_constant( x=global_step, boundaries=[start_decay_at], values=[learning_rate, decayed_learning_rate]) if min_learning_rate: final_lr = tf.maximum(final_lr, min_learning_rate) return final_lr return decay_fn
def rounding_sequence_accuracy(predictions, labels, weights_fn=common_layers.weights_nonzero): """Sequence accuracy for L1/L2 losses: round down the predictions to ints.""" outputs = tf.squeeze(tf.to_int32(predictions), axis=-1) weights = weights_fn(labels) labels = tf.to_int32(labels) not_correct = tf.to_float(tf.not_equal(outputs, labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) return correct_seq, tf.constant(1.0)
def testListOfScalarTensors(self): a = tf.to_int32(5) b = tf.to_int32(6) value = np.random.rand(11, 11) with self.test_session(use_gpu=False) as sess: result = sess.run(tf.split(value, [a, b])) self.assertAllEqual(result[0], value[0:5, :]) self.assertAllEqual(result[1], value[5:, :])
def padded_sequence_accuracy(logits, labels): """Percentage of times that predictions matches labels everywhere (non-0).""" with tf.variable_scope("padded_sequence_accuracy", values=[logits, labels]): logits, labels = _pad_tensors_to_same_length(logits, labels) weights = tf.to_float(tf.not_equal(labels, 0)) outputs = tf.to_int32(tf.argmax(logits, axis=-1)) padded_labels = tf.to_int32(labels) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) return correct_seq, tf.constant(1.0)
def padded_accuracy(predictions, labels, weights_fn=common_layers.weights_nonzero): """Percentage of times that predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy", values=[predictions, labels]): padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) padded_labels = tf.to_int32(padded_labels) return tf.to_float(tf.equal(outputs, padded_labels)), weights
def argmax2d(Xin): origShape = tf.shape(Xin) reshape_t = tf.concat(0,[origShape[0:1],[-1],origShape[3:4]]) zz = tf.reshape(Xin,reshape_t) pp = tf.to_int32(tf.argmax(zz,1)) sz1 = tf.slice(origShape,[2],[1]) cc1 = tf.div(pp,tf.to_int32(sz1)) cc2 = tf.mod(pp,tf.to_int32(sz1)) return tf.pack([cc1,cc2])
def sparse_sequence_length(sparse_tensor): with tf.name_scope("sparse_sequence_length"): indices = tf.to_int32(sparse_tensor.indices) row_indices = indices[:, 0] col_indices = indices[:, 1] num_rows = tf.to_int32(sparse_tensor.dense_shape[0]) row_range = tf.expand_dims(tf.range(num_rows), 0) row_indicator = tf.to_int32( tf.equal(tf.expand_dims(row_indices, 1), row_range)) split_col_indices = row_indicator * (tf.expand_dims(col_indices, 1) + 1) row_lengths = tf.reduce_max(split_col_indices, [0]) return row_lengths
def cycle_gan_internal(inputs, targets, _, hparams): """Cycle GAN, main step used for training.""" with tf.variable_scope("cycle_gan"): # Embed inputs and targets. inputs_orig, targets_orig = tf.to_int32(inputs), tf.to_int32(targets) inputs = common_layers.embedding( inputs_orig, hparams.vocab_size, hparams.hidden_size, "embed") targets = common_layers.embedding( targets_orig, hparams.vocab_size, hparams.hidden_size, "embed", reuse=True) # Split the batch into input-input and target-target parts. inputs1, _ = split_on_batch(inputs) _, targets2 = split_on_batch(targets) # Define F and G, called inp2tgt and tgt2inp here. def inp2tgt(x, reuse=False): return transformer_vae.residual_conv(x, 1, hparams, "inp2tgt", reuse) def tgt2inp(x, reuse=False): return transformer_vae.residual_conv(x, 1, hparams, "tgt2inp", reuse) # Input-input part. inp1_tgt = inp2tgt(inputs1) inp1_back = tgt2inp(inp1_tgt) # Target-target part. tgt2_inp = tgt2inp(targets2, reuse=True) tgt2_back = inp2tgt(tgt2_inp, reuse=True) # Reconstruction losses. inp1_orig, _ = split_on_batch(inputs_orig) _, tgt2_orig = split_on_batch(targets_orig) inp1_loss = reconstruct_loss( inp1_back, tf.squeeze(inp1_orig, axis=3), hparams) tgt2_loss = reconstruct_loss( tgt2_back, tf.squeeze(tgt2_orig, axis=3), hparams, reuse=True) # Discriminator losses. dloss1 = discriminate_loss(inputs1, tgt2_inp, True, hparams, "inp_disc") dloss2 = discriminate_loss(targets2, inp1_tgt, True, hparams, "tgt_disc") # Reconstruct targets from inputs. tgt = inp2tgt(inputs, reuse=True) tgt = tf.layers.dense(tgt, hparams.vocab_size, name="softmax", reuse=True) # We use the reconstruction only for tracking progress, no gradients here! tgt = tf.stop_gradient(tf.expand_dims(tgt, axis=2)) losses = {"input_input": hparams.cycle_loss_multiplier * inp1_loss, "target_target": hparams.cycle_loss_multiplier * tgt2_loss, "input_disc": dloss1, "target_disc": dloss2} return tgt, losses
def _anchor_component(self): with tf.variable_scope('ANCHOR_' + self._tag) as scope: # just to get the shape right height = tf.to_int32(tf.ceil(self._im_info[0, 0] / np.float32(self._feat_stride[0]))) width = tf.to_int32(tf.ceil(self._im_info[0, 1] / np.float32(self._feat_stride[0]))) anchors, anchor_length = tf.py_func(generate_anchors_pre, [height, width, self._feat_stride, self._anchor_scales, self._anchor_ratios], [tf.float32, tf.int32], name="generate_anchors") anchors.set_shape([None, 4]) anchor_length.set_shape([]) self._anchors = anchors self._anchor_length = anchor_length
def _build_graph(self, dims): """ Constructs a TensorFlow subgraph for counterfactual regression. Sets the following member variables (to TF nodes): self.output The output prediction "y" self.tot_loss The total objective to minimize self.imb_loss The imbalance term of the objective self.pred_loss The prediction term of the objective self.weights_in The input/representation layer weights self.weights_out The output/post-representation layer weights self.weights_pred The (linear) prediction layer weights self.h_rep The layer of the penalized representation """ # 注意!这里的sigma是log_squared_sigma,即log(sigma^2) z_t_en_mu = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_t_en_mu") z_t_en_sigma = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_t_en_sigma") z_c_en_mu = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_c_en_mu") z_c_en_sigma = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_c_en_sigma") z_y_en_mu = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_y_en_mu") z_y_en_sigma = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_y_en_sigma") z_t_de_mu = self._build_fully_connected_layers(tf.concat([self.x, self.t], -1), FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_t_de_mu") z_t_de_sigma = self._build_fully_connected_layers(tf.concat([self.x, self.t], -1), FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_t_de_sigma") z_c_de_mu = self._build_fully_connected_layers(tf.concat([self.x, self.t, self.y], -1), FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_c_de_mu") z_c_de_sigma = self._build_fully_connected_layers(tf.concat([self.x, self.t, self.y], -1), FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_c_de_sigma") z_y_de_mu = self._build_fully_connected_layers(tf.concat([self.x, self.y], -1), FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_y_de_mu") z_y_de_sigma = self._build_fully_connected_layers(tf.concat([self.x, self.y], -1), FLAGS.n_in, FLAGS.dim_in, self.dropout_in, "z_y_de_sigma") z_t_sample_en = self._get_sample_from_dist(z_t_en_mu, z_t_en_sigma) z_c_sample_en = self._get_sample_from_dist(z_c_en_mu, z_c_en_sigma) z_y_sample_en = self._get_sample_from_dist(z_y_en_mu, z_y_en_sigma) z_t_sample_de = self._get_sample_from_dist(z_t_de_mu, z_t_de_sigma) z_c_sample_de = self._get_sample_from_dist(z_c_de_mu, z_c_de_sigma) z_y_sample_de = self._get_sample_from_dist(z_y_de_mu, z_y_de_sigma) zt_zc_concat_de = tf.concat([z_t_sample_de, z_c_sample_de], -1) # zt_zc_concat_en = tf.concat([z_t_sample_en, z_c_sample_en], -1) pred_t_de = tf.layers.dense( self._build_fully_connected_layers(zt_zc_concat_de, FLAGS.n_out, FLAGS.dim_out, self.dropout_out, 't_out_net'), 1, name='pred_t_logit') pred_t_en = tf.layers.dense( self._build_fully_connected_layers(z_t_sample_en, FLAGS.n_out, FLAGS.dim_out, self.dropout_out, 't_out_net_test'), 1, name='pred_t_logit_test') i0 = tf.to_int32(tf.where(self.t < 1)[:, 0]) i1 = tf.to_int32(tf.where(self.t > 0)[:, 0]) zc_zy_concat_de = tf.concat([z_c_sample_de, z_y_sample_de], -1) zc_zy_concat0_de = tf.gather(zc_zy_concat_de, i0) zc_zy_concat1_de = tf.gather(zc_zy_concat_de, i1) z_y_sample0_en = tf.gather(z_y_sample_en, i0) z_y_sample1_en = tf.gather(z_y_sample_en, i1) pred_y0_de = tf.layers.dense( self._build_fully_connected_layers(zc_zy_concat0_de, FLAGS.n_out, FLAGS.dim_out, self.dropout_out, 'y0_out_net'), 1, name='pred_y0_logit') pred_y1_de = tf.layers.dense( self._build_fully_connected_layers(zc_zy_concat1_de, FLAGS.n_out, FLAGS.dim_out, self.dropout_out, 'y1_out_net'), 1, name='pred_y1_logit') pred_y0_en = tf.layers.dense( self._build_fully_connected_layers(z_y_sample0_en, FLAGS.n_out, FLAGS.dim_out, self.dropout_out, 'y0_out_net_test'), 1, name='pred_y0_logit_test') pred_y1_en = tf.layers.dense( self._build_fully_connected_layers(z_y_sample1_en, FLAGS.n_out, FLAGS.dim_out, self.dropout_out, 'y1_out_net_test'), 1, name='pred_y1_logit_test') pred_y_de = tf.dynamic_stitch([i0, i1], [pred_y0_de, pred_y1_de]) pred_y_en = tf.dynamic_stitch([i0, i1], [pred_y0_en, pred_y1_en]) self.t_classif_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_t_de, labels=self.t)) + \ FLAGS.coef_t_pred*tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_t_en, labels=self.t)) if FLAGS.loss == "log": self.y_predict_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_y_de, labels=self.y)) + \ FLAGS.coef_y_pred*tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_y_en, labels=self.y)) else: # self.y_predict_loss = tf.reduce_mean(self.sample_weight * tf.square(self.y - pred_y_de)) self.y_predict_loss = tf.reduce_mean(tf.square(self.y - pred_y_de)) + \ FLAGS.coef_y_pred*tf.reduce_mean(tf.square(self.y - pred_y_en)) # self.y_predict_loss = tf.reduce_mean(tf.abs(self.y - pred_y_de)) KL_zt = self._KL_distance(z_t_de_mu, z_t_de_sigma, z_t_en_mu, z_t_en_sigma) KL_zc = self._KL_distance(z_c_de_mu, z_c_de_sigma, z_c_en_mu, z_c_en_sigma) KL_zy = self._KL_distance(z_y_de_mu, z_y_de_sigma, z_y_en_mu, z_y_en_sigma) orth_loss_t_y = self._cal_orth(z_t_sample_en, z_y_sample_en) orth_loss_t_c = self._cal_orth(z_t_sample_en, z_c_sample_en) orth_loss_y_c = self._cal_orth(z_y_sample_en, z_c_sample_en) orth_loss = orth_loss_t_y + orth_loss_t_c + orth_loss_y_c self.tot_loss = self.t_classif_loss + self.y_predict_loss self.tot_loss = self.tot_loss + KL_zt + KL_zc + KL_zy self.tot_loss = self.tot_loss + FLAGS.coef_orth_loss * orth_loss if FLAGS.loss == "log": self.pred_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_y_en, labels=self.y)) else: self.pred_loss = tf.sqrt(tf.reduce_mean(tf.square(self.y - pred_y_en))) self.imb_dist, imb_mat = wasserstein(z_t_sample_en, self.t, 0.5, lam=FLAGS.wass_lambda, its=FLAGS.wass_iterations, sq=False, backpropT=FLAGS.wass_bpt) if FLAGS.loss == "log": self.output = tf.nn.sigmoid(pred_y_en) else: self.output = pred_y_en
def cast(p): return tf.to_int32(tf.round(p))
#second conv. layer W_conv2 = weight_variable([fs2, fs2, nf1, nf2]) b_conv2 = bias_variable([nf2]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) #reverse the process ##unpool, deconv 1 #unpooling a1 = tf.transpose(h_pool2, perm = [0,3,1,2]) b1 = tf.reshape(a1,[-1,nf2,sy/4*sx/4,1]) c1 = tf.tile(b1,tf.to_int32(tf.constant(np.array([1,1,1,2])))) d1 = tf.reshape(c1,[-1,nf2,sy/4,sx/2]) e1 = tf.tile(d1,tf.to_int32(tf.constant(np.array([1,1,1,2])))) h_unpool1 = tf.reshape(e1, [-1,sy/2,sx/2,nf2]) #deconv W_conv2_tr = tf.transpose(W_conv2, perm = [0,1,3,2]) h_deconv1 = tf.nn.relu(conv2d(h_conv2 - b_conv2, W_conv2_tr)) ##unpool, deconv 2 #unpooling a2 = tf.transpose(h_deconv1, perm = [0,3,1,2]) b2 = tf.reshape(a2,[-1,nf1,sy/2*sx/2,1]) c2 = tf.tile(b2,tf.to_int32(tf.constant(np.array([1,1,1,2])))) d2 = tf.reshape(c2,[-1,nf1,sy/2,sx])
def down_shift(x): x_shape = x.get_shape().as_list() # for zero-padding batch_size = tf.shape(tf.reduce_sum(tf.to_int32(tf.not_equal(x, hp.vocab_size + 1)), 1))[0] return tf.concat((x[:, :, :, :], tf.zeros([batch_size, hp.max_len, hp.filter_h - 1, x_shape[3]])), 2)
def decode_label(label): label = tf.decode_raw(label, tf.uint8) # tf.string -> [tf.uint8] label = tf.reshape(label, []) # label is a scalar return tf.to_int32(label)
def mul_adaptive_embedding_lookup(x, n_token, d_embed, d_proj, cutoffs, initializer, proj_initializer, div_val=1, perms=None, proj_same_dim=True, scope='adaptive_embed'): """ perms: If None, first compute W = W1 x W2 (projection for each bin), and then compute X x W (embedding lookup). If not None, use bin-based embedding lookup with max_bin_size defined by the shape of perms. """ emb_scale = d_proj ** 0.5 with tf.variable_scope(scope): if div_val == 1: lookup_table = tf.get_variable('lookup_table', [n_token, d_embed], initializer=initializer) y = embedding_lookup(lookup_table, x) if d_proj != d_embed: proj_W = tf.get_variable('proj_W', [d_embed, d_proj], initializer=proj_initializer) y = tf.einsum('ibe,ed->ibd', y, proj_W) else: proj_W = None ret_params = [lookup_table, proj_W] else: tables, projs = [], [] cutoff_ends = [0] + cutoffs + [n_token] x_size = tf.shape(x) if perms is None: cat_lookup = [] else: cat_lookup = tf.zeros([x_size[0], x_size[1], d_proj]) for i in range(len(cutoff_ends) - 1): with tf.variable_scope('cutoff_{}'.format(i)): l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1] cur_d_embed = d_embed // (div_val ** i) lookup_table = tf.get_variable('lookup_table', [r_idx - l_idx, cur_d_embed], initializer=initializer) if cur_d_embed == d_proj and not proj_same_dim: proj_W = None else: proj_W = tf.get_variable('proj_W', [cur_d_embed, d_proj], initializer=proj_initializer) if perms is None: cat_lookup.append(tf.einsum('ie,ed->id', lookup_table, proj_W)) else: # speed up the computation of the first bin # also save some meory if i == 0: cur_y = embedding_lookup(lookup_table, tf.minimum(x, r_idx - 1)) if proj_W is not None: cur_y = tf.einsum('ibe,ed->ibd', cur_y, proj_W) cur_y *= perms[i][:, :, None] cat_lookup += cur_y else: cur_x = tf.einsum('ib,ibk->k', tf.to_float(x - l_idx), perms[i]) cur_x = tf.to_int32(cur_x) cur_y = embedding_lookup(lookup_table, cur_x) if proj_W is not None: cur_y = tf.einsum('ke,ed->kd', cur_y, proj_W) cat_lookup += tf.einsum('kd,ibk->ibd', cur_y, perms[i]) tables.append(lookup_table) projs.append(proj_W) if perms is None: cat_lookup = tf.concat(cat_lookup, 0) y = embedding_lookup(cat_lookup, x) else: y = cat_lookup ret_params = [tables, projs] y *= emb_scale return y, ret_params
def crop_and_resize(image, boxes, box_ind, crop_size, pad_border=True): """ Aligned version of tf.image.crop_and_resize, following our definition of floating point boxes. Args: image: NCHW boxes: nx4, x1y1x2y2 box_ind: (n,) crop_size (int): Returns: n,C,size,size """ assert isinstance(crop_size, int), crop_size boxes = tf.stop_gradient(boxes) # TF's crop_and_resize produces zeros on border if pad_border: # this can be quite slow image = tf.pad(image, [[0, 0], [0, 0], [1, 1], [1, 1]], mode='SYMMETRIC') boxes = boxes + 1 @under_name_scope() def transform_fpcoor_for_tf(boxes, image_shape, crop_shape): """ The way tf.image.crop_and_resize works (with normalized box): Initial point (the value of output[0]): x0_box * (W_img - 1) Spacing: w_box * (W_img - 1) / (W_crop - 1) Use the above grid to bilinear sample. However, what we want is (with fpcoor box): Spacing: w_box / W_crop Initial point: x0_box + spacing/2 - 0.5 (-0.5 because bilinear sample (in my definition) assumes floating point coordinate (0.0, 0.0) is the same as pixel value (0, 0)) This function transform fpcoor boxes to a format to be used by tf.image.crop_and_resize Returns: y1x1y2x2 """ x0, y0, x1, y1 = tf.split(boxes, 4, axis=1) spacing_w = (x1 - x0) / tf.to_float(crop_shape[1]) spacing_h = (y1 - y0) / tf.to_float(crop_shape[0]) nx0 = (x0 + spacing_w / 2 - 0.5) / tf.to_float(image_shape[1] - 1) ny0 = (y0 + spacing_h / 2 - 0.5) / tf.to_float(image_shape[0] - 1) nw = spacing_w * tf.to_float(crop_shape[1] - 1) / tf.to_float(image_shape[1] - 1) nh = spacing_h * tf.to_float(crop_shape[0] - 1) / tf.to_float(image_shape[0] - 1) return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=1) # Expand bbox to a minium size of 1 # boxes_x1y1, boxes_x2y2 = tf.split(boxes, 2, axis=1) # boxes_wh = boxes_x2y2 - boxes_x1y1 # boxes_center = tf.reshape((boxes_x2y2 + boxes_x1y1) * 0.5, [-1, 2]) # boxes_newwh = tf.maximum(boxes_wh, 1.) # boxes_x1y1new = boxes_center - boxes_newwh * 0.5 # boxes_x2y2new = boxes_center + boxes_newwh * 0.5 # boxes = tf.concat([boxes_x1y1new, boxes_x2y2new], axis=1) image_shape = tf.shape(image)[2:] boxes = transform_fpcoor_for_tf(boxes, image_shape, [crop_size, crop_size]) image = tf.transpose(image, [0, 2, 3, 1]) # nhwc ret = tf.image.crop_and_resize(image, boxes, tf.to_int32(box_ind), crop_size=[crop_size, crop_size]) ret = tf.transpose(ret, [0, 3, 1, 2]) # ncss return ret
def get_losses(d_out_real, d_out_fake, x_real_onehot, x_fake_onehot_appr, gen_o, discriminator, config, rewards=None, initial_samples_for_rewards=None): batch_size = config['batch_size'] gan_type = config['gan_type'] seq_len = config['seq_len'] vocab_size = config['vocab_size'] RL_alpha = config['rl_alpha'] if gan_type == 'standard': # the non-satuating GAN loss d_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_out_real, labels=tf.ones_like(d_out_real))) d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_out_fake, labels=tf.zeros_like(d_out_fake))) d_loss = d_loss_real + d_loss_fake g_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_out_fake, labels=tf.ones_like(d_out_fake))) elif gan_type == 'JS': # the vanilla GAN loss d_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_out_real, labels=tf.ones_like(d_out_real))) d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_out_fake, labels=tf.zeros_like(d_out_fake))) d_loss = d_loss_real + d_loss_fake g_loss = -d_loss_fake elif gan_type == 'KL': # the GAN loss implicitly minimizing KL-divergence d_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_out_real, labels=tf.ones_like(d_out_real))) d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_out_fake, labels=tf.zeros_like(d_out_fake))) d_loss = d_loss_real + d_loss_fake g_loss = tf.reduce_mean(-d_out_fake) elif gan_type == 'hinge': # the hinge loss d_loss_real = tf.reduce_mean(tf.nn.relu(1.0 - d_out_real)) d_loss_fake = tf.reduce_mean(tf.nn.relu(1.0 + d_out_fake)) d_loss = d_loss_real + d_loss_fake g_loss = -tf.reduce_mean(d_out_fake) elif gan_type == 'tv': # the total variation distance d_loss = tf.reduce_mean(tf.tanh(d_out_fake) - tf.tanh(d_out_real)) g_loss = tf.reduce_mean(-tf.tanh(d_out_fake)) elif gan_type == 'wgan-gp': # WGAN-GP d_loss = tf.reduce_mean(d_out_fake) - tf.reduce_mean(d_out_real) GP = gradient_penalty(discriminator, x_real_onehot, x_fake_onehot_appr, config) d_loss += GP g_loss = -tf.reduce_mean(d_out_fake) elif gan_type == 'LS': # LS-GAN d_loss_real = tf.reduce_mean(tf.squared_difference(d_out_real, 1.0)) d_loss_fake = tf.reduce_mean(tf.square(d_out_fake)) d_loss = d_loss_real + d_loss_fake g_loss = tf.reduce_mean(tf.squared_difference(d_out_fake, 1.0)) elif gan_type == 'RSGAN': # relativistic standard GAN d_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_out_real - d_out_fake, labels=tf.ones_like(d_out_real))) g_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=d_out_fake - d_out_real, labels=tf.ones_like(d_out_fake))) else: raise NotImplementedError("Divergence '%s' is not implemented" % gan_type) #TODO reinforce_loss = tf.constant(0.0) if '_pg' in config['g_architecture'] and '_pg' in config['d_architecture']: reshaped_fake_one_hot = tf.reshape(x_fake_onehot_appr, [-1, vocab_size]) rnn_outputs_for_reinforce = tf.reduce_sum( tf.one_hot( tf.to_int32(tf.reshape(initial_samples_for_rewards, [-1])), vocab_size, 1.0, 0.0) * tf.log(tf.clip_by_value(reshaped_fake_one_hot, 1e-20, 1.0)), 1) # initial_samples_for_rewards[:, 1:] reinforce_loss = tf.reduce_mean( rnn_outputs_for_reinforce * tf.reshape(rewards, [-1])) # reinforce_rewards[:, 1:] # reinforce_loss = tf.reduce_sum(rnn_outputs_for_reinforce * tf.reshape(rewards, [-1])) # reinforce_rewards[:, 1:] reinforce_loss = -RL_alpha * reinforce_loss if config['rl_only'] == True: print("No gan objective in G, only policy gradients") g_loss = reinforce_loss d_loss = tf.get_variable("dummy_d_loss", initializer=0.0, trainable=False) else: g_loss += reinforce_loss log_pg = tf.reduce_mean(tf.log(gen_o + EPS)) # [1], measures the log p_g(x) return log_pg, g_loss, d_loss, reinforce_loss
def _build_model(self): """Build our MLP network.""" with tf.variable_scope("Matchnet", reuse=tf.AUTO_REUSE): #创建变量域,便于参数共享 # For intermediate visualization self.fetch_vis = {} # -------------------- Network archintecture -------------------- # Import correct build_graph function from archs.cvpr2020 import build_graph # Build graph print("Building Graph") # Preprocessing input, currently doing nothing x_in = pre_x_in(self.x_in, self.config.pre_x_in) y_in = self.y_in self.fetch_vis["x_in"] = self.x_in self.fetch_vis["y_in"] = self.y_in logits = [] indexs = [] e_hats = [] losses = [] # Framework for iterative top-k strategy. # We currently disable iterative top-k by set num_phase=1. for i in range(self.config.num_phase): # Weight local is the wegiht matrix for incorporating locality into network # But we currently disable it by set it as None. weight_local = None x_shp = tf.shape(x_in) logit, vis_dict = build_graph(x_in, self.is_training, self.config, weight_local) tf.summary.histogram("logit", logit) self.fetch_vis = {**self.fetch_vis, **vis_dict} # For visualizing intermediate layers self.fetch_vis["logits"] = logit[:, None, :, None] self.bool_use_weight_for_score = self.config.bool_use_weight_for_score # Support different output weight for 8-point algorithm if self.config.weight_opt == "relu_tanh": weights = tf.nn.relu(tf.tanh(logit)) elif self.config.weight_opt == "sigmoid_softmax": logit_softmax = vis_dict["logit_softmax"] self.logit_softmax = logit_softmax mask = tf.nn.sigmoid(logit) if self.config.bool_hard_attention: mask = tf.to_float(logit > 0) weights = tf.exp(logit_softmax) * mask weights = weights / tf.reduce_sum(weights, -1, keep_dims=True) else: raise ValueError("Don't support it") # Make input data (num_img_pair x num_corr x 4) xx = tf.transpose(tf.reshape( x_in, (x_shp[0], x_shp[2], 4)), (0, 2, 1)) # Create the matrix to be used for the eight-point algorithm X = tf.transpose(tf.stack([ xx[:, 2] * xx[:, 0], xx[:, 2] * xx[:, 1], xx[:, 2], xx[:, 3] * xx[:, 0], xx[:, 3] * xx[:, 1], xx[:, 3], xx[:, 0], xx[:, 1], tf.ones_like(xx[:, 0]) ], axis=1), (0, 2, 1)) self.fetch_vis["X"] = X[:, None] print("X shape = {}".format(X.shape)) wX = tf.reshape(weights, (x_shp[0], x_shp[2], 1)) * X print("wX shape = {}".format(wX.shape)) XwX = tf.matmul(tf.transpose(X, (0, 2, 1)), wX) print("XwX shape = {}".format(XwX.shape)) # Recover essential matrix from self-adjoing eigen e, v = tf.self_adjoint_eig(XwX) e_hat = tf.reshape(v[:, :, 0], (x_shp[0], 9)) # in case you want to directly output F self.out_e_hat = e_hat if self.config.use_fundamental > 0: # Go back Essential Matrix with input norm and calibration matrix e_hat = tf.reshape(e_hat, (x_shp[0], 3, 3)) e_hat = tf.matmul( tf.matmul(tf.transpose(self.T2_in, (0, 2, 1)), e_hat), self.T1_in) e_hat = tf.matmul( tf.matmul(tf.transpose(self.K2_in, (0, 2, 1)), e_hat), self.K1_in) e_hat = tf.reshape(e_hat, (x_shp[0], 9)) e_hat /= tf.norm(e_hat, axis=1, keep_dims=True) last_e_hat = e_hat last_logit = logit last_x_in = x_in last_weights = weights e_hats += [e_hat] losses += [self._build_loss(e_hat, logit, x_in, y_in, weights, name=str(i))] logits += [logit] num_top_k = tf.to_int32(x_shp[2] * 5 / 10) # top 50% points # update x_in and y_in according to the logit x_in, index = topk(x_in, logit[:, None], num_top_k) y_in = tf.squeeze(tf.gather_nd(y_in[:, None], index), 1) indexs += [index] # L2 loss for var in tf.trainable_variables(): if "weights" in var.name: print(var.name) tf.add_to_collection("l2_losses", tf.reduce_sum(var**2)) l2_loss = tf.add_n(tf.get_collection("l2_losses")) tf.summary.scalar("l2_loss", l2_loss) # Check global_step and add essential loss loss = self.config.loss_decay * l2_loss self.loss = loss + tf.reduce_mean(tf.stack(losses)) # repalce self.logit and self.e_hat with self.last_e_hat, # self.last_logit, self.last_x_in self.e_hat = None self.logits = None self.last_e_hat = last_e_hat self.last_logit = last_logit self.last_x_in = last_x_in self.last_weights = last_weights
def _get_staffline_window_size(self, staffline_distance): return tf.to_int32( tf.round( tf.to_float(staffline_distance) * tf.to_float(self.staffline_distance_multiple)))
def biuld_net(self): # gragh = tf.Graph() # with gragh.as_default(): ########### ### set top conv top_con = CNNs(self.x, 128, [9, 1], 2, "SAME", self.is_train) self.primary_cap = layers_vector( top_con, 32, 4, [9, 1], 1, self.is_train, shapes=[-1, self.next_length * 8, 16, 1]) # [-1,88*16,8,1] #with tf.variable_scope("capsules_layers"): fc_function = tf.reshape(self.primary_cap, shape=(-1, self.primary_cap.shape[1].value, 1, self.primary_cap.shape[-2].value, 1)) #with tf.variable_scope("routing"): #[-1,88*16,1,8,1] blu = tf.constant(np.zeros([ self.batch_size, self.primary_cap.shape[1].value, self.num_label, 1, 1 ]), dtype=tf.float32) caps = routing(fc_function, blu, num_outputs=self.num_label, num_dims=32) #### [120,37,8,1] top_conv_1 = CNNs(self.x, 128, [7, 1], 2, "SAME", self.is_train) self.primary_cap_1 = layers_vector( top_conv_1, 32, 4, [7, 1], 1, self.is_train, shapes=[-1, self.next_length * 16, 8, 1]) fc_function_1 = tf.reshape( self.primary_cap_1, shape=(-1, self.primary_cap_1.shape[1].value, 1, self.primary_cap_1.shape[-2].value, 1)) blu_1 = tf.constant(np.zeros([ self.batch_size, self.primary_cap_1.shape[1].value, self.num_label, 1, 1 ]), dtype=tf.float32) with tf.variable_scope("routint_1"): caps_1 = routing(fc_function_1, blu_1, self.num_label, 16) top_con_2 = CNNs(self.x, 128, [5, 1], 2, 'SAME', self.is_train) self.primary_cap_2 = layers_vector( top_con_2, 32, 4, [5, 1], 1, self.is_train, shapes=[-1, self.next_length * 32, 4, 1]) fc_function_2 = tf.reshape( self.primary_cap_2, shape=(-1, self.primary_cap_2.shape[1].value, 1, self.primary_cap_2.shape[-2].value, 1)) blu_2 = tf.constant(np.zeros([ self.batch_size, self.primary_cap_2.shape[1].value, self.num_label, 1, 1 ]), dtype=tf.float32) with tf.variable_scope("routing_2"): caps_2 = routing(fc_function_2, blu_2, self.num_label, 8) a = 3.0 b = 1.0 c = 1.0 # a = 3.0 # b = 1.0 caps = tf.concat([a * caps, b * caps_1, c * caps_2], axis=3) # This is the best performance in our experiments. self.caps = tf.squeeze(caps, axis=1) v_length = tf.sqrt( reduce_sum(tf.square(self.caps), axis=2, keepdims=True) + eposilion) softmax_v = softmax(v_length, axis=1) #########[batch_size,num_label,1,1] argmax_idx = tf.to_int32(tf.argmax(softmax_v, axis=1)) self.argmax_idx = tf.reshape(argmax_idx, shape=(self.batch_size, )) ### self.masked_v = tf.multiply( tf.squeeze(self.caps), tf.reshape(self.y, (-1, self.num_label, 1))) self.v_length = tf.sqrt( reduce_sum(tf.square(self.caps), axis=2, keepdims=True) + eposilion) ######## # decoder vector_j = tf.reshape(self.masked_v, shape=(self.batch_size, -1)) fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=256) fc1 = tf.contrib.layers.fully_connected(fc1, num_outputs=512) self.decode = tf.contrib.layers.fully_connected( fc1, num_outputs=self.length, activation_fn=tf.sigmoid)
def build_graph(self, task_index=0, time_major=True): # self.max_input_length = dispenser.max_input_length if self.server is not None: cluster = tf.train.ClusterSpec(self.server.server_def.cluster) self.is_chief = task_index == 0 num_replicas = len(cluster.as_dict()['worker']) device = tf.train.replica_device_setter( cluster=cluster, worker_device='/job:worker/task:%d' % task_index) else: self.is_chief = True num_replicas = 1 device = None # create the graph self.graph = tf.Graph() # define the placeholders in the graph with self.graph.as_default(): batch_size = self.conf['batch_size'] with tf.device(device): # create the inputs placeholder, time_major, [time,batch_size,input_dim] # max_input_length and batch_size should be None for efficiency compute? if time_major: self.inputX = tf.placeholder(dtype=tf.float32, shape=[ self.max_input_length, batch_size, self.input_dim ], name='inputX') else: self.inputX = tf.placeholder(dtype=tf.float32, shape=[ batch_size, self.max_input_length, self.input_dim ], name='inputX') # reference labels self.targetY = tf.placeholder( dtype=tf.int32, shape=[batch_size, self.max_target_length], name='targetY') # the length of all the input sequences self.input_seq_length = tf.placeholder(dtype=tf.int32, shape=[batch_size], name='input_seq_length') # the length of all the output sequences self.target_seq_length = tf.placeholder( dtype=tf.int32, shape=[batch_size], name='target_seq_length') # compute the training outputs of the classifier self.trainlogits, self.logit_seq_length = self.__call__( inputs=self.inputX, input_seq_length=self.input_seq_length, targets=self.targetY, target_seq_length=self.target_seq_length, is_training=True, time_major=time_major) # create variables for validation with tf.variable_scope('validation'): self.vallogits, self.val_logit_seq_length = self.__call__( inputs=self.inputX, input_seq_length=self.input_seq_length, targets=self.targetY, target_seq_length=self.target_seq_length, is_training=False, time_major=time_major) self.val_loss = self.compute_ce_loss( self.targetY, self.vallogits, self.val_logit_seq_length, self.target_seq_length, time_major=time_major) self.predictions = tf.to_int32( tf.nn.ctc_greedy_decoder(self.vallogits, self.val_logit_seq_length, merge_repeated=False)[0][0]) # a variable to hold the amount of steps already taken self.global_step = tf.get_variable( name='global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) with tf.variable_scope('train'): # create the optimizer if self.conf['optimizer'] == 'adam': optimizer = tf.train.AdamOptimizer( self.conf['learning_rate']) elif self.conf['optimizer'] == 'nm': #nestrov mometum optimizer = tf.train.MomentumOptimizer( self.conf['learning_rate'], 0.99, use_nesterov=True) else: raise Exception('unsupported optimizer func' + self.conf['optimizer']) # compute the loss self.loss = self.compute_ce_loss(self.targetY, self.trainlogits, self.logit_seq_length, self.target_seq_length, time_major=time_major) # compute the gradients gradients, variables = zip( *optimizer.compute_gradients(self.loss)) with tf.variable_scope('clip'): # clip the gradients,to test clib_by_gloabal_norm if self.conf['write_summary'] == 'yes': tf.summary.scalar('global_gradients_norm', tf.global_norm(gradients)) gradients, _ = tf.clip_by_global_norm( gradients, self.conf['grad_clip'] or 5) # opperation to apply the gradients apply_gradients_op = optimizer.apply_gradients( grads_and_vars=zip(gradients, variables), global_step=self.global_step, name='apply_gradients') # all remaining operations with the UPDATE_OPS GraphKeys update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # create an operation to update the gradients, the batch_loss # and do all other update ops self.update_op = tf.group(*([apply_gradients_op] + update_ops), name='update') if self.conf['write_summary'] == 'yes': # create the summaries for visualisation tf.summary.scalar('validation loss', self.val_loss) tf.summary.scalar('train loss', self.loss) tf.summary.scalar('learning rate', self.learning_rate) # create a histogram for all trainable parameters for param in tf.trainable_variables(): tf.summary.histogram(param.name, param) self.train_summary_writer = tf.summary.FileWriter( os.path.join(self.conf['savepath'], 'log/train', self.conf['model'])) self.summary_op = tf.summary.merge_all() # create the saver self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5, keep_checkpoint_every_n_hours=1)
def body(*args) -> BeamSearchLoopState: """The beam search body function. This is where the beam search algorithm is implemented. Arguments: loop_state: ``BeamSearchLoopState`` instance (see the docs for this module) """ loop_state = BeamSearchLoopState(*args) bs_state = loop_state.bs_state dec_loop_state = loop_state.decoder_loop_state # don't want to use this decoder with uninitialized parent assert self.parent_decoder.step_scope.reuse # CALL THE DECODER BODY FUNCTION # TODO figure out why mypy throws too-many-arguments on this next_loop_state = decoder_body(*dec_loop_state) # type: ignore logits = next_loop_state.prev_logits rnn_state = next_loop_state.prev_rnn_state rnn_output = next_loop_state.prev_rnn_output attns = next_loop_state.prev_contexts # mask the probabilities # shape(logprobs) = beam x vocabulary logprobs = tf.nn.log_softmax(logits) finished_mask = tf.expand_dims(tf.to_float(bs_state.finished), 1) unfinished_logprobs = (1. - finished_mask) * logprobs finished_row = tf.one_hot( PAD_TOKEN_INDEX, len(self.parent_decoder.vocabulary), dtype=tf.float32, on_value=0., off_value=tf.float32.min) finished_logprobs = finished_mask * finished_row logprobs = unfinished_logprobs + finished_logprobs # update hypothesis scores # shape(hyp_probs) = beam x vocabulary hyp_probs = tf.expand_dims(bs_state.logprob_sum, 1) + logprobs # update hypothesis lengths hyp_lengths = bs_state.lengths + 1 - tf.to_int32(bs_state.finished) # shape(scores) = beam x vocabulary scores = hyp_probs / tf.expand_dims( self._length_penalty(hyp_lengths), 1) # flatten so we can use top_k scores_flat = tf.reshape(scores, [-1]) # shape(both) = beam topk_scores, topk_indices = tf.nn.top_k( scores_flat, self._beam_size) topk_scores.set_shape([self._beam_size]) topk_indices.set_shape([self._beam_size]) # flatten the hypothesis probabilities hyp_probs_flat = tf.reshape(hyp_probs, [-1]) # select logprobs of the best hyps (disregard lenghts) next_logprob_sum = tf.gather(hyp_probs_flat, topk_indices) # pylint: disable=no-member next_logprob_sum.set_shape([self._beam_size]) # pylint: enable=no-member next_word_ids = tf.mod(topk_indices, len(self.parent_decoder.vocabulary)) next_beam_ids = tf.div(topk_indices, len(self.parent_decoder.vocabulary)) next_beam_prev_rnn_state = tf.gather(rnn_state, next_beam_ids) next_beam_prev_rnn_output = tf.gather(rnn_output, next_beam_ids) next_beam_prev_attns = [tf.gather(a, next_beam_ids) for a in attns] next_lengths = tf.gather(hyp_lengths, next_beam_ids) # update finished flags has_just_finished = tf.equal(next_word_ids, END_TOKEN_INDEX) next_finished = tf.logical_or( tf.gather(bs_state.finished, next_beam_ids), has_just_finished) prev_output = loop_state.bs_output step = dec_loop_state.step output = SearchStepOutputTA( scores=prev_output.scores.write(step, topk_scores), parent_ids=prev_output.parent_ids.write(step, next_beam_ids), token_ids=prev_output.token_ids.write(step, next_word_ids)) search_state = SearchState( logprob_sum=next_logprob_sum, lengths=next_lengths, finished=next_finished, last_word_ids=next_word_ids, last_state=next_beam_prev_rnn_state, last_attns=next_beam_prev_attns) # For run-time computation, the decoder needs: # - step # - input_symbol # - prev_rnn_state # - prev_rnn_output # - prev_contexts # - attention_loop_states # - finished # For train-mode computation, it also needs # - train_inputs # For recording the computation in time, it needs # - rnn_outputs (TA) # - logits (TA) # - mask (TA) # Because of the beam search algorithm, it outputs # (but does not not need) # - prev_logits # During beam search decoding, we are not interested in recording # of the computation as done by the decoder. The record is stored # in search states and step outputs of this decoder. next_prev_logits = tf.gather(next_loop_state.prev_logits, next_beam_ids) next_prev_contexts = [tf.gather(ctx, next_beam_ids) for ctx in next_loop_state.prev_contexts] # Update the decoder next_loop_state next_loop_state = next_loop_state._replace( input_symbol=next_word_ids, prev_rnn_state=next_beam_prev_rnn_state, prev_rnn_output=next_beam_prev_rnn_output, prev_logits=next_prev_logits, prev_contexts=next_prev_contexts, finished=next_finished) return BeamSearchLoopState( bs_state=search_state, bs_output=output, decoder_loop_state=next_loop_state)
def coarseness(image): kmax = tf.constant(5) #image = tf.reduce_mean(image,axis=3) #image = tf.expand_dims(image,-1) image = tf.image.rgb_to_grayscale(image) window1 = np.power(2, 1) kernel1 = tf.ones([window1, window1, 1, 1]) average_gray1 = tf.nn.conv2d(image, kernel1, strides=[1, 1, 1, 1], padding='SAME') kernel_h1 = np.zeros([1, 2 * window1, 1, 1]) kernel_h1[0][0][0][0] = -1 kernel_h1[0][2 * window1 - 1][0][0] = 1 horizon1 = tf.nn.conv2d(average_gray1, kernel_h1, strides=[1, 1, 1, 1], padding='SAME') horizon1 = tf.squeeze(horizon1, [3]) kernel_v1 = np.zeros([2 * window1, 1, 1, 1]) kernel_v1[0][0][0][0] = -1 kernel_v1[2 * window1 - 1][0][0][0] = 1 vertical1 = tf.nn.conv2d(average_gray1, kernel_v1, strides=[1, 1, 1, 1], padding='SAME') vertical1 = tf.squeeze(vertical1, [3]) window2 = np.power(2, 2) kernel2 = tf.ones([window2, window2, 1, 1]) average_gray2 = tf.nn.conv2d(image, kernel2, strides=[1, 1, 1, 1], padding='SAME') kernel_h2 = np.zeros([1, 2 * window2, 1, 1]) kernel_h2[0][0][0][0] = -1 kernel_h2[0][2 * window2 - 1][0][0] = 1 horizon2 = tf.nn.conv2d(average_gray2, kernel_h2, strides=[1, 1, 1, 1], padding='SAME') horizon2 = tf.squeeze(horizon2, [3]) kernel_v2 = np.zeros([2 * window2, 1, 1, 1]) kernel_v2[0][0][0][0] = -1 kernel_v2[2 * window2 - 1][0][0][0] = 1 vertical2 = tf.nn.conv2d(average_gray2, kernel_v2, strides=[1, 1, 1, 1], padding='SAME') vertical2 = tf.squeeze(vertical2, [3]) window3 = np.power(2, 3) kernel3 = tf.ones([window3, window3, 1, 1]) average_gray3 = tf.nn.conv2d(image, kernel3, strides=[1, 1, 1, 1], padding='SAME') kernel_h3 = np.zeros([1, 2 * window3, 1, 1]) kernel_h3[0][0][0][0] = -1 kernel_h3[0][2 * window3 - 1][0][0] = 1 horizon3 = tf.nn.conv2d(average_gray3, kernel_h3, strides=[1, 1, 1, 1], padding='SAME') horizon3 = tf.squeeze(horizon3, [3]) kernel_v3 = np.zeros([2 * window3, 1, 1, 1]) kernel_v3[0][0][0][0] = -1 kernel_v3[2 * window3 - 1][0][0][0] = 1 vertical3 = tf.nn.conv2d(average_gray3, kernel_v3, strides=[1, 1, 1, 1], padding='SAME') vertical3 = tf.squeeze(vertical3, [3]) window4 = np.power(2, 4) kernel4 = tf.ones([window4, window4, 1, 1]) average_gray4 = tf.nn.conv2d(image, kernel4, strides=[1, 1, 1, 1], padding='SAME') kernel_h4 = np.zeros([1, 2 * window4, 1, 1]) kernel_h4[0][0][0][0] = -1 kernel_h4[0][2 * window4 - 1][0][0] = 1 horizon4 = tf.nn.conv2d(average_gray4, kernel_h4, strides=[1, 1, 1, 1], padding='SAME') horizon4 = tf.squeeze(horizon4, [3]) kernel_v4 = np.zeros([2 * window4, 1, 1, 1]) kernel_v4[0][0][0][0] = -1 kernel_v4[2 * window4 - 1][0][0][0] = 1 vertical4 = tf.nn.conv2d(average_gray4, kernel_v4, strides=[1, 1, 1, 1], padding='SAME') vertical4 = tf.squeeze(vertical4, [3]) window5 = np.power(2, 5) kernel5 = tf.ones([window5, window5, 1, 1]) average_gray5 = tf.nn.conv2d(image, kernel5, strides=[1, 1, 1, 1], padding='SAME') kernel_h5 = np.zeros([1, 2 * window5, 1, 1]) kernel_h5[0][0][0][0] = -1 kernel_h5[0][2 * window5 - 1][0][0] = 1 horizon5 = tf.nn.conv2d(average_gray5, kernel_h5, strides=[1, 1, 1, 1], padding='SAME') horizon5 = tf.squeeze(horizon5, [3]) kernel_v5 = np.zeros([2 * window5, 1, 1, 1]) kernel_v5[0][0][0][0] = -1 kernel_v5[2 * window5 - 1][0][0][0] = 1 vertical5 = tf.nn.conv2d(average_gray5, kernel_v5, strides=[1, 1, 1, 1], padding='SAME') vertical5 = tf.squeeze(vertical5, [3]) #return tf.shape(horizon5) horizon = tf.concat([horizon1, horizon2, horizon3, horizon4, horizon5], 0) vertical = tf.concat( [vertical1, vertical2, vertical3, vertical4, vertical5], 0) h_max_index = tf.to_int32(tf.argmax(horizon, 0)) v_max_index = tf.to_int32(tf.argmax(vertical, 0)) h_max = tf.reduce_max(horizon, 0) v_max = tf.reduce_max(vertical, 0) comp = tf.greater(h_max, v_max) Sbest = tf.where(comp, h_max_index, v_max_index) #return tf.shape(Sbest) Sbest = tf.to_float(tf.pow(2, Sbest)) frcs = tf.reduce_mean(Sbest) return frcs
def mul_adaptive_logsoftmax(hidden, target, n_token, d_embed, d_proj, cutoffs, params, tie_projs, initializer=None, proj_initializer=None, div_val=1, perms=None, proj_same_dim=True, scope='adaptive_softmax', **kwargs): def _logit(x, W, b, proj): y = x if x.shape.ndims == 3: if proj is not None: y = tf.einsum('ibd,ed->ibe', y, proj) return tf.einsum('ibd,nd->ibn', y, W) + b else: if proj is not None: y = tf.einsum('id,ed->ie', y, proj) return tf.einsum('id,nd->in', y, W) + b params_W, params_projs = params[0], params[1] with tf.variable_scope(scope): if len(cutoffs) == 0: softmax_b = tf.get_variable('bias', [n_token], initializer=tf.zeros_initializer()) output = _logit(hidden, params_W, softmax_b, params_projs) nll = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output) nll = tf.reduce_mean(nll) else: total_loss, total_cnt = 0, 0 cutoff_ends = [0] + cutoffs + [n_token] for i in range(len(cutoff_ends) - 1): with tf.variable_scope('cutoff_{}'.format(i)): l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1] cur_d_embed = d_embed // (div_val ** i) if div_val == 1: cur_W = params_W[l_idx: r_idx] else: cur_W = params_W[i] cur_b = tf.get_variable('b', [r_idx - l_idx], initializer=tf.zeros_initializer()) if tie_projs[i]: if div_val == 1: cur_proj = params_projs else: cur_proj = params_projs[i] else: if (div_val == 1 or not proj_same_dim) and d_proj == cur_d_embed: cur_proj = None else: cur_proj = tf.get_variable('proj', [cur_d_embed, d_proj], initializer=proj_initializer) if i == 0: cluster_W = tf.get_variable('cluster_W', [len(cutoffs), d_embed], initializer=tf.zeros_initializer()) cluster_b = tf.get_variable('cluster_b', [len(cutoffs)], initializer=tf.zeros_initializer()) cur_W = tf.concat([cur_W, cluster_W], 0) cur_b = tf.concat([cur_b, cluster_b], 0) head_logit = _logit(hidden, cur_W, cur_b, cur_proj) head_target = kwargs.get("head_target") head_nll = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=head_target, logits=head_logit) masked_loss = head_nll * perms[i] total_loss += tf.reduce_sum(masked_loss) total_cnt += tf.reduce_sum(perms[i]) # head_logprob = tf.nn.log_softmax(head_logit) # final_logprob = head_logprob * perms[i][:, :, None] # final_target = tf.one_hot(target, tf.shape(head_logprob)[2]) # total_loss -= tf.einsum('ibn,ibn->', final_logprob, final_target) # total_cnt += tf.reduce_sum(perms[i]) else: cur_head_nll = tf.einsum('ib,ibk->k', head_nll, perms[i]) cur_hidden = tf.einsum('ibd,ibk->kd', hidden, perms[i]) tail_logit = _logit(cur_hidden, cur_W, cur_b, cur_proj) tail_target = tf.einsum('ib,ibk->k', tf.to_float(target - l_idx), perms[i]) tail_nll = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.to_int32(tail_target), logits=tail_logit) sum_nll = cur_head_nll + tail_nll mask = tf.reduce_sum(perms[i], [0, 1]) masked_loss = sum_nll * mask total_loss += tf.reduce_sum(masked_loss) total_cnt += tf.reduce_sum(mask) nll = total_loss / total_cnt return nll
def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits, labels, num_classes, ignore_label, loss_weight=1.0, upsample_logits=True, hard_example_mining_step=0, top_k_percent_pixels=1.0, gt_is_matting_map=False, scope=None): """Adds softmax cross entropy loss for logits of each scale. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. num_classes: Integer, number of target classes. ignore_label: Integer, label to ignore. loss_weight: A float or a list of loss weights. If it is a float, it means all the labels have the same weight. If it is a list of weights, then each element in the list represents the weight for the label of its index, for example, loss_weight = [0.1, 0.5] means the weight for label 0 is 0.1 and the weight for label 1 is 0.5. upsample_logits: Boolean, upsample logits or not. hard_example_mining_step: An integer, the training step in which the hard exampling mining kicks off. Note that we gradually reduce the mining percent to the top_k_percent_pixels. For example, if hard_example_mining_step = 100K and top_k_percent_pixels = 0.25, then mining percent will gradually reduce from 100% to 25% until 100K steps after which we only mine top 25% pixels. top_k_percent_pixels: A float, the value lies in [0.0, 1.0]. When its value < 1.0, only compute the loss for the top k percent pixels (e.g., the top 20% pixels). This is useful for hard pixel mining. gt_is_matting_map: If true, the groundtruth is a matting map of confidence score. If false, the groundtruth is an integer valued class mask. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None, or groundtruth is matting map while label is not floating value. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') # If input groundtruth is a matting map of confidence, check if the input # labels are floating point values. if gt_is_matting_map and not labels.dtype.is_floating: raise ValueError( 'Labels must be floats if groundtruth is a matting map.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear(logits, preprocess_utils.resolve_shape( labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. # When gt_is_matting_map = true, label downsampling with nearest neighbor # method may introduce artifacts. However, to avoid ignore_label from # being interpolated with other labels, we still perform nearest neighbor # interpolation. # TODO(huizhongc): Change to bilinear interpolation by processing padded # and non-padded label separately. if gt_is_matting_map: tf.logging.warning( 'Label downsampling with nearest neighbor may introduce artifacts.' ) scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) scaled_labels = tf.reshape(scaled_labels, shape=[-1]) unimib_weights = [0.96, 55, 128, 139, 123, 168, 279, 350] weights = utils.get_label_weight_mask(scaled_labels, ignore_label, num_classes, label_weights=unimib_weights) # Dimension of keep_mask is equal to the total number of pixels. keep_mask = tf.cast(tf.not_equal(scaled_labels, ignore_label), dtype=tf.float32) train_labels = None logits = tf.reshape(logits, shape=[-1, num_classes]) if gt_is_matting_map: # When the groundtruth is integer label mask, we can assign class # dependent label weights to the loss. When the groundtruth is image # matting confidence, we do not apply class-dependent label weight (i.e., # label_weight = 1.0). if loss_weight != 1.0: raise ValueError( 'loss_weight must equal to 1 if groundtruth is matting map.' ) # Assign label value 0 to ignore pixels. The exact label value of ignore # pixel does not matter, because those ignore_value pixel losses will be # multiplied to 0 weight. train_labels = scaled_labels * keep_mask train_labels = tf.expand_dims(train_labels, 1) train_labels = tf.concat([1 - train_labels, train_labels], axis=1) else: train_labels = tf.one_hot(scaled_labels, num_classes, on_value=1.0, off_value=0.0) default_loss_scope = ('softmax_all_pixel_loss' if top_k_percent_pixels == 1.0 else 'softmax_hard_example_mining') with tf.name_scope(loss_scope, default_loss_scope, [logits, train_labels, weights]): # Compute the loss for all pixels. pixel_losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=tf.stop_gradient(train_labels, name='train_labels_stop_gradient'), logits=logits, name='pixel_losses') weighted_pixel_losses = tf.multiply(pixel_losses, weights) if top_k_percent_pixels == 1.0: total_loss = tf.reduce_sum(weighted_pixel_losses) num_present = tf.reduce_sum(keep_mask) loss = _div_maybe_zero(total_loss, num_present) tf.losses.add_loss(loss) else: num_pixels = tf.to_float(tf.shape(logits)[0]) # Compute the top_k_percent pixels based on current training step. if hard_example_mining_step == 0: # Directly focus on the top_k pixels. top_k_pixels = tf.to_int32(top_k_percent_pixels * num_pixels) else: # Gradually reduce the mining percent to top_k_percent_pixels. global_step = tf.to_float( tf.train.get_or_create_global_step()) ratio = tf.minimum(1.0, global_step / hard_example_mining_step) top_k_pixels = tf.to_int32((ratio * top_k_percent_pixels + (1.0 - ratio)) * num_pixels) top_k_losses, _ = tf.nn.top_k(weighted_pixel_losses, k=top_k_pixels, sorted=True, name='top_k_percent_pixels') total_loss = tf.reduce_sum(top_k_losses) num_present = tf.reduce_sum( tf.to_float(tf.not_equal(top_k_losses, 0.0))) loss = _div_maybe_zero(total_loss, num_present) tf.losses.add_loss(loss)
def __init__(self, I_size, O_size, n_control): #The network recieves a frame from the game, flattened into an array. #It then resizes it and processes it through four convolutional layers. self.scalarInput = tf.placeholder(shape=[None, I_size], dtype=tf.float32) self.f_connect1 = tf.contrib.layers.fully_connected( inputs=self.scalarInput, num_outputs=64, activation_fn=tf.nn.relu, weights_initializer=tf.random_normal_initializer(), biases_initializer=tf.random_normal_initializer()) self.f_connect2 = tf.contrib.layers.fully_connected( inputs=self.f_connect1, num_outputs=64, activation_fn=tf.nn.relu, weights_initializer=tf.random_normal_initializer(), biases_initializer=tf.random_normal_initializer()) self.f_connect3 = tf.contrib.layers.fully_connected( inputs=self.f_connect2, num_outputs=64, activation_fn=tf.nn.relu, weights_initializer=tf.random_normal_initializer(), biases_initializer=tf.random_normal_initializer()) self.f_connect4 = tf.contrib.layers.fully_connected( inputs=self.f_connect3, num_outputs=O_size, activation_fn=tf.nn.relu, weights_initializer=tf.random_normal_initializer(), biases_initializer=tf.random_normal_initializer()) #We take the output from the final convolutional layer and split it into separate advantage and value streams. self.streamAC, self.streamVC = tf.split(self.f_connect4, num_or_size_splits=2, axis=1) #self.streamA = slim.flatten(self.streamAC) #self.streamV = slim.flatten(self.streamVC) self.streamA = self.streamAC self.streamV = self.streamVC xavier_init = tf.contrib.layers.xavier_initializer() self.AW = tf.Variable(xavier_init([O_size // 2, 5 * n_control])) self.VW = tf.Variable(xavier_init([O_size // 2, 1])) self.Advantage = tf.matmul(self.streamA, self.AW) self.Value = tf.matmul(self.streamV, self.VW) #Then combine them together to get our final Q-values. self.Qout = self.Value + tf.subtract( self.Advantage, tf.reduce_mean(self.Advantage, keep_dims=True)) sizeQ = tf.shape(self.Qout) self.Qout_reshape = tf.reshape(self.Qout, [ tf.to_int32(sizeQ[0] * n_control), tf.to_int32(sizeQ[1] / n_control) ]) self.predict = tf.argmax(self.Qout_reshape, 1) #network generate all the action-value pair for the input state, we sample some action-value pair from memory, we just need to min the different between o and out #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values. self.targetQ = tf.placeholder(shape=[None, n_control], dtype=tf.float32) self.actions = tf.placeholder(shape=[None, n_control], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, 5, dtype=tf.float32) hotsize = tf.shape(self.actions_onehot) self.reshape_hot = tf.reshape(self.actions_onehot, [hotsize[0] * n_control, 5]) self.sum = tf.reduce_sum(tf.multiply(self.Qout_reshape, self.reshape_hot), axis=1) self.Q = tf.reshape(self.sum, [hotsize[0], n_control]) self.td_error = tf.square(self.targetQ - self.Q) self.loss = tf.reduce_mean(self.td_error) self.trainer = tf.train.AdamOptimizer(learning_rate=0.0001) self.updateModel = self.trainer.minimize(self.loss)
def discrete_bottleneck(self, x, scope="bottleneck"): """Discretization bottleneck for latent variables. Args: x: Input to the discretization bottleneck. scope: Scope of the function. Returns: Embedding to pass to the decoder, discrete latent, loss, and the embedding function. Raises: ValueError: If projection_tensors is None for reshape_method project, or ema_count or ema_means is None if we are using ema, or unknown args. """ with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): x_reshaped = self.slice_hidden(x) x_res = x_reshaped x_means_hot = [] x_means = 0 loss = 0 for i in range(self.hparams.num_residuals): x_means_hot_res, x_means_res, q_loss_res, e_loss_res = \ self.embedding_lookup(x_reshaped, self.hparams.means[i]) # Update the ema variables if self.hparams.ema: tf.logging.info("Using EMA with beta = {}".format( self.hparams.beta)) updated_ema_count_res = \ moving_averages.assign_moving_average( self.hparams.ema_count[i], tf.reduce_sum( tf.reshape( x_means_hot_res, shape=[-1, self.hparams.num_blocks, self.hparams.block_v_size]), axis=0), self.hparams.decay, zero_debias=False) dw = tf.matmul( tf.transpose(x_means_hot_res, perm=[1, 2, 0]), tf.transpose(x_res, perm=[1, 0, 2])) updated_ema_means_res = \ moving_averages.assign_moving_average( self.hparams.ema_means[i], dw, self.hparams.decay, zero_debias=False) n = tf.reduce_sum(updated_ema_count_res, axis=-1, keep_dims=True) updated_ema_count_res = ( (updated_ema_count_res + self.hparams.epsilon) / (n + 2**self.hparams.z_size * self.hparams.epsilon) * n) updated_ema_means_res = updated_ema_means_res / tf.expand_dims( updated_ema_count_res, axis=-1) with tf.control_dependencies([e_loss_res]): print("self.hparams.means[i]", self.hparams.means[i]) # raw_input() update_means_res = tf.assign(self.hparams.means[i], updated_ema_means_res) # update_means_res = self.hparams.means[i] with tf.control_dependencies([update_means_res]): loss += self.hparams.beta * e_loss_res else: loss += q_loss_res + self.hparams.beta * e_loss_res # Update the residuals x_res -= x_means_res x_means += x_means_res x_means_hot.append(x_means_hot_res) # Get the discrete latent representation x_means_hot = tf.stack(x_means_hot, axis=1) x_means_idx = tf.argmax(x_means_hot, axis=-1) # Get the binary representation num_bits = int( self.hparams.z_size // (self.hparams.num_blocks * self.hparams.num_residuals)) x_means_bits = self.int_to_bit(x_means_idx, num_bits=num_bits, base=2) shape = common_layers.shape_list(x_means_bits) new_shape = shape[:-2] new_shape[0] = -1 new_shape[-1] = self.hparams.z_size x_means_bits = tf.reshape(x_means_bits, new_shape) x_discrete = self.bit_to_int(tf.to_int32(x_means_bits), num_bits=self.hparams.z_size, base=2) # Reshape x_discrete shape_x = common_layers.shape_list(x) shape_discrete = shape_x[:-1] x_discrete = tf.reshape(x_discrete, shape_discrete) x_means = tf.reshape(x_means, shape=shape_x) h1 = x + tf.stop_gradient(x_means - x) h2 = tf.layers.dense(tf.nn.relu(h1), self.hparams.filter_size, name="vch2") res = tf.layers.dense(tf.nn.relu(h2), self.hparams.hidden_size, name="vcfin") embed_fn = partial(self.embed, scope=scope) return { "dense": res, "discrete": x_discrete, "loss": loss, "embed": embed_fn }
def create_network(self, inputs, input_seq_len, dropout_rate, reuse_variables): network_proto = self.network_proto seq_len = input_seq_len batch_size = tf.shape(inputs)[0] gpu_enabled = self.gpu_available with tf.variable_scope("cnn_lstm", reuse=reuse_variables) as scope: no_layers = len(network_proto.layers) == 0 if not no_layers: has_conv_or_pool = network_proto.layers[ 0].type != LayerParams.LSTM else: has_conv_or_pool = False factor = 1 if has_conv_or_pool: cnn_inputs = tf.reshape( inputs, [batch_size, -1, network_proto.features, 1]) shape = seq_len, network_proto.features layers = [cnn_inputs] last_num_filters = 1 cnn_layer_index = 0 for layer in [ l for l in network_proto.layers if l.type != LayerParams.LSTM ]: if layer.type == LayerParams.CONVOLUTIONAL: layers.append( tf.layers.conv2d( name="conv2d" if cnn_layer_index == 0 else "conv2d_{}".format(cnn_layer_index), inputs=layers[-1], filters=layer.filters, kernel_size=(layer.kernel_size.x, layer.kernel_size.y), padding="same", activation=tf.nn.relu, reuse=reuse_variables, )) cnn_layer_index += 1 last_num_filters = layer.filters elif layer.type == LayerParams.MAX_POOLING: layers.append( tf.layers.max_pooling2d( inputs=layers[-1], pool_size=(layer.kernel_size.x, layer.kernel_size.y), strides=(layer.stride.x, layer.stride.y), padding="same", )) shape = (tf.to_int32(shape[0] // layer.stride.x), shape[1] // layer.stride.y) factor *= layer.stride.x else: raise Exception("Unknown layer of type %s" % layer.type) lstm_seq_len, lstm_num_features = shape rnn_inputs = tf.reshape(layers[-1], [ batch_size, tf.shape(layers[-1])[1], last_num_filters * lstm_num_features ]) lstm_num_features = last_num_filters * lstm_num_features else: rnn_inputs = inputs lstm_seq_len = seq_len lstm_num_features = network_proto.features lstm_layers = [ l for l in network_proto.layers if l.type == LayerParams.LSTM ] # Time major inputs required for lstm time_major_inputs = tf.transpose(rnn_inputs, [1, 0, 2]) if len(lstm_layers) > 0: for i, lstm in enumerate(lstm_layers): if lstm.hidden_nodes != lstm_layers[0].hidden_nodes: raise Exception( "Currently all lstm layers must have an equal number of hidden nodes. " "Got {} != {}".format(lstm.hidden_nodes, lstm_layers[0].hidden_nodes)) def cpu_cudnn_compatible_lstm_backend(time_major_inputs, hidden_nodes): def get_lstm_cell(num_hidden): return cudnn_rnn.CudnnCompatibleLSTMCell( num_hidden, reuse=reuse_variables) fw, bw = zip(*[(get_lstm_cell(hidden_nodes), get_lstm_cell(hidden_nodes)) for _ in lstm_layers]) time_major_outputs, output_fw, output_bw \ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(list(fw), list(bw), time_major_inputs, sequence_length=lstm_seq_len, dtype=tf.float32, scope="cudnn_lstm/stack_bidirectional_rnn", time_major=True, ) return time_major_outputs def gpu_cudnn_lstm_backend(time_major_inputs, hidden_nodes): # Create the Cudnn LSTM factory rnn_lstm = cudnn_rnn.CudnnLSTM( len(lstm_layers), hidden_nodes, direction='bidirectional', kernel_initializer=tf.initializers.random_uniform( -0.1, 0.1)) # TODO: Check if the models are loadable from meta Graph, maybe the next line fixed this rnn_lstm._saveable_cls = cudnn_rnn.CudnnLSTMSaveable # Apply the lstm to the inputs time_major_outputs, ( output_h, output_c) = rnn_lstm(time_major_inputs) return time_major_outputs if network_proto.backend.cudnn: if gpu_enabled: print("Using CUDNN LSTM backend on GPU") time_major_outputs = gpu_cudnn_lstm_backend( time_major_inputs, lstm_layers[0].hidden_nodes) else: print("Using CUDNN compatible LSTM backend on CPU") time_major_outputs = cpu_cudnn_compatible_lstm_backend( time_major_inputs, lstm_layers[0].hidden_nodes) else: raise Exception("Only cudnn based backend supported yet.") # Set the output size output_size = lstm_layers[-1].hidden_nodes * 2 else: output_size = lstm_num_features time_major_outputs = time_major_inputs # flatten to (T * N, F) for matrix multiplication. This will be reversed later time_major_outputs = tf.reshape( time_major_outputs, [-1, time_major_outputs.shape.as_list()[2]]) if network_proto.dropout > 0: time_major_outputs = tf.nn.dropout(time_major_outputs, 1 - dropout_rate, name="dropout") # we need to turn off validate_shape so we can resize the variable on a codec resize w = tf.get_variable('W', validate_shape=False, initializer=tf.random_uniform( [output_size, network_proto.classes], -0.1, 0.1)) b = tf.get_variable('B', validate_shape=False, initializer=tf.constant( 0., shape=[network_proto.classes])) # the output layer time_major_logits = tf.matmul(time_major_outputs, w) + b # reshape back time_major_logits = tf.reshape( time_major_logits, [-1, batch_size, tf.shape(w)[-1]], name="time_major_logits") time_major_softmax = tf.nn.softmax(time_major_logits, -1, "time_major_softmax") logits = tf.transpose(time_major_logits, [1, 0, 2], name="logits") softmax = tf.transpose(time_major_softmax, [1, 0, 2], name="softmax") lstm_seq_len = tf.identity(lstm_seq_len, "seq_len_out") # DECODER # ================================================================ if network_proto.ctc == NetworkParams.CTC_DEFAULT: decoded, log_prob = ctc_ops.ctc_greedy_decoder( time_major_logits, lstm_seq_len, merge_repeated=network_proto.ctc_merge_repeated) elif network_proto.ctc == NetworkParams.CTC_FUZZY: decoded, log_prob = self.fuzzy_module['decoder_op']( softmax, lstm_seq_len) else: raise Exception( "Unknown ctc model: '%s'. Supported are Default and Fuzzy" % network_proto.ctc) decoded = decoded[0] sparse_decoded = ( tf.identity(decoded.indices, name="decoded_indices"), tf.identity(decoded.values, name="decoded_values"), tf.identity(decoded.dense_shape, name="decoded_shape"), ) return lstm_seq_len, time_major_logits, time_major_softmax, logits, softmax, decoded, sparse_decoded, factor
def read_label(tf_bytestring): label = tf.decode_raw(tf_bytestring, tf.uint8) label = tf.reshape(label, []) return tf.to_int32(label)
def right_shift(x): x_shape = x.get_shape().as_list() # for zero-padding batch_size = tf.shape(tf.reduce_sum(tf.to_int32(tf.not_equal(x, hp.vocab_size + 1)), 1))[0] return tf.concat((tf.zeros([batch_size, hp.filter_h - 1, hp.word_embed_size + hp.filter_h - 1, x_shape[3]]), x[:, :, :, :]), 1)
def _init_env(self): FLAGS.use_tpu = False tf.logging.set_verbosity(tf.logging.DEBUG) tf.logging.info("Import usr dir from %s", self._usr_dir) if self._usr_dir != None: usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) tf.logging.info("Start to create hparams,for %s of %s", self._problem, self._hparams_set) self._hparams = create_hparams() self._hparams_decode = create_decode_hparams( extra_length=self._extra_length, batch_size=self._batch_size, beam_size=self._beam_size, alpha=self._alpha, return_beams=self._return_beams, write_beam_scores=self._write_beam_scores) self.estimator = trainer_lib.create_estimator( FLAGS.model, self._hparams, t2t_trainer.create_run_config(self._hparams), decode_hparams=self._hparams_decode, use_tpu=False) tf.logging.info("Finish intialize environment") ####### problem type :输出分类 还是序列 还是语言模型 self.problem_type = self._hparams.problems[0].target_modality[ 0] #class? symble self._whether_has_inputs = self._hparams.problem_instances[ 0].has_inputs self._beam_size = 1 if self.problem_type == 'class_label' else self._beam_size ### make input placeholder self._inputs_ph = tf.placeholder( dtype=tf.int32) # shape not specified,any shape x = tf.placeholder(dtype=tf.int32) x.set_shape([None, None]) # ? -> (?,?) x = tf.expand_dims(x, axis=[2]) # -> (?,?,1) x = tf.to_int32(x) self._inputs_ph = x #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1]) batch_inputs = x # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1]) #targets_ph = tf.placeholder(dtype=tf.int32) #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) self._features = { "inputs": batch_inputs, "problem_choice": 0, # We run on the first problem here. "input_space_id": self._hparams.problems[0].input_space_id, "target_space_id": self._hparams.problems[0].target_space_id } ### 加入 decode length 变长的 self.input_extra_length_ph = tf.placeholder(dtype=tf.int32) self._features['decode_length'] = self.input_extra_length_ph #### # features['decode_length_decide_end']=True ###### target if transformer_scorer if self._model_name.lower().find('score') != -1: self._targets_ph = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='targets') self._features['targets'] = self._targets_ph # batch targets self._target_pretend = np.zeros((1, 1, 1, 1)) #### mode = tf.estimator.ModeKeys.PREDICT # estimator_spec = model_builder.model_fn(self._model_name, features, mode, self._hparams, # problem_names=[self._problem], decode_hparams=self._hparams_dc) predictions_dict = self.estimator._call_model_fn( self._features, None, mode, t2t_trainer.create_run_config(self._hparams)) self._predictions_dict = predictions_dict.predictions #self._predictions = self._predictions_dict["outputs"] # self._scores=predictions_dict['scores'] not return when greedy search tf.logging.info("Start to init tf session") if self._isGpu: print('Using GPU in Decoder') gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self._fraction) self._sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) else: print('Using CPU in Decoder') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0) config = tf.ConfigProto(gpu_options=gpu_options) config.allow_soft_placement = True config.log_device_placement = False self._sess = tf.Session(config=config) with self._sess.as_default(): ckpt = saver_mod.get_checkpoint_state(self._model_dir) saver = tf.train.Saver() tf.logging.info("Start to restore the parameters from %s", ckpt.model_checkpoint_path) saver.restore(self._sess, ckpt.model_checkpoint_path) tf.logging.info("Finish intialize environment")
def _target_len_mask(targets, sequence_length): # Mask out losses that are beyond the sequence length for each examples. max_seq_len = tf.shape(targets)[1] return tf.sequence_mask(tf.to_int32(sequence_length), max_seq_len, dtype=tf.float32)
def ae_transformer_internal(inputs, targets, target_space, hparams, cache=None, predict_mask=1.0): """AE Transformer, main step used for training.""" # Summaries break with the do_refine cond, turn them off in that case. global _DO_SUMMARIES if hparams.do_refine: _DO_SUMMARIES = False # Prepare. if inputs is not None: batch_size = common_layers.shape_list(inputs)[0] else: batch_size = common_layers.shape_list(targets)[0] targets = tf.reshape(targets, [batch_size, -1, 1, hparams.hidden_size]) # Encoder. if inputs is not None: inputs = common_layers.flatten4d3d(inputs) inputs, ed = encode(inputs, target_space, hparams, "input_enc") inputs_ex, ed_ex = inputs, ed else: ed, inputs_ex, ed_ex = None, None, None # Autoencoding. losses = { "extra": tf.constant(0.0), "latent_pred": tf.constant(0.0), "neg_q_entropy": tf.constant(0.0) } if hparams.do_ae: # flatten here original_targets_shape = tf.shape(targets) if hparams.task == "image": cia.maybe_reshape_4d_to_3d(targets) if hparams.task == "translate": if inputs is not None: max_targets_len_from_inputs = tf.concat([inputs, inputs], axis=1) else: max_targets_len_from_inputs = targets else: assert hparams.task == "image" max_targets_len_from_inputs = targets if hparams.word_shuffle: tf.logging.info("Using word shuffle with rate = {}".format( hparams.word_shuffle)) targets_idx = tf.range(start=0, limit=common_layers.shape_list(targets)[1], delta=1) targets_idx = tf.to_float(targets_idx) noise = tf.random_uniform( shape=common_layers.shape_list(targets_idx), minval=0, maxval=1 + hparams.word_shuffle) targets_idx += noise permutation = tf.contrib.framework.argsort(targets_idx) targets_permuted = tf.gather(targets, indices=permutation, axis=1) targets = targets_permuted targets, _ = common_layers.pad_to_same_length( targets, max_targets_len_from_inputs, final_length_divisible_by=2**hparams.num_compress_steps) if hparams.word_dropout: mask = tf.random_uniform(shape=common_layers.shape_list(targets), minval=0.0, maxval=1.0) targets_noisy = tf.where(mask > hparams.word_dropout, targets, tf.zeros_like(targets)) else: targets_noisy = targets targets_c = compress(targets_noisy, inputs, False, hparams, "compress") if hparams.mode != tf.estimator.ModeKeys.PREDICT: # Compress and bottleneck. latents_dense, latents_discrete, extra_loss, embed, neg_q_entropy = ( hparams.bottleneck(inputs=targets_c, filter_size=hparams.compress_filter_size, mode=hparams.mode, name="vc")) if _DO_SUMMARIES: tf.summary.histogram( "b0", tf.reshape(latents_discrete[:, 0, :], [-1])) pc = common_layers.inverse_exp_decay(hparams.startup_steps) pc = pc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 cond = tf.less(tf.random_uniform([batch_size]), pc) latents_dense = tf.where(cond, latents_dense, targets_c) # TODO(lukaszkaiser): return extra losses batchwise, multiply before mean. losses["extra"] = extra_loss * tf.reduce_mean(tf.to_float(cond)) # Extra loss predicting latent code from input. Discrete only. if hparams.bottleneck_kind not in ["dense", "vae"]: latents_pred = decode_transformer(inputs_ex, ed_ex, embed(latents_discrete), hparams, "extra", task="translate") _, latent_pred_loss = ae_latent_softmax( latents_pred, tf.stop_gradient(latents_discrete), hparams) # Scale by latent dimension for summary so we can compare across # batches. if _DO_SUMMARIES: tf.summary.scalar("latent_pred_loss_mean", tf.reduce_mean(latent_pred_loss)) if hparams.sum_over_latents: latent_pred_loss = tf.reduce_sum(latent_pred_loss, [1, 2]) losses["latent_pred"] = tf.reduce_mean( latent_pred_loss * tf.to_float(cond)) * hparams.prior_scale losses["neg_q_entropy"] = neg_q_entropy * hparams.entropy_scale else: inputs_c = decode_transformer(inputs, ed, targets_c, hparams, "dec_c") losses["latent_pred"] = tf.reduce_mean( (inputs_c - targets_c)**2) * 20 def bn_inputs(): with tf.variable_scope(tf.get_variable_scope(), reuse=True): bn, _, _, _, _ = hparams.bottleneck( inputs=inputs_c, filter_size=hparams.compress_filter_size, mode=hparams.mode, name="vc") return bn inputs_c = bn_inputs() ptc = 1.0 - common_layers.inverse_lin_decay(200000) * 0.5 ptc = ptc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 latents_dense = tf.where( tf.less(tf.random_uniform([batch_size]), ptc), latents_dense, inputs_c) else: if hparams.bottleneck_kind in ["dense", "vae"]: inputs_c = decode_transformer(inputs, ed, targets_c, hparams, "dec_c") latents_dense, _, _, _, _ = hparams.bottleneck( inputs=inputs_c, filter_size=hparams.compress_filter_size, mode=hparams.mode, name="vc") else: latent_len = common_layers.shape_list(targets_c)[1] _, _, _, embed, _ = hparams.bottleneck( inputs=targets_c, filter_size=hparams.compress_filter_size, name="vc") latents_dense = tf.zeros_like(targets_c[:, :latent_len, :, :]) if cache is None: cache = ae_latent_sample(latents_dense, inputs_ex, ed_ex, embed, 16, hparams) latents_dense = embed(cache) # Postprocess. d = latents_dense latent_len = common_layers.shape_list(latents_dense)[1] if isinstance(latent_len, tf.Tensor): # TODO(trandustin): Fix this in a better manner. latent_len = max(1000, hparams.max_length) pos = tf.get_variable("pos", [1, latent_len + 1, 1, hparams.hidden_size]) pos = pos[:, :common_layers.shape_list(latents_dense)[1] + 1, :, :] latents_dense = tf.pad(latents_dense, [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos # decompressing the dense latents for i in range(hparams.num_compress_steps): j = hparams.num_compress_steps - i - 1 d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j) if inputs is not None and hparams.do_attend_decompress: d = attend(d, inputs, hparams, "decompress_attend_%d" % j) d = decompress_step(d, hparams, i > 0, False, "decompress_%d" % j) # Masking. if hparams.do_mask: masking = common_layers.inverse_lin_decay( hparams.mask_startup_steps) masking *= common_layers.inverse_exp_decay( hparams.mask_startup_steps // 4) # Not much at start. if not hparams.do_refine: masking -= tf.random_uniform([]) * hparams.unmasked_percentage masking = tf.minimum(tf.maximum(masking, 0.0), 1.0) if hparams.use_predict_mask: masking = predict_mask if hparams.mode == tf.estimator.ModeKeys.PREDICT: masking = predict_mask mask = tf.less( masking, tf.random_uniform(common_layers.shape_list(targets)[:-1])) mask = tf.expand_dims(tf.to_float(mask), 3) # targets is always [batch, length, 1, depth] targets = mask * targets + (1.0 - mask) * d # reshape back to 4d here if hparams.task == "image": targets = tf.reshape(targets, original_targets_shape) res = decode_transformer(inputs, ed, targets, hparams, "decoder", causal=hparams.causal) if hparams.do_ae: if hparams.do_mask and hparams.do_refine: def refine_res(): # return residual_conv(res, 1, (5, 1), hparams, "refine") r, _ = encode(tf.squeeze(res, axis=[2]), target_space, hparams, "refine_enc") return tf.expand_dims(r, axis=2) masked_batches = tf.reduce_sum(mask, axis=[1, 2, 3]) all_masked = tf.less(masked_batches, 0.1) res = tf.where(all_masked, refine_res(), res) # We'll start training the extra model of latents after mask_startup_steps. nonlatent_steps = hparams.mask_startup_steps latent_time = tf.less(nonlatent_steps, tf.to_int32(tf.train.get_global_step())) losses["latent_pred"] *= tf.to_float(latent_time) # res was generated from padded targets, which means it has some extra # elements. These can cause shape problems when computing loss with respect to # the original (unpadded) targets. So we remove their extra elements here. res = res[:, :original_targets_shape[1], :, :] return res, losses, cache
def _get_dictionary_tensor( dictionary_path, charset ): return tf.sparse_tensor_to_dense( tf.to_int32( dictionary_from_file( dictionary_path, charset )))
def _preprocess(self, features, labels): """Model-specific preprocessing for features and labels: - Creates vocabulary lookup tables for source and target vocab - Converts tokens into vocabulary ids """ # Create vocabulary lookup for source source_vocab_to_id, source_id_to_vocab, source_word_to_count, _ = \ vocab.create_vocabulary_lookup_table(self.source_vocab_info.path) source_candidate_vocab_to_id, source_candidate_id_to_vocab, source_candidate_word_to_count, _ = \ vocab.create_vocabulary_lookup_table(self.source_candidate_vocab_info.path) # Create vocabulary look for target target_vocab_to_id, target_id_to_vocab, target_word_to_count, _ = \ vocab.create_vocabulary_lookup_table(self.target_vocab_info.path) # Add vocab tables to graph colection so that we can access them in # other places. graph_utils.add_dict_to_collection( { "source_vocab_to_id": source_vocab_to_id, "source_id_to_vocab": source_id_to_vocab, "source_word_to_count": source_word_to_count, "source_candidate_vocab_to_id": source_candidate_vocab_to_id, "source_candidate_id_to_vocab": source_candidate_id_to_vocab, "source_candidate_word_to_count": source_candidate_word_to_count, "target_vocab_to_id": target_vocab_to_id, "target_id_to_vocab": target_id_to_vocab, "target_word_to_count": target_word_to_count }, "vocab_tables") # Slice source to max_len if self.params["source.max_seq_len"] is not None: features["source_tokens"] = features[ "source_tokens"][:, :self.params["source.max_seq_len"]] features["source_len"] = tf.minimum( features["source_len"], self.params["source.max_seq_len"]) # Slice source_candidate to max_len if self.params["source_candidate.max_seq_len"] is not None: features["source_candidate_tokens"] = features[ "source_candidate_tokens"][:, :self.params[ "source_candidate.max_seq_len"]] features["source_candidate_len"] = tf.minimum( features["source_candidate_len"], self.params["source_candidate.max_seq_len"]) # Look up the source ids in the vocabulary features["source_ids"] = source_vocab_to_id.lookup( features["source_tokens"]) features["source_candidate_ids"] = source_candidate_vocab_to_id.lookup( features["source_candidate_tokens"]) # Maybe reverse the source if self.params["source.reverse"] is True: features["source_ids"] = tf.reverse_sequence( input=features["source_ids"], seq_lengths=features["source_len"], seq_dim=1, batch_dim=0, name=None) features["source_candidate_ids"] = tf.reverse_sequence( input=features["source_candidate_ids"], seq_lengths=features["source_candidate_len"], seq_dim=1, batch_dim=0, name=None) features["source_len"] = tf.to_int32(features["source_len"]) tf.summary.histogram("source_len", tf.to_float(features["source_len"])) features["source_candidate_len"] = tf.to_int32( features["source_candidate_len"]) tf.summary.histogram("source_candidate_len", tf.to_float(features["source_candidate_len"])) if labels is None: return features, None labels = labels.copy() # Slices targets to max length if self.params["target.max_seq_len"] is not None: labels["target_tokens"] = labels[ "target_tokens"][:, :self.params["target.max_seq_len"]] labels["target_len"] = tf.minimum( labels["target_len"], self.params["target.max_seq_len"]) # Look up the target ids in the vocabulary labels["target_ids"] = target_vocab_to_id.lookup( labels["target_tokens"]) labels["target_len"] = tf.to_int32(labels["target_len"]) tf.summary.histogram("target_len", tf.to_float(labels["target_len"])) # Keep track of the number of processed tokens num_tokens = tf.reduce_sum(labels["target_len"]) num_tokens += tf.reduce_sum(features["source_len"]) num_tokens += tf.reduce_sum(features["source_candidate_len"]) token_counter_var = tf.Variable(0, "tokens_counter") total_tokens = tf.assign_add(token_counter_var, num_tokens) tf.summary.scalar("num_tokens", total_tokens) with tf.control_dependencies([total_tokens]): features["source_tokens"] = tf.identity(features["source_tokens"]) features["source_candidate_tokens"] = tf.identity( features["source_candidate_tokens"]) # Add to graph collection for later use graph_utils.add_dict_to_collection(features, "features") if labels: graph_utils.add_dict_to_collection(labels, "labels") print("attention_biseqseq features:{} labels:{}".format( features, labels)) return features, labels
def _build_metrics(labels, predictions, weights, batch_losses): """Builds TensorFlow operations to compute model evaluation metrics. Args: labels: Tensor with shape [batch_size]. predictions: Tensor with shape [batch_size, output_dim]. weights: Tensor with shape [batch_size]. batch_losses: Tensor with shape [batch_size]. Returns: A dictionary {metric_name: (metric_value, update_op). """ # Compute the predicted labels. assert len(predictions.shape) == 2 binary_classification = (predictions.shape[1] == 1) if binary_classification: predictions = tf.squeeze(predictions, axis=[1]) predicted_labels = tf.to_int32( tf.greater(predictions, 0.5), name="predicted_labels") else: predicted_labels = tf.argmax( predictions, 1, name="predicted_labels", output_type=tf.int32) metrics = {} with tf.variable_scope("metrics"): # Total number of examples. num_examples = _metric_variable("num_examples", [], tf.float32) update_num_examples = tf.assign_add(num_examples, tf.reduce_sum(weights)) metrics["num_examples"] = (num_examples.read_value(), update_num_examples) # Accuracy metrics. num_correct = _metric_variable("num_correct", [], tf.float32) is_correct = weights * tf.to_float(tf.equal(labels, predicted_labels)) update_num_correct = tf.assign_add(num_correct, tf.reduce_sum(is_correct)) metrics["accuracy/num_correct"] = (num_correct.read_value(), update_num_correct) accuracy = tf.div(num_correct, num_examples, name="accuracy") metrics["accuracy/accuracy"] = (accuracy, tf.no_op()) # Weighted cross-entropy loss. metrics["losses/weighted_cross_entropy"] = tf.metrics.mean( batch_losses, weights=weights, name="cross_entropy_loss") # Possibly create additional metrics for binary classification. if binary_classification: labels = tf.cast(labels, dtype=tf.bool) predicted_labels = tf.cast(predicted_labels, dtype=tf.bool) # AUC. metrics["auc"] = tf.metrics.auc( labels, predictions, weights=weights, num_thresholds=1000) def _count_condition(name, labels_value, predicted_value): """Creates a counter for given values of predictions and labels.""" count = _metric_variable(name, [], tf.float32) is_equal = tf.to_float( tf.logical_and( tf.equal(labels, labels_value), tf.equal(predicted_labels, predicted_value))) update_op = tf.assign_add(count, tf.reduce_sum(weights * is_equal)) return count.read_value(), update_op # Confusion matrix metrics. metrics["confusion_matrix/true_positives"] = _count_condition( "true_positives", labels_value=True, predicted_value=True) metrics["confusion_matrix/false_positives"] = _count_condition( "false_positives", labels_value=False, predicted_value=True) metrics["confusion_matrix/true_negatives"] = _count_condition( "true_negatives", labels_value=False, predicted_value=False) metrics["confusion_matrix/false_negatives"] = _count_condition( "false_negatives", labels_value=True, predicted_value=False) return metrics
def call(self, inputs, training=None, **kwargs): # get offset, shape [batch_size, out_h, out_w, filter_h, * filter_w * channel_out * 2] offset = tf.nn.conv2d(inputs, filter=self.offset_layer_kernel, strides=[1, *self.strides, 1], padding=self.padding.upper(), dilations=[1, *self.dilation_rate, 1]) offset += self.offset_layer_bias # add padding if needed inputs = self._pad_input(inputs) # some length batch_size = int(inputs.get_shape()[0]) channel_in = int(inputs.get_shape()[-1]) in_h, in_w = [int(i) for i in inputs.get_shape()[1:3] ] # input feature map size out_h, out_w = [int(i) for i in offset.get_shape()[1:3] ] # output feature map size filter_h, filter_w = self.kernel_size # get x, y axis offset offset = tf.reshape(offset, [batch_size, out_h, out_w, -1, 2]) y_off, x_off = offset[:, :, :, :, 0], offset[:, :, :, :, 1] # input feature map gird coordinates y, x = self._get_conv_indices([in_h, in_w]) y, x = [tf.expand_dims(i, axis=-1) for i in [y, x]] y, x = [ tf.tile(i, [batch_size, 1, 1, 1, self.num_deformable_group]) for i in [y, x] ] y, x = [tf.reshape(i, [*i.shape[0:3], -1]) for i in [y, x]] y, x = [tf.to_float(i) for i in [y, x]] # add offset y, x = y + y_off, x + x_off y = tf.clip_by_value(y, 0, in_h - 1) x = tf.clip_by_value(x, 0, in_w - 1) # get four coordinates of points around (x, y) y0, x0 = [tf.to_int32(tf.floor(i)) for i in [y, x]] y1, x1 = y0 + 1, x0 + 1 # clip y0, y1 = [tf.clip_by_value(i, 0, in_h - 1) for i in [y0, y1]] x0, x1 = [tf.clip_by_value(i, 0, in_w - 1) for i in [x0, x1]] # get pixel values indices = [[y0, x0], [y0, x1], [y1, x0], [y1, x1]] p0, p1, p2, p3 = [ DeformableConvLayer._get_pixel_values_at_point(inputs, i) for i in indices ] # cast to float x0, x1, y0, y1 = [tf.to_float(i) for i in [x0, x1, y0, y1]] # weights w0 = (y1 - y) * (x1 - x) w1 = (y1 - y) * (x - x0) w2 = (y - y0) * (x1 - x) w3 = (y - y0) * (x - x0) # expand dim for broadcast w0, w1, w2, w3 = [tf.expand_dims(i, axis=-1) for i in [w0, w1, w2, w3]] # bilinear interpolation pixels = tf.add_n([w0 * p0, w1 * p1, w2 * p2, w3 * p3]) # reshape the "big" feature map pixels = tf.reshape(pixels, [ batch_size, out_h, out_w, filter_h, filter_w, self.num_deformable_group, channel_in ]) pixels = tf.transpose(pixels, [0, 1, 3, 2, 4, 5, 6]) pixels = tf.reshape(pixels, [ batch_size, out_h * filter_h, out_w * filter_w, self.num_deformable_group, channel_in ]) # copy channels to same group feat_in_group = self.filters // self.num_deformable_group pixels = tf.tile(pixels, [1, 1, 1, 1, feat_in_group]) pixels = tf.reshape( pixels, [batch_size, out_h * filter_h, out_w * filter_w, -1]) # depth-wise conv out = tf.nn.depthwise_conv2d(pixels, self.kernel, [1, filter_h, filter_w, 1], 'VALID') # add the output feature maps in the same group out = tf.reshape(out, [batch_size, out_h, out_w, self.filters, channel_in]) out = tf.reduce_sum(out, axis=-1) if self.use_bias: out += self.bias return self.activation(out)
def _init_env(self): FLAGS.use_tpu = False #tf.logging.set_verbosity(tf.logging.DEBUG) tf.logging.info("Import usr dir from %s", self._usr_dir) if self._usr_dir != None: #usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) usr_dir.import_usr_dir(self._usr_dir) tf.logging.info("Start to create hparams,for %s of %s", self._problem, self._hparams_set) self._hparams = create_hparams() self._hparams_decode = create_decode_hparams( extra_length=self._extra_length, batch_size=self._batch_size, beam_size=self._beam_size, alpha=self._alpha, return_beams=self._return_beams, write_beam_scores=self._write_beam_scores, force_decode_length=self._force_decode_length) self.estimator = trainer_lib.create_estimator( FLAGS.model, self._hparams, t2t_trainer.create_run_config(self._hparams), decode_hparams=self._hparams_decode, use_tpu=False) tf.logging.info("Finish intialize environment") ####### problem type :输出分类 还是序列 还是语言模型 #self.problem_type = self._hparams.problem_hparams[0].target_modality[0] #class? symble self.problem_type = self._hparams.problem_hparams.target_modality[0] #self._whether_has_inputs = self._hparams.problem[0].has_inputs self._whether_has_inputs = self._hparams.problem.has_inputs self._beam_size = 1 if self._customer_problem_type == 'classification' else self._beam_size ### make input placeholder #self._inputs_ph = tf.placeholder(dtype=tf.int32) # shape not specified,any shape x = tf.placeholder(dtype=tf.int32) x.set_shape([None, None]) # ? -> (?,?) x = tf.expand_dims(x, axis=[2]) # -> (?,?,1) x = tf.to_int32(x) self._inputs_ph = x #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1]) batch_inputs = x # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1]) #targets_ph = tf.placeholder(dtype=tf.int32) #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) self._features = { "inputs": batch_inputs, "problem_choice": 0, # We run on the first problem here. "input_space_id": self._hparams.problem_hparams.input_space_id, "target_space_id": self._hparams.problem_hparams.target_space_id } ### 加入 decode length 变长的 self.input_extra_length_ph = tf.placeholder(dtype=tf.int32, shape=[]) self._features[ 'decode_length'] = self.input_extra_length_ph # total_decode=input_len+extra_len| extra of chunkProblem =0 # real_decode_length=len(input)+extra_length ## #self._features['decode_length_decide_end'] = True #### 如果是relative 参数 if self._hparams_set == "transformer_relative": del self._features['problem_choice'] del self._features['input_space_id'] del self._features['target_space_id'] if self._customer_problem_type == 'languageModel_pp': del self._features['problem_choice'] del self._features['input_space_id'] del self._features['target_space_id'] if self._model_name in ['slice_net', 'transformer_encoder']: del self._features['problem_choice'] del self._features['input_space_id'] del self._features['target_space_id'] if self._model_name == 'transformer' and self._customer_problem_type == 'classification': del self._features['problem_choice'] del self._features['input_space_id'] del self._features['target_space_id'] ###### target if transformer_scorer if self._customer_problem_type == 'classification': self._targets_ph = tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets') self._features['targets'] = self._targets_ph # batch targets if self._customer_problem_type == 'languageModel_pp': self._targets_ph = tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets') self._features['targets'] = self._targets_ph #### mode mode = tf.estimator.ModeKeys.PREDICT if self._customer_problem_type == 'languageModel_pp': mode = tf.estimator.ModeKeys.EVAL elif self._customer_problem_type == 'classification' and 'score' not in self._model_name: mode = tf.estimator.ModeKeys.EVAL # estimator_spec = model_builder.model_fn(self._model_name, features, mode, self._hparams, # problem_names=[self._problem], decode_hparams=self._hparams_dc) predictions_dict = self.estimator._call_model_fn( self._features, None, mode, t2t_trainer.create_run_config(self._hparams)) self._predictions_dict = predictions_dict.predictions # score -> score_yr if self._customer_problem_type == 'classification' and 'score' in self._model_name: self._score = predictions_dict.predictions.get('scores') if self._score != None: #[batch,beam] [batch,] self._predictions_dict['scores_class'] = tf.exp( common_layers.log_prob_from_logits(self._score)) elif self._customer_problem_type == 'classification' and 'score' not in self._model_name: self._score = predictions_dict.predictions.get('predictions') if self._score != None: #[batch,beam] [batch,] self._predictions_dict['scores_class'] = tf.exp( common_layers.log_prob_from_logits(self._score)) #self._predictions = self._predictions_dict["outputs"] # self._scores=predictions_dict['scores'] not return when greedy search tf.logging.info("Start to init tf session") if self._isGpu: print('Using GPU in Decoder') gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self._fraction) self._sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) else: print('Using CPU in Decoder') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0) config = tf.ConfigProto(gpu_options=gpu_options) config.allow_soft_placement = True config.log_device_placement = False self._sess = tf.Session(config=config) with self._sess.as_default(): ckpt = saver_mod.get_checkpoint_state(self._model_dir) saver = tf.train.Saver(allow_empty=True) tf.logging.info("Start to restore the parameters from %s", ckpt.model_checkpoint_path) saver.restore(self._sess, ckpt.model_checkpoint_path) tf.logging.info("Finish intialize environment")
def Single_acc(self): correct_prediction = tf.equal(tf.to_int32(tf.argmax(self.y, axis=1)), self.argmax_idx) self.acc_num = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))