def occludeCenter(self, parsed_features): ''' Crop an image to the bounding box and occlude on the center of the image by @occlusion. @params: parsed_feature: dict of features: @vars: image_data: JPEG image data in String shape: shape of the image @return: ''' image_data = parsed_features['image/encoded'] shape = (parsed_features['image/height'], parsed_features['image/width'], parsed_features['image/channels']) height = shape[0] width = shape[1] tf.assert_equal(shape[2], tf.constant([3], shape[2].dtype), message="Channels not equal 3") tf.assert_none_equal(height, tf.constant([0], height.dtype), message="Height is 0") tf.assert_none_equal(width, tf.constant([0], width.dtype), message="Width is 0") image = tf.image.decode_jpeg(image_data) imageResized = tf.image.resize_images( image, [227, 227], tf.image.ResizeMethod.NEAREST_NEIGHBOR) side = tf.sqrt(self.occlusionRatio) offset_height2 = tf.cast(((1.0 - side) / 2) * 227, tf.int32) target_height2 = tf.cast((side) * 227, tf.int32) imageOccluded = self.occlude(imageResized, offset_height2, offset_height2, target_height2, target_height2) # RGB -> BGR, the model was trained on BGR image from Caffe bgrImageOccluded = imageOccluded[:, :, ::-1] image = tf.subtract(tf.cast(bgrImageOccluded, tf.float32), IMAGENET_MEAN) label = tf.cast(parsed_features['image/class/label'], tf.int32) one_hot = tf.one_hot(label - 1, self.num_classes) return image, one_hot
def _maybe_attach_assertion(x): if not validate_args: return x if assert_positive: return control_flow_ops.with_dependencies([ tf.assert_positive(x, message="diagonal part must be positive"), ], x) return control_flow_ops.with_dependencies([ tf.assert_none_equal( x, tf.zeros([], x.dtype), message="diagonal part must be non-zero") ], x)
def __init__(self, shift=None, scale=None, validate_args=False, name="affine_scalar"): """Instantiates the `AffineScalar` bijector. This `Bijector` is initialized with `shift` `Tensor` and `scale` arguments, giving the forward operation: ```none Y = g(X) = scale * X + shift ``` if `scale` is not specified, then the bijector has the semantics of `scale = 1.`. Similarly, if `shift` is not specified, then the bijector has the semantics of `shift = 0.`. Args: shift: Floating-point `Tensor`. If this is set to `None`, no shift is applied. scale: Floating-point `Tensor`. If this is set to `None`, no scale is applied. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. """ self._graph_parents = [] self._name = name self._validate_args = validate_args with self._name_scope("init", values=[scale, shift]): self._shift = shift self._scale = scale if self._shift is not None: self._shift = tf.convert_to_tensor(shift, name="shift") if self._scale is not None: self._scale = tf.convert_to_tensor(scale, name="scale") if validate_args: self._scale = control_flow_ops.with_dependencies([ tf.assert_none_equal( self._scale, tf.zeros([], dtype=self._scale.dtype)) ], self._scale) dtype = dtype_util.common_dtype([self._shift, self._scale]) super(AffineScalar, self).__init__(forward_min_event_ndims=0, is_constant_jacobian=True, validate_args=validate_args, dtype=dtype, name=name)
def __init__(self, shift=None, scale=None, validate_args=False, name="affine_scalar"): """Instantiates the `AffineScalar` bijector. This `Bijector` is initialized with `shift` `Tensor` and `scale` arguments, giving the forward operation: ```none Y = g(X) = scale * X + shift ``` if `scale` is not specified, then the bijector has the semantics of `scale = 1.`. Similarly, if `shift` is not specified, then the bijector has the semantics of `shift = 0.`. Args: shift: Floating-point `Tensor`. If this is set to `None`, no shift is applied. scale: Floating-point `Tensor`. If this is set to `None`, no scale is applied. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. """ self._graph_parents = [] self._name = name self._validate_args = validate_args with self._name_scope("init", values=[scale, shift]): self._shift = shift self._scale = scale if self._shift is not None: self._shift = tf.convert_to_tensor(shift, name="shift") if self._scale is not None: self._scale = tf.convert_to_tensor(self._scale, name="scale") if validate_args: self._scale = control_flow_ops.with_dependencies([ tf.assert_none_equal(self._scale, tf.zeros([], dtype=self._scale.dtype)) ], self._scale) super(AffineScalar, self).__init__( forward_min_event_ndims=0, is_constant_jacobian=True, validate_args=validate_args, name=name)
def _finalize(self, _, contents): """Structure output and compute segment and position metadata.""" # The output shape information is lost during the filter; however we can # guarantee the shape. (That's the point of this exercise, after all!) contents.set_shape((self._packed_length, self._num_sequences * 2)) # Both the dummy branch of the scan step function and the eviction dataset # use vectors of minus one. The cost of this check is negligible and the # leakage of such dummy sequences would be difficult to debug downstream. check_leaks = tf.assert_none_equal(contents, -tf.ones_like(contents)) with tf.control_dependencies([check_leaks]): contents = tf.identity(contents) segment, position = self._compute_auxiliary_structure(contents) return {"contents": contents[:, :self._num_sequences], "segment": segment, "position": position}
def __init__(self, hinge_softness=None, validate_args=False, name="softplus"): with tf.name_scope(name, values=[hinge_softness]): if hinge_softness is not None: self._hinge_softness = tf.convert_to_tensor( hinge_softness, name="hinge_softness") else: self._hinge_softness = None if validate_args: nonzero_check = tf.assert_none_equal( tf.convert_to_tensor(0, dtype=self.hinge_softness.dtype), self.hinge_softness, message="hinge_softness must be non-zero") self._hinge_softness = control_flow_ops.with_dependencies( [nonzero_check], self.hinge_softness) super(Softplus, self).__init__(forward_min_event_ndims=0, validate_args=validate_args, name=name)
def _assertions(self, x): if not self.validate_args: return [] shape = tf.shape(x) is_matrix = tf.assert_rank_at_least( x, 2, message="Input must have rank at least 2.") is_square = tf.assert_equal( shape[-2], shape[-1], message="Input must be a square matrix.") above_diagonal = tf.matrix_band_part( tf.matrix_set_diag(x, tf.zeros(shape[:-1], dtype=tf.float32)), 0, -1) is_lower_triangular = tf.assert_equal( above_diagonal, tf.zeros_like(above_diagonal), message="Input must be lower triangular.") # A lower triangular matrix is nonsingular iff all its diagonal entries are # nonzero. diag_part = tf.matrix_diag_part(x) is_nonsingular = tf.assert_none_equal( diag_part, tf.zeros_like(diag_part), message="Input must have all diagonal entries nonzero.") return [is_matrix, is_square, is_lower_triangular, is_nonsingular]
def _assertions(self, x): if not self.validate_args: return [] shape = tf.shape(x) is_matrix = tf.assert_rank_at_least( x, 2, message="Input must have rank at least 2.") is_square = tf.assert_equal(shape[-2], shape[-1], message="Input must be a square matrix.") above_diagonal = tf.matrix_band_part( tf.matrix_set_diag(x, tf.zeros(shape[:-1], dtype=tf.float32)), 0, -1) is_lower_triangular = tf.assert_equal( above_diagonal, tf.zeros_like(above_diagonal), message="Input must be lower triangular.") # A lower triangular matrix is nonsingular iff all its diagonal entries are # nonzero. diag_part = tf.matrix_diag_part(x) is_nonsingular = tf.assert_none_equal( diag_part, tf.zeros_like(diag_part), message="Input must have all diagonal entries nonzero.") return [is_matrix, is_square, is_lower_triangular, is_nonsingular]
def _maybe_assert_valid(self, t): if not self.validate_args: return t is_valid = tf.assert_none_equal( t, 0., message="All elements must be non-zero.") return control_flow_ops.with_dependencies([is_valid], t)
def __call__(self, *, X, Y=None, past=None, past_tokens=None, mask=None, padding_token: Optional[int] = None, do_dropout=False): X = tf.convert_to_tensor(X, dtype=tf.int32) if mask is not None: mask = tf.convert_to_tensor(mask, dtype=tf.bool) assert mask.dtype == tf.bool if padding_token is not None: assert mask is None, 'At most one of mask and padding_token should be set' mask = tf.not_equal(X, padding_token) X = tf.where(mask, X, tf.zeros_like(X)) if past is not None: assert past_tokens is not None, 'padding_token requires past_tokens' mask = tf.concat( [tf.not_equal(past_tokens, padding_token), mask], axis=1) with tf.variable_scope(self.scope, reuse=self.built, auxiliary_name_scope=not self.built): self.built = True results = {} batch, sequence = utils.shape_list(X) seed = tf.random.uniform(dtype=tf.int64, shape=[2], minval=-2**63, maxval=2**63 - 1) wpe_seed, wte_seed, blocks_seed, heads_seed = split_seed(seed, 4) wpe = tf.get_variable( 'wpe', [self.hparams.n_ctx, self.hparams.n_embd], initializer=tf.random_normal_initializer(stddev=0.01)) wte = tf.get_variable( 'wte', [self.hparams.n_vocab, self.hparams.n_embd], initializer=tf.random_normal_initializer(stddev=0.02)) wpe = dropout(wpe, self.hparams.embd_pdrop, do_dropout=do_dropout, stateless=True, seed=wpe_seed, name='wpe_drop') wte = dropout(wte, self.hparams.embd_pdrop, do_dropout=do_dropout, stateless=True, seed=wte_seed, name='wte_drop') past_length = 0 if past is None else tf.shape(past)[-2] positions = positions_for(batch=batch, sequence=sequence, past_length=past_length, mask=mask) h = embed(X, wte) + embed(positions, wpe) # Transformer presents = [] pasts = tf.unstack( past, axis=1) if past is not None else [None] * self.hparams.n_layer assert len(pasts) == self.hparams.n_layer block_seeds = split_seed(blocks_seed, self.hparams.n_layer) for layer, (past, block_seed) in enumerate(zip(pasts, block_seeds)): h, present = block(h, 'h%d' % layer, past=past, mask=mask, do_dropout=do_dropout, scale=True, hparams=self.hparams, seed=block_seed) presents.append(present) results['present'] = tf.stack(presents, axis=1) h = norm(h, 'ln_f') if mask is not None: # For non-present tokens, use the output from the last present token instead. present_indices = utils.where( mask[:, past_length:], tf.tile(tf.range(sequence)[None, :], [batch, 1]), -1) use_indices = utils.cumulative_max(present_indices) # assert since GPUs don't with tf.control_dependencies( [tf.assert_none_equal(use_indices, -1)]): h = utils.index_each(h, use_indices) results['h'] = h # Language model loss. Do tokens <n predict token n? h_flat = tf.reshape(h, [batch * sequence, self.hparams.n_embd]) flat_lm_logits = tf.matmul(h_flat, wte, transpose_b=True) labels = tf.concat([X[:, 1:], X[:, :1]], axis=1) flat_labels = tf.reshape(labels, [batch * sequence]) flat_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=flat_labels, logits=flat_lm_logits) lm_losses = tf.reshape(flat_losses, [batch, sequence]) lm_logits = tf.reshape(flat_lm_logits, [batch, sequence, -1]) relevant_losses = lm_losses[:, :-1] results['lm_all_losses'] = relevant_losses results['lm_logits'] = lm_logits results['lm_losses'] = tf.reduce_mean(relevant_losses, axis=-1) head_seeds = split_seed(heads_seed, len(self.scalar_heads)) for head_name, head_seed in zip(self.scalar_heads, head_seeds): with tf.variable_scope(f"heads/{head_name}"): dropped_h = \ dropout(h, self.hparams.head_pdrop, do_dropout=do_dropout, seed=head_seed, name='drop') # TODO: refactor this, perhaps move to Policy res, reg_loss = fc_layer( dropped_h, (), scale=0 if head_name == 'value' else None) results[head_name] = tf.cast(res, dtype=tf.float32, name='res_cast') results[f"{head_name}_regularizer"] = tf.cast( reg_loss, dtype=tf.float32, name='reg_loss_cast') # All done! return results
def _create_scale_operator(self, identity_multiplier, diag, tril, perturb_diag, perturb_factor, shift, validate_args): """Construct `scale` from various components. Args: identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ... k], which represents a k x k diagonal matrix. tril: Floating-point `Tensor` representing the diagonal matrix. `scale_tril` has shape [N1, N2, ... k], which represents a k x k lower triangular matrix. perturb_diag: Floating-point `Tensor` representing the diagonal matrix of the low rank update. perturb_factor: Floating-point `Tensor` representing factor matrix. shift: Floating-point `Tensor` representing `shift in `scale @ X + shift`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. Returns: scale. In the case of scaling by a constant, scale is a floating point `Tensor`. Otherwise, scale is a `LinearOperator`. Raises: ValueError: if all of `tril`, `diag` and `identity_multiplier` are `None`. """ identity_multiplier = _as_tensor(identity_multiplier, "identity_multiplier") diag = _as_tensor(diag, "diag") tril = _as_tensor(tril, "tril") perturb_diag = _as_tensor(perturb_diag, "perturb_diag") perturb_factor = _as_tensor(perturb_factor, "perturb_factor") # If possible, use the low rank update to infer the shape of # the identity matrix, when scale represents a scaled identity matrix # with a low rank update. shape_hint = None if perturb_factor is not None: shape_hint = distribution_util.dimension_size(perturb_factor, axis=-2) if self._is_only_identity_multiplier: if validate_args: return control_flow_ops.with_dependencies([ tf.assert_none_equal( identity_multiplier, tf.zeros([], identity_multiplier.dtype), ["identity_multiplier should be non-zero."]) ], identity_multiplier) return identity_multiplier scale = distribution_util.make_tril_scale( loc=shift, scale_tril=tril, scale_diag=diag, scale_identity_multiplier=identity_multiplier, validate_args=validate_args, assert_positive=False, shape_hint=shape_hint) if perturb_factor is not None: return tf.linalg.LinearOperatorLowRankUpdate( scale, u=perturb_factor, diag_update=perturb_diag, is_diag_update_positive=perturb_diag is None, is_non_singular=True, # Implied by is_positive_definite=True. is_self_adjoint=True, is_positive_definite=True, is_square=True) return scale
def add_decoder_op(self): # reshape inputs to a list of words input_mask = tf.sequence_mask(self.sequence_lengths) encoder_output_h, encoder_output_c = self.encoder_output decoder_input_h = tf.boolean_mask(encoder_output_h, input_mask) decoder_input_c = tf.boolean_mask(encoder_output_c, input_mask) initial_state = tf.contrib.rnn.LSTMStateTuple(h=decoder_input_h, c=decoder_input_c) batch_size = tf.shape(decoder_input_h)[0] projection_layer = tf.layers.Dense(self.config.ntags, use_bias=True, name="decoder_proj") decoder_cell = tf.contrib.rnn.LSTMCell(num_units=2 * self.config.hidden_size_lstm) start_tokens = tf.tile([self.sos_id], [batch_size]) # shift tags one step to the left and prepend 'sos' token. tag_ids_train = tf.concat( [tf.expand_dims(start_tokens, 1), self.tag_ids[:, :-1]], 1) tags_train_embedded = tf.nn.embedding_lookup(self.tag_embeddings, tag_ids_train) tags_train_embedded = tf.layers.dropout( tags_train_embedded, rate=1 - self.config.tag_embeddings_dropout, training=self.training_phase) # Training train_helper = tf.contrib.seq2seq.TrainingHelper( inputs=tags_train_embedded, sequence_length=self. tag_lengths # `tag-length` covers <sos-token, actual tags, eos-token> ) train_decoder = tf.contrib.seq2seq.BasicDecoder( decoder_cell, train_helper, initial_state=initial_state, output_layer=projection_layer) decoder_outputs, final_state, decoder_sequence_lengths = tf.contrib.seq2seq.dynamic_decode( train_decoder) logits = decoder_outputs.rnn_output logits = tf.verify_tensor_all_finite(logits, "Logits not finite") # from padded training tags extracts actual-tags + eos-token: weights = tf.to_float(tf.not_equal(tag_ids_train, self.eos_id)) weights = tf.to_float(tf.not_equal(weights, self.pad_id)) loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=self.tag_ids, weights=weights, name="sequence_loss", average_across_timesteps=False) self.loss = tf.reduce_sum(loss) # Inference infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=self.tag_embeddings, start_tokens=start_tokens, end_token=self.eos_id) infer_decoder = tf.contrib.seq2seq.BasicDecoder( decoder_cell, infer_helper, initial_state=initial_state, output_layer=projection_layer) final_outputs, final_state, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode( infer_decoder, maximum_iterations=self.config.decoder_maximum_iterations, impute_finished=True) decoder_logits = final_outputs.rnn_output decoder_logits = tf.verify_tensor_all_finite( decoder_logits, "Decoder Logits not finite") with tf.control_dependencies([ tf.assert_rank(decoder_logits, 3), tf.assert_none_equal(tf.reduce_sum(decoder_logits), 0.), tf.assert_equal( tf.cast(tf.argmax(decoder_logits, axis=-1), tf.int32), final_outputs.sample_id) ]): decoder_logits = tf.identity(decoder_logits) self.decoder_logits = decoder_logits self.labels_pred = final_outputs.sample_id self.labels_pred_lengths = final_sequence_lengths
def _create_scale_operator(self, identity_multiplier, diag, tril, perturb_diag, perturb_factor, shift, validate_args): """Construct `scale` from various components. Args: identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ... k], which represents a k x k diagonal matrix. tril: Floating-point `Tensor` representing the diagonal matrix. `scale_tril` has shape [N1, N2, ... k], which represents a k x k lower triangular matrix. perturb_diag: Floating-point `Tensor` representing the diagonal matrix of the low rank update. perturb_factor: Floating-point `Tensor` representing factor matrix. shift: Floating-point `Tensor` representing `shift in `scale @ X + shift`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. Returns: scale. In the case of scaling by a constant, scale is a floating point `Tensor`. Otherwise, scale is a `LinearOperator`. Raises: ValueError: if all of `tril`, `diag` and `identity_multiplier` are `None`. """ identity_multiplier = _as_tensor(identity_multiplier, "identity_multiplier") diag = _as_tensor(diag, "diag") tril = _as_tensor(tril, "tril") perturb_diag = _as_tensor(perturb_diag, "perturb_diag") perturb_factor = _as_tensor(perturb_factor, "perturb_factor") # If possible, use the low rank update to infer the shape of # the identity matrix, when scale represents a scaled identity matrix # with a low rank update. shape_hint = None if perturb_factor is not None: shape_hint = distribution_util.dimension_size(perturb_factor, axis=-2) if self._is_only_identity_multiplier: if validate_args: return control_flow_ops.with_dependencies([ tf.assert_none_equal(identity_multiplier, tf.zeros([], identity_multiplier.dtype), ["identity_multiplier should be non-zero."]) ], identity_multiplier) return identity_multiplier scale = distribution_util.make_tril_scale( loc=shift, scale_tril=tril, scale_diag=diag, scale_identity_multiplier=identity_multiplier, validate_args=validate_args, assert_positive=False, shape_hint=shape_hint) if perturb_factor is not None: return tf.linalg.LinearOperatorLowRankUpdate( scale, u=perturb_factor, diag_update=perturb_diag, is_diag_update_positive=perturb_diag is None, is_non_singular=True, # Implied by is_positive_definite=True. is_self_adjoint=True, is_positive_definite=True, is_square=True) return scale
def calc_loss(self): location = self.outputs['location'] classification = self.outputs['classification'] batch_sise = classification[0].get_shape().as_list()[0] location = tf.unstack( tf.concat([tf.reshape(t, [batch_sise, -1, 4]) for t in location], axis=1)) l_gt = self.ground_truth['locations'] l_gt = tf.unstack( tf.concat([tf.reshape(t, [batch_sise, -1, 4]) for t in l_gt], axis=1)) cls_loss = [ tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) for (logits, labels) in zip(classification, self.ground_truth['labels']) ] cls_loss = tf.unstack( tf.concat([layers.flatten(t) for t in cls_loss], axis=-1)) flattened_label = tf.concat( [layers.flatten(t) for t in self.ground_truth['labels']], axis=-1) flattened_label_list = tf.unstack(flattened_label) num_pos_l = [] cls_loss_pos = [] cls_loss_neg = [] loc_loss_pos = [] for i, cls in enumerate(cls_loss): sorted_cls_loss = tf.contrib.framework.sort(cls, -1, 'DESCENDING') labels = flattened_label_list[i] pos = tf.greater(labels, 0) neg = tf.equal(labels, 0) num_pos = tf.reduce_sum(tf.to_int32(pos)) num_neg = 3 * num_pos max_neg = tf.reduce_sum(tf.to_int32(neg)) max_idx = tf.minimum(num_neg - 1, max_neg - 1) min_score = sorted_cls_loss[max_idx] selected = tf.greater(cls, min_score) neg = tf.logical_and(neg, selected) ass = tf.assert_none_equal(num_pos, 0) tf.add_to_collection(ass, "ASSERT") loc = tf.boolean_mask(location[i], pos) target = tf.boolean_mask(l_gt[i], pos) loc = tf.losses.huber_loss(target, loc, reduction='none') cls_loss_pos.append(tf.reduce_sum(tf.boolean_mask(cls, pos))) cls_loss_neg.append(tf.reduce_sum(tf.boolean_mask(cls, neg))) loc_loss_pos.append(tf.reduce_sum(loc)) num_pos_l.append(num_pos) num_pos = tf.to_float(tf.add_n(num_pos_l)) loc_loss = tf.add_n(loc_loss_pos) / num_pos cls_loss_pos = tf.add_n(cls_loss_pos) / num_pos cls_loss_neg = tf.add_n(cls_loss_neg) / num_pos num_pos /= batch_sise reg = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) reg_loss = tf.add_n(reg) total_loss = loc_loss + cls_loss_pos + cls_loss_neg + reg_loss with tf.variable_scope('loss'): tf.summary.scalar('loc_loss', loc_loss) tf.summary.scalar('cls_loss_pos', cls_loss_pos) tf.summary.scalar('cls_loss_neg', cls_loss_neg) tf.summary.scalar('reg_loss', reg_loss) tf.summary.scalar('total_loss', total_loss) tf.summary.scalar('num_pos', num_pos) return total_loss
def add_decoder_op(self): # reshape inputs to a list of words input_mask = tf.sequence_mask(self.sequence_lengths) encoder_output_h, encoder_output_c = self.encoder_output decoder_input_h = tf.boolean_mask(encoder_output_h, input_mask) decoder_input_c = tf.boolean_mask(encoder_output_c, input_mask) initial_state = tf.contrib.rnn.LSTMStateTuple(h=decoder_input_h, c=decoder_input_c) batch_size = tf.shape(decoder_input_h)[0] projection_layer = tf.layers.Dense(self.config.ntags, use_bias=True, name="decoder_proj") decoder_cell = tf.contrib.rnn.LSTMCell( num_units=2 * self.config.hidden_size_lstm ) # num_units = encoder backword and forward hidden states concatenated if (self.config.analysis_embeddings == "attention_tag" or self.config.analysis_embeddings == "attention_category"): self.logger.warning("Using attention %s" % self.config.analysis_embeddings) # shape: [words X analysis-number X attention-embedding-size] analysis_attention_embeddings = tf.boolean_mask( self.analysis_attention_embeddings, input_mask) analysis_lengths = tf.boolean_mask(self.analysis_lengths, input_mask) # shape: [words] if self.config.attention_mechanism == 'luong': attention_mechanism = tf.contrib.seq2seq.LuongAttention( num_units=2 * self.config.hidden_size_lstm, memory=analysis_attention_embeddings, memory_sequence_length=analysis_lengths, scale=False) elif self.config.attention_mechanism == 'bahdanau': attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( num_units=2 * self.config.hidden_size_lstm, memory=analysis_attention_embeddings, memory_sequence_length=analysis_lengths) else: raise ValueError("Invalid attention mechanism '%s'" % self.config.attention_mechnism) decoder_cell = tf.contrib.seq2seq.AttentionWrapper( decoder_cell, attention_mechanism, attention_layer_size=2 * self.config.hidden_size_lstm) initial_state = decoder_cell.zero_state( dtype=tf.float32, batch_size=batch_size).clone(cell_state=initial_state) start_tokens = tf.tile([self.sos_id], [batch_size]) # shift tags one step to the left and prepend 'sos' token. tag_ids_train = tf.concat( [tf.expand_dims(start_tokens, 1), self.tag_ids[:, :-1]], 1) tags_train_embedded = tf.nn.embedding_lookup(self.tag_embeddings, tag_ids_train) tags_train_embedded = tf.layers.dropout( tags_train_embedded, rate=1 - self.config.tag_embeddings_dropout, training=self.training_phase) # Training if self.config.trainer == "basic": train_helper = tf.contrib.seq2seq.TrainingHelper( inputs=tags_train_embedded, sequence_length=self. tag_lengths, # `tag-length` covers <sos-token, actual tags, eos-token> ) elif self.config.trainer == "scheduled": train_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper( inputs=tags_train_embedded, sequence_length=self. tag_lengths, # `tag-length` covers <sos-token, actual tags, eos-token> embedding=lambda ids: tf.nn.embedding_lookup( self.tag_embeddings, ids), sampling_probability=self.config. scheduled_trainer_sampling_prob) else: raise ValueError("Invalid trainer specified: '%s'" % self.config.trainer) train_decoder = tf.contrib.seq2seq.BasicDecoder( decoder_cell, train_helper, initial_state=initial_state, output_layer=projection_layer) decoder_outputs, final_state, decoder_sequence_lengths = tf.contrib.seq2seq.dynamic_decode( train_decoder, impute_finished=False) # logits = decoder_outputs.rnn_output logits = decoder_outputs[0] logits = tf.verify_tensor_all_finite(logits, "Logits not finite") # from padded training tags extracts actual-tags + eos-token: weights = tf.to_float(tf.not_equal(tag_ids_train, self.eos_id)) weights = tf.to_float(tf.not_equal(weights, self.pad_id)) loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=self.tag_ids, weights=weights, name="sequence_loss", average_across_timesteps=False) self.loss = tf.reduce_sum(loss) # Scoring # 1. Score given labels scoring_helper = tf.contrib.seq2seq.TrainingHelper( inputs=tags_train_embedded, sequence_length=self.tag_lengths) scoring_decoder = tf.contrib.seq2seq.BasicDecoder( decoder_cell, scoring_helper, initial_state=initial_state, output_layer=projection_layer) scoring_outputs, _, scoring_sequence_lengths = tf.contrib.seq2seq.dynamic_decode( scoring_decoder) scoring_logits = scoring_outputs.rnn_output scoring_logits = tf.verify_tensor_all_finite( scoring_logits, "Scoring logits not finite") logits_flat = tf.reshape(scoring_logits, [-1, tf.shape(scoring_logits)[2]]) softmax_scores_flat = tf.nn.softmax(logits_flat, dim=-1) tag_ids_train_flat = tf.reshape(self.tag_ids, [-1]) indices = tf.concat([ tf.expand_dims(tf.range(0, tf.shape(tag_ids_train_flat)[0]), 1), tf.expand_dims(tag_ids_train_flat, 1) ], axis=1) tag_softmax_scores_flat = tf.gather_nd(softmax_scores_flat, indices) tag_softmax_scores = tf.reshape(tag_softmax_scores_flat, [batch_size, -1]) tag_mask = tf.sequence_mask(self.tag_lengths, tf.shape(tag_softmax_scores)[1]) tag_softmax_scores = tf.multiply(tag_softmax_scores, tf.cast(tag_mask, tf.float32)) tag_softmax_scores += tf.cast(tf.logical_not(tag_mask), tf.float32) scores = np.e**-tf.div( tf.reduce_sum(tf.log(tag_softmax_scores), axis=-1), tf.cast(self.tag_lengths, tf.float32)) self.labels_scores = scores # 2. Score best labels max_tag_softmax_scores = tf.reduce_max(tf.nn.softmax(scoring_logits, dim=-1), axis=-1) max_tag_mask = tf.sequence_mask(self.tag_lengths, tf.shape(max_tag_softmax_scores)[1]) max_tag_softmax_scores = tf.multiply(max_tag_softmax_scores, tf.cast(max_tag_mask, tf.float32)) max_tag_softmax_scores += tf.cast(tf.logical_not(max_tag_mask), tf.float32) max_scores = np.e**-tf.div( tf.reduce_sum(tf.log(max_tag_softmax_scores), axis=-1), tf.cast(self.tag_lengths, tf.float32)) self.labels_max_scores = max_scores self.labels_max_ids = tf.argmax(scoring_logits, axis=-1) # Inference infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=self.tag_embeddings, start_tokens=start_tokens, end_token=self.eos_id) infer_decoder = tf.contrib.seq2seq.BasicDecoder( decoder_cell, infer_helper, initial_state=initial_state, output_layer=projection_layer) final_outputs, final_state, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode( infer_decoder, maximum_iterations=self.config.decoder_maximum_iterations, impute_finished=True) decoder_logits = final_outputs.rnn_output decoder_logits = tf.verify_tensor_all_finite( decoder_logits, "Decoder Logits not finite") with tf.control_dependencies([ tf.assert_rank(decoder_logits, 3), tf.assert_none_equal(tf.reduce_sum(decoder_logits), 0.), tf.assert_equal( tf.cast(tf.argmax(decoder_logits, axis=-1), tf.int32), final_outputs.sample_id) ]): decoder_logits = tf.identity(decoder_logits) self.decoder_logits = decoder_logits self.labels_pred = final_outputs.sample_id self.labels_pred_lengths = final_sequence_lengths