def update_metainfo_op_with_vars(metainfo_ph: tf.Tensor, nz_ph: tf.Tensor, metainfo_var: tf.Variable, nz_var: tf.Variable) -> tf.Operation: assign_nz = nz_var.assign(nz_ph) assign_meta = metainfo_var.assign(metainfo_ph) with tf.control_dependencies([assign_nz, assign_meta]): update_op = tf.no_op() return update_op
def _train_eval(self): self.mask = tf.sequence_mask(self.features[constants.LENGTH_KEY], name="padding_mask") num_labels = self.extractor.vocab_size() _logits = select_logits(self.logits, self.predicate_indices, self.n_steps) seq_mask = None if constants.BERT_LENGTH_KEY in self.features else self.features.get( constants.SEQUENCE_MASK) rel_loss = sequence_loss( logits=_logits, targets=self.targets, sequence_lengths=self._sequence_lengths, num_labels=num_labels, crf=self.config.crf, tag_transitions=self._tag_transitions, label_smoothing=self.config.label_smoothing, confidence_penalty=self.config.confidence_penalty, name="bilinear_loss", mask=seq_mask) self.loss = rel_loss self.metric = Variable(0, name=append_label(constants.OVERALL_KEY, self.name), dtype=tf.float32, trainable=False)
def _get_update(self, variable: tf.Variable, gradient: tf.Tensor, step_size: tf.Tensor) -> List[tf.Operation]: with tf.variable_scope(variable.op.name): gradient = tf.cast(gradient, tf.float32) state_m = tf.get_variable( "adam_m", shape=variable.shape, dtype=variable.dtype, initializer=tf.zeros_initializer(), trainable=False, ) updated_m = (self.beta1 * tf.cast(state_m, tf.float32) + (1 - self.beta1) * gradient) state_v = tf.get_variable( "adam_v", shape=variable.shape, dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False, ) updated_v = self.beta2 * state_v + (1 - self.beta2) * (gradient**2) delta = step_size * updated_m / (tf.sqrt(updated_v) + self.epsilon) updated_variable = tf.cast(variable, tf.float32) - delta return [ variable.assign(tf.cast(updated_variable, variable.dtype)), state_m.assign(tf.cast(updated_m, state_m.dtype)), state_v.assign(updated_v), ]
def update_metainfo_op_with_vars(metainfo_ph: tf.Tensor, nz_ph: tf.Tensor, metainfo_var: tf.Variable, nz_var: tf.Variable) -> tf.Operation: """Returns an op that can be used to update the metainfo on device :param metainfo_ph: Metainfo placeholder :param nz_ph: Nonzero-values placeholder :param metainfo_var: Metainfo variable :param nz_var: Nonzero-values variable """ assign_nz = nz_var.assign(nz_ph) assign_meta = metainfo_var.assign(metainfo_ph) with tf.control_dependencies([assign_nz, assign_meta]): update_op = tf.no_op() return update_op
def _train_eval(self): if self.config.label_smoothing > 0: targets = tf.one_hot(self.targets, depth=self.extractor.vocab_size()) self.loss = tf.losses.softmax_cross_entropy( onehot_labels=targets, logits=self.logits, label_smoothing=self.config.label_smoothing) else: self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits, labels=self.targets)) self.metric = Variable(0, name=append_label(constants.OVERALL_KEY, self.name), dtype=tf.float32, trainable=False)
def _train_eval(self): num_labels = self.extractor.vocab_size() seq_mask = None if constants.BERT_LENGTH_KEY in self.features else self.features.get( constants.SEQUENCE_MASK) self.loss = sequence_loss( logits=self.logits, targets=self.targets, sequence_lengths=self._sequence_lengths, num_labels=num_labels, crf=self.config.crf, tag_transitions=self._tag_transitions, label_smoothing=self.config.label_smoothing, confidence_penalty=self.config.confidence_penalty, mask=seq_mask) self.metric = Variable(0, name=append_label(constants.OVERALL_KEY, self.name), dtype=tf.float32, trainable=False)
def _train_eval(self): self.mask = tf.sequence_mask(self.lens, name="padding_mask") # compute combined arc and rel losses (both via softmax cross entropy) def compute_loss(logits, targets, name): with tf.variable_scope(name): losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets) losses = tf.boolean_mask(losses, self.mask) return tf.reduce_mean(losses) self.arc_targets = tf.identity(self.features[constants.HEAD_KEY], name=constants.HEAD_KEY) arc_loss = compute_loss(self.arc_logits, self.arc_targets, "arc_bilinear_loss") _rel_logits = select_logits(self.rel_logits, self.arc_targets, self.n_steps) rel_loss = compute_loss(_rel_logits, self.targets, "rel_bilinear_loss") arc_loss = self.config.get('arc_loss_weight', 1) * arc_loss rel_loss = self.config.get('rel_loss_weight', 1) * rel_loss self.loss = arc_loss + rel_loss self.metric = Variable(0, name=append_label(constants.OVERALL_KEY, self.name), dtype=tf.float32, trainable=False)