예제 #1
0
def eval_step(input_ids, 
               target_ids, 
               ):

    target_inp = target_ids[:, :-1]
    _, combined_mask, dec_padding_mask = create_masks(input_ids, target_inp)  
    (draft_predictions, draft_attention_weights, 
    refine_predictions, refine_attention_weights) = Model(
                                                           input_ids,
                                                           dec_padding_mask=dec_padding_mask,
                                                           target_ids=target_inp,
                                                           look_ahead_mask=combined_mask, 
                                                           training=False
                                                           )
    loss, target = loss_function(target_ids, 
                         draft_predictions,
                         refine_predictions, 
                         Model
                         )
    train_loss(loss)
    log.info(Model.summary())
    if config.save_initial_weights:
        initial_weights = os.path.join(config.initial_weights,'initial_weights')
        Model.save_weights(initial_weights)

    return loss
def eval_step(input_ids, target_ids_, target_ids, draft_mask, refine_mask):

    (draft_predictions, draft_attention_weights, refine_predictions,
     refine_attention_weights) = Model(input_ids, target_ids_, False)
    draft_output_sequence_loss = loss_function(target_ids[:, 1:, :],
                                               draft_predictions, draft_mask)
    if config.use_refine_decoder:
        refine_output_sequence_loss = loss_function(target_ids[:, :-1, :],
                                                    refine_predictions,
                                                    refine_mask)
    else:
        refine_output_sequence_loss = 0
    regularization_loss = tf.add_n(Model.losses)
    loss = draft_output_sequence_loss + refine_output_sequence_loss + regularization_loss
    log.info(Model.summary())
    if config.save_initial_weights:
        initial_weights = os.path.join(config.initial_weights,
                                       'initial_weights')
        Model.save_weights(initial_weights)
    return loss
def train_step(input_ids, target_ids_, target_ids, draft_mask, refine_mask,
               grad_accum_flag):
    with tf.GradientTape() as tape:
        (draft_predictions, draft_attention_weights, refine_predictions,
         refine_attention_weights) = Model(input_ids, target_ids_, True)
        train_variables = Model.trainable_variables
        draft_output_sequence_loss = loss_function(target_ids[:, 1:, :],
                                                   draft_predictions,
                                                   draft_mask)
        if config.use_refine_decoder:
            refine_output_sequence_loss = loss_function(
                target_ids[:, :-1, :], refine_predictions, refine_mask)
            predictions = refine_predictions
            target = target_ids_[:, :-1]
        else:
            refine_output_sequence_loss = 0
            predictions = draft_predictions
            target = target_ids_[:, 1:]

        regularization_loss = tf.add_n(Model.losses)
        loss = draft_output_sequence_loss + refine_output_sequence_loss + regularization_loss
        scaled_loss = optimizer.get_scaled_loss(loss)
    scaled_gradients = tape.gradient(scaled_loss, train_variables)
    gradients = optimizer.get_unscaled_gradients(scaled_gradients)
    # Initialize the shadow variables with same type as the gradients
    if not gradient_accumulators:
        for tv in gradients:
            gradient_accumulators.append(
                tf.Variable(tf.zeros_like(tv), trainable=False))
    # accmulate the gradients to the shadow variables
    for (accumulator, grad) in zip(gradient_accumulators, gradients):
        accumulator.assign_add(grad)
    # apply the gradients and reset them to zero if the flag is true
    if grad_accum_flag:
        optimizer.apply_gradients(zip(gradient_accumulators, train_variables))
        for accumulator in (gradient_accumulators):
            accumulator.assign(tf.zeros_like(accumulator))
        train_loss(loss)
        train_accuracy(target, predictions)
    return predictions
예제 #4
0
def train_step(input_ids, 
               target_ids,
               grad_accum_flag):
    
    _, combined_mask, dec_padding_mask = create_masks(
                                                        input_ids, 
                                                        target_ids[:, :-1]
                                                        )
    with tf.GradientTape() as tape:
        (draft_logits, refine_logits, draft_attention_weights, 
          refine_attention_weights, 
          candidate_returns,  
          sample_returns) = Model(
                                   input_ids,
                                   dec_padding_mask=dec_padding_mask,
                                   target_ids=target_ids,
                                   look_ahead_mask=combined_mask, 
                                   training=True,
                                   )
        train_variables = Model.trainable_variables
        loss, bert_f1_score = loss_function(target_ids,
                                     draft_logits, 
                                     refine_logits,
                                     candidate_returns,
                                     sample_returns
                                     )
        regularization_loss = tf.add_n(Model.losses)
        total_loss = tf.reduce_sum([loss, regularization_loss])
        scaled_loss = optimizer.get_scaled_loss(loss)
    scaled_gradients  = tape.gradient(scaled_loss, train_variables)
    gradients = optimizer.get_unscaled_gradients(scaled_gradients)
    if config.accumulate_gradients:
        # Initialize the shadow variables with same type as the gradients 
        if not gradient_accumulators:
            for tv in gradients:
              gradient_accumulators.append(tf.Variable(tf.zeros_like(tv), 
                                                       trainable=False))
        # accmulate the gradients to the shadow variables
        for (accumulator, grad) in zip(gradient_accumulators, gradients):
            accumulator.assign_add(grad)
        # apply the gradients and reset them to zero if the flag is true
        if grad_accum_flag:
            optimizer.apply_gradients(zip(gradient_accumulators, train_variables))
            for accumulator in (gradient_accumulators):
                accumulator.assign(tf.zeros_like(accumulator))
            train_loss(loss)
            
    else:
        optimizer.apply_gradients(zip(gradients, train_variables))
        train_loss(loss)

    return refine_logits, bert_f1_score
예제 #5
0
def train_step(input_ids, target_ids, grad_accum_flag):

    target_inp = target_ids[:, :-1]
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        input_ids, target_inp)
    with tf.GradientTape() as tape:
        (draft_predictions, draft_attention_weights, refine_predictions,
         refine_attention_weights) = Model(
             input_ids,
             dec_padding_mask=dec_padding_mask,
             target_ids=target_inp,
             enc_padding_mask=enc_padding_mask,
             look_ahead_mask=combined_mask,
             training=True,
         )
        train_variables = Model.trainable_variables
        loss, target = loss_function(target_ids, draft_predictions,
                                     refine_predictions, Model)
        predictions = refine_predictions if refine_predictions is not None else draft_predictions
        scaled_loss = optimizer.get_scaled_loss(loss)
    scaled_gradients = tape.gradient(scaled_loss, train_variables)
    gradients = optimizer.get_unscaled_gradients(scaled_gradients)
    if config.accumulate_gradients:
        # Initialize the shadow variables with same type as the gradients
        if not gradient_accumulators:
            for tv in gradients:
                gradient_accumulators.append(
                    tf.Variable(tf.zeros_like(tv), trainable=False))
        # accmulate the gradients to the shadow variables
        for (accumulator, grad) in zip(gradient_accumulators, gradients):
            accumulator.assign_add(grad)
        # apply the gradients and reset them to zero if the flag is true
        if grad_accum_flag:
            optimizer.apply_gradients(
                zip(gradient_accumulators, train_variables))
            for accumulator in (gradient_accumulators):
                accumulator.assign(tf.zeros_like(accumulator))
            train_loss(loss)
            train_accuracy(target, predictions)
    else:
        optimizer.apply_gradients(zip(gradients, train_variables))
        train_loss(loss)
        train_accuracy(target, predictions)

    return predictions