Beispiel #1
0
def eval_step(input_ids, 
               target_ids, 
               ):

    target_inp = target_ids[:, :-1]
    _, combined_mask, dec_padding_mask = create_masks(input_ids, target_inp)  
    (draft_predictions, draft_attention_weights, 
    refine_predictions, refine_attention_weights) = Model(
                                                           input_ids,
                                                           dec_padding_mask=dec_padding_mask,
                                                           target_ids=target_inp,
                                                           look_ahead_mask=combined_mask, 
                                                           training=False
                                                           )
    loss, target = loss_function(target_ids, 
                         draft_predictions,
                         refine_predictions, 
                         Model
                         )
    train_loss(loss)
    log.info(Model.summary())
    if config.save_initial_weights:
        initial_weights = os.path.join(config.initial_weights,'initial_weights')
        Model.save_weights(initial_weights)

    return loss
Beispiel #2
0
def val_step(input_ids, target_ids):

    (draft_predicted_ids, draft_attention_weights, refine_predicted_ids_2D,
     refine_attention_weights,
     refine_logits) = Model(input_ids,
                            decoder_type=config.draft_decoder_type,
                            beam_size=config.beam_size,
                            length_penalty=config.length_penalty,
                            temperature=config.softmax_temperature,
                            top_p=config.top_p,
                            top_k=config.top_k,
                            target_ids=None,
                            dec_padding_mask=None,
                            look_ahead_mask=None,
                            training=None)
    refine_validation_loss, _ = mask_and_calculate_nll_loss(
        refine_logits,
        target_ids,
        config.PAD_ID,
        epsilon=0,
    )
    perplexity = tf.math.exp(refine_validation_loss)
    perplexity /= config.validation_batch_size
    bert_f1 = calculate_bert_f1(target_ids, refine_predicted_ids_2D)
    return (perplexity, bert_f1, draft_attention_weights,
            refine_attention_weights)
Beispiel #3
0
def val_step(input_ids, target_ids, step, write_output_seq):

    enc_padding_mask = create_padding_mask(input_ids)
    (draft_predictions, draft_attention_weights, refine_predictions,
     refine_attention_weights) = Model(input_ids,
                                       decoder_type=config.draft_decoder_type,
                                       beam_size=config.beam_size,
                                       length_penalty=config.length_penalty,
                                       temperature=config.softmax_temperature,
                                       top_p=config.top_p,
                                       top_k=config.top_k,
                                       enc_padding_mask=enc_padding_mask,
                                       target_ids=None,
                                       dec_padding_mask=None,
                                       look_ahead_mask=None,
                                       training=None)

    if refine_predictions is not None:
        predictions = refine_predictions
    else:
        predictions = draft_predictions
    task_score, bert_f1 = tf_write_output_sequence(input_ids, target_ids[:,
                                                                         1:],
                                                   predictions[:, 1:], step,
                                                   write_output_seq)

    return (task_score, bert_f1, draft_attention_weights,
            refine_attention_weights)
def calculate_bert_f1(target_ids, predicted):

    target_mask = create_pretrained_model_mask(target_ids)
    predicted_return_mask = create_pretrained_model_mask(predicted)
    mask = tf.concat([target_mask, predicted_return_mask], axis=0)
    ids = tf.concat([target_ids, predicted], axis=0)
    # (8*batch_size, *_seq_len, target_vocab_size)
    embeddings = Model.decoder_bert_model(ids, attention_mask=mask)[0]
    embeddings_normalized = embeddings / (tf.norm(embeddings,
                                                  axis=-1)[:, :, tf.newaxis])
    # (4*batch_size,              (4*batch_size,
    #  tar_seq_len,               cand_seq_len ,
    #  target_vocab_size),        target_vocab_size)
    target_embeddings_normalized, predicted_embeddings_normalized = tf.split(
        embeddings_normalized, 2, axis=0)
    # (4*batch_size, tar_seq_len, cand_seq_len)
    scores = tf.matmul(predicted_embeddings_normalized,
                       target_embeddings_normalized,
                       transpose_b=True)
    mask = tf.matmul(predicted_return_mask[:, :, tf.newaxis],
                     target_mask[:, tf.newaxis, :])
    scores = scores * mask
    recall = tf.reduce_max(scores, 1)
    precision = tf.reduce_max(scores, 2)
    recall = tf.reduce_sum(recall, 1)
    precision = tf.reduce_sum(precision, 1)
    recall = recall / tf.reduce_sum(target_mask, -1)
    precision = precision / tf.reduce_sum(predicted_return_mask, -1)
    f1_score = (2 * (precision * recall)) / (precision + recall)

    return f1_score
def eval_step(input_ids, target_ids_, target_ids, draft_mask, refine_mask):

    (draft_predictions, draft_attention_weights, refine_predictions,
     refine_attention_weights) = Model(input_ids, target_ids_, False)
    draft_output_sequence_loss = loss_function(target_ids[:, 1:, :],
                                               draft_predictions, draft_mask)
    if config.use_refine_decoder:
        refine_output_sequence_loss = loss_function(target_ids[:, :-1, :],
                                                    refine_predictions,
                                                    refine_mask)
    else:
        refine_output_sequence_loss = 0
    regularization_loss = tf.add_n(Model.losses)
    loss = draft_output_sequence_loss + refine_output_sequence_loss + regularization_loss
    log.info(Model.summary())
    if config.save_initial_weights:
        initial_weights = os.path.join(config.initial_weights,
                                       'initial_weights')
        Model.save_weights(initial_weights)
    return loss
Beispiel #6
0
def train_step(input_ids, 
               target_ids,
               grad_accum_flag):
    
    _, combined_mask, dec_padding_mask = create_masks(
                                                        input_ids, 
                                                        target_ids[:, :-1]
                                                        )
    with tf.GradientTape() as tape:
        (draft_logits, refine_logits, draft_attention_weights, 
          refine_attention_weights, 
          candidate_returns,  
          sample_returns) = Model(
                                   input_ids,
                                   dec_padding_mask=dec_padding_mask,
                                   target_ids=target_ids,
                                   look_ahead_mask=combined_mask, 
                                   training=True,
                                   )
        train_variables = Model.trainable_variables
        loss, bert_f1_score = loss_function(target_ids,
                                     draft_logits, 
                                     refine_logits,
                                     candidate_returns,
                                     sample_returns
                                     )
        regularization_loss = tf.add_n(Model.losses)
        total_loss = tf.reduce_sum([loss, regularization_loss])
        scaled_loss = optimizer.get_scaled_loss(loss)
    scaled_gradients  = tape.gradient(scaled_loss, train_variables)
    gradients = optimizer.get_unscaled_gradients(scaled_gradients)
    if config.accumulate_gradients:
        # Initialize the shadow variables with same type as the gradients 
        if not gradient_accumulators:
            for tv in gradients:
              gradient_accumulators.append(tf.Variable(tf.zeros_like(tv), 
                                                       trainable=False))
        # accmulate the gradients to the shadow variables
        for (accumulator, grad) in zip(gradient_accumulators, gradients):
            accumulator.assign_add(grad)
        # apply the gradients and reset them to zero if the flag is true
        if grad_accum_flag:
            optimizer.apply_gradients(zip(gradient_accumulators, train_variables))
            for accumulator in (gradient_accumulators):
                accumulator.assign(tf.zeros_like(accumulator))
            train_loss(loss)
            
    else:
        optimizer.apply_gradients(zip(gradients, train_variables))
        train_loss(loss)

    return refine_logits, bert_f1_score
def val_step(input_ids, target_ids_, step, write_output_seq):
    dec_padding_mask = create_padding_mask(input_ids)
    (draft_predictions, _, refine_predictions,
     _) = Model.predict(input_ids, dec_padding_mask, False)

    if config.use_refine_decoder:
        predictions = refine_predictions
    else:
        predictions = draft_predictions
    rouge, bert = tf_write_output_sequence(target_ids_[:, 1:], predictions[:,
                                                                           1:],
                                           step, write_output_seq)
    return (rouge, bert)
def batch_run_check(batch, start):
    if config.run_tensorboard:
        with train_output_sequence_writer.as_default():
            tf.summary.scalar('train_loss', train_loss.result(), step=batch)
            tf.summary.scalar('train_accuracy',
                              train_accuracy.result(),
                              step=batch)
    if config.display_model_summary:
        log.info(Model.summary())
        log.info(batch_zero.format(time.time() - start))
        config.display_model_summary = False
    log.info(
        batch_run_details.format(train_loss.result(), train_accuracy.result()))
    return train_loss.result()
Beispiel #9
0
def batch_run_check(batch, start_time, bert_f1_score):

    if config.run_tensorboard:
        with train_output_sequence_writer.as_default():
            tf.summary.scalar('train_loss', train_loss.result(), step=batch)
    if config.display_model_summary:
        log.info(Model.summary())
        log.info(batch_zero.format(time.time() - start_time))
        config.display_model_summary = False
    log.info(
        batch_run_details.format(
            tf.debugging.assert_all_finite(train_loss.result(),
                                           message="NaN's or Inf's.",
                                           name='NAN_assertion'),
            bert_f1_score.numpy()))
Beispiel #10
0
def train_step(input_ids, target_ids, grad_accum_flag):

    target_inp = target_ids[:, :-1]
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        input_ids, target_inp)
    with tf.GradientTape() as tape:
        (draft_predictions, draft_attention_weights, refine_predictions,
         refine_attention_weights) = Model(
             input_ids,
             dec_padding_mask=dec_padding_mask,
             target_ids=target_inp,
             enc_padding_mask=enc_padding_mask,
             look_ahead_mask=combined_mask,
             training=True,
         )
        train_variables = Model.trainable_variables
        loss, target = loss_function(target_ids, draft_predictions,
                                     refine_predictions, Model)
        predictions = refine_predictions if refine_predictions is not None else draft_predictions
        scaled_loss = optimizer.get_scaled_loss(loss)
    scaled_gradients = tape.gradient(scaled_loss, train_variables)
    gradients = optimizer.get_unscaled_gradients(scaled_gradients)
    if config.accumulate_gradients:
        # Initialize the shadow variables with same type as the gradients
        if not gradient_accumulators:
            for tv in gradients:
                gradient_accumulators.append(
                    tf.Variable(tf.zeros_like(tv), trainable=False))
        # accmulate the gradients to the shadow variables
        for (accumulator, grad) in zip(gradient_accumulators, gradients):
            accumulator.assign_add(grad)
        # apply the gradients and reset them to zero if the flag is true
        if grad_accum_flag:
            optimizer.apply_gradients(
                zip(gradient_accumulators, train_variables))
            for accumulator in (gradient_accumulators):
                accumulator.assign(tf.zeros_like(accumulator))
            train_loss(loss)
            train_accuracy(target, predictions)
    else:
        optimizer.apply_gradients(zip(gradients, train_variables))
        train_loss(loss)
        train_accuracy(target, predictions)

    return predictions
Beispiel #11
0
def main(config: str) -> None:

    if config not in ('production'):
        raise ValueError(f'Unknown deployment environment "{config}"')

    try:
        # Dataset
        logging.info("Creating dataset...")
        data_configuration = helpers.get_configuration(config,
                                                       data_configurations)
        dataset = Dataset(config=data_configuration)
        dataset.create()

        # Model
        logging.info("Creating model...")
        model_configuration = helpers.get_configuration(
            config, model_configurations)
        model = Model(model_configuration, input_dataset=dataset)
        model.build_model()
        model.build_annoy_representations(feature_type='item', is_cab=True)
        model.build_annoy_representations(feature_type='item', is_cab=False)

        # Prediction
        logging.info("Creating predictions...")
        prediction_configuration = helpers.get_configuration(
            config, prediction_configurations)
        predictor = UserItemPrediction(config=prediction_configuration)
        predictor.get_similar_items(
            product_id=prediction_configuration.DEFAULT_ITEM_EG, rec_type=1)
        predictor.get_similar_items(
            product_id=prediction_configuration.DEFAULT_ITEM_EG, rec_type=2)
        predictor.get_lightfm_recommendation(
            user_index=prediction_configuration.DEFAULT_USER_EG,
            use_precomputed_scores=False)

    except Exception as e:
        logging.exception(e)
    else:
        logging.info('Success @run.py')
def train_step(input_ids, target_ids_, target_ids, draft_mask, refine_mask,
               grad_accum_flag):
    with tf.GradientTape() as tape:
        (draft_predictions, draft_attention_weights, refine_predictions,
         refine_attention_weights) = Model(input_ids, target_ids_, True)
        train_variables = Model.trainable_variables
        draft_output_sequence_loss = loss_function(target_ids[:, 1:, :],
                                                   draft_predictions,
                                                   draft_mask)
        if config.use_refine_decoder:
            refine_output_sequence_loss = loss_function(
                target_ids[:, :-1, :], refine_predictions, refine_mask)
            predictions = refine_predictions
            target = target_ids_[:, :-1]
        else:
            refine_output_sequence_loss = 0
            predictions = draft_predictions
            target = target_ids_[:, 1:]

        regularization_loss = tf.add_n(Model.losses)
        loss = draft_output_sequence_loss + refine_output_sequence_loss + regularization_loss
        scaled_loss = optimizer.get_scaled_loss(loss)
    scaled_gradients = tape.gradient(scaled_loss, train_variables)
    gradients = optimizer.get_unscaled_gradients(scaled_gradients)
    # Initialize the shadow variables with same type as the gradients
    if not gradient_accumulators:
        for tv in gradients:
            gradient_accumulators.append(
                tf.Variable(tf.zeros_like(tv), trainable=False))
    # accmulate the gradients to the shadow variables
    for (accumulator, grad) in zip(gradient_accumulators, gradients):
        accumulator.assign_add(grad)
    # apply the gradients and reset them to zero if the flag is true
    if grad_accum_flag:
        optimizer.apply_gradients(zip(gradient_accumulators, train_variables))
        for accumulator in (gradient_accumulators):
            accumulator.assign(tf.zeros_like(accumulator))
        train_loss(loss)
        train_accuracy(target, predictions)
    return predictions
def translate():

    en_input = input('Enter the english sentence-> ')
    en_input = preprocess(en_input)
    input_CLS_ID = source_tokenizer.vocab_size
    input_SEP_ID = source_tokenizer.vocab_size + 1
    target_CLS_ID = target_tokenizer.vocab_size
    target_SEP_ID = target_tokenizer.vocab_size + 1

    input_ids = tf.convert_to_tensor(
        [[input_CLS_ID] + source_tokenizer.encode(en_input) + [input_SEP_ID]])
    dec_padding_mask = create_padding_mask(input_ids)
    start = time.time()
    preds_draft_summary, _, _, _ = Model.predict(input_ids, dec_padding_mask)

    translated_sequence = target_tokenizer.decode([
        i for i in tf.squeeze(preds_draft_summary)
        if i not in [target_CLS_ID, target_SEP_ID, config.PAD_ID]
    ])
    print(
        f'Translated output --> {translated_sequence if translated_sequence else "EMPTY"}'
    )
    print(f'Time taken --> {round(time.time()-start)} seconds')
def generate():

    en_input = input('Enter the sentence-> ')
    en_input = preprocess(en_input)
    input_ids = tf.constant(source_tokenizer.encode(en_input))[None, :]
    dec_padding_mask = create_padding_mask(input_ids)

    start = time.time()
    (preds_draft_summary, _, preds_refine_summary, _,
     _) = Model.predict(input_ids,
                        batch_size=1,
                        draft_decoder_type='topktopp',
                        beam_size=10,
                        length_penalty=0.6,
                        temperature=1,
                        top_p=0.9,
                        top_k=25)
    generated_sequence = target_tokenizer.decode(
        tf.squeeze(preds_refine_summary), skip_special_tokens=True)
    print(
        f'Translated output--> {generated_sequence if generated_sequence else "EMPTY"}'
    )
    print(f'Time to process --> {round(time.time()-start)} seconds')
Beispiel #15
0
    tokenized_string = source_tokenizer.encode(sample_string)
    log.info('Tokenized string is {}'.format(tokenized_string))
    original_string = source_tokenizer.decode(tokenized_string)
    log.info('The original string: {}'.format(original_string))
    assert original_string == sample_string, 'Encoding issue with tokenizer'
    

if config.check_predictions_shape:

    temp_input = tf.random.uniform((64, 38), dtype=tf.int64, minval=0, maxval=200)
    temp_target = tf.random.uniform((64, 36), dtype=tf.int64, minval=0, maxval=200)
    (draft_predictions, draft_attention_weights, 
    refine_predictions, refine_attention_weights) = Model(temp_input,
                                                       dec_padding_mask=None, 
                                                       enc_padding_mask=None, 
                                                       look_ahead_mask=None,
                                                       target_ids=temp_target, 
                                                       training=False, 
                                                       )
    log.info(f'The output shape of the sample model is {tf.shape(draft_predictions if refine_predictions is None else refine_predictions)}')
    

if config.gpu_memory_test:

    memory_limit = 85
    gpu_usage = check_gpu_usage()
    while float(gpu_usage[:-1]) < memory_limit:
        gpu_usage = change_dataset_and_train(config.tokens_per_batch, config.train_batch_size)
        config.tokens_per_batch += 50
    log.info(f'GPU memory exceeded {memory_limit}% hence stopping the training')
Beispiel #16
0

if __name__ == '__main__':
    # Instantiate the model
    temp_input = tf.random.uniform((2, 19),
                                   dtype=tf.int64,
                                   minval=0,
                                   maxval=200)
    temp_target = tf.random.uniform((2, 12),
                                    dtype=tf.int64,
                                    minval=0,
                                    maxval=200)
    _ = Model(
        temp_input,
        dec_padding_mask=None,
        enc_padding_mask=None,
        look_ahead_mask=None,
        target_ids=temp_target,
        training=False,
    )
    ck_pt_mgr = check_ckpt(config.checkpoint_path)
    log_dir = os.path.join(config.tensorboard_log, embedding_projector_dir)
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    filename = input('Enter the filename:- ')
    file_path = os.path.join(config.output_sequence_write_path, filename)
    input_sentences = []
    hypothesis = []
    with tf.io.gfile.GFile(file_path, 'r') as f:
        for line in f.readlines():
            (source, _, hyp) = line.split('\t')
            input_sentences.append(source)
Beispiel #17
0
def main(argv):
    date = datetime.datetime.now()

    #Dataset object.
    dataset = Pipeline(FLAGS.base_path, FLAGS.image_h, FLAGS.image_w)

    handle = dataset.handle
    #Load data lists.
    train_x, train_y, train_n, valid_x, valid_y, valid_n = dataset.createList(
        valid_size=0.2)

    #Datasets and iterator creation.
    dataset_train = dataset.createDataset(train_x, train_y, train_n,
                                          FLAGS.batch_size_train)
    train_iterator = dataset.initializeIterator(dataset_train, one_shot=False)
    dataset_valid = dataset.createDataset(valid_x, valid_y, valid_n,
                                          FLAGS.batch_size_valid)
    valid_iterator = dataset.initializeIterator(dataset_valid, one_shot=False)

    #Train data returned by iterator.
    batch = dataset.createIterator(dataset_train)

    #Object model.
    model = Model(dataset.n_classes, batch[0], batch[1], FLAGS.learning_rate)
    save_dir = FLAGS.save_dir
    #Saver object.
    saver = tf.train.Saver()
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    os.makedirs(save_dir + '/' + date.strftime('%y_%m_%d-%H_%M'))
    save_dir = save_dir + '/' + date.strftime('%y_%m_%d-%H_%M')
    save_path = os.path.join(save_dir, 'best_validation')

    #Steps number for training and validation.
    n_steps_train = int(len(train_x) / FLAGS.batch_size_train)
    n_steps_valid = int(len(valid_x) / FLAGS.batch_size_valid)

    #Initialize Tensorflow session.
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        #Handle: Decide which dataset (train or valid) is loaded in each operation.
        train_handle = sess.run(train_iterator.string_handle())
        valid_handle = sess.run(valid_iterator.string_handle())

        v_loss_train = []
        v_loss_valid = []
        v_acc_train = []
        v_acc_valid = []

        #Early stopping parameters.
        #Best validation accuracy obtained.
        best_validation_accuracy = 0.0
        #Last epoch where validation accuracy improved.
        last_improvement = 0
        #Max. epoch number without improvement. Once is reached, training process will stop.
        #Número de épocas a las que el entrenamiento es detenido si no ha habido mejora.
        improvement_epochs = 10

        for epoch in range(FLAGS.n_epochs):
            #Train model for one epoch.
            print("\nTraining...")
            sess.run(train_iterator.initializer)
            sum_loss_train = 0
            sum_acc_train = 0
            i = 0

            while True:
                try:
                    _, loss_train, acc_train = sess.run(
                        [model.optimizer, model.loss, model.accuracy],
                        feed_dict={
                            handle: train_handle,
                            model.keep_prob: 0.5
                        })

                    sum_loss_train += loss_train
                    sum_acc_train += acc_train

                    showProgress(epoch, i, n_steps_train, loss_train,
                                 acc_train)
                    checkRAM()
                    i += 1

                except tf.errors.OutOfRangeError:
                    mean_loss_train = sum_loss_train / n_steps_train
                    mean_acc_train = sum_acc_train / n_steps_train
                    v_loss_train.append(mean_loss_train)
                    v_acc_train.append(mean_acc_train)

                    showEpochResults(mean_loss_train, mean_acc_train)
                    break

            sess.run(valid_iterator.initializer)

            #Validate model for one epoch.
            print("\nValidating...")
            sum_loss_valid = 0
            sum_acc_valid = 0
            j = 0

            while True:
                try:
                    loss_valid, acc_valid = sess.run(
                        [model.loss, model.accuracy],
                        feed_dict={
                            handle: valid_handle,
                            model.keep_prob: 1
                        })

                    sum_loss_valid += loss_valid
                    sum_acc_valid += acc_valid

                    showProgress(epoch, j, n_steps_valid, loss_valid,
                                 acc_valid)
                    checkRAM()
                    j += 1

                except tf.errors.OutOfRangeError:
                    mean_loss_valid = sum_loss_valid / n_steps_valid
                    mean_acc_valid = sum_acc_valid / n_steps_valid
                    v_loss_valid.append(mean_loss_valid)
                    v_acc_valid.append(mean_acc_valid)

                    showEpochResults(mean_loss_valid, mean_acc_valid)
                    break

            #If validation accuracy increased in last epoch.
            if mean_acc_valid > best_validation_accuracy:
                #Update best accuracy value.
                best_validation_accuracy = mean_acc_valid
                last_improvement = epoch

                #Save trained variables.
                saver.save(sess=sess, save_path=save_path)
                print('Improvement')

            #If there weren't improvements in a while, stop training.
            if epoch - last_improvement > improvement_epochs:
                print('No improvements in a while. Stopping optimization.')
                break

        #Write training data in text file and save it.
        f = open(save_dir + '/parameters.txt', 'w')
        f.write(
            'Data set:\t{}\nClasses:\t{}\nValidation set size:\t{}\nEpochs number:\t{}\nBathch size train:\t{}\nBathch size validation:\t{}\nLearning rate:\t{}\nImage size:\t{},{}\nBest validation accuracy:\t{}'
            .format(FLAGS.base_path, str(dataset.classes),
                    str(FLAGS.valid_size), str(FLAGS.n_epochs),
                    str(FLAGS.batch_size_train), str(FLAGS.batch_size_valid),
                    str(FLAGS.learning_rate), str(FLAGS.image_h),
                    str(FLAGS.image_w), str(best_validation_accuracy)))
        f.close()

        #Plot training results.
        plotResults(1,
                    v_loss_train,
                    v_loss_valid,
                    loss=True,
                    title='Train and validation loss.')
        plotResults(2,
                    v_acc_train,
                    v_acc_valid,
                    loss=False,
                    title='Train and validation accuracy')