def eval_step(input_ids, target_ids, ): target_inp = target_ids[:, :-1] _, combined_mask, dec_padding_mask = create_masks(input_ids, target_inp) (draft_predictions, draft_attention_weights, refine_predictions, refine_attention_weights) = Model( input_ids, dec_padding_mask=dec_padding_mask, target_ids=target_inp, look_ahead_mask=combined_mask, training=False ) loss, target = loss_function(target_ids, draft_predictions, refine_predictions, Model ) train_loss(loss) log.info(Model.summary()) if config.save_initial_weights: initial_weights = os.path.join(config.initial_weights,'initial_weights') Model.save_weights(initial_weights) return loss
def val_step(input_ids, target_ids): (draft_predicted_ids, draft_attention_weights, refine_predicted_ids_2D, refine_attention_weights, refine_logits) = Model(input_ids, decoder_type=config.draft_decoder_type, beam_size=config.beam_size, length_penalty=config.length_penalty, temperature=config.softmax_temperature, top_p=config.top_p, top_k=config.top_k, target_ids=None, dec_padding_mask=None, look_ahead_mask=None, training=None) refine_validation_loss, _ = mask_and_calculate_nll_loss( refine_logits, target_ids, config.PAD_ID, epsilon=0, ) perplexity = tf.math.exp(refine_validation_loss) perplexity /= config.validation_batch_size bert_f1 = calculate_bert_f1(target_ids, refine_predicted_ids_2D) return (perplexity, bert_f1, draft_attention_weights, refine_attention_weights)
def val_step(input_ids, target_ids, step, write_output_seq): enc_padding_mask = create_padding_mask(input_ids) (draft_predictions, draft_attention_weights, refine_predictions, refine_attention_weights) = Model(input_ids, decoder_type=config.draft_decoder_type, beam_size=config.beam_size, length_penalty=config.length_penalty, temperature=config.softmax_temperature, top_p=config.top_p, top_k=config.top_k, enc_padding_mask=enc_padding_mask, target_ids=None, dec_padding_mask=None, look_ahead_mask=None, training=None) if refine_predictions is not None: predictions = refine_predictions else: predictions = draft_predictions task_score, bert_f1 = tf_write_output_sequence(input_ids, target_ids[:, 1:], predictions[:, 1:], step, write_output_seq) return (task_score, bert_f1, draft_attention_weights, refine_attention_weights)
def calculate_bert_f1(target_ids, predicted): target_mask = create_pretrained_model_mask(target_ids) predicted_return_mask = create_pretrained_model_mask(predicted) mask = tf.concat([target_mask, predicted_return_mask], axis=0) ids = tf.concat([target_ids, predicted], axis=0) # (8*batch_size, *_seq_len, target_vocab_size) embeddings = Model.decoder_bert_model(ids, attention_mask=mask)[0] embeddings_normalized = embeddings / (tf.norm(embeddings, axis=-1)[:, :, tf.newaxis]) # (4*batch_size, (4*batch_size, # tar_seq_len, cand_seq_len , # target_vocab_size), target_vocab_size) target_embeddings_normalized, predicted_embeddings_normalized = tf.split( embeddings_normalized, 2, axis=0) # (4*batch_size, tar_seq_len, cand_seq_len) scores = tf.matmul(predicted_embeddings_normalized, target_embeddings_normalized, transpose_b=True) mask = tf.matmul(predicted_return_mask[:, :, tf.newaxis], target_mask[:, tf.newaxis, :]) scores = scores * mask recall = tf.reduce_max(scores, 1) precision = tf.reduce_max(scores, 2) recall = tf.reduce_sum(recall, 1) precision = tf.reduce_sum(precision, 1) recall = recall / tf.reduce_sum(target_mask, -1) precision = precision / tf.reduce_sum(predicted_return_mask, -1) f1_score = (2 * (precision * recall)) / (precision + recall) return f1_score
def eval_step(input_ids, target_ids_, target_ids, draft_mask, refine_mask): (draft_predictions, draft_attention_weights, refine_predictions, refine_attention_weights) = Model(input_ids, target_ids_, False) draft_output_sequence_loss = loss_function(target_ids[:, 1:, :], draft_predictions, draft_mask) if config.use_refine_decoder: refine_output_sequence_loss = loss_function(target_ids[:, :-1, :], refine_predictions, refine_mask) else: refine_output_sequence_loss = 0 regularization_loss = tf.add_n(Model.losses) loss = draft_output_sequence_loss + refine_output_sequence_loss + regularization_loss log.info(Model.summary()) if config.save_initial_weights: initial_weights = os.path.join(config.initial_weights, 'initial_weights') Model.save_weights(initial_weights) return loss
def train_step(input_ids, target_ids, grad_accum_flag): _, combined_mask, dec_padding_mask = create_masks( input_ids, target_ids[:, :-1] ) with tf.GradientTape() as tape: (draft_logits, refine_logits, draft_attention_weights, refine_attention_weights, candidate_returns, sample_returns) = Model( input_ids, dec_padding_mask=dec_padding_mask, target_ids=target_ids, look_ahead_mask=combined_mask, training=True, ) train_variables = Model.trainable_variables loss, bert_f1_score = loss_function(target_ids, draft_logits, refine_logits, candidate_returns, sample_returns ) regularization_loss = tf.add_n(Model.losses) total_loss = tf.reduce_sum([loss, regularization_loss]) scaled_loss = optimizer.get_scaled_loss(loss) scaled_gradients = tape.gradient(scaled_loss, train_variables) gradients = optimizer.get_unscaled_gradients(scaled_gradients) if config.accumulate_gradients: # Initialize the shadow variables with same type as the gradients if not gradient_accumulators: for tv in gradients: gradient_accumulators.append(tf.Variable(tf.zeros_like(tv), trainable=False)) # accmulate the gradients to the shadow variables for (accumulator, grad) in zip(gradient_accumulators, gradients): accumulator.assign_add(grad) # apply the gradients and reset them to zero if the flag is true if grad_accum_flag: optimizer.apply_gradients(zip(gradient_accumulators, train_variables)) for accumulator in (gradient_accumulators): accumulator.assign(tf.zeros_like(accumulator)) train_loss(loss) else: optimizer.apply_gradients(zip(gradients, train_variables)) train_loss(loss) return refine_logits, bert_f1_score
def val_step(input_ids, target_ids_, step, write_output_seq): dec_padding_mask = create_padding_mask(input_ids) (draft_predictions, _, refine_predictions, _) = Model.predict(input_ids, dec_padding_mask, False) if config.use_refine_decoder: predictions = refine_predictions else: predictions = draft_predictions rouge, bert = tf_write_output_sequence(target_ids_[:, 1:], predictions[:, 1:], step, write_output_seq) return (rouge, bert)
def batch_run_check(batch, start): if config.run_tensorboard: with train_output_sequence_writer.as_default(): tf.summary.scalar('train_loss', train_loss.result(), step=batch) tf.summary.scalar('train_accuracy', train_accuracy.result(), step=batch) if config.display_model_summary: log.info(Model.summary()) log.info(batch_zero.format(time.time() - start)) config.display_model_summary = False log.info( batch_run_details.format(train_loss.result(), train_accuracy.result())) return train_loss.result()
def batch_run_check(batch, start_time, bert_f1_score): if config.run_tensorboard: with train_output_sequence_writer.as_default(): tf.summary.scalar('train_loss', train_loss.result(), step=batch) if config.display_model_summary: log.info(Model.summary()) log.info(batch_zero.format(time.time() - start_time)) config.display_model_summary = False log.info( batch_run_details.format( tf.debugging.assert_all_finite(train_loss.result(), message="NaN's or Inf's.", name='NAN_assertion'), bert_f1_score.numpy()))
def train_step(input_ids, target_ids, grad_accum_flag): target_inp = target_ids[:, :-1] enc_padding_mask, combined_mask, dec_padding_mask = create_masks( input_ids, target_inp) with tf.GradientTape() as tape: (draft_predictions, draft_attention_weights, refine_predictions, refine_attention_weights) = Model( input_ids, dec_padding_mask=dec_padding_mask, target_ids=target_inp, enc_padding_mask=enc_padding_mask, look_ahead_mask=combined_mask, training=True, ) train_variables = Model.trainable_variables loss, target = loss_function(target_ids, draft_predictions, refine_predictions, Model) predictions = refine_predictions if refine_predictions is not None else draft_predictions scaled_loss = optimizer.get_scaled_loss(loss) scaled_gradients = tape.gradient(scaled_loss, train_variables) gradients = optimizer.get_unscaled_gradients(scaled_gradients) if config.accumulate_gradients: # Initialize the shadow variables with same type as the gradients if not gradient_accumulators: for tv in gradients: gradient_accumulators.append( tf.Variable(tf.zeros_like(tv), trainable=False)) # accmulate the gradients to the shadow variables for (accumulator, grad) in zip(gradient_accumulators, gradients): accumulator.assign_add(grad) # apply the gradients and reset them to zero if the flag is true if grad_accum_flag: optimizer.apply_gradients( zip(gradient_accumulators, train_variables)) for accumulator in (gradient_accumulators): accumulator.assign(tf.zeros_like(accumulator)) train_loss(loss) train_accuracy(target, predictions) else: optimizer.apply_gradients(zip(gradients, train_variables)) train_loss(loss) train_accuracy(target, predictions) return predictions
def main(config: str) -> None: if config not in ('production'): raise ValueError(f'Unknown deployment environment "{config}"') try: # Dataset logging.info("Creating dataset...") data_configuration = helpers.get_configuration(config, data_configurations) dataset = Dataset(config=data_configuration) dataset.create() # Model logging.info("Creating model...") model_configuration = helpers.get_configuration( config, model_configurations) model = Model(model_configuration, input_dataset=dataset) model.build_model() model.build_annoy_representations(feature_type='item', is_cab=True) model.build_annoy_representations(feature_type='item', is_cab=False) # Prediction logging.info("Creating predictions...") prediction_configuration = helpers.get_configuration( config, prediction_configurations) predictor = UserItemPrediction(config=prediction_configuration) predictor.get_similar_items( product_id=prediction_configuration.DEFAULT_ITEM_EG, rec_type=1) predictor.get_similar_items( product_id=prediction_configuration.DEFAULT_ITEM_EG, rec_type=2) predictor.get_lightfm_recommendation( user_index=prediction_configuration.DEFAULT_USER_EG, use_precomputed_scores=False) except Exception as e: logging.exception(e) else: logging.info('Success @run.py')
def train_step(input_ids, target_ids_, target_ids, draft_mask, refine_mask, grad_accum_flag): with tf.GradientTape() as tape: (draft_predictions, draft_attention_weights, refine_predictions, refine_attention_weights) = Model(input_ids, target_ids_, True) train_variables = Model.trainable_variables draft_output_sequence_loss = loss_function(target_ids[:, 1:, :], draft_predictions, draft_mask) if config.use_refine_decoder: refine_output_sequence_loss = loss_function( target_ids[:, :-1, :], refine_predictions, refine_mask) predictions = refine_predictions target = target_ids_[:, :-1] else: refine_output_sequence_loss = 0 predictions = draft_predictions target = target_ids_[:, 1:] regularization_loss = tf.add_n(Model.losses) loss = draft_output_sequence_loss + refine_output_sequence_loss + regularization_loss scaled_loss = optimizer.get_scaled_loss(loss) scaled_gradients = tape.gradient(scaled_loss, train_variables) gradients = optimizer.get_unscaled_gradients(scaled_gradients) # Initialize the shadow variables with same type as the gradients if not gradient_accumulators: for tv in gradients: gradient_accumulators.append( tf.Variable(tf.zeros_like(tv), trainable=False)) # accmulate the gradients to the shadow variables for (accumulator, grad) in zip(gradient_accumulators, gradients): accumulator.assign_add(grad) # apply the gradients and reset them to zero if the flag is true if grad_accum_flag: optimizer.apply_gradients(zip(gradient_accumulators, train_variables)) for accumulator in (gradient_accumulators): accumulator.assign(tf.zeros_like(accumulator)) train_loss(loss) train_accuracy(target, predictions) return predictions
def translate(): en_input = input('Enter the english sentence-> ') en_input = preprocess(en_input) input_CLS_ID = source_tokenizer.vocab_size input_SEP_ID = source_tokenizer.vocab_size + 1 target_CLS_ID = target_tokenizer.vocab_size target_SEP_ID = target_tokenizer.vocab_size + 1 input_ids = tf.convert_to_tensor( [[input_CLS_ID] + source_tokenizer.encode(en_input) + [input_SEP_ID]]) dec_padding_mask = create_padding_mask(input_ids) start = time.time() preds_draft_summary, _, _, _ = Model.predict(input_ids, dec_padding_mask) translated_sequence = target_tokenizer.decode([ i for i in tf.squeeze(preds_draft_summary) if i not in [target_CLS_ID, target_SEP_ID, config.PAD_ID] ]) print( f'Translated output --> {translated_sequence if translated_sequence else "EMPTY"}' ) print(f'Time taken --> {round(time.time()-start)} seconds')
def generate(): en_input = input('Enter the sentence-> ') en_input = preprocess(en_input) input_ids = tf.constant(source_tokenizer.encode(en_input))[None, :] dec_padding_mask = create_padding_mask(input_ids) start = time.time() (preds_draft_summary, _, preds_refine_summary, _, _) = Model.predict(input_ids, batch_size=1, draft_decoder_type='topktopp', beam_size=10, length_penalty=0.6, temperature=1, top_p=0.9, top_k=25) generated_sequence = target_tokenizer.decode( tf.squeeze(preds_refine_summary), skip_special_tokens=True) print( f'Translated output--> {generated_sequence if generated_sequence else "EMPTY"}' ) print(f'Time to process --> {round(time.time()-start)} seconds')
tokenized_string = source_tokenizer.encode(sample_string) log.info('Tokenized string is {}'.format(tokenized_string)) original_string = source_tokenizer.decode(tokenized_string) log.info('The original string: {}'.format(original_string)) assert original_string == sample_string, 'Encoding issue with tokenizer' if config.check_predictions_shape: temp_input = tf.random.uniform((64, 38), dtype=tf.int64, minval=0, maxval=200) temp_target = tf.random.uniform((64, 36), dtype=tf.int64, minval=0, maxval=200) (draft_predictions, draft_attention_weights, refine_predictions, refine_attention_weights) = Model(temp_input, dec_padding_mask=None, enc_padding_mask=None, look_ahead_mask=None, target_ids=temp_target, training=False, ) log.info(f'The output shape of the sample model is {tf.shape(draft_predictions if refine_predictions is None else refine_predictions)}') if config.gpu_memory_test: memory_limit = 85 gpu_usage = check_gpu_usage() while float(gpu_usage[:-1]) < memory_limit: gpu_usage = change_dataset_and_train(config.tokens_per_batch, config.train_batch_size) config.tokens_per_batch += 50 log.info(f'GPU memory exceeded {memory_limit}% hence stopping the training')
if __name__ == '__main__': # Instantiate the model temp_input = tf.random.uniform((2, 19), dtype=tf.int64, minval=0, maxval=200) temp_target = tf.random.uniform((2, 12), dtype=tf.int64, minval=0, maxval=200) _ = Model( temp_input, dec_padding_mask=None, enc_padding_mask=None, look_ahead_mask=None, target_ids=temp_target, training=False, ) ck_pt_mgr = check_ckpt(config.checkpoint_path) log_dir = os.path.join(config.tensorboard_log, embedding_projector_dir) if not os.path.exists(log_dir): os.mkdir(log_dir) filename = input('Enter the filename:- ') file_path = os.path.join(config.output_sequence_write_path, filename) input_sentences = [] hypothesis = [] with tf.io.gfile.GFile(file_path, 'r') as f: for line in f.readlines(): (source, _, hyp) = line.split('\t') input_sentences.append(source)
def main(argv): date = datetime.datetime.now() #Dataset object. dataset = Pipeline(FLAGS.base_path, FLAGS.image_h, FLAGS.image_w) handle = dataset.handle #Load data lists. train_x, train_y, train_n, valid_x, valid_y, valid_n = dataset.createList( valid_size=0.2) #Datasets and iterator creation. dataset_train = dataset.createDataset(train_x, train_y, train_n, FLAGS.batch_size_train) train_iterator = dataset.initializeIterator(dataset_train, one_shot=False) dataset_valid = dataset.createDataset(valid_x, valid_y, valid_n, FLAGS.batch_size_valid) valid_iterator = dataset.initializeIterator(dataset_valid, one_shot=False) #Train data returned by iterator. batch = dataset.createIterator(dataset_train) #Object model. model = Model(dataset.n_classes, batch[0], batch[1], FLAGS.learning_rate) save_dir = FLAGS.save_dir #Saver object. saver = tf.train.Saver() if not os.path.exists(save_dir): os.makedirs(save_dir) os.makedirs(save_dir + '/' + date.strftime('%y_%m_%d-%H_%M')) save_dir = save_dir + '/' + date.strftime('%y_%m_%d-%H_%M') save_path = os.path.join(save_dir, 'best_validation') #Steps number for training and validation. n_steps_train = int(len(train_x) / FLAGS.batch_size_train) n_steps_valid = int(len(valid_x) / FLAGS.batch_size_valid) #Initialize Tensorflow session. with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #Handle: Decide which dataset (train or valid) is loaded in each operation. train_handle = sess.run(train_iterator.string_handle()) valid_handle = sess.run(valid_iterator.string_handle()) v_loss_train = [] v_loss_valid = [] v_acc_train = [] v_acc_valid = [] #Early stopping parameters. #Best validation accuracy obtained. best_validation_accuracy = 0.0 #Last epoch where validation accuracy improved. last_improvement = 0 #Max. epoch number without improvement. Once is reached, training process will stop. #Número de épocas a las que el entrenamiento es detenido si no ha habido mejora. improvement_epochs = 10 for epoch in range(FLAGS.n_epochs): #Train model for one epoch. print("\nTraining...") sess.run(train_iterator.initializer) sum_loss_train = 0 sum_acc_train = 0 i = 0 while True: try: _, loss_train, acc_train = sess.run( [model.optimizer, model.loss, model.accuracy], feed_dict={ handle: train_handle, model.keep_prob: 0.5 }) sum_loss_train += loss_train sum_acc_train += acc_train showProgress(epoch, i, n_steps_train, loss_train, acc_train) checkRAM() i += 1 except tf.errors.OutOfRangeError: mean_loss_train = sum_loss_train / n_steps_train mean_acc_train = sum_acc_train / n_steps_train v_loss_train.append(mean_loss_train) v_acc_train.append(mean_acc_train) showEpochResults(mean_loss_train, mean_acc_train) break sess.run(valid_iterator.initializer) #Validate model for one epoch. print("\nValidating...") sum_loss_valid = 0 sum_acc_valid = 0 j = 0 while True: try: loss_valid, acc_valid = sess.run( [model.loss, model.accuracy], feed_dict={ handle: valid_handle, model.keep_prob: 1 }) sum_loss_valid += loss_valid sum_acc_valid += acc_valid showProgress(epoch, j, n_steps_valid, loss_valid, acc_valid) checkRAM() j += 1 except tf.errors.OutOfRangeError: mean_loss_valid = sum_loss_valid / n_steps_valid mean_acc_valid = sum_acc_valid / n_steps_valid v_loss_valid.append(mean_loss_valid) v_acc_valid.append(mean_acc_valid) showEpochResults(mean_loss_valid, mean_acc_valid) break #If validation accuracy increased in last epoch. if mean_acc_valid > best_validation_accuracy: #Update best accuracy value. best_validation_accuracy = mean_acc_valid last_improvement = epoch #Save trained variables. saver.save(sess=sess, save_path=save_path) print('Improvement') #If there weren't improvements in a while, stop training. if epoch - last_improvement > improvement_epochs: print('No improvements in a while. Stopping optimization.') break #Write training data in text file and save it. f = open(save_dir + '/parameters.txt', 'w') f.write( 'Data set:\t{}\nClasses:\t{}\nValidation set size:\t{}\nEpochs number:\t{}\nBathch size train:\t{}\nBathch size validation:\t{}\nLearning rate:\t{}\nImage size:\t{},{}\nBest validation accuracy:\t{}' .format(FLAGS.base_path, str(dataset.classes), str(FLAGS.valid_size), str(FLAGS.n_epochs), str(FLAGS.batch_size_train), str(FLAGS.batch_size_valid), str(FLAGS.learning_rate), str(FLAGS.image_h), str(FLAGS.image_w), str(best_validation_accuracy))) f.close() #Plot training results. plotResults(1, v_loss_train, v_loss_valid, loss=True, title='Train and validation loss.') plotResults(2, v_acc_train, v_acc_valid, loss=False, title='Train and validation accuracy')