def run_eval(ckpt_path='/content/drive/My Drive/Text_summarization/BERT_text_summarisation/cnn_checkpoints/ckpt-69'): restore_chkpt(ckpt_path) if config.use_tfds: examples, metadata = tfds.load( config.tfds_name, with_info=True, as_supervised=True, data_dir='/content/drive/My Drive/Text_summarization/cnn_dataset', builder_kwargs={"version": "2.0.0"} ) test_examples = examples['test'] test_buffer_size = metadata.splits['test'].num_examples test_dataset = map_batch_shuffle( test_examples, test_buffer_size, split='test', batch_size=h_parms.batch_size ) log.info('Test TF_dataset created') test_dataset = test_dataset.take(1) else: test_dataset = infer_data_from_df() ref_sents=[] hyp_sents=[] for (doc_id, (input_ids, _, _, target_ids, _, _)) in tqdm(enumerate(test_dataset, 1)): start_time = time.time() draft, refined_summary, att = predict_using_beam_search( input_ids, beam_size=3, refine_decoder_type='greedy' ) for tar, ref_hyp in zip(target_ids, refined_summary): sum_ref = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(tar) if i not in [0, 101, 102]]) sum_hyp = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(ref_hyp) if i not in [0, 101, 102]]) sum_ref = convert_wordpiece_to_words(sum_ref) sum_hyp = convert_wordpiece_to_words(sum_hyp) ref_sents.append(sum_ref) hyp_sents.append(sum_hyp) try: rouges = rouge_all.get_scores(ref_sents , hyp_sents) avg_rouge_f1 = np.mean([np.mean([rouge_scores['rouge-1']["f"], rouge_scores['rouge-2']["f"], rouge_scores['rouge-l']["f"]]) for rouge_scores in rouges]) _, _, bert_f1 = b_score(ref_sents, hyp_sents, lang='en', model_type=config.pretrained_bert_model) avg_bert_f1 = np.mean(bert_f1.numpy()) except: avg_rouge_f1 = 0 avg_bert_f1 = 0 print(infer_template.format('beam_search', 'greedy', avg_rouge_f1, avg_bert_f1, 3)) print(f'time to process document {doc_id} : {time.time()-start_time}') print(f'Calculating scores for {len(ref_sents)} golden summaries and {len(hyp_sents)} predicted summaries')
rouge_score, bert_score ) ) log.info(evaluation_step.format(step+1, time.time() - start)) log.info(checkpoint_details.format(step+1, ckpt_save_path)) #Print metrics: pattern = re.compile('[\W_]+') infer_ckpt = '75' ckpt = tf.train.Checkpoint(model=model) ckpt.restore( 'ckpt_dir/content/drive/My Drive/Text_summarization/BERT_text_summarisation/Summarization_inference_ckps/ckpt-' + infer_ckpt).expect_partial() train_examples = examples['train'] train_dataset = map_batch_shuffle(train_examples, 100, split='train', shuffle=True, batch_size=1, filter_off=False) for (step, (input_ids, input_mask, input_segment_ids, target_ids_, target_mask, target_segment_ids)) in enumerate( train_dataset): sum_hyp = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(input_ids) if i not in [CLS_ID, SEP_ID, 0]]) ip_ids = tokenizer.encode(' '.join(sum_hyp)) preds_draft_summary, preds_refined_summary, refine_attention_dist = predict_using_beam_search( tf.convert_to_tensor([ip_ids]), refine_decoder_sampling_type='topktopp', k=7, p=0.8) reference = tokenizer.convert_ids_to_tokens( [i for i in tf.squeeze(target_ids_) if i not in [CLS_ID, SEP_ID, 0]]) reference = ' '.join(list(reference)) sum_hyp = tokenizer.convert_ids_to_tokens( [i for i in tf.squeeze(preds_refined_summary) if i not in [CLS_ID, SEP_ID, 0]]) summary = convert_wordpiece_to_words(sum_hyp)
avg_rouge_f1 = 0 avg_bert_f1 = 0 print( infer_template.format(draft_type, refine_type, avg_rouge_f1, avg_bert_f1)) print(f'time to process document {doc_id} : {time.time()-start_time}') if __name__ == '__main__': #Restore the model's checkpoints #restore_chkpt('/content/drive/My Drive/Text_summarization/BERT_text_summarisation/cnn_checkpoints/ckpt-43') restore_chkpt(file_path.infer_ckpt_path) if config.use_tfds: examples, metadata = tfds.load( config.tfds_name, with_info=True, as_supervised=True, data_dir='/content/drive/My Drive/Text_summarization/cnn_dataset') test_examples = examples['test'] test_buffer_size = metadata.splits['test'].num_examples test_dataset = map_batch_shuffle(test_examples, test_buffer_size, split='test', batch_size=h_parms.batch_size) log.info('Test TF_dataset created') # Number of samples to use test_dataset = test_dataset.take(50) else: test_dataset = infer_data_from_df() run_inference(test_dataset)