def run_eval(ckpt_path='/content/drive/My Drive/Text_summarization/BERT_text_summarisation/cnn_checkpoints/ckpt-69'):
  restore_chkpt(ckpt_path)
  if config.use_tfds:
    examples, metadata = tfds.load(
                                   config.tfds_name, 
                                   with_info=True, 
                                   as_supervised=True, 
                                   data_dir='/content/drive/My Drive/Text_summarization/cnn_dataset',
                                   builder_kwargs={"version": "2.0.0"}
                                   )
    test_examples = examples['test']
    test_buffer_size = metadata.splits['test'].num_examples
    test_dataset = map_batch_shuffle(
                                     test_examples, 
                                     test_buffer_size, 
                                     split='test',
                                     batch_size=h_parms.batch_size
                                     )
    log.info('Test TF_dataset created')
    test_dataset = test_dataset.take(1)
  else:
    test_dataset = infer_data_from_df()
  ref_sents=[]
  hyp_sents=[]
  for (doc_id, (input_ids, _, _, target_ids, _, _)) in tqdm(enumerate(test_dataset, 1)):
    start_time = time.time()
    draft, refined_summary, att = predict_using_beam_search(
                                                            input_ids, 
                                                            beam_size=3, 
                                                            refine_decoder_type='greedy'
                                                            )
    for tar, ref_hyp in zip(target_ids, refined_summary):
      sum_ref = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(tar) if i not in [0, 101, 102]])
      sum_hyp = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(ref_hyp) if i not in [0, 101, 102]])
      sum_ref = convert_wordpiece_to_words(sum_ref)
      sum_hyp = convert_wordpiece_to_words(sum_hyp)
      ref_sents.append(sum_ref)
      hyp_sents.append(sum_hyp)
  try:
    rouges = rouge_all.get_scores(ref_sents , hyp_sents)
    avg_rouge_f1 = np.mean([np.mean([rouge_scores['rouge-1']["f"], 
                                    rouge_scores['rouge-2']["f"], 
                                    rouge_scores['rouge-l']["f"]]) for rouge_scores in rouges])
    _, _, bert_f1 = b_score(ref_sents, hyp_sents, lang='en', model_type=config.pretrained_bert_model)
    avg_bert_f1 = np.mean(bert_f1.numpy())
  except:
    avg_rouge_f1 = 0
    avg_bert_f1 = 0
  print(infer_template.format('beam_search', 'greedy', avg_rouge_f1, avg_bert_f1, 3))
  print(f'time to process document {doc_id} : {time.time()-start_time}')
  print(f'Calculating scores for {len(ref_sents)} golden summaries and {len(hyp_sents)} predicted summaries')
Exemple #2
0
                                            rouge_score,
                                            bert_score
                                           )
                      )
              log.info(evaluation_step.format(step+1, time.time() - start))
              log.info(checkpoint_details.format(step+1, ckpt_save_path))

              #Print metrics:
              pattern = re.compile('[\W_]+')
              infer_ckpt = '75'
              ckpt = tf.train.Checkpoint(model=model)
              ckpt.restore(
                  'ckpt_dir/content/drive/My Drive/Text_summarization/BERT_text_summarisation/Summarization_inference_ckps/ckpt-' + infer_ckpt).expect_partial()

              train_examples = examples['train']
              train_dataset = map_batch_shuffle(train_examples, 100, split='train', shuffle=True, batch_size=1,
                                                filter_off=False)
              for (step, (input_ids, input_mask, input_segment_ids, target_ids_, target_mask, target_segment_ids)) in enumerate(
                      train_dataset):
                  sum_hyp = tokenizer.convert_ids_to_tokens([i for i in tf.squeeze(input_ids) if i not in [CLS_ID, SEP_ID, 0]])
                  ip_ids = tokenizer.encode(' '.join(sum_hyp))
                  preds_draft_summary, preds_refined_summary, refine_attention_dist = predict_using_beam_search(
                      tf.convert_to_tensor([ip_ids]),
                      refine_decoder_sampling_type='topktopp',
                      k=7,
                      p=0.8)
                  reference = tokenizer.convert_ids_to_tokens(
                      [i for i in tf.squeeze(target_ids_) if i not in [CLS_ID, SEP_ID, 0]])
                  reference = ' '.join(list(reference))
                  sum_hyp = tokenizer.convert_ids_to_tokens(
                      [i for i in tf.squeeze(preds_refined_summary) if i not in [CLS_ID, SEP_ID, 0]])
                  summary = convert_wordpiece_to_words(sum_hyp)
            avg_rouge_f1 = 0
            avg_bert_f1 = 0
        print(
            infer_template.format(draft_type, refine_type, avg_rouge_f1,
                                  avg_bert_f1))
        print(f'time to process document {doc_id} : {time.time()-start_time}')


if __name__ == '__main__':
    #Restore the model's checkpoints
    #restore_chkpt('/content/drive/My Drive/Text_summarization/BERT_text_summarisation/cnn_checkpoints/ckpt-43')
    restore_chkpt(file_path.infer_ckpt_path)
    if config.use_tfds:
        examples, metadata = tfds.load(
            config.tfds_name,
            with_info=True,
            as_supervised=True,
            data_dir='/content/drive/My Drive/Text_summarization/cnn_dataset')
        test_examples = examples['test']
        test_buffer_size = metadata.splits['test'].num_examples
        test_dataset = map_batch_shuffle(test_examples,
                                         test_buffer_size,
                                         split='test',
                                         batch_size=h_parms.batch_size)
        log.info('Test TF_dataset created')
        # Number of samples to use
        test_dataset = test_dataset.take(50)
    else:
        test_dataset = infer_data_from_df()
    run_inference(test_dataset)