コード例 #1
0
    def test_run_summarization(self):
        stream_handler = logging.StreamHandler(sys.stdout)
        logger.addHandler(stream_handler)

        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            run_summarization.py
            --model_name_or_path t5-small
            --train_file tests/fixtures/tests_samples/xsum/sample.json
            --validation_file tests/fixtures/tests_samples/xsum/sample.json
            --output_dir {tmp_dir}
            --overwrite_output_dir
            --max_steps=50
            --warmup_steps=8
            --do_train
            --do_eval
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --predict_with_generate
        """.split()

        with patch.object(sys, "argv", testargs):
            run_summarization.main()
            result = get_results(tmp_dir)
            self.assertGreaterEqual(result["eval_rouge1"], 10)
            self.assertGreaterEqual(result["eval_rouge2"], 2)
            self.assertGreaterEqual(result["eval_rougeL"], 7)
            self.assertGreaterEqual(result["eval_rougeLsum"], 7)
コード例 #2
0
def main(text):
  _text_to_binary(text, '../news-summarizer-ai/pointer-generator/data/val')
  run_summarization.main('work')
  with open(DECODE_PATH, 'r', encoding="utf-8") as f:
    abstract = f.read()
    sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
    sentences = sent_tokenizer.tokenize(abstract) # this text should be summary
    sentences = [sent.capitalize() for sent in sentences]
    result = ' '.join(sentences)
    print("DIMAAAAA")
    print(json.dumps(result))
コード例 #3
0
def test():
    run_summarization.main('Stories', "Summaries", 5, 0.95, 100, 200)
コード例 #4
0
def main():
    # start flask app
    run_summarization.main()
    app.run(host='0.0.0.0')