def test_run_summarization(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) tmp_dir = self.get_auto_remove_tmp_dir() testargs = f""" run_summarization.py --model_name_or_path t5-small --train_file tests/fixtures/tests_samples/xsum/sample.json --validation_file tests/fixtures/tests_samples/xsum/sample.json --output_dir {tmp_dir} --overwrite_output_dir --max_steps=50 --warmup_steps=8 --do_train --do_eval --learning_rate=2e-4 --per_device_train_batch_size=2 --per_device_eval_batch_size=1 --predict_with_generate """.split() with patch.object(sys, "argv", testargs): run_summarization.main() result = get_results(tmp_dir) self.assertGreaterEqual(result["eval_rouge1"], 10) self.assertGreaterEqual(result["eval_rouge2"], 2) self.assertGreaterEqual(result["eval_rougeL"], 7) self.assertGreaterEqual(result["eval_rougeLsum"], 7)
def main(text): _text_to_binary(text, '../news-summarizer-ai/pointer-generator/data/val') run_summarization.main('work') with open(DECODE_PATH, 'r', encoding="utf-8") as f: abstract = f.read() sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') sentences = sent_tokenizer.tokenize(abstract) # this text should be summary sentences = [sent.capitalize() for sent in sentences] result = ' '.join(sentences) print("DIMAAAAA") print(json.dumps(result))
def test(): run_summarization.main('Stories', "Summaries", 5, 0.95, 100, 200)
def main(): # start flask app run_summarization.main() app.run(host='0.0.0.0')