def test_person_summary(): s = Summarizer() people = [ { "gender": "F", "image": "https://example.com/image1", "party": [{"name": "Democratic"}, {"name": "Democratic", "end_date": "1990"}], }, { "gender": "F", "image": "https://example.com/image2", "party": [{"name": "Democratic"}, {"name": "Working Families"}], "extras": {"religion": "Zoroastrian"}, "contact_details": [{"fax": "123-456-7890", "note": "Capitol Office"}], "other_identifiers": [{"scheme": "fake", "identifier": "abc"}], "ids": {"twitter": "fake"}, }, { "gender": "M", "image": "https://example.com/image3", "party": [{"name": "Republican"}], "contact_details": [{"phone": "123-456-7890", "note": "Capitol Office"}], "other_identifiers": [{"scheme": "fake", "identifier": "123"}], }, ] for p in people: s.summarize(p) assert s.parties == {"Republican": 1, "Democratic": 2, "Working Families": 1} assert s.contact_counts == {"Capitol Office phone": 1, "Capitol Office fax": 1} assert s.id_counts == {"fake": 2, "twitter": 1} assert s.optional_fields == {"gender": 3, "image": 3} assert s.extra_counts == {"religion": 1}
def test_summarize_multipe(self): args = '{"output_ids": [], \ "ppg_ids": ["DEF", "LTFL", "LP61"], \ "problem_objective_ids": ["FGH"], \ "goal_ids": ["EFG"], \ "operation_ids": ["BEN", "LISA"], \ "report_type": "Mid Year Report", \ "year": 2013 }' s = Summarizer(env = 'test', args = args) summary = s.summarize() assert summary.strip() == 'the quick brown fox. the second quick brown fox.'
def test_query(self): args = '{"output_ids": [], \ "ppg_ids": ["DEF", "LTFL", "LP61"], \ "problem_objective_ids": ["FGH"], \ "goal_ids": ["EFG"], \ "operation_ids": ["BEN", "7VC"], \ "report_type": "Mid Year Report", \ "year": 2013 }' s = Summarizer(env = 'test', args = args) text = s.query() assert text.strip() == 'the quick brown fox.'
def test_query(self): args = '{"output_ids": [], \ "ppg_ids": ["DEF", "LTFL", "LP61"], \ "problem_objective_ids": ["FGH"], \ "goal_ids": ["EFG"], \ "operation_ids": ["BEN", "7VC"], \ "report_type": "Mid Year Report", \ "year": 2013 }' s = Summarizer(env='test', args=args) text = s.query() assert text.strip() == 'the quick brown fox.'
def test_summarize_multipe(self): args = '{"output_ids": [], \ "ppg_ids": ["DEF", "LTFL", "LP61"], \ "problem_objective_ids": ["FGH"], \ "goal_ids": ["EFG"], \ "operation_ids": ["BEN", "LISA"], \ "report_type": "Mid Year Report", \ "year": 2013 }' s = Summarizer(env='test', args=args) summary = s.summarize() assert summary.strip( ) == 'the quick brown fox. the second quick brown fox.'
def test_summarize_large(self): args = '{"output_ids": [], \ "ppg_ids": ["DEF", "LTFL", "LP61"], \ "problem_objective_ids": ["FGH"], \ "goal_ids": ["EFG"], \ "operation_ids": ["BEN", "LISA", "JEFF"], \ "report_type": "Mid Year Report", \ "year": 2013 }' max_chars = 500 s = Summarizer(env='test', args=args, max_chars=max_chars) summary = s.summarize() assert len(summary) <= max_chars
def test_summarize_large(self): args = '{"output_ids": [], \ "ppg_ids": ["DEF", "LTFL", "LP61"], \ "problem_objective_ids": ["FGH"], \ "goal_ids": ["EFG"], \ "operation_ids": ["BEN", "LISA", "JEFF"], \ "report_type": "Mid Year Report", \ "year": 2013 }' max_chars = 500 s = Summarizer(env = 'test', args = args, max_chars = max_chars) summary = s.summarize() assert len(summary) <= max_chars
def test_get_optimal_subset(self): # takes about .9 seconds with open('sample_text.txt', encoding="utf8") as text_file: # Load text sample_text = str(text_file.read()) # Remove unwanted text in the essay. ie '(fr)' sample_text = sample_text.replace('(fr) ', '') # Load document class summarizer = Summarizer(text=sample_text) # get summary val, summary = summarizer.get_optimal_subset(2500, ret_as="str") # Write out summary with open('output.txt', 'w+') as w_file: w_file.write(summary)
def test_query_string(self): args = '{"output_ids": [], \ "ppg_ids": ["LRZQ", "LTFL", "LP61"], \ "problem_objective_ids": ["c70f5d80-a7cd-4d68-a085-aa04702c0fea"], \ "goal_ids": ["EM"], \ "operation_ids": ["BEN", "7VC"], \ "report_type": "Mid Year Report", \ "year": 2013 }' s = Summarizer(env = 'test', args = args) assert len(s.args['operation_ids']) == 2 assert len(s.args['output_ids']) == 0 assert len(s.args['ppg_ids']) == 3 assert int(s.args['year']) == 2013 assert s.args['report_type'] == 'Mid Year Report' query_string = s.query_string() assert s.args['report_type'] in query_string assert str(s.args['year']) in query_string assert s.args['goal_ids'][0] in query_string assert s.args['operation_ids'][1] in query_string
def test_query_string(self): args = '{"output_ids": [], \ "ppg_ids": ["LRZQ", "LTFL", "LP61"], \ "problem_objective_ids": ["c70f5d80-a7cd-4d68-a085-aa04702c0fea"], \ "goal_ids": ["EM"], \ "operation_ids": ["BEN", "7VC"], \ "report_type": "Mid Year Report", \ "year": 2013 }' s = Summarizer(env='test', args=args) assert len(s.args['operation_ids']) == 2 assert len(s.args['output_ids']) == 0 assert len(s.args['ppg_ids']) == 3 assert int(s.args['year']) == 2013 assert s.args['report_type'] == 'Mid Year Report' query_string = s.query_string() assert s.args['report_type'] in query_string assert str(s.args['year']) in query_string assert s.args['goal_ids'][0] in query_string assert s.args['operation_ids'][1] in query_string
tk.withdraw() copied_text = tk.clipboard_get() subj, text = body_from_website(copied_text.lower()) if text == '': if len(copied_text) > 100: text = copied_text subj = '' else: print(f'Invalid Input: {copied_text}') input('Press enter to continue...') sys.exit() else: print(f'Loading text from: {copied_text}') document = Summarizer(text) _, summary = document.get_optimal_subset_by_percent_words(.15, ret_as='str') # Generate response response_body = 'Title: ' + subj + '\n\n' if len(summary) != 0: response_body += summary else: response_body += 'Text Too Short' response_body += '\n\n--------------\n\n' print(response_body)
import pickle from pathlib import Path from summa import summarizer from summarize import Summarizer if __name__ == '__main__': DATA_PATH = Path('data') reviewtext = pd.read_pickle(DATA_PATH/'df_processed_reviews.p') idx2sent = pickle.load(open(DATA_PATH/'idx2sent.p', 'rb')) sent2vec = pickle.load(open(DATA_PATH/'sent2vec.p', 'rb')) idx = 198 my_summarizer = Summarizer(reviewtext, idx2sent, sent2vec) review, my_summary = my_summarizer.summarize(idx, n=100) summary = summarizer.summarize(review, ratio=0.2)
continue print(f'Emails to process: {emails}') sender, subject, body = email_client.get_email_data( emails.split(' ')[0]) print(f'Email received from: {sender}') try: documents = [] # Load document class with body text if subject.lower() == 'summarize: url': for url in re.split(r'[ \t\n\r]+', str(body)): if url != '': subj, text = body_from_website(url.lower()) if text != '': documents.append(Summarizer(text)) else: documents.append(Summarizer(str(body))) # Find summary, that is twenty percent the size summaries = [] for document in documents: summaries.append( document.get_optimal_subset_by_percent_words( .15, ret_as='str')[1]) # Create ew email body response_body = '' for i in range(len(documents)): response_body += 'Title: ' + subject response_body += '\n\n'
from django.shortcuts import render, redirect from django.http import HttpResponse from app.util import ExamplePicker from app.models import Eval from summarize import Summarizer import time import json import os from pathlib import Path project_root = Path(os.getcwd()) summarizer = Summarizer() article_picker = ExamplePicker(data_path=project_root.joinpath('data').as_posix()) def demo(request): return_object = { 'article': None, 'summary': None, 'options': ["pgn", "textrank"] } if request.method == 'GET': article = "" # article selection module return_object['article'] = article_picker.pick_random_article()
from summarize import Summarizer argentina_articles = [ "argentina/argentina-guardian.txt", "argentina/argentina-nyt.txt" ] china_articles = ["china/china-cnn.txt", "china/china-nyt.txt"] climate_articles = ["climate/climate-npr.txt", "climate/climate-nyt.txt"] VW_articles = ["VW/VW-ars.txt", "VW/VW-nyt.txt"] Iran_articles = ["iran/iran.txt"] magic = Summarizer(Iran_articles) print(magic.generate_summaries())
def test_setup(self): s = Summarizer(env='test') assert s.env == 'test' assert s.language == 'english'
# driver.py # Luke Reichold - CSCI 4930 from summarize import Summarizer argentina_articles = ["argentina/argentina-guardian.txt", "argentina/argentina-nyt.txt"] china_articles = ["china/china-cnn.txt", "china/china-nyt.txt"] climate_articles = ["climate/climate-npr.txt", "climate/climate-nyt.txt"] VW_articles = ["VW/VW-ars.txt", "VW/VW-nyt.txt"] magic = Summarizer(VW_articles) print(magic.generate_summaries())