This repository has been archived by the owner on Nov 25, 2023. It is now read-only.
/
summarize.py
83 lines (67 loc) · 2.43 KB
/
summarize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""
Parses a list of URLs, performs data extraction,
and renders the output in HTML format as news articles.
"""
def render(template, **kwargs):
"""
Renders the HTML containing provided summaries.
The summary has to be an instance of summary.Summary,
or at least contain similar properties: title, image, url,
description and collections: titles, images, descriptions.
"""
import jinja2
import os.path as path
searchpath = path.join(path.dirname(__file__),
"templates")
loader = jinja2.FileSystemLoader(searchpath=searchpath)
env = jinja2.Environment(loader=loader)
temp = env.get_template(template)
return temp.render(**kwargs)
def summarize(urls):
"""
Calls extract for each of the URLs,
Returns the list of Extracted instances as summaries,
the result of the process, and the speed.
"""
import time
from summary import Summary
fails = 0
err = lambda e: e.__class__.__name__
summaries = []
start = time.time()
for url in urls:
try:
print("-> %s" % url)
summary = Summary(url)
summary.extract()
except KeyboardInterrupt:
break
except Exception as e:
fails += 1
summary = {
'titles': ["[%s]" % err(e)],
'urls': [url],
'descriptions': [str(e)],
'source': url,
}
print("[%s] (%s): %s" % (err(e), e, url))
summaries.append(summary)
end = time.time()
result = fails and "Fails: %s out of %s." % (fails, len(summaries)) \
or "Success: %s." % len(summaries)
print(result)
duration = end - start
speed = "%.2f" % (duration/len(summaries))
return summaries, result, speed
if __name__ == '__main__':
urls = []
url = 'https://levelup.gitconnected.com/awesome-terminal-applications-e4a06022dffa'
urls.append(url)
# with open('urls.txt', 'r') as file:
# urls.extend([url.strip() for url in file if not url.strip().startswith("#") \
# and url.strip() != ""])
summaries, result, speed = summarize(urls)
page = render(template="news.html",
summaries=summaries, result=result, speed=speed)
with open('news.html', 'w') as file:
file.write(page) # .encode('utf-8')