/
tools.py
48 lines (41 loc) · 1.42 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
__author__ = 'lorenzo'
import requests
import simplejson as json
import html.parser
from codecs import raw_unicode_escape_decode
def retrieve_url(url):
"""
Utility: URL's response fetching
:param url:
:return:
"""
return requests.get(url)
def retrieve_json(url, method='GET', data=None):
"""
Utility: URL's body fetching
:rtype : dict()
:param url: URL to fetch
:param method: the method to use for the request
:param data: if method is POST, pass also some data for request's body
:return: dictionary from the response's body
"""
print(url)
if method == 'GET':
try: # avoid unicode escaping problems (double backslash encoding)
h = html.parser.HTMLParser()
text = h.unescape(requests.get(url).text)
return json.loads(raw_unicode_escape_decode(text)[0])
except json.JSONDecodeError as e:
raise e
elif method == 'POST':
if data is not None:
try:
h = html.parser.HTMLParser()
text = h.unescape(requests.post(url, data=data).text)
return json.loads(raw_unicode_escape_decode(text)[0]) # using bs4 to treat html entities
except json.JSONDecodeError as e:
raise e
else:
raise Exception('retrieve_json(): data for POST cannot be None')
else:
raise Exception('retrieve_json(): Wrong Method')