-
Notifications
You must be signed in to change notification settings - Fork 0
/
web.py
134 lines (117 loc) · 4.26 KB
/
web.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python
# -*- coding: utf8 -*-
"""
*Availability: 3+*
The web class contains essential web-related functions for interaction with web applications or websites in your modules.
It supports HTTP GET, HTTP POST and HTTP HEAD.
"""
"""
web.py - Web Facilities
Copyright © 2008, Sean B. Palmer, inamidst.com
Copyright © 2009, Michael Yanovich <yanovich.1@osu.edu>
Copyright © 2012, Dimitri Molenaars, Tyrope.nl.
Copyright © 2012, Elad Alfassa, <elad@fedoraproject.org>
Licensed under the Eiffel Forum License 2.
More info:
http://willie.dftba.net
"""
import re, urllib2
from htmlentitydefs import name2codepoint
#HTTP GET
def get(uri, timeout=20):
"""
Execute an HTTP GET query on `uri`, and return the result.
`timeout` is an optional argument, which represents how much time we should wait before throwing a timeout exception. It defualts to 20, but can be set to higher values if you are communicating with a slow web application.
"""
if not uri.startswith('http'):
return
u = get_urllib_object(uri, timeout)
bytes = u.read()
u.close()
return bytes
# Get HTTP headers
def head(uri, timeout=20):
"""
Execute an HTTP GET query on `uri`, and return the headers.
`timeout` is an optional argument, which represents how much time we should wait before throwing a timeout exception. It defualts to 20, but can be set to higher values if you are communicating with a slow web application.
"""
if not uri.startswith('http'):
return
u = get_urllib_object(uri, timeout)
info = u.info()
u.close()
return info
# HTTP POST
def post(uri, query):
"""
Execute an HTTP POST query. `uri` is the target URI, and `query` is the POST data.
"""
if not uri.startswith('http'):
return
data = urllib2.urlencode(query)
u = urllib2.urlopen(uri, data)
bytes = u.read()
u.close()
return bytes
r_entity = re.compile(r'&([^;\s]+);')
def entity(match):
value = match.group(1).lower()
if value.startswith('#x'):
return unichr(int(value[2:], 16))
elif value.startswith('#'):
return unichr(int(value[1:]))
elif name2codepoint.has_key(value):
return unichr(name2codepoint[value])
return '[' + value + ']'
def decode(html):
return r_entity.sub(entity, html)
#For internal use in web.py, (modules can use this if they need a urllib object they can execute read() on)
#Both handles redirects and makes sure input URI is UTF-8
def get_urllib_object(uri, timeout):
"""
Return a urllib2 object for `uri` and `timeout`. This is better than using urrlib2 directly, for it handles redirects, makes sure URI is utf8, and is shorter and easier to use.
Modules may use this if they need a urllib2 object to execute .read() on. For more information, refer to the urllib2 documentation.
"""
redirects = 0
try:
uri = uri.encode("utf-8")
except:
pass
while True:
req = urllib2.Request(uri, headers={'Accept':'*/*', 'User-Agent':'Mozilla/5.0 (Jenni)'})
try: u = urllib2.urlopen(req, None, timeout)
except urllib2.HTTPError, e:
return e.fp
except:
raise
info = u.info()
if not isinstance(info, list):
status = '200'
else:
status = str(info[1])
try: info = info[0]
except: pass
if status.startswith('3'):
uri = urlparse.urljoin(uri, info['Location'])
else: break
redirects += 1
if redirects >= 50:
return "Too many re-directs."
return u
#Identical to urllib2.quote
def quote(string):
"""
Identical to urllib2.quote. Use this if you already importing web in your module and don't want to import urllib2 just to use the quote function.
"""
return urllib2.quote(string)
r_string = re.compile(r'("(\\.|[^"\\])*")')
r_json = re.compile(r'^[,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]+$')
env = {'__builtins__': None, 'null': None, 'true': True, 'false': False}
def json(text):
"""Evaluate JSON text safely (we hope)."""
if r_json.match(r_string.sub('', text)):
text = r_string.sub(lambda m: 'u' + m.group(1), text)
return eval(text.strip(' \t\r\n'), env, {})
raise ValueError('Input must be serialised JSON.')
if __name__=="__main__":
main()