forked from scrapinghub/splash
/
utils.py
187 lines (151 loc) · 5.19 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
from __future__ import absolute_import
import os
import gc
import sys
import json
import base64
import inspect
import resource
from collections import defaultdict
import functools
import psutil
import six
_REQUIRED = object()
class BadRequest(Exception):
pass
class BinaryCapsule(object):
""" A wrapper for passing binary data. """
def __init__(self, data, content_type):
self.data = data
self.content_type = content_type
def as_b64(self):
return base64.b64encode(self.data).decode('utf-8')
# def __repr__(self):
# data_repr = truncated(repr(self.data), max_length=40)
# return "BinaryCapsule(%s, %r)" % (data_repr, self.content_type)
class SplashJSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, BinaryCapsule):
return o.as_b64()
return super(SplashJSONEncoder, self).default(o)
def to_unicode(text, encoding=None, errors='strict'):
"""Return the unicode representation of a bytes object `text`. If `text`
is already an unicode object, return it as-is."""
if isinstance(text, six.text_type):
return text
if not isinstance(text, (bytes, six.text_type)):
raise TypeError('to_unicode must receive a bytes, str or unicode '
'object, got %s' % type(text).__name__)
if encoding is None:
encoding = 'utf-8'
return text.decode(encoding, errors)
def to_bytes(text, encoding=None, errors='strict'):
"""Return the binary representation of `text`. If `text`
is already a bytes object, return it as-is."""
if isinstance(text, bytes):
return text
if not isinstance(text, six.string_types):
raise TypeError('to_bytes must receive a unicode, str or bytes '
'object, got %s' % type(text).__name__)
if encoding is None:
encoding = 'utf-8'
return text.encode(encoding, errors)
PID = os.getpid()
def get_num_fds():
proc = psutil.Process(PID)
try:
return proc.num_fds()
except AttributeError: # psutil < 2.0
return proc.get_num_fds()
def get_alive():
""" Return counts of alive objects. """
relevant_types = {
'SplashQWebPage', 'SplashQNetworkAccessManager',
'HtmlRender', 'PngRender', 'JsonRender', 'HarRender', 'LuaRender',
'QWebView', 'QWebPage', 'QWebFrame',
'QNetworkRequest', 'QNetworkReply', 'QNetworkProxy',
'QSize', 'QBuffer', 'QPainter', 'QImage', 'QUrl', 'QTimer',
'SplashCookieJar', 'OneShotCallbackProxy',
'_ExposedRequest', '_ExposedBoundRequest',
'_ExposedResponse', '_ExposedBoundResponse',
'_ExposedTimer',
'BrowserTab', '_SplashHttpClient', 'JavascriptConsole',
'ProfilesSplashProxyFactory',
'SplashProxyRequest', 'Request', 'Deferred',
'LuaRuntime', '_LuaObject', '_LuaTable', '_LuaIter', '_LuaThread',
'_LuaFunction', '_LuaCoroutineFunction', 'LuaError', 'LuaSyntaxError',
'AsyncBrowserCommand',
}
counts = defaultdict(int)
for o in gc.get_objects():
if not inspect.isclass(o):
cname = type(o).__name__
if cname in relevant_types:
counts[cname] += 1
return dict(counts)
def get_leaks():
gc.collect()
return get_alive()
def get_ru_maxrss():
""" Return max RSS usage (in bytes) """
size = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
if sys.platform != 'darwin':
# on Mac OS X ru_maxrss is in bytes, on Linux it is in KB
size *= 1024
return size
def get_total_phymem():
""" Return the total amount of physical memory available. """
try:
return psutil.virtual_memory().total
except AttributeError: # psutil < 2.0
return psutil.phymem_usage().total
def truncated(text, max_length=100, msg='...'):
"""
>>> truncated("hello world!", 5)
'hello...'
>>> truncated("hello world!", 25)
'hello world!'
>>> truncated("hello world!", 5, " [truncated]")
'hello [truncated]'
"""
if len(text) < max_length:
return text
else:
return text[:max_length] + msg
def dedupe(it):
"""
>>> list(dedupe([3,1,3,1,2]))
[3, 1, 2]
"""
seen = set()
for el in it:
if el in seen:
continue
seen.add(el)
yield el
def path_join_secure(base, *paths):
"""
Join two or more pathname components, inserting slashes as needed.
Unlike os.path.join ValueError is raised if the result is
outside ``base``.
"""
base = os.path.abspath(base)
if not base.endswith(os.path.sep):
base = base + os.path.sep
path = os.path.abspath(os.path.join(base, *paths))
if not path.startswith(base):
raise ValueError("Resulting path %r is outside %r." % (path, base))
return path
def requires_attr(attr_name, raiser):
"""
Methods wrapped in this decorator raise an error if a required
attribute is not set.
"""
def decorator(meth):
@functools.wraps(meth)
def wrapper(self, *args, **kwargs):
if getattr(self, attr_name, None) is None:
raiser(self, meth, attr_name)
return meth(self, *args, **kwargs)
return wrapper
return decorator