forked from MaksimKneller/goodproxy
/
goodproxy.py
289 lines (221 loc) · 9.52 KB
/
goodproxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
""" A multithreaded proxy checker with anonymity analysis
Given a file containing a list of proxies, in a form of ip:port, attempts
to connect through each proxy to a local web server. If successful, the web
server collects the headers coming out of the proxy and return them to the
calling thread for anonymity analysis.
Anonymity levels are defined as follows:
Level 1: Elite Proxy - hides source IP and that it is a proxy
Level 2: Anonymous Proxy - hides source IP but labels itself as a proxy
Level 3: Transparent Proxy - both source IP and proxy details are visible
Because this program spins off a local web server to simulate a recipient of
proxied requests - the port it runs on needs to be port-forwarded on
your router.
Usage:
goodproxy.py [-h] -wanip WANIP [-port PORT] [-file FILE]
[-timeout TIMEOUT] [-threads THREADS]
Parameters:
-wanip -- your external IP (as at whatismyip.org)
-port -- for the local web server (default 80)
-file -- filename with a list of proxies per line (default proxies.txt)
-timeout -- time in seconds for connecting to a proxy (default 1.0)
-threads -- number of threads to boost performance (default 8)
Functions:
test_proxy -- does the actual connecting through a proxy
main -- creates daemon threads, writes results to a file
Output:
Creates a result.csv with a comma-delimited list of proxies and
results like the anonymity level, time to connect and headers sent out by
the proxy.
"""
import argparse
import http.client
import json
import logging
import queue
import socket
import sys
import threading
import time
import urllib.request
import simpleserver
""" Utility Functions """
def loadProxyList(args, proxy_list):
""" load a list of proxies from the proxy file """
with open(args.file) as proxyfile:
for line in proxyfile:
proxy_list.put(line.strip())
def saveResults(good_proxies):
""" save results to file """
with open("result.csv", 'w') as result_file:
result_file.write('PROXY,LEVEL,TIME,HEADERS\n')
result_file.write('\n'.join(good_proxies))
def processInputParameters(argv):
""" Process input parameters """
parser = argparse.ArgumentParser(
description='A multithreaded proxy checker and anonymity analyzer.')
parser.add_argument(
'-wanip', help='your external IP (whatismyip.org)', required=True)
parser.add_argument(
'-port', help='port for the local web server (default 80)',
default=80, type=int)
parser.add_argument(
'-file', help='a file with a list of proxies (default proxies.txt)',
default="proxies.txt")
parser.add_argument(
'-timeout',
type=float, help='timeout in seconds (default 1.0)', default=1.0)
parser.add_argument(
'-threads', type=int, help='number of threads (default 8)',
default=8)
return parser.parse_args(argv)
def test_proxy(
url_timeout, proxy_list, lock, good_proxies, bad_proxies, wanip, port):
""" Attempt to connect through a proxy.
This function is used in a daemon thread and will loop continuously while
waiting for available proxies in the proxy_list. Once proxy_list contains
a proxy, this function will extract it and the proxy_list queue will
automatically lock until the thread is done. A connection to the local web
server will then be attempted through the proxy, using a URL consisting of
wanip:port. Results from successfull connections will be saved into the
good_proxies list. Exceptions, like connect failures, are ignored
since we are interested in working proxies only.
"""
while True:
# take an item from the proxy list queue
# get() also auto locks the queue for use by this thread
try:
proxy_ip = proxy_list.get()
except queue.Empty:
continue
except:
logging.debug(
"Queue.get() error {0}".format(sys.exc_info()[0]))
continue
start = time.time()
# configure urllib.request to use proxy
proxy = urllib.request.ProxyHandler({'http': proxy_ip})
opener = urllib.request.build_opener(proxy)
urllib.request.install_opener(opener)
# some sites block frequent querying from programmatic methods so
# set a header to simulate a browser
request = urllib.request.Request(
"http://{0}:{1}".format(wanip, port),
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64)' +
'AppleWebKit/537.36 (KHTML, like Gecko)' +
'Chrome/43.0.2357.134 Safari/537.36'})
# attempt to establish a connection
try:
response = urllib.request.urlopen(
request,
timeout=float(url_timeout)).read().decode("utf-8")
except (urllib.request.URLError,
urllib.request.HTTPError,
socket.error, http.client.HTTPException):
# ignore the usual errors related to bad proxies like connectivity
# timeouts, refused connections, HTTPError, URLError, etc
continue
except:
# report serious errors
logging.debug(
"Unexpected error for {0} : {1}".format(
proxy_ip,
sys.exc_info()[0]))
continue
# the response from the local web server will be in JSON
# format and will contain the headers from the proxy
try:
headers_json = json.loads(response)
except:
# if unable to parse response into JSON then skip this sproxy
logging.debug(
"JSON parsing error for {0} : {1}".format(
proxy_ip,
sys.exc_info()[0]))
continue
# parse out the keys and values for easier comparison
header_keys = set([item[0].upper() for item in headers_json])
header_values = [item[1].upper() for item in headers_json]
# sanity check: if num of header keys doesn't equal to num of header
# values then something was wrong with the JSON or the headers so
# skip the proxy
if(len(header_keys) != len(header_values)):
continue
# analyze headers to decide which level of anonymity this proxy
# exhibits. Transparent proxies show the source IP and may contain
# the X-Forwarded-For header. Anonymous proxies don't send out the
# source IP by advertize themselves as being proxies. Anything else
# can be classified as an Elite proxy that shows neither the info
# about the source or that it is a proxy.
proxy_type = ""
if wanip in header_values:
proxy_type = "Transparent"
elif bool([key for key in header_keys if "FORWARD" in key.upper()
or "VIA" in key.upper()
or "PROXY" in key.upper()]):
proxy_type = "Anonymous"
else:
proxy_type = "Elite"
print(
"{0: <21} {1: <12} {2:>5.1f}s {3}".format(proxy_ip,
proxy_type,
time.time() -
start,
headers_json))
# save the proxy and analysis results to a list
# threading.Lock() is used to prevent multiple threads from
# corrupting this list as its a shared resource
with lock:
good_proxies.append(
"{0},{1},{2:.1f},{3}".format(
proxy_ip,
proxy_type,
time.time() -
start,
headers_json))
# release the queue containing a list of proxies to test
# this prevents multiple threads from re-testing same proxies
proxy_list.task_done()
def main(argv):
""" Main Function
Loads proxies from a file and spins of a simple web server in a sub-thread.
Then creates a number of daemon threads which monitor a queue for available
proxies to test. Once completed, successful results are written out to a
results.csv file.
"""
proxy_list = queue.Queue() # Hold a list of proxy ip:ports
lock = threading.Lock() # locks good_proxies, bad_proxies lists
good_proxies = [] # proxies that passed connectivity tests
bad_proxies = [] # proxies that failed connectivity tests
# configure logging
logging.basicConfig(filename="tester.log", level=logging.DEBUG)
# parse input parameters
args = processInputParameters(argv)
# load in a list of proxies from a text file
loadProxyList(args, proxy_list)
# start local web server
simpleserver.start(args.port)
# setup daemons ^._.^
for _ in range(args.threads):
worker = threading.Thread(
target=test_proxy,
args=(
args.timeout,
proxy_list,
lock,
good_proxies,
bad_proxies,
args.wanip,
args.port))
worker.setDaemon(True)
worker.start()
start = time.time()
try:
# block main thread until the proxy list queue becomes empty
proxy_list.join()
except KeyboardInterrupt:
print("Finished")
saveResults(good_proxies)
# some metrics
print("Finished in {0:.1f}s".format(time.time() - start))
if __name__ == "__main__":
main(sys.argv[1:])