def personal_proxy_example(n_req=10): # Set PROXY_LIST and PORT_LIST in definitions.py pm = ProxyManager(PROXY_LIST, PORT_LIST) for _ in range(n_req): pm.set_proxy() foo()
def run(self): spider_start_time = str(datetime.now()).split('.')[0] print spider_start_time, 'time to spider start!' proxy_manager = ProxyManager() page = get_html(BASE_URL) page = unicode(page,'GBK').encode('UTF-8') page_count = self.get_page_count(page) page_count_time = str(datetime.now()).split('.')[0] print page_count_time, 'get page count:', page_count default_ip = get_default_ip() if page_count != 0: last_proxy = None for i in xrange(1, page_count): page = get_html(URL_HEADER + str(i) + URL_END, last_proxy) proxy_list = filte(page) for proxy in proxy_list: if proxy.anonymous_type == '高匿': check_result = check_anonymous(proxy, default_ip) spider_time = str(datetime.now()).split('.')[0] if check_result[0]: proxy.delay_time = check_result[1] proxy.created_time = str(datetime.now()).split('.')[0] proxy.is_in_china = 2 proxy_manager.add_proxy(proxy, spider_time) last_proxy = proxy else: pass
def run(self): spider_start_time = str(datetime.now()).split('.')[0] print spider_start_time, 'time to spider start!' proxy_manager = ProxyManager() last_proxy = None for url in self.urls: page = get_html(url) page_count = self.get_page_count(page) page_count_time = str(datetime.now()).split('.')[0] print page_count_time, 'get page count:', page_count default_ip = get_default_ip() for i in xrange(1, page_count): page = get_html(url + str(i)) proxy_list = filte(page) for proxy in proxy_list: if proxy.anonymous_type == '高匿': check_result = check_anonymous(proxy, default_ip) spider_time = str(datetime.now()).split('.')[0] if check_result[0]: proxy.delay_time = check_result[1] proxy.created_time = str(datetime.now()).split('.')[0] proxy.is_in_china = 0 if url.endswith(CHINA_ANONYMOUS) or url.endswith(CHINA_NORMAL): proxy.is_in_china = 1 proxy_manager.add_proxy(proxy, spider_time) last_proxy = proxy else: pass
def run(self): spider_start_time = str(datetime.now()).split('.')[0] print spider_start_time, 'time to spider start!' proxy_manager = ProxyManager() page = get_html(BASE_URL) page = unicode(page, 'GBK').encode('UTF-8') page_count = self.get_page_count(page) page_count_time = str(datetime.now()).split('.')[0] print page_count_time, 'get page count:', page_count default_ip = get_default_ip() if page_count != 0: last_proxy = None for i in xrange(1, page_count): page = get_html(URL_HEADER + str(i) + URL_END, last_proxy) proxy_list = filte(page) for proxy in proxy_list: if proxy.anonymous_type == '高匿': check_result = check_anonymous(proxy, default_ip) spider_time = str(datetime.now()).split('.')[0] if check_result[0]: proxy.delay_time = check_result[1] proxy.created_time = str( datetime.now()).split('.')[0] proxy.is_in_china = 2 proxy_manager.add_proxy(proxy, spider_time) last_proxy = proxy else: pass
def run(self): spider_start_time = str(datetime.now()).split('.')[0] print spider_start_time, 'time to spider start!' proxy_manager = ProxyManager() last_proxy = None for url in self.urls: page = get_html(url) page_count = self.get_page_count(page) page_count_time = str(datetime.now()).split('.')[0] print page_count_time, 'get page count:', page_count default_ip = get_default_ip() for i in xrange(1, page_count): page = get_html(url + str(i)) proxy_list = filte(page) for proxy in proxy_list: if proxy.anonymous_type == '高匿': check_result = check_anonymous(proxy, default_ip) spider_time = str(datetime.now()).split('.')[0] if check_result[0]: proxy.delay_time = check_result[1] proxy.created_time = str( datetime.now()).split('.')[0] proxy.is_in_china = 0 if url.endswith(CHINA_ANONYMOUS) or url.endswith( CHINA_NORMAL): proxy.is_in_china = 1 proxy_manager.add_proxy(proxy, spider_time) last_proxy = proxy else: pass
def __init__(self, conn, client_addr, server_ip): self.proxy_manager = ProxyManager() self.client = conn self.client_id = client_addr[1] self.client_ip = "" self.permission = False # whether or not user is authenticated self.role = "EMPLOYEE" # permission mode of user: "******", "user", or "EMPLOYEE" self.KEEP_ALIVE_TIME = 115 # time to keep idle connection alive(seconds) self.server_ip = server_ip
def public_proxy_example(n_req=10): proxy_df = get_proxy_list(n_proxy=5, anonymity='elite', https='true') proxy_list = proxy_df['IP'].values port_list = proxy_df['PORT'].values pm = ProxyManager(proxy_list, port_list) for _ in range(n_req): pm.set_proxy() foo()
def __init__(self, conn, client_addr): self.proxy_manager = ProxyManager() self.client = conn self.client_id = client_addr[1] # get id self.client_ip = client_addr[0] # get ip address self.http_version = "1.1" if self.DEBUG: print( "[proxy_thread.py -> __init__] new instance of ProxyThread() class " )
def user_search_start_running(): global bfs_queue bfs_queue = Queue(maxsize=0) bfs_queue.put(9149967) #start proxy manager global proxy_manager proxy_manager = ProxyManager() proxy_manager.retrieve_new_proxies() #start daemon refreshing available proxies t = threading.Thread(target=proxy_manager.refresh_proxies) t.start() tp = threading.Thread(target=periodic_info_backup) tp.start() retrieve_user_from_queue()
def __init__(self, conn, client_addr): self.proxy_manager = ProxyManager() self.client = conn self.client_id = client_addr[1] self.client_address = client_addr[0] self.url_after_split = None self.http_status = None
def user_search_start_running(): global bfs_queue bfs_queue = Queue(maxsize=0) bfs_queue.put(6546594) #start proxy manager global proxy_manager proxy_manager = ProxyManager() proxy_manager.retrieve_new_proxies() #restore bitarray from file # f = open('bitcopy.txt','rb') # user_manager.user_bit_array = pickle.load(f) # f.close() tp = threading.Thread(target=periodic_info_backup) tp.start() retrieve_user_from_queue()
def process_client_request(self, data): """ Main algorithm. Note that those are high level steps, and most of them may require futher implementation details 1. get url and private mode status from client 2. if private mode, then mask ip address: mask_ip_address method 3. check if the resource (site) is in cache. If so and not private mode, then: 3.1 check if site is blocked for this employee 3.2 check if site require credentials for this employee 3.3 if 3.1 or 3.2 then then client needs to send a post request to proxy with credentials to check 3.1 and 3.2 access 3.3.1 if credentials are valid, send a HEAD request to the original server to check last_date_modified parameter. If the cache header for that site is outdated then move to step 4. Otherwise, send a response to the client with the requested site and the appropiate status code. 4. If site is not in cache, or last_data_modified is outdated, then create a GET request to the original server, and store in cache the reponse from the server. :param data: :return: VOID """ self.data = data url = data["url"] privateMode = data["is_private_mode"] if privateMode == '1': self._mask_ip_adress() manager = ProxyManager() managerResult = manager.get_cached_resource(data) if data["url"] == managerResult["url"]: if data["is_private_mode"] == managerResult["is_private_mode"]: manager.is_site_blocked(data) if manager.is_site_blocked(data) == True: return True
def __init__(self, validate=True, **kwargs): self.proxy_mgr = ProxyManager() self.validate = validate try: self.address = kwargs['address'] self.port = kwargs['port'] except KeyError: raise Exception("Missing required attribute(s) address and/or port for Proxy object") for req in self.__class__.required_proxy_attrs: if not getattr(self, req): raise Exception("Attribute %s must evaluate to True" % req) for kwarg, default in self.__class__.default_proxy_attrs.items(): if(kwarg in kwargs and kwargs[kwarg]): default = kwargs[kwarg] setattr(self, kwarg, default) if self.validate: self.do_proxy_validation() self.proxy_id = self.id print(self.proxy_id)
# Proxychecker from fastapi import FastAPI import motor.motor_tornado import uvicorn from proxy_manager import ProxyManager import asyncio def get_db_conn(): client = motor.motor_tornado.MotorClient("mongodb://18.185.77.185:27017/") return client["mar_wit"] db_conn = get_db_conn() proxy = ProxyManager( db_conn, "https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt" ) app = FastAPI() @app.on_event("startup") async def boot(): await asyncio.sleep(5) asyncio.create_task(proxy.set()) asyncio.create_task(proxy.check()) if __name__ == "__main__": uvicorn.run("proxychecker:app", port=8000, reload=True)
class Proxy(object): default_proxy_attrs = {'location':'Unknown', 'protocol':'http', 'id':None} required_proxy_attrs = ('address', 'port') def __init__(self, validate=True, **kwargs): self.proxy_mgr = ProxyManager() self.validate = validate try: self.address = kwargs['address'] self.port = kwargs['port'] except KeyError: raise Exception("Missing required attribute(s) address and/or port for Proxy object") for req in self.__class__.required_proxy_attrs: if not getattr(self, req): raise Exception("Attribute %s must evaluate to True" % req) for kwarg, default in self.__class__.default_proxy_attrs.items(): if(kwarg in kwargs and kwargs[kwarg]): default = kwargs[kwarg] setattr(self, kwarg, default) if self.validate: self.do_proxy_validation() self.proxy_id = self.id print(self.proxy_id) @property def address(self): return self._address @address.setter def address(self, addr): adr_re = r'^[\d\.]+$' if not re.match(adr_re, addr): raise Exception("Invalid address only IPV4 supported.") else: self._address=addr @property def port(self): return self._port @port.setter def port(self, port): if is_int(port): self._port = port else: raise Exception("value %s is not an int" % port) @property def protocol(self): return self._protocol @protocol.setter def protocol(self, protocol): if protocol not in ('http','https','ssl','socksV4','socksV5','socks_v4','socks_v5','ftp'): raise Exception("Protocol must be http, https, ssl, socksv4, socksv5, or ftp") else: self._protocol = protocol @property def id(self): return self._id @id.setter def id(self, id): if is_int(id) or id is None: self._id = id self._proxy_id = id else: raise Exception("Invalid ID, must be an integer") @property def proxy_id(self): return self._proxy_id @proxy_id.setter def proxy_id(self, pid): if pid is None: self._proxy_id = None return if is_int(pid): self._id=pid self._proxy_id = pid return @property def location(self): return self._location @location.setter def location(self, loc): self._location = loc @property def last_active(self): return self._last_active @last_active.setter def last_active(self, la): if isinstance(la, datetime.datetime): self._last_active = la else: raise Exception("last_active time must be instance of datetime.datetime") def to_string(self): prefix = "http" if(re.compile("socks").search(self.protocol)): prefix = "socks" return "%s://%s:%s" % (prefix, self.address, self.port) @classmethod def from_string(self, str, **kwargs): regex = re.compile('^(http|socks)?(?:\:\/\/)?(.*):(.*)$') res = regex.findall(str) if(not res): raise Exception("Could not parse proxy string '%s'" %str) else: res = list(res[0]) if not 'protocol' in kwargs: kwargs['protocol'] = res[0] return Proxy(address=res[1], port=res[2], **kwargs) def clone(self): return copy.copy(self) def do_proxy_validation(self): self.proxy_mgr.get_or_create_proxy_id(self)
def __init__(self, conn, client_addr): self.proxy_manager = ProxyManager() self.client = conn self.client_id = client_addr[1]
class ProxyThread: """ The proxy thread class represents a threaded proxy instance to handle a specific request from a client socket """ BUFFER_SIZE = 4096 KEEP_ALIVE_REQUESTS = 5 # max number of requests made over connection def __init__(self, conn, client_addr, server_ip): self.proxy_manager = ProxyManager() self.client = conn self.client_id = client_addr[1] self.client_ip = "" self.permission = False # whether or not user is authenticated self.role = "EMPLOYEE" # permission mode of user: "******", "user", or "EMPLOYEE" self.KEEP_ALIVE_TIME = 115 # time to keep idle connection alive(seconds) self.server_ip = server_ip def get_settings(self): return self.proxy_manager def is_non_persistent(self, http_request_string): """ Determines whether or not given http_request_string has persistent features (Connection: Open, etc., 1.0, etc..) """ if parse_for_field(http_request_string, "http_version") != "1.1": return True if parse_for_field(http_request_string, "Connection") == "Keep-Alive": # set keep alive time. try: self.KEEP_ALIVE_TIME = min( int(parse_for_field(http_request_string, "Keep-Alive")), self.KEEP_ALIVE_TIME) except ParsingError: # Keep-Alive field does not exist. Use default timeout pass return False return True def init_thread(self): """ this is where you put your thread ready to receive data from the client like in assign #1 calling the method self.client.rcv(..) in the appropiate loop and then proccess the request done by the client :return: VOID """ try: # grab first request client_req = self._receive() non_persistent = self.is_non_persistent(client_req) num_of_requests = 0 while True: # start timer tick = time.time() self.process_client_request(client_req) # increment number of request/responses num_of_requests += 1 # if non_persistent or idle time is up or number of requests exceeded. if non_persistent or time.time( ) - tick > self.KEEP_ALIVE_TIME or num_of_requests >= self.KEEP_ALIVE_REQUESTS: break client_req = self._receive() except socket.timeout: print("Client {} has timed out.".format(self.client_id)) except socket.error as sock_error: print(f"An HTTPError occurred: {sock_error}") except ClientDisconnect: print("Client has disconnected") except Exception as e: print(f"Something went wrong: {e}") self.client.close() def _mask_ip_address(self): """ When private mode, mask ip address to browse in private This is easy if you think in terms of client-server sockets :return: VOID """ self.client_ip = self.server_ip def respond_ok(self, params): """ Sends 200 response to client """ req_map = params['request_map'] response = params['response'] http_version = req_map["http_version"] content_length = len(response.text) response_header = "HTTP/{} 200 OK\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\n".format( http_version) content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format( content_length, response.text) if http_version == '1.1': connection = req_map['Connection'] response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format( connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS) if "Date" in response.headers: response_header += "Date: {}\r\n".format(response.headers["Date"]) self._send(response_header + content_end) def respond_not_modified(self, params): """ Sends 304 response to client """ req_map = params['request_map'] response = params['response'] http_version = req_map["http_version"] content_length = len(response.text) response_header = "HTTP/{} 304 Not Modified\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\n".format( http_version) content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format( content_length, response.text) if http_version == '1.1': connection = req_map['Connection'] response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format( connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS) if "Date" in response.headers: response_header += "Date: {}\r\n".format(response.headers["Date"]) self._send(response_header + content_end) def respond_unauthorized(self, params): """ Sends 401 response to client """ req_map = params['request_map'] http_version = req_map["http_version"] content = "<!DOCTYPE HTML><html><head><title>401</title></head><body><h1>401 Unauthorized:</h1> resource is blocked or not authorized for the current user.</body></html>" response_header = 'HTTP/{} 401 Unauthorized\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\nProxy-Authenticate: Basic realm="proxyserver"\r\n'.format( http_version) content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format( len(content), content) if http_version == '1.1': connection = req_map['Connection'] response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format( connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS) self._send(response_header + content_end) def respond_bad_request(self, params): """ Sends 400 response to client Bad Request """ req_map = params['request_map'] http_version = '1.1' # req_map["http_version"] content = "<!DOCTYPE HTML><html><head><title>401</title></head><body><h1>400 Bad Request:</h1> the request is not understood by the original server or the proxy server</body></html>" response_header = 'HTTP/{} 400 Bad Request\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\n'.format( http_version) content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format( len(content), content) if http_version == '1.1': connection = req_map['Connection'] response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format( connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS) self._send(response_header + content_end) def respond_not_found(self, params): """ Sends 404 response to client Not Found """ req_map = params['request_map'] http_version = req_map["http_version"] content = "<!DOCTYPE HTML><html><head><title>401</title></head><body><h1>404 Not Found:</h1> the original server has not found anything matching the Request URI provided by the proxy server.</body></html>" response_header = 'HTTP/{} 404 Not Found\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\n'.format( http_version) content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format( len(content), content) if http_version == '1.1': connection = req_map['Connection'] response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format( connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS) self._send(response_header + content_end) def respond_need_auth(self, params): """ Sends 407 response to client """ req_map = params['request_map'] http_version = req_map["http_version"] content = '<!DOCTYPE HTML><html><head><title>401</title></head><body><h1>407 Proxy Authentication Required:</h1> the proxy needs authorization based on client credentials in order to continue with the request.</body></head>' response_header = 'HTTP/{} 407 Proxy Authentication Required\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\nProxy-Authenticate: Basic realm="proxyserver"\r\n'.format( http_version) content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format( len(content), content) if http_version == '1.1': connection = req_map['Connection'] response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format( connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS) self._send(response_header + content_end) def respond_forbidden(self, params): """ Sends 403 response to client """ req_map = params['request_map'] http_version = req_map["http_version"] content = "<!DOCTYPE HTML><html><head><title>401</title></head><body><h1>403 Forbidden:</h1> the server or the proxy server understood the request but the current user is forbidden to see the content of the resource requested. Authorization won’t work either in this case.</body></html>" response_header = 'HTTP/{} 403 Forbidden\r\nServer: Ricware\r\nX-Powered-By: CODE/1.0.0\r\n'.format( http_version) content_end = "Content-Length: {}\r\nContent-Type: text/html; charset=UTF-8\r\n\r\n{}".format( len(content), content) if http_version == '1.1': connection = req_map['Connection'] response_header += "Connection: {}\r\nKeep-Alive: timeout={}, max={}\r\n".format( connection, self.KEEP_ALIVE_TIME, self.KEEP_ALIVE_REQUESTS) self._send(response_header + content_end) def handle_client_response(self, code, params): """ Handles which response to send to users given code. """ print("Sending: {} response to {}".format(code, self.client_id)) if code == 200: self.respond_ok(params) elif code == 304: self.respond_not_modified(params) elif code == 401: self.respond_unauthorized(params) elif code == 400: self.respond_bad_request(params) elif code == 404: self.respond_not_found(params) elif code == 407: self.respond_need_auth(params) elif code == 403: self.respond_forbidden(params) else: raise Exception("Code not understood") def check_permissions(self): """ Because after login, user role is known, check proxy manager to see if userrole is allowed access to resource. :return: Bool if user is able to access resource. """ return self.role == "super" def login(self, username, password, mask=False): """ Gets userrole from proxy manager, returns True if user is authorized """ # if mask: # self._mask_ip_address() # check proxymanager if user/pw is in proxymanager. if self.proxy_manager.is_admin( username, password) or self.proxy_manager.is_manager( username, password): self.role = "super" return True if self.proxy_manager.is_private_mode_user(username, password): self.role = "user" return True return False def handle_auth(self, request_map, response_params, blocked_sites=False): """ Note that execution flows back to calling function if the user is authenticating/authenticated :param request_map: used to check request :param response_params: used in response :return: Bool if to continue execution """ if not self.permission or request_map["method"] == "POST": # ask for permissions if request_map["method"] != "POST": # if not an attempt to login and not logged in, ask to login self.handle_client_response(407, response_params) return False body = request_map["body"] post_params = params_to_map(body) if "user_name" not in post_params or "password" not in post_params: # 403 error: Something is wrong with input params... self.handle_client_response(403, response_params) return False username = post_params["user_name"] password = post_params["password"] self.permission = self.login(username, password, mask=True) if not self.permission: # insufficient permissions self.handle_client_response(401, response_params) return False if blocked_sites and not self.check_permissions(): # unauthorized self.handle_client_response(401, response_params) return False return True def handle_cache(self, url, request_map, response_params): """ Handles execution of checking cache if-modified-since date, and if not returns execution :param url: url to check :return: Bool if to continue execution """ # if item is cached... if self.proxy_manager.is_cached(url): old_response = self.proxy_manager.get_cached_resource(url) new_head = self.head_request_to_server(url, request_map) # compare try: if old_response.headers[ "If-Modified-Since"] == new_head.headers[ "If-Modified-Since"]: response_params['response'] = old_response self.handle_client_response(304, response_params) return False except KeyError: pass return True def process_client_request(self, http_request_string): """ Main algorithm. Note that those are high level steps, and most of them may require futher implementation details 1. get url and private mode status from client 2. if private mode, then mask ip address: mask_ip_address method 3. check if the resource (site) is in cache. If so and not private mode, then: 3.1 check if site is blocked for this employee 3.2 check if site require credentials for this employee 3.3 if 3.1 or 3.2 then then client needs to send a post request to proxy with credentials to check 3.1 and 3.2 access 3.3.1 if credentials are valid, send a HEAD request to the original server to check last_date_modified parameter. If the cache header for that site is outdated then move to step 4. Otherwise, send a response to the client with the requested site and the appropiate status code. 4. If site is not in cache, or last_data_modified is outdated, then create a GET request to the original server, and store in cache the reponse from the server. :param http_request_string: :return: VOID """ try: # get a mapping of HTTP request string to HTTP request fields request_map = parse_for_field(http_request_string) url = request_map["url"] url, query_params = extract_query_params(url, "is_private_mode") query_params = params_to_map(query_params) # holds data needed to parse in respond response_params = {'request_map': request_map} # get client's ip self.client_ip = request_map["Host"] if int(query_params["is_private_mode"]) == 1: # make sure authed if not self.handle_auth(request_map, response_params): return # user has sufficient permissions at this point. response = self.get_request_to_server(url, request_map) else: # in handle_cache checks if proxy manager has url. if it does, it does a head request, # checks if_modified_since date, if it is ok, it sends 300 instead. else execution flows # back here. if not self.handle_cache(url, request_map, response_params): return response = self.get_request_to_server(url, request_map) if self.proxy_manager.is_site_blocked(url): if not self.handle_auth( request_map, response_params, blocked_sites=True): return response_params['response'] = response if 200 <= response.status_code < 300 or response.status_code == 304: if int(query_params["is_private_mode"]) != 1: self.proxy_manager.update_cache(url, response) self.proxy_manager.add_history(url) self.handle_client_response(200, response_params) else: self.handle_client_response(404, response_params) except (ParsingError, KeyError): # this is encountered when during parsing of the headers, there is an error. # In this case, a 400 Bad Request is sent back to client. self.handle_client_response(400, {}) def _send(self, data): """ Serialialize data send with the send() method of self.client :param data: the response data :return: VOID """ data_serialized = pickle.dumps(data) self.client.send(data_serialized) def _receive(self): """ deserialize the data :return: the deserialized data """ self.client.settimeout(self.KEEP_ALIVE_TIME) client_request = self.client.recv(self.BUFFER_SIZE) if not client_request: raise ClientDisconnect() # Deserializes the data. client_data = pickle.loads(client_request) return client_data def head_request_to_server(self, url, param): """ HEAD request does not return the HTML of the site :param url: :param param: additions to session header :return: the headers of the response from the original server """ headers = {} # add custom headers if "Connection" in param: headers["Connection"] = param["Connection"] if "Keep-Alive" in param: headers["Keep-Alive"] = param["Keep-Alive"] response = requests.head(url, headers=headers) return response def get_request_to_server(self, url, param): """ GET request :param url: :param param: additions to session header :return: the complete response including the body of the response """ headers = {} # add custom headers if "Connection" in param: headers["Connection"] = param["Connection"] if "Keep-Alive" in param: headers["Keep-Alive"] = param["Keep-Alive"] response = requests.get(url, headers=headers) return response
def __init__(self): log = LogHandler('refresh_schedule') ProxyManager.__init__(self, log)
from db_classes.saver import Saver from proxy_manager import ProxyManager import config as cfg import my_log main_logger = my_log.get_logger(__name__) main_logger.info('program started') saver = Saver() proxy_manager = ProxyManager()
def getAll(): proxies = ProxyManager(logger).getAll() return jsonify(proxies)
def get(): proxy = ProxyManager(logger).get() return proxy if proxy else 'no proxy!'
def __init__(self): log = LogHandler('valid_schedule') ProxyManager.__init__(self, log) Thread.__init__(self)
def __init__(self, conn, client_addr): self.proxy_manager = ProxyManager() self.client = conn self.get_settings() self.init_thread(conn) self.client_id = client_addr[1]
from proxy_manager import ProxyManager import proxy_manager import threading import time pm = ProxyManager() pm.retrieve_new_proxies() while True: if len(pm.proxy_list)==0: print "wait a minute" time.sleep(3) else: break print pm.proxy_list
class ProxyThread(object): MAX_DATA_RECV = 1000000000 """ The proxy thread class represents a threaded proxy instance to handle a specific request from a client socket """ DEBUG = False def __init__(self, conn, client_addr): self.proxy_manager = ProxyManager() self.client = conn self.client_id = client_addr[1] # get id self.client_ip = client_addr[0] # get ip address self.http_version = "1.1" if self.DEBUG: print( "[proxy_thread.py -> __init__] new instance of ProxyThread() class " ) def get_settings(self): if self.DEBUG: print("[proxy_thread.py -> get_settings] called ") return self.proxy_manager def init_thread(self): try: data = self._receive() self.process_client_request(data) if self.DEBUG: print( "[proxy_thread.py -> init_thread] data received. data: \n" + str(data)) except socket.error as err: print("[proxy_thread -> init_thread] error! " + str(err)) def client_id(self): if self.DEBUG: print("[proxy_thread.py -> client_id] called. returned: " + str(self.client_id)) return self.client_id def _mask_ip_adress(self): # the proxy server is already masking the user IP if self.DEBUG: print( "[proxy_thread.py -> _mask_ip_address] set self.client_ip to: " + str(self.client_ip)) def process_client_request(self, data): req = HttpHelper().convert_http_request_to_dict(data) self.http_version = req['http'] url = req['url'] is_private_mode = req['header']['is_private_mode'] username = req['header']['username'] # fetch username password = req['header']['password'] # fetch password # if private mode then mask ip if is_private_mode == 1: self._mask_ip_adress() if self.proxy_manager.is_site_blocked(url): if self.proxy_manager.is_admin(username, password): self.check_cache_and_send_to_client(url) else: self.send_response_to_client( str(407), "", """<!DOCTYPE html> <html> <head> <title>407 Proxy Authentication Required</title> </head> <body> <h1>407 Proxy Authentication Required</h1><p> the proxy needs authorization based on client credentials in order to continue with the request. (try logging in as admin)</p> </body> </html>""") elif self.proxy_manager.is_site_blocked_except_managers(url): if self.proxy_manager.is_manager( username, password) or self.proxy_manager.is_admin( username, password): self.check_cache_and_send_to_client(url) else: self.send_response_to_client( str(401), "", """<!DOCTYPE html> <html> <head> <title>401</title> </head> <body> <h1>401 Unauthoritzed</h1><p> resource is blocked or not authorized for the current user</p> </body> </html>""") else: self.check_cache_and_send_to_client(url) def check_cache_and_send_to_client(self, url): if self.proxy_manager.is_cached( url) and not self.is_outdated_cache(url): # <-- crashing here cached_site = self.proxy_manager.get_cached_resource( url) # contains url, last_modified, and html self.send_response_to_client(str(200), cached_site['last_modified'], cached_site['html']) # response else: # website not cached or might be outdated resource res = self.response_from_server({ 'mode': 'GET', 'url': url, 'param': [] }) if self.DEBUG: print("url: " + url + " status code: " + str(res.status_code)) print("headers: " + str(res.headers)) # add to cache try: self.proxy_manager.add_cached_resource( url, res.headers['last-modified'], str(res.content)) except KeyError as e: print("proxy_thread, last-modified header did not exist: " + str(e)) self.proxy_manager.add_cached_resource(url, res.headers['date'], str(res.content)) # send response last_modified = "" try: last_modified = res.headers['last-modified'] except KeyError: last_modified = res.headers['date'] self.send_response_to_client(res.status_code, last_modified, str(res.content)) def is_outdated_cache(self, url): cache = self.proxy_manager.get_cached_resource(url) headers = self.response_from_server({ 'mode': 'HEAD', 'url': str(url), 'param': [] }) try: return headers['last-modified'] == cache['last_modified'] except KeyError as err: print("outdated cache: " + str(err)) return False #returning false to fake success in cache def _send(self, data): try: serialized = pickle.dumps(data) self.client.send(serialized) if self.DEBUG: print("[proxy_thread.py -> _send] sent data to client: " + str(data)) except socket.error as err: print("proxy_thread send failed with error %s" % err) return def _receive(self): while True: try: serialized = self.client.recv(self.MAX_DATA_RECV) data = pickle.loads(serialized) if self.DEBUG: print( "[proxy_thread.py -> _receive] received data from Client: \n" + str(data)) return data except socket.error as err: print("proxy_thread receive failed with error %s " % err) except EOFError: # print("[proxy_thread -> _receive] EOFError! (unable to pickle.loads successfully)") print("proxy thread EOFError exiting...") return " " pass def head_request_to_server(self, url): session = requests.session() session.headers['Connection'] = 'close' session.headers['Keep-Alive'] = '0' try: response = session.head(url) if self.DEBUG: print("head_request_to_server: " + response) return response.headers # .headers is a dictionary except requests.exceptions.MissingSchema: # retry logic print("request failed. retrying with http:// added to url") response = session.head('http://' + url) if self.DEBUG: print("head_request_to_server: " + response) return response.headers # .headers is a dictionary def get_request_to_server(self, url): session = requests.session() session.headers['Connection'] = 'close' session.headers['Keep-Alive'] = '0' try: response = session.get(url) if self.DEBUG: print("get_request_to_server: " + str(response)) return response # .headers, .content, .json, .status_code except requests.exceptions.MissingSchema: # retry logic print("request failed. retrying with http:// added to url: " + "http://" + url) response = session.get('http://' + url) if self.DEBUG: print("get_request_to_server: " + str(response)) return response def response_from_server(self, request): mode = request['mode'] url = request['url'] if mode == "GET": return self.get_request_to_server(url) return self.head_request_to_server(url) def send_response_to_client(self, status_code, last_modified, html): response_string = HttpHelper().build_http_response( self.http_version, str(status_code), last_modified, str(html)) self._send(response_string)
from storage_manager import Redis from proxy_manager import ProxyManager from autoproxy_config.config import configuration DESIGNATED_ENDPOINT = configuration.app_config['designated_endpoint']['value'] from IPython import embed import time redis = Redis(**configuration.redis_config) pm = ProxyManager() embed() pm = ProxyManager() for i in range(500): proxy = pm.get_proxy(DESIGNATED_ENDPOINT) proxy.callback(success=False) proxy = pm.get_proxy('https://google.com') proxy.callback(success=True) pm.storage_mgr.sync_to_db()
def getStatus(): status = ProxyManager(logger).getNumber() return jsonify(status)
def delete(): proxy = request.args.get('proxy') ProxyManager().delete(proxy) return 'success'
finally: s.close() if (len(times) + 1 % 25 == 0): print('made 25 requests, host still up') if __name__ == "__main__": target = sys.argv[1] if len(sys.argv) - 1 > 0 else 'https://www.google.com' port = sys.arg[2] if len(sys.argv) - 1 > 1 else 443 proxy_queue = Queue() abort_q = Queue() pman = ProxyManager() print('filling proxy stack') pman.fill_proxy_stack() while not pman.proxy_stack: pass num_processes = cpu_count() processes = [] print('spinning processes') for i in range(num_processes): processes.append( Process(target=spin_threads, args=(target, port, proxy_queue, abort_q))) processes[i].start() try: