class ProxyRefresh(): def __init__(self, proxy_type='https'): if proxy_type == 'https': self.redis_handler = RedisClient('https_proxy') elif proxy_type == 'http': self.redis_handler = RedisClient('http_proxy') else: raise Exception('type must be https or http') self.proxy_type = proxy_type self.proxy_pool = set([*fuzz_all(), *self.redis_handler.get_all()]) def refresh(self, pool_num=10): pool = ThreadPool(pool_num) pool.map(self.valid_ip, self.proxy_pool) pool.close() pool.join() def refresh_in_async(self): asynctask = AsyncTask() for ip in self.proxy_pool: asynctask.add_task(self.valid_ip, ip) asynctask.run() def valid_ip(self, ip): if proxy_is_useful(ip, self.proxy_type): self.redis_handler.add(ip) print('ok', ip) else: self.redis_handler.delete(ip)
class ProxyManage(Resource): def __init__(self): self.http_proxy = RedisClient('http_proxy') self.https_proxy = RedisClient('https_proxy') self.parser = reqparse.RequestParser() self.parser.add_argument('type', type=str, required=True, help='required args of proxy type: http/https, like ?type=http') @error_handle def get(self): self.parser.add_argument('all', type=str, default='false', required=False, help='') get_all = self.parser.parse_args()['all'] proxy_type = self.parser.parse_args()['type'] if proxy_type == 'http': if get_all == 'true': return self.http_proxy.get_all() else: return self.http_proxy.get_one() elif proxy_type == 'https': if get_all == 'true': return self.https_proxy.get_all() else: return self.https_proxy.get_one() else: raise ParamError(msg='proxy type param error,must be http/https') @error_handle def delete(self): self.parser.add_argument('ip', type=str, required=True) args = self.parser.parse_args() proxy_type = args.get('type') ip = args.get('ip') if proxy_type == 'http': return self.http_proxy.delete(ip) if proxy_type == 'https': return self.https_proxy.delete(ip) else: self.http_proxy.delete(ip) self.https_proxy.delete(ip) return
class ProxyManager(object): """ manager of the proxy pool """ def __init__(self, logger): self.db = RedisClient(name='raw_proxy', host='localhost', port=6379) self.raw_proxy_queue = 'raw_proxy' self.useful_proxy_queue = 'useful_proxy' self.log = logger def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxy_getter in proxy_getter_methods: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxy_getter)(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format( func=proxy_getter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) proxies = self.db.getAll() return random.choice(list(proxies.keys())) if proxies else None def delete(self, proxy): """ delete the given proxy from proxy pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from proxy pool :return: list """ self.db.changeTable(self.useful_proxy_queue) proxies = self.db.getAll() return list(proxies.keys()) if proxies else list() def getNumber(self): """ get number of the raw and useful proxies :return: dict """ self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } @staticmethod def validUsefulProxy(proxy, logger): """ check whether if the proxy is useful if timeout of the proxy over 20s, deprecate it :param proxy: :return: """ if isinstance(proxy, bytes): proxy = proxy.decode('utf8') proxies = {"http": "http://{proxy}".format(proxy=proxy)} try: r = requests.get('http://httpbin.org/ip', proxies=proxies, timeout=20, verify=False) if r.status_code == 200: logger.info('%s is ok' % proxy) return True except: return False
class Generator(): def __init__(self, hostname): """ connect redis get cookies map and username map and init browser(use selenium) """ fd = open("conf/%s_website.json" % hostname, "r") tmp = fd.read() data = json.loads(tmp) self.website = data["website_name"] self.login_url = data["login_url"] self.cookies_db = RedisClient('cookies', self.website) self.users_db = RedisClient('users', self.website) self.users_db.set("15320347357","123456wyq") self.users_db.set("15320343017","123456wyq") chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--window-size=1980,1980') chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') self.browser = webdriver.Chrome(chrome_options=chrome_options) self.wait = WebDriverWait(self.browser, 20) def get_cookie_dict(self, cookie): """ return a cookie type is dict """ res = {} for item in cookie: res[item["name"]] = item["value"] return res def open_lianjia(self, username, password): """ open website,input username and password finally click it """ self.browser.get(self.login_url) time.sleep(2) #点击登录 button = self.browser.find_element_by_css_selector("a.btn-login.bounceIn.actLoginBtn") button.click() time.sleep(2) #使用密码账号登录 button = self.browser.find_element_by_css_selector("#con_login_user_tel a.tologin") button.click() username_input = self.wait.until(EC.presence_of_element_located( (By.CSS_SELECTOR, "input.the_input.topSpecial.users") )) password_input = self.wait.until(EC.presence_of_element_located( (By.CSS_SELECTOR, "input.the_input.password") )) loginSubmit = self.wait.until(EC.presence_of_element_located( (By.CSS_SELECTOR, ".li_btn a.login-user-btn") )) #输入账号密码login username_input.send_keys(username) password_input.send_keys(password) loginSubmit.click() time.sleep(3) def open_qfang(self, username, password): """ open linajia,input username and password finally click it """ self.browser.get(self.login_url) time.sleep(2) #点击登录 button = self.browser.find_element_by_css_selector("#noLoginUser .nav-link a") button.click() time.sleep(2) #使用密码账号登录 button = self.browser.find_element_by_css_selector("#loginTbs a:nth-child(2)") button.click() username_input = self.wait.until(EC.presence_of_element_located( (By.ID, "phone") )) password_input = self.wait.until(EC.presence_of_element_located( (By.ID, "password") )) loginSubmit = self.wait.until(EC.presence_of_element_located( (By.ID, "loginSubmit") )) #输入账号密码login username_input.send_keys(username) password_input.send_keys(password) loginSubmit.click() time.sleep(3) def new_cookie_qfang(self, username, password): """ request website,login and get cookie """ self.open_qfang(username, password) #确认是否登录成功 check = self.browser.find_element_by_css_selector("#loginOrUserName a.frontUserName") text = check.text res = {} if text == "我的Q房": res["code"] = 1 res["data"] = self.browser.get_cookies() else: res["code"] = -1 res["data"] = "login failed" return res def new_cookie_lianjia(self, username, password): """ request lianjia,login and get cookie """ self.open_lianjia(username, password) check = self.browser.find_element_by_css_selector(".ti-hover .typeShowUser a:link") res = {} if "1" in check.text: res["code"] = 1 res["data"] = self.browser.get_cookies() else: res["code"] = -1 res["data"] = "login failed" return res def save_cookies(self): """ get all cookies and save """ all_users = self.users_db.all_users() done_users = self.cookies_db.all_users() if len(all_users) == len(done_users): print("No users can get cookie") for user in all_users: if user not in done_users: pw = self.users_db.get(user) print("get cookie user:%s,website:%s..." % (user, self.website) ) if self.website == "qfang": result = self.new_cookie_qfang(user, pw) elif self.website == "lianjia": result =self.new_cookie_lianjia(user, pw) else: print("not support this website") if result["code"] == 1: cookie = self.get_cookie_dict(result["data"]) self.cookies_db.set(user, json.dumps(cookie)) print("save cookie %s succ" % cookie) elif result["code"] == -1: print(result["data"]) self.users_db.delete(user) print("delete account :%s" % user) else: print(result["data"]) def get_cookie(self): """ get_cookie from redis """ cookie = cookies_db.get_cookie() return cookie def close(self): self.browser.quit() os.system('pkill chromedriver')