def registers(self, present_website: str, VPN: str): while self.success_count < self.assignment_num: # 每次循环检测当前错误状态 if self.__monitor_status() == -1: break self.now_count = self.now_count + 1 Session = requests.Session() # 获取代理设置 proxies = ip_proxy(VPN) if proxies == {"error": -1}: self.failed_count = self.failed_count + 1 continue self.failed_count = 0 Session.proxies = { "http": proxies, "https": proxies, } # 设置最大重试次数 Session.mount('http://', HTTPAdapter(max_retries=1)) Session.mount('https://', HTTPAdapter(max_retries=1)) retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 registerData = self.__register_one(Session, present_website) if registerData != -1: # registerData != -1说明注册成功 self.success_count = self.success_count + 1 self.failed_count = 0 break else: self.failed_count = self.failed_count + 1 proxies = ip_proxy(VPN) Session.proxies = proxies time.sleep(g_var.SLEEP_TIME) continue if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = "连续注册出错,程序停止" g_var.logger.error("register:连续注册失败!程序停止") break g_var.logger.info("g_var.SPIDER_STATUS" + str(g_var.SPIDER_STATUS)) g_var.logger.info("本线程共成功注册'self.success_count'=" + str(self.success_count) + "个账户")
def get_Session(VPN: str): """ 获取Session对象 Args: VPN:网站vpn访问类型 Returns: 成功返回Session对象 错误返回-1 """ Session = requestsW.Session() # 获取代理设置 proxies = ip_proxy(VPN) if proxies == {"error": -1}: return -1 Session.proxies = proxies # 设置最大重试次数 Session.mount('http://', HTTPAdapter(max_retries=1)) Session.mount('https://', HTTPAdapter(max_retries=1)) return Session
def register_test(): email = "*****@*****.**" emailpwd = "dqS72ijG" Session = requestsW.Session() Session.proxies = ip_proxy() print(Session.proxies) headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36" } # headers["x-requested-with"] = "XMLHttpRequest" headers["referer"] = "https://www.boredpanda.com/add-new-post/" headers[ "content-type"] = "application/x-www-form-urlencoded; charset=UTF-8" user = email.split("@")[0] pwd = emailpwd data = { "action": "contribution_signup", "user_email": email, "user_full_name": user, "user_pass": pwd, "redirect": "https://www.boredpanda.com/add-new-post/" } res = Session.post( "https://www.boredpanda.com/blog/wp-admin/admin-ajax.php", proxies=Session.proxies, data=data) print(res.text) if 'user_id' not in res.text: return "注册失败" #TODO 邮箱新方法 res = EmailVerify( username=email, password=emailpwd, re_text= 'Click .{0,50} href="(http://\w{5,15}.ct.sendgrid.net/ls/click\?upn=.{300,600})">here</a>' ).execute_Start() if res == -1: return "获取邮箱失败" res = Session.get(res["data"], headers=headers) print(Session.cookies.get_dict()) print(res.text) #网站user <a href=".*?">My Profile</a> if "boredpanda_auth" not in Session.cookies.get_dict(): return "未打开注册页面,重新注册" print("已经打开注册") url = "https://www.baidu.com" data = { "action": "save_settings_form", "settingsDisplay": user, "settingsWebsite": url, "settingsFacebook": url, "settingsTwitter": url, "settingsFlickr": url, "settingsSlack": "", "settingsBio": "这里是我的个人啊12323", "settingsAdminBox": "" } res = Session.post( "https://www.boredpanda.com/blog/wp-admin/admin-ajax.php", proxies=Session.proxies, headers=headers, data=data) success = 0 if "success" not in res.text: return "修改个人资料失败" print("修改个人资料:", res.text) # {"success":"1"} data = { "action": "save_privacy_settings_form", "allowContactMe": "true", "ninjaPanda": "false", } # proxies=ip_proxy() res = Session.post( "https://www.boredpanda.com/blog/wp-admin/admin-ajax.php", headers=headers, proxies=Session.proxies, data=data) if "success" not in res.text: return "修改可见失败" print("https://www.boredpanda.com/author/%s/" % user)
def __register_one(self, Session, present_website: str, email_and_passwd): """ 注册一个账户,需要实现注册、激活、并将注册数据存入数据库的功能 Args: Session:Session对象 present_website:当前网站名,用于数据库表名 email_and_passwd:邮箱账户和密码,email_and_passwd[0]是邮箱,[1]是密码 Returns: 注册成功返回注册数据字典对象registerData,需要包含id, username, password, email, cookie(在访问激活链接时能取到,\ 取不到返回空) user_id这样获取:(示例) # 将注册的账户写入数据库(sql自己写,这边只是个示例) sql = "INSERT INTO "+present_website+"(username, password, mail, status, cookie) VALUES('" + \ username + "', '" + password + "', '" + email + "', '" + str(0) + cookie + "');" last_row_id = MysqlHandler().insert(sql) if last_row_id != -1: registerData["user_id"] = last_row_id return registerData else: g_var.logger.error("数据库插入用户注册数据失败") return 0 注册失败返回状态码 0:某些报错需要跳出while循环,更换邮箱 -1:连续代理错误或页面发生改变等取不到关键数据等,需要停止程序 -2:注册失败,可能是打码出错等原因,邮箱可以继续使用(邮箱资源成本较高,因此要确保注册成功后再更换邮箱),不跳出循环 """ email = email_and_passwd[0] emailpwd = email_and_passwd[1] Session = requestsW.Session() Session.proxies = ip_proxy() headers = {"User-Agent": get_user_agent()} # headers["x-requested-with"] = "XMLHttpRequest" headers["referer"] = "https://www.boredpanda.com/add-new-post/" headers[ "content-type"] = "application/x-www-form-urlencoded; charset=UTF-8" user = email.split("@")[0] pwd = emailpwd data = { "action": "contribution_signup", "user_email": email, "user_full_name": user, "user_pass": pwd, "redirect": "https://www.boredpanda.com/add-new-post/" } res = Session.post( "https://www.boredpanda.com/blog/wp-admin/admin-ajax.php", proxies=Session.proxies, data=data) if 'user_id' not in res.text: g_var.ERR_MSG = g_var.ERR_MSG + "|_|发送邮箱,注册失败" g_var.logger.info("|_|发送邮箱,注册失败") return 0 # TODO 邮箱新方法 res = EmailVerify( username=email, password=emailpwd, re_text= 'Click .{0,50} href="(http://\w{5,15}.ct.sendgrid.net/ls/click\?upn=.{300,600})">here</a>' ).execute_Start() if res == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|邮箱激活失败" g_var.logger.info("|_|邮箱激活失败") return 0 res = Session.get(res["data"], headers=headers) if "boredpanda_auth" not in Session.cookies.get_dict(): g_var.ERR_MSG = g_var.ERR_MSG + "|_|邮箱获取链接后,请求失败" g_var.logger.info("|_|邮箱获取链接后,请求失败") return 0 sql = """INSERT INTO %s (username, password, mail, status, cookie) VALUES("%s", "%s", "%s", "%s", "%s");""" % ( present_website, user, pwd, email_and_passwd[0], 0, str(Session.cookies.get_dict())) g_var.logger.info(sql) last_row_id = MysqlHandler().insert(sql) if last_row_id == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|数据库插入失败" g_var.logger.info("|_|数据库插入失败") return 0 tlList = get_new_title_and_link() title, url = tlList[0], tlList[1] data = { "action": "save_settings_form", "settingsDisplay": title, "settingsWebsite": url, "settingsFacebook": url, "settingsTwitter": url, "settingsFlickr": url, "settingsSlack": "", "settingsBio": title, "settingsAdminBox": "" } res = Session.post( "https://www.boredpanda.com/blog/wp-admin/admin-ajax.php", proxies=Session.proxies, headers=headers, data=data) success = 0 if "success" not in res.text: g_var.ERR_MSG = g_var.ERR_MSG + "|_|修改个人链接,请求失败" g_var.logger.info("|_|修改个人链接,请求失败") return 0 data = { "action": "save_privacy_settings_form", "allowContactMe": "true", "ninjaPanda": "false", } # proxies=ip_proxy() res = Session.post( "https://www.boredpanda.com/blog/wp-admin/admin-ajax.php", headers=headers, proxies=Session.proxies, data=data) if "success" not in res.text: g_var.ERR_MSG = g_var.ERR_MSG + "|_|允许个人资料访问,请求失败" g_var.logger.info("|_|允许个人资料访问,请求失败") return 0 sql = "INSERT INTO %s_article(url, keyword, user_id) VALUES('%s', '%s', '%s');" % ( present_website, "https://www.boredpanda.com/author/%s/" % user, title, last_row_id) if g_var.insert_article_lock.acquire(): last_row_id = MysqlHandler().insert(sql) if last_row_id == -1: g_var.ERR_MSG = g_var.ERR_MSG + "|_|MYSQL插入文章失败" g_var.logger.info("|_|MYSQL插入文章失败") return 0
def start(self, present_website, VPN): while self.success_count < self.assignment_num: # 每次循环检测当前错误状态 if self.__monitor_status() == -1: break self.now_count = self.now_count + 1 # 设置Session对象 Session = requests.Session() proxies = ip_proxy(VPN) if proxies == {"error": -1}: self.failed_count = self.failed_count + 1 continue Session.proxies = proxies Session.mount('http://', HTTPAdapter(max_retries=1)) Session.mount('https://', HTTPAdapter(max_retries=1)) # 1、注册 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 registerData = self.__register_one(Session, present_website) if registerData != -1: # 说明注册成功 break else: # 失败更换代理 g_var.logger.info("注册失败" + str(registerData)) time.sleep(g_var.SLEEP_TIME) proxies = ip_proxy(VPN) Session.proxies = proxies continue if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.logger.error("start:连续注册失败!程序停止") break # 2、登录 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 # 构造一个userInfo userInfo: tuple = (registerData['user_id'], registerData['name'], registerData['password'], registerData['mail'], '0', "") loginData = self.__login(Session, VPN, userInfo) if loginData == {"error": -1}: # 登录报错,停止运行 g_var.ERR_MSG = "登录出错" self.failed_count = self.failed_count + 1 time.sleep(g_var.SLEEP_TIME) proxies = ip_proxy(VPN) Session.proxies = proxies continue elif loginData == {"error": 1}: # 账号异常,重新取新账号登录 self.failed_count = self.failed_count + 1 continue else: self.failed_count = 0 self.proxy_err_count = 0 break if retry_count == g_var.RETRY_COUNT_MAX: g_var.SPIDER_STATUS = 3 g_var.logger.error("start:连续登录失败!程序停止") break # 3、发文章 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 status = self.__postMessage(Session, loginData) if status == {"ok": 0}: # 发文章成功 self.success_count = self.success_count + 1 self.failed_count = 0 self.proxy_err_count = 0 break elif status == {"error": 1}: self.failed_count = self.failed_count + 1 self.proxy_err_count = self.proxy_err_count + 1 time.sleep(g_var.SLEEP_TIME) proxies = ip_proxy(VPN) Session.proxies = proxies # g_var.logger.info("proxies"+str(proxies)) elif status == {"error": -1}: # 获取不到文章,程序停止 self.failed_count = self.failed_count + 1 break if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = "连续发文章出错,程序停止" g_var.logger.error("连续发文章出错,程序停止") break g_var.logger.info("成功注册账户并发送文章" + str(self.success_count) + "篇")
def loginAndPostMessage(self, VPN: str): while self.success_count < self.assignment_num: # 每次循环检测当前错误状态 if self.__monitor_status() == -1: break self.now_count = self.now_count + 1 Session = requests.Session() # 获取代理设置 proxies = ip_proxy(VPN) if proxies == {"error": -1}: self.failed_count = self.failed_count + 1 continue Session.proxies = proxies Session.mount('http://', HTTPAdapter(max_retries=1)) Session.mount('https://', HTTPAdapter(max_retries=1)) # 1、登录 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 # 从数据库中获取用户信息 userInfo = generate_login_data("reddit_com") if userInfo == None: g_var.logger.error("数据库中获取用户失败,本线程停止!") return {"error": -1} else: loginData = self.__login(Session, VPN, userInfo) if loginData == {"error": -1}: # 登录报错,停止运行 g_var.ERR_MSG = "登录出错" self.failed_count = self.failed_count + 1 time.sleep(g_var.SLEEP_TIME) proxies = ip_proxy(VPN) Session.proxies = proxies continue elif loginData == {"error": 1}: # 账号异常,重新取新账号登录 self.failed_count = self.failed_count + 1 continue else: self.failed_count = 0 self.proxy_err_count = 0 break if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = "连续登录出错,程序停止" g_var.logger.error("login:连续登录失败!程序停止") break # 2、发文章 retry_count = 0 while retry_count < g_var.RETRY_COUNT_MAX: retry_count = retry_count + 1 status = self.__postMessage(Session, loginData) if status == {"ok": 0}: # 发文章成功 self.success_count = self.success_count + 1 self.failed_count = 0 self.proxy_err_count = 0 break elif status == {"error": 1}: self.failed_count = self.failed_count + 1 self.proxy_err_count = self.proxy_err_count + 1 time.sleep(g_var.SLEEP_TIME) proxies = ip_proxy(VPN) Session.proxies = proxies # g_var.logger.info("proxies"+str(proxies)) elif status == {"error": -1}: # 获取不到文章,程序停止 self.failed_count = self.failed_count + 1 break if retry_count == g_var.RETRY_COUNT_MAX: # 连续出错说明发生了一些问题,需要停止程序 g_var.SPIDER_STATUS = 3 g_var.ERR_MSG = "连续发文章出错,程序停止" g_var.logger.error("连续发文章出错,程序停止") break g_var.logger.info("成功发送" + str(self.success_count) + "篇文章")