class Getter(): def __init__(self): self.redis = RedisClient() self.crawler = Crawler() def is_over_threshold(self): """ 判断是否达到了代理池限制 """ if self.redis.count() >= POOL_UPPER_THRESHOLD: return True else: return False def run(self): print('获取器开始执行') if not self.is_over_threshold(): # 获取代理网站数量并循环 for callback_label in range(self.crawler.__CrawlFuncCount__): # 代理网站方法名 callback = self.crawler.__CrawlFunc__[callback_label] # 传入方法名,返回ip:端口号 proxies = self.crawler.get_proxies(callback) # 数据存入redis for proxy in proxies: self.redis.add(proxy)
def __init__(self,url,yanse): self.url = url # yanse为为我们下单上面的颜色类目选项 self.yanse = yanse self.brower = webdriver.Chrome() self.wait = WebDriverWait(self.brower,30) self.cookies_db = RedisClient('jdcookies')
def __init__(self, website='default'): self.website = website self.cookies_db = RedisClient('cookies', self.website) self.accounts_db = RedisClient('accounts', self.website) self.header = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 ' }
def __init__(self): self.account_db = RedisClient('jdaccount') self.cookies_db = RedisClient('jdcookies') self.headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 ' } self.testUrl = 'https://www.jd.com/'
class Getter(): def __init__(self): self.redis = RedisClient() self.crawler = Crawler() def is_over_threshold(self): if self.redis.count() >= POOL_MAX_THRESHOLD: return True else: return False def run(self): print("获取器开始执行") if not self.is_over_threshold(): for callback_index in range(self.crawler.__CrawlFuncCount__): # print(callback_index) callback = self.crawler.__CrawlFunc__[callback_index] proxies = self.crawler.get_proxies(callback) for proxy in proxies: self.redis.add(proxy)
class CookiesGenerator(): def __init__(self, name='jdcookies'): self.name = name self.account_db = RedisClient('jdaccount') self.cookies_db = RedisClient(self.name) def process_cookies(self, cookies): dict = {} # 提取cookies中的name和value组成新字典 for cookie in cookies: dict[cookie['name']] = cookie['value'] return dict def run(self): # 提取redis数据库中的账号,进行对比,看哪个账号换没有获取cookies account_usernames = self.account_db.usernames() cookies_usernames = self.cookies_db.usernames() for username in account_usernames: # 如果账号没有存在与cookies表中说明没有获取cookies if not username in cookies_usernames: password = self.account_db.get(username) print('正在生成账号为%s的cookies' % username) # 这块是我们cookies文件中的返回结果 result = Cookies(username, password).main() time.sleep(10) if result.get('status') == 1: # cookies = self.process_cookies(result.get('content')) cookies = result.get('content') print('成功获取到cookies', cookies) if self.cookies_db.set(username, json.dumps(cookies)): print("成功保存cookies") elif result.get('status') == 2: print(result.get('content')) if self.account_db.delete(username): print('成功删除错误账号') else: print(result.get('content'))
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: 单个代理 :return: None """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法', proxy) except: self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: None """ print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() # 批量测试 for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class ValidTester(object): def __init__(self, website='default'): self.website = website self.cookies_db = RedisClient('cookies', self.website) self.accounts_db = RedisClient('accounts', self.website) self.header = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 ' } # 测试cookies,这块用子类重写 def test(self, username, cookies): raise NotImplementedError # 获取所有得cookies进行测试 def run(self): cookies_groups = self.cookies_db.all() for username, cookies in cookies_groups.items(): self.test(username, cookies)
class Tester(): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode("utf-8") http_proxy = "http://" + proxy print("测试代理{}中".format(proxy)) async with session.get(TEXT_URL, proxy=http_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print("代理可用", proxy) else: self.redis.decrease(proxy) print("代理返回的响应码不合法", proxy) except (ClientConnectionError, ClientError, AttributeError, asyncio.TimeoutError): self.redis.decrease(proxy) def run(self): print("测试开始运行") try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print("测试发生错误", e.args)
def __init__(self): self.redis = RedisClient()
class CookiesGenerator(object): def __init__(self, website='default'): self.website = website self.cookies_db = RedisClient('cookies', self.website) self.account_db = RedisClient('account', self.website) self.init_browser() # 当不需要实例得时候我们手动销毁,释放内存 def __del__(self): self.close() # 这里对浏览器进行设置, def init_browser(self): # 如果加上下面两行代码让浏览器在后台静默执行,由于我们要手动验证码所以忽略 # self.option = webdriver.ChromeOptions() # self.option.add_argument('headless') self.browser = webdriver.Chrome() # 获取cookies,子类到时候自行重写,不然报错 def new_cookies(self, username, password): raise NotImplementedError #提取cookies中得name和value重新生产新字典,其他字段无用 def process_cookies(self, cookies): dict = {} for cookie in cookies: dict[cookie['name']] = cookie['value'] return dict # 运行函数 def run(self): # 导出所有得账号列表 account_usernames = self.account_db.usernames() cookies_usernames = self.cookies_db.usernames() # 遍历所有账号,找出没有cookies得账号 for username in account_usernames: if not username in cookies_usernames: password = self.account_db.get(username) print('正在生成cookies', '账号:', username, '密码----') result = self.new_cookies(username, password) time.sleep(10) # 这快利用我们cookies文件生成得状态码判断登录状态,登录正常得获取并保存cookies,错误得则删除账号 if result.get('status') == 1: cookies = self.process_cookies(result.get('content')) print('成功获取到cookies', cookies) if self.cookies_db.set(username, json.dumps(cookies)): print('成功保存cookies') elif result.get('status') == 2: print(result.get('content')) if self.account_db.delete(username): print('成功删除错误账号') else: print(result.get('content')) else: print('所有账号都已成功获取cookies') def close(self): try: print('关闭浏览器') self.browser.close() del self.browser except TypeError: print('浏览器关闭失败')
def __init__(self, website='default'): self.website = website self.cookies_db = RedisClient('cookies', self.website) self.account_db = RedisClient('account', self.website) self.init_browser()
# 此文件用来添加我们自己的账号到redis中 # 创建cookie池肯定会用到大量的账号,所以我们这边以txt文件为例,不用我们手动输入了 # 将事先申请好的账号密码保存到txt文件中,格式为:账号---密码,然后直接读取就行 # 默认放到同路径下的account.txt中, from save import RedisClient # 先实例化Readis类,参入key名字 conn = RedisClient('account', 'cf') def readaccount(sp='----'): print('开始读取读取account.txt文件......') with open("account.txt", "r") as f: datas = f.readlines() # readlinses方法是将txt文件中的内容以列表形式输出 for data in datas: # 由于txt文件中自带\n,所以我们先去除换行符然后分割账号密码 account, password = data.strip('\n').split(sp) print("正在导入账号:%s 密码:%s" % (account, password)) # 调用我们实现写好的set方法将账号密码储存到redis中 result = conn.set(account, password) print('导入成功\n' if result else '导入失败') if __name__ == "__main__": readaccount()
def __init__(self): self.redis = RedisClient() self.crawler = Crawler()
''' 此文件为账号导入模块,将我们提前写好的账号全部导入到redis中 我们单独建立一个account.txt来集中存放账号密码,来提高效率 date:2021.3.2 author:焦康阳 blog:https://jiaokangyang.com ''' from save import RedisClient jd = RedisClient('jdaccount') # 我们txt文件中用----将账号密码隔开, def readaccount(sp='----'): print('开始导入账号密码,正在读取account.txt') with open('account.txt', 'r') as f: datas = f.readlines() for data in datas: username, password = data.strip('\n').split(sp) print('正在导入账号:%s 密码:%s' % (username, password)) result = jd.set(username, password) print('导入成功' if result else '导入失败')
def __init__(self, name='jdcookies'): self.name = name self.account_db = RedisClient('jdaccount') self.cookies_db = RedisClient(self.name)
class TestCookies(): def __init__(self): self.account_db = RedisClient('jdaccount') self.cookies_db = RedisClient('jdcookies') self.headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 ' } self.testUrl = 'https://www.jd.com/' def test(self, username, cookies): print('开始测试账号为%s的cookies' % username) try: # 测试格式是否为json格式 json.dumps(cookies) except TypeError: print('cookies不合法', username) self.cookies_db.delete(username) print('删除cookies', username) return try: # 加入cookies测试有效性 response = requests.get(self.testUrl, headers=self.headers, cookies=cookies, timeout=5, allow_redirects=False) if response.status_code == 200: print('账号%s的cookies有效' % username) else: print(response.status_code, response.headers) print('账号%s的cookies已失效' % username) self.cookies_db.delete(username) print('删除cookies', username) except ConnectionError as e: print('发生异常', e.args) def process_cookies(self, cookies): dict = {} # 提取cookies中的name和value组成新字典,可供requests调用的cookies for cookie in cookies: dict[cookie['name']] = cookie['value'] return dict def run(self): # 获取cookies表中的所有账号cookies cookies_groups = self.cookies_db.all() for username, cookies in cookies_groups.items(): # 将JSON格式转换为字典 a = json.loads(cookies) # 将selenium生成的cookies转换为requests需要的格式进行测试 b = self.process_cookies(a) self.test(username, b) # a = CookiesGenerator() # a = TestCookies() # a.run()
class putong(): def __init__(self,url,yanse): self.url = url # yanse为为我们下单上面的颜色类目选项 self.yanse = yanse self.brower = webdriver.Chrome() self.wait = WebDriverWait(self.brower,30) self.cookies_db = RedisClient('jdcookies') #先进行登录 def login(self): print(datetime.now().strftime('%Y-%m-%d %H:%M:%S '),'开始使用cookies登录账号') #随机获取一个账号的cookies if self.cookies_db.count() != 0: username,cookie = self.cookies_db.random_getall() cookies = json.loads(cookie) print(datetime.now().strftime('%Y-%m-%d %H:%M:%S '),'成功获取到账号%s的cookies'%username) else: print('没有可用的cookies,请重新获取后再进行登录') return False self.brower.get('https://www.jd.com/') for cookie in cookies: self.brower.add_cookie(cookie) self.brower.get(self.url) try: # 判断账号是否登录成功 return bool(self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,'nickname')))),username except ex.TimeoutException: return False # 选择商品加入购物车 def choice(self): try: # 遍历yanse中设定的值,也就是我们要选择的商品属性,有的商品有多个属性同时选择,所以我们要添加多个 for i in self.yanse: print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),"正在选择属性",i) self.wait.until(EC.presence_of_element_located((By.LINK_TEXT,i))) #选择属性后点击加入购物车 self.wait.until(EC.presence_of_element_located((By.XPATH,'//div[@id="choose-btns"]/a[@id="InitCartUrl"]'))).click() # 商品加入成功后京东会自动跳入到成功页面,我们检测整个CSS就行 if self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,'success-lcol'))): print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'商品已成功加入购物车') return True else: print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'商品加入购物车失败') return False except ex.TimeoutException: return False # 购物车页面进行结算 def pay(self): try: # 打开购物车页面 self.brower.get('https://cart.jd.com/cart_index/') # 点击全选。这块跳转到结算界面京东自动打勾了,这里我们不用自己操作 # self.wait.until(EC.presence_of_element_located((By.NAME,'select-all'))).click() time.sleep(2) # 点击结算 self.wait.until(EC.presence_of_element_located((By.LINK_TEXT, '去结算'))).click() time.sleep(2) self.wait.until(EC.presence_of_element_located((By.ID, 'order-submit'))).click() print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "已提交订单") return True except ex.TimeoutException: print('页面超时') return False #进行登录下单流程 def run(self): a,username = self.login() if a: print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "账号%s登录成功"%username) # 登录成功后选择商品属性 if self.choice(): print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "商品属性选择完毕并成功加入购物车") if self.pay(): print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "账号%s的用户已成功提交订单,请及时支付"%username) self.brower.close() else: print('订单提交失败') self.brower.close() else: print('商品选择失败') self.brower.close() # a = putong('https://item.jd.com/100014929004.html',{ # 'Y7000京选|超万人好评系列', # 'GTX1650ti|i5/16G/512G/100%sRGB', # }) # a.run()
def get_conn(): if not hasattr(g,'redis'): g.redis=RedisClient() return g.redis