Esempio n. 1
0
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()
 
    def is_over_threshold(self):
        """
        判断是否达到了代理池限制
        """
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False
 
    def run(self):
        print('获取器开始执行')
        if not self.is_over_threshold():
            # 获取代理网站数量并循环
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                # 代理网站方法名
                callback = self.crawler.__CrawlFunc__[callback_label]
                # 传入方法名,返回ip:端口号
                proxies = self.crawler.get_proxies(callback)
                # 数据存入redis
                for proxy in proxies: 
                    self.redis.add(proxy)
Esempio n. 2
0
 def __init__(self,url,yanse):
     self.url = url
     # yanse为为我们下单上面的颜色类目选项
     self.yanse = yanse
     self.brower = webdriver.Chrome()
     self.wait = WebDriverWait(self.brower,30)
     self.cookies_db = RedisClient('jdcookies')
Esempio n. 3
0
 def __init__(self, website='default'):
     self.website = website
     self.cookies_db = RedisClient('cookies', self.website)
     self.accounts_db = RedisClient('accounts', self.website)
     self.header = {
         'Accept':
         'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
         'User-Agent':
         'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
     }
Esempio n. 4
0
 def __init__(self):
     self.account_db = RedisClient('jdaccount')
     self.cookies_db = RedisClient('jdcookies')
     self.headers = {
         'Accept':
         'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
         'User-Agent':
         'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
     }
     self.testUrl = 'https://www.jd.com/'
Esempio n. 5
0
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        if self.redis.count() >= POOL_MAX_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print("获取器开始执行")
        if not self.is_over_threshold():
            for callback_index in range(self.crawler.__CrawlFuncCount__):
                # print(callback_index)
                callback = self.crawler.__CrawlFunc__[callback_index]
                proxies = self.crawler.get_proxies(callback)
                for proxy in proxies:
                    self.redis.add(proxy)
Esempio n. 6
0
class CookiesGenerator():
    def __init__(self, name='jdcookies'):
        self.name = name
        self.account_db = RedisClient('jdaccount')
        self.cookies_db = RedisClient(self.name)

    def process_cookies(self, cookies):
        dict = {}
        # 提取cookies中的name和value组成新字典
        for cookie in cookies:
            dict[cookie['name']] = cookie['value']

        return dict

    def run(self):
        # 提取redis数据库中的账号,进行对比,看哪个账号换没有获取cookies
        account_usernames = self.account_db.usernames()
        cookies_usernames = self.cookies_db.usernames()

        for username in account_usernames:
            # 如果账号没有存在与cookies表中说明没有获取cookies
            if not username in cookies_usernames:
                password = self.account_db.get(username)
                print('正在生成账号为%s的cookies' % username)
                # 这块是我们cookies文件中的返回结果
                result = Cookies(username, password).main()
                time.sleep(10)
                if result.get('status') == 1:
                    # cookies = self.process_cookies(result.get('content'))
                    cookies = result.get('content')
                    print('成功获取到cookies', cookies)
                    if self.cookies_db.set(username, json.dumps(cookies)):
                        print("成功保存cookies")

                elif result.get('status') == 2:
                    print(result.get('content'))
                    if self.account_db.delete(username):
                        print('成功删除错误账号')

                else:
                    print(result.get('content'))
Esempio n. 7
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy: 单个代理
        :return: None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法', proxy)
            except:
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return: None
        """
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            # 批量测试
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Esempio n. 8
0
class ValidTester(object):
    def __init__(self, website='default'):
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.accounts_db = RedisClient('accounts', self.website)
        self.header = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
        }

    # 测试cookies,这块用子类重写
    def test(self, username, cookies):
        raise NotImplementedError

    # 获取所有得cookies进行测试
    def run(self):
        cookies_groups = self.cookies_db.all()
        for username, cookies in cookies_groups.items():
            self.test(username, cookies)
Esempio n. 9
0
class Tester():
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode("utf-8")
                http_proxy = "http://" + proxy
                print("测试代理{}中".format(proxy))
                async with session.get(TEXT_URL, proxy=http_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print("代理可用", proxy)
                    else:
                        self.redis.decrease(proxy)
                        print("代理返回的响应码不合法", proxy)

            except (ClientConnectionError, ClientError, AttributeError,
                    asyncio.TimeoutError):
                self.redis.decrease(proxy)

    def run(self):
        print("测试开始运行")
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print("测试发生错误", e.args)
Esempio n. 10
0
 def __init__(self):
     self.redis = RedisClient()
Esempio n. 11
0
class CookiesGenerator(object):
    def __init__(self, website='default'):
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.account_db = RedisClient('account', self.website)
        self.init_browser()

    # 当不需要实例得时候我们手动销毁,释放内存
    def __del__(self):
        self.close()

    # 这里对浏览器进行设置,
    def init_browser(self):
        # 如果加上下面两行代码让浏览器在后台静默执行,由于我们要手动验证码所以忽略
        # self.option = webdriver.ChromeOptions()
        # self.option.add_argument('headless')
        self.browser = webdriver.Chrome()

    # 获取cookies,子类到时候自行重写,不然报错
    def new_cookies(self, username, password):
        raise NotImplementedError

    #提取cookies中得name和value重新生产新字典,其他字段无用
    def process_cookies(self, cookies):
        dict = {}
        for cookie in cookies:
            dict[cookie['name']] = cookie['value']
        return dict

    # 运行函数
    def run(self):
        # 导出所有得账号列表
        account_usernames = self.account_db.usernames()
        cookies_usernames = self.cookies_db.usernames()

        # 遍历所有账号,找出没有cookies得账号
        for username in account_usernames:
            if not username in cookies_usernames:
                password = self.account_db.get(username)
                print('正在生成cookies', '账号:', username, '密码----')
                result = self.new_cookies(username, password)
                time.sleep(10)
                # 这快利用我们cookies文件生成得状态码判断登录状态,登录正常得获取并保存cookies,错误得则删除账号
                if result.get('status') == 1:
                    cookies = self.process_cookies(result.get('content'))
                    print('成功获取到cookies', cookies)
                    if self.cookies_db.set(username, json.dumps(cookies)):
                        print('成功保存cookies')

                elif result.get('status') == 2:
                    print(result.get('content'))
                    if self.account_db.delete(username):
                        print('成功删除错误账号')

                else:
                    print(result.get('content'))

        else:
            print('所有账号都已成功获取cookies')

    def close(self):
        try:
            print('关闭浏览器')
            self.browser.close()
            del self.browser

        except TypeError:
            print('浏览器关闭失败')
Esempio n. 12
0
 def __init__(self, website='default'):
     self.website = website
     self.cookies_db = RedisClient('cookies', self.website)
     self.account_db = RedisClient('account', self.website)
     self.init_browser()
Esempio n. 13
0
# 此文件用来添加我们自己的账号到redis中
# 创建cookie池肯定会用到大量的账号,所以我们这边以txt文件为例,不用我们手动输入了
# 将事先申请好的账号密码保存到txt文件中,格式为:账号---密码,然后直接读取就行
# 默认放到同路径下的account.txt中,

from save import RedisClient

# 先实例化Readis类,参入key名字
conn = RedisClient('account', 'cf')


def readaccount(sp='----'):
    print('开始读取读取account.txt文件......')
    with open("account.txt", "r") as f:
        datas = f.readlines()
        # readlinses方法是将txt文件中的内容以列表形式输出
        for data in datas:
            # 由于txt文件中自带\n,所以我们先去除换行符然后分割账号密码
            account, password = data.strip('\n').split(sp)
            print("正在导入账号:%s   密码:%s" % (account, password))
            # 调用我们实现写好的set方法将账号密码储存到redis中
            result = conn.set(account, password)
            print('导入成功\n' if result else '导入失败')


if __name__ == "__main__":
    readaccount()
Esempio n. 14
0
 def __init__(self):
     self.redis = RedisClient()
     self.crawler = Crawler()
Esempio n. 15
0
'''
    此文件为账号导入模块,将我们提前写好的账号全部导入到redis中
    我们单独建立一个account.txt来集中存放账号密码,来提高效率
    date:2021.3.2
    author:焦康阳
    blog:https://jiaokangyang.com
'''

from save import RedisClient

jd = RedisClient('jdaccount')


# 我们txt文件中用----将账号密码隔开,
def readaccount(sp='----'):
    print('开始导入账号密码,正在读取account.txt')
    with open('account.txt', 'r') as f:
        datas = f.readlines()
        for data in datas:
            username, password = data.strip('\n').split(sp)
            print('正在导入账号:%s 密码:%s' % (username, password))
            result = jd.set(username, password)
            print('导入成功' if result else '导入失败')
Esempio n. 16
0
 def __init__(self, name='jdcookies'):
     self.name = name
     self.account_db = RedisClient('jdaccount')
     self.cookies_db = RedisClient(self.name)
Esempio n. 17
0
class TestCookies():
    def __init__(self):
        self.account_db = RedisClient('jdaccount')
        self.cookies_db = RedisClient('jdcookies')
        self.headers = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
        }
        self.testUrl = 'https://www.jd.com/'

    def test(self, username, cookies):
        print('开始测试账号为%s的cookies' % username)
        try:
            # 测试格式是否为json格式
            json.dumps(cookies)
        except TypeError:

            print('cookies不合法', username)
            self.cookies_db.delete(username)
            print('删除cookies', username)
            return

        try:
            # 加入cookies测试有效性
            response = requests.get(self.testUrl,
                                    headers=self.headers,
                                    cookies=cookies,
                                    timeout=5,
                                    allow_redirects=False)
            if response.status_code == 200:
                print('账号%s的cookies有效' % username)
            else:
                print(response.status_code, response.headers)
                print('账号%s的cookies已失效' % username)
                self.cookies_db.delete(username)
                print('删除cookies', username)
        except ConnectionError as e:
            print('发生异常', e.args)

    def process_cookies(self, cookies):
        dict = {}
        # 提取cookies中的name和value组成新字典,可供requests调用的cookies
        for cookie in cookies:
            dict[cookie['name']] = cookie['value']

        return dict

    def run(self):
        # 获取cookies表中的所有账号cookies
        cookies_groups = self.cookies_db.all()
        for username, cookies in cookies_groups.items():
            # 将JSON格式转换为字典
            a = json.loads(cookies)
            # 将selenium生成的cookies转换为requests需要的格式进行测试
            b = self.process_cookies(a)

            self.test(username, b)


# a = CookiesGenerator()
# a = TestCookies()
# a.run()
Esempio n. 18
0
class putong():
    def __init__(self,url,yanse):
        self.url = url
        # yanse为为我们下单上面的颜色类目选项
        self.yanse = yanse
        self.brower = webdriver.Chrome()
        self.wait = WebDriverWait(self.brower,30)
        self.cookies_db = RedisClient('jdcookies')

    #先进行登录
    def login(self):
        print(datetime.now().strftime('%Y-%m-%d  %H:%M:%S '),'开始使用cookies登录账号')
        #随机获取一个账号的cookies
        if self.cookies_db.count() != 0:
            username,cookie = self.cookies_db.random_getall()
            cookies = json.loads(cookie)
            print(datetime.now().strftime('%Y-%m-%d  %H:%M:%S '),'成功获取到账号%s的cookies'%username)
        else:
            print('没有可用的cookies,请重新获取后再进行登录')
            return False

        self.brower.get('https://www.jd.com/')
        for cookie in cookies:
            self.brower.add_cookie(cookie)

        self.brower.get(self.url)

        try:
            # 判断账号是否登录成功
            return bool(self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,'nickname')))),username

        except ex.TimeoutException:
            return False

    # 选择商品加入购物车
    def choice(self):
        try:
            # 遍历yanse中设定的值,也就是我们要选择的商品属性,有的商品有多个属性同时选择,所以我们要添加多个
            for i in self.yanse:
                print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),"正在选择属性",i)
                self.wait.until(EC.presence_of_element_located((By.LINK_TEXT,i)))

            #选择属性后点击加入购物车
            self.wait.until(EC.presence_of_element_located((By.XPATH,'//div[@id="choose-btns"]/a[@id="InitCartUrl"]'))).click()
            # 商品加入成功后京东会自动跳入到成功页面,我们检测整个CSS就行
            if self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,'success-lcol'))):
                print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'商品已成功加入购物车')
                return True
            else:
                print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'商品加入购物车失败')
                return False
        except ex.TimeoutException:
            return False

    # 购物车页面进行结算
    def pay(self):
        try:
            # 打开购物车页面
            self.brower.get('https://cart.jd.com/cart_index/')
            # 点击全选。这块跳转到结算界面京东自动打勾了,这里我们不用自己操作
            # self.wait.until(EC.presence_of_element_located((By.NAME,'select-all'))).click()
            time.sleep(2)
            # 点击结算
            self.wait.until(EC.presence_of_element_located((By.LINK_TEXT, '去结算'))).click()
            time.sleep(2)

            self.wait.until(EC.presence_of_element_located((By.ID, 'order-submit'))).click()
            print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "已提交订单")
            return True
        except ex.TimeoutException:
            print('页面超时')
            return False

    #进行登录下单流程
    def run(self):
        a,username = self.login()
        if a:
            print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "账号%s登录成功"%username)
            # 登录成功后选择商品属性
            if self.choice():
                print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "商品属性选择完毕并成功加入购物车")
                if self.pay():
                    print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "账号%s的用户已成功提交订单,请及时支付"%username)
                    self.brower.close()
                else:
                    print('订单提交失败')
                    self.brower.close()
            else:
                print('商品选择失败')
                self.brower.close()

# a = putong('https://item.jd.com/100014929004.html',{
#     'Y7000京选|超万人好评系列',
#     'GTX1650ti|i5/16G/512G/100%sRGB',
# })
# a.run()
Esempio n. 19
0
def get_conn():
    if not hasattr(g,'redis'):
        g.redis=RedisClient()

    return g.redis