Example #1
0
class Getter:
    def __init__(self, website='tianyancha'):
        """初始化数据库类和cookie爬虫类"""
        self.website = website
        self.redis = RedisClient('accounts', self.website)
        self.crawler = Crawler()
        self.accounts_db = RedisClient('accounts', self.website)

    def is_over_threshold(self):
        """判断数据库是否已经存满"""
        if self.redis.count() >= POOL_UPPER_THRESHLD:
            return True
        return False

    def run(self):
        """开始抓取cookies存入数据库"""
        accounts_usernames = self.accounts_db.usernames()
        keys = self.redis.get()
        for username in accounts_usernames[:]:
            if not username in keys:
                password = self.accounts_db.get_value(username)
                print('正在生成Cookies', '账号', username, '密码', password)
                if not self.is_over_threshold():
                    try:
                        time.sleep(5)
                        cookie = self.crawler.crawl_main(username, password)
                        if cookie:
                            self.redis.add(username, cookie)
                            print("cookie有效")
                        else:
                            print("监控到cookie为空")
                    except Exception as e:
                        pass
            else:
                print('账号', username, "存在于cookie池里")
Example #2
0
 def verify_cookie(cls):
     baseurl = 'https://weibo.cn/'
     conn = RedisClient()
     if conn.get():
         #print(conn.get())
         try:
             response = requests.get(baseurl,cookies=conn.get())
             #print(response.text)                
             if response.status_code == 200:
                 return cls(cookie=conn.get())
             else:
                 conn.add_score(conn.get())
                 return cls(cookie=Spider.verify_cookie())
         except Exception:
             print('verify error')
     else:
         l = Login()
         l.save_cookies()
         return cls(cookie=Spider.verify_cookie())
Example #3
0
 def verify_cookie(cls):
     baseurl = 'https://weibo.cn/'
     conn = RedisClient()
     if conn.get():
         #print(conn.get())
         try:
             response = requests.get(baseurl, cookies=conn.get())
             #print(response.text)
             if response.status_code == 200:
                 return cls(cookie=conn.get())
             else:
                 conn.add_score(conn.get())
                 return cls(cookie=Spider.verify_cookie())
         except Exception:
             print('verify error')
     else:
         l = Login()
         l.save_cookies()
         return cls(cookie=Spider.verify_cookie())
Example #4
0
class AipClient(object):
    '''
    百度识别api
    '''
    def __init__(self, appid, api_key, secrrt_key, redis_url):
        self.appid = appid
        self.api_key = api_key
        self.secrrt_key = secrrt_key
        self.client = AipOcr(appid, api_key, secrrt_key)
        self.redis = RedisClient(redis_url)

    def __new__(cls, *args, **kw):
        '''
        api 单例模式
        '''
        if not hasattr(cls, '_instance'):
            cls._instance = super().__new__(cls)
        return cls._instance

    @property
    def options(self):
        return {
            "language_type": "CHN_ENG",
            "detect_direction": "false",
            "detect_language": "true",
            "probability": "true"
        }

    def General(self, image, **kwargs):
        print('调取General_api  识别')
        return self.client.basicGeneral(image, self.options)

    def Accurate(self, image):
        print('调取Accurate_api  识别')
        return self.client.basicAccurate(image, self.options)

    def orc(self, image, **kwargs):
        hash_value = MD5.md5(image)
        results = self.General(image, **kwargs)
        if results.get('words_result'):
            self.redis.add(hash_value, results['words_result'][0]['words'])
            return results['words_result'][0]['words']
        results = self.Accurate(image)
        if results.get('words_result'):
            self.redis.add(hash_value, results['words_result'][0]['words'])
            return results['words_result'][0]['words']
        return '*'

    def run(self, image, **kwargs):
        hash_value = MD5.md5(image)
        if self.redis.exists(hash_value):
            return self.redis.get(hash_value)
        else:
            return self.orc(image, **kwargs)
Example #5
0
class ImportDatabase(object):
    def __init__(self):
        self.client = RedisClient()
        self.kuaidai = KuaidaiProcuration()

    def set_ip(self,value):
        self.client.set(value)

    def main(self):
        for item in self.kuaidai.parse_url():
            if item not in self.client.get():
                self.client.set(item)
        print(f'获取IP个数:{self.client.count()}')
Example #6
0
class VaildTester(object):
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
    }

    def __init__(self):
        self.client = RedisClient()

    def test(self, value):
        raise NotImplementedError

    def run(self):
        items = self.client.get()
        for item in items:
            self.test(item)
Example #7
0
 def run():
     for website in ACCOUNTS.keys():
         print('*' * 40)
         print('%s正在检测cookies...' % website)
         acc = RedisClient('accounts', website)
         coo = RedisClient('cookies', website)
         log = eval(website.capitalize() + 'Login()')
         for username, password in acc.get_all().items():
             # 第一次运行时,可能数据还未录入就开始测试,因此加异常处理
             try:
                 cookies = coo.get(username)
                 if log.good_cookies(cookies):
                     print('\t%s\t\tCookies已通过检测...' % username)
                 else:
                     print('\t%s\t\tCookies未通过检测!!!' % username)
                     coo.delete(username)
                     print('\t%s\t\tCookies已删除!!!' % username)
                     log.login(username, password)
                     cookies = log.get_cookies()
                     coo.set(username, cookies)
             except Exception as e:
                 print('数据库为空,请等数据录入之后再进行测试:', e.args)
Example #8
0
class AipClient(object):
    '''
    百度识别api
    '''
    def __init__(self, appid, api_key, secrrt_key, redis_url):
        self.appid = appid
        self.api_key = api_key
        self.secrrt_key = secrrt_key
        self.client = AipOcr(appid, api_key, secrrt_key)
        self.redis = RedisClient(redis_url)

    def __new__(cls, *args, **kw):
        '''
        api 单例模式
        '''
        if not hasattr(cls, '_instance'):
            cls._instance = super().__new__(cls)
        return cls._instance

    @property
    def options(self):
        return {
            "language_type": "CHN_ENG",
            "detect_direction": "false",
            "detect_language": "false",
            "probability": "true"
        }

    def General(self, image, **kwargs):
        print('调取General_api  识别')
        return self.client.basicGeneral(image, self.options)

    def Accurate(self, image):
        print('调取Accurate_api  识别')
        return self.client.basicAccurate(image, self.options)

    def orc(self, image, font_key, word, **kwargs):
        hash_value = MD5.md5(image)
        results = self.General(image, **kwargs)
        if results.get('words_result'):
            if results.get('words_result') != '*':
                result = results['words_result'][0]['words']
                self.redis.add(hash_value, result)
                self.redis.hadd(font_key, word, result)
            return result
        results = self.Accurate(image)
        if results.get('words_result'):
            if results.get('words_result') != '*':
                result = results['words_result'][0]['words']
                self.redis.add(hash_value, result)
                self.redis.hadd(font_key, word, result)
            return result
        if FIXED:
            '''手动修正'''
            if not os.path.exists(os.path.join(BASE_DIR, hash_value + '.jpg')):
                with open(os.path.join(BASE_DIR, hash_value + '.jpg'),
                          'wb') as f:
                    f.write(image)
        return '*'

    def run(self, image, font_key, word, **kwargs):
        hash_value = MD5.md5(image)
        if self.redis.exists(hash_value):
            result = self.redis.get(hash_value)
            self.redis.hadd(font_key, word, result)
            return result
        else:
            return self.orc(image, font_key, word, **kwargs)
Example #9
0
class CookiesGenerator(object):
    def __init__(self, website='default'):
        """
        父类,初始化一些对象
        :param website: 名称
        :param browser: 浏览器,不用可以设置为None
        """
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.accounts_db = RedisClient('accounts', self.website)
        self.init_browser()

    def __del__(self):
        self.close()

    def init_browser(self):
        """
        通过browser参数初始化全局游览器共模拟登录使用
        :return:
        """
        if BROWSER_TYPE == 'PhantomJS':
            caps = DesiredCapabilities.PHANTOMJS
            caps[
                "phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
            self.browser = webdriver.PhantomJS(desired_capabilities=caps)
            self.browser.set_window_size(1400, 500)
        elif BROWSER_TYPE == 'Chrome':
            self.browser = webdriver.Chrome()

    def new_cookies(self, username, password):
        """
        新生成Cookies,子类需要重写
        :param username: 用户名
        :param password: 密码
        :return:
        """
        return NotImplementedError

    def process_cookies(self, cookies):
        """
        处理cookies
        :param cookies:
        :return:
        """
        dict = {}
        for cookie in cookies:
            dict[cookie['name']] = cookie['value']
        return dict

    def run(self):
        """
        运行,得到所有账户,然后顺次模拟登录
        :return:
        """
        accounts_usernames = self.accounts_db.usernames()
        cookies_usernames = self.cookies_db.usernames()

        for username in accounts_usernames:
            if not username in cookies_usernames:
                password = self.accounts_db.get(username)
                print('正在生成Cookies', '账号', username, '密码', password)
                result = self.new_cookies(username, password)
                # 成功获取
                if result.get('status') == 1:
                    cookies = self.process_cookies(result.get('content'))
                    print('成功获取到Cookies', cookies)
                    if self.cookies_db.set(username, json.dumps(cookies)):
                        print('成功保存Cookies')
                # 密码错误,移除账号
                elif result.get('status') == 2:
                    print(result.get('content'))
                    if self.accounts_db.delete(username):
                        print('成功删除账号')
                else:
                    print(result.get('content'))
            else:
                print('所有账号都已经成功获取Cookies')

    def close(self):
        """
        关闭
        :return:
        """
        try:
            print('Closing Browser')
            self.browser.close()
            del self.browser
        except TypeError:
            print('Browser not opened')
Example #10
0
class Tester:
    def __init__(self, website='tianyancha'):
        """初始化数据库管理对象"""
        self.website = website
        self.redis = RedisClient('accounts', self.website)

    async def test_one_proxy(self, key, proxy):
        """对目标网站测试一个cookies是否可用"""
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                # 解码为字符串
                headers = {
                    "Accept":
                    "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                    "Accept-Encoding":
                    "gzip, deflate, br",
                    "Accept-Language":
                    "zh-CN,zh;q=0.9",
                    "Cache-Control":
                    "max-age=0",
                    "Connection":
                    "keep-alive",
                    "Cookie":
                    proxy[:-1],
                    "Host":
                    "www.tianyancha.com",
                    "Upgrade-Insecure-Requests":
                    "1",
                    "User-Agent":
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
                }

                # async with session.get(TEST_URL, headers=headers, timeout=30) as response:
                try:
                    response = requests.get(TEST_URL,
                                            headers=headers,
                                            timeout=30)
                    result = response.text
                    html = etree.HTML(result)
                    # print("".join(html.xpath('//div[@class="box -company-box "]/div[@class="content"]/div[@class="header"]/h1[@class="name"]/text()')))
                    user = "".join(
                        html.xpath('//span[@class="ni-sp-name"]//text()'))
                    print(user, '*' * 20)
                    """"".join(html.xpath('//div[@class="box -company-box "]/div[@class="content"]/div[@class="header"]/h1[@class="name"]/text()'))"""
                    if response.status_code in TRUE_STATUS_CODE and user:
                        # cookie可用
                        self.redis.max(key, proxy)
                        print(key, 100, '可用')
                    else:
                        # cookie不可用
                        # send = Send_Click()
                        # staus = send.run(proxy)
                        # if staus:
                        #     self.redis.max(key, proxy)
                        #     print(key, 100, "通过点字验证")
                        # else:
                        a = self.redis.decrease(key, proxy)
                        print(key, -20, "状态码错误")
                except Exception as e:
                    print(key, '请求错误', -20, e)
            except Exception as e:
                # self.redis.decrease(key, proxy)
                print(key, '测试错误', -20, e)

    async def start(self):
        """启动协程, 测试所有cookies"""
        try:
            keys = self.redis.get()
            for key in keys:
                if "tianyancha" not in key:
                    proxies = self.redis.all(key)
                    print(key)
                    for i in range(0, len(proxies)):
                        test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                        tasks = [
                            self.test_one_proxy(key, proxy)
                            for proxy in test_proxies
                        ]
                        asyncio.gather(*tasks)
                        time.sleep(5)
                else:
                    pass
        except Exception as e:
            print('测试器发生错误', e.args)

    def run(self):
        asyncio.run(self.start())
Example #11
0
class CookiesGenerator(object):
    def __init__(self, website = 'default'):
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.accounts_db = RedisClient('accounts', self.website)
        self.init_browser()

    def __del__(self):
        self.close()

    def init_browser(self):
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')
        self.browser = webdriver.Chrome(chrome_options = chrome_options)
        
    def new_cookies(self, username, password):
        """
        新生成Cookies,子类需要重写
        :param username: 用户名
        :param password: 密码
        :return:
        """
        raise NotImplementedError

    def process_cookies(self, cookies):
        dict = {}
        for cookie in cookies:
            dict[cookie['name']] = cookie['value'] 
        return dict

    def run(self):
        account_usernames = self.accounts_db.usernames()
        cookies_usernames = self.cookies_db.usernames()

        for username in account_usernames:
            if not username in cookies_usernames:
                password = self.accounts_db.get(username)
                print('Generating new cookies...[username: {} password: {}]'.format(username, password))
                result = self.new_cookies(username, password)
                
                if result.get('status') == 1:
                    cookies = self.process_cookies(result.get('content'))
                    print('Generated successfully!')
                    if self.cookies_db.set(username, json.dumps(cookies)):
                        print('Saved new cookies successfully!')
                elif result.get('status') == 2:
                    print(result.get('content'))
                    if self.accounts_db.delete(username):
                        print('Deleted invalid account successfully! [username: {}]'.format(username))
                else:
                    print(result.get('content'))
        else:
            print('All accounts has got cookies successfully!')

    def close(self):
        try:
            print('Closing browser...')
            self.browser.close()
            del self.browser
            print('Browser has closed!')
        except TypeError:
            print('Browser not opened!')
Example #12
0
class CookiesGenerator:
    """
    Cookie 生成器基类
    """
    def __init__(self, site, single_cycle_limit):
        self.site = site
        # 单轮登录数量上限
        self.single_cycle_limit = single_cycle_limit
        self.cookies_db = RedisClient('cookies', self.site)
        self.accounts_db = RedisClient('accounts', self.site)

    def new_cookies(self, username, password):
        """
        新生成 Cookies
        :param username: 用户名
        :param password: 密码
        :return:
        """
        raise NotImplementedError

    @staticmethod
    def process_cookies(cookies):
        """
        处理 cookies
        :param cookies:
        :return:
        """
        return {cookie['name']: cookie['value'] for cookie in cookies}

    def run(self):
        """
        运行账号池的所有账号生成 cookie
        :return:
        """
        accounts_usernames = self.accounts_db.usernames()
        cookies_usernames = self.cookies_db.usernames()
        num = 0
        for username in accounts_usernames:
            if num >= self.single_cycle_limit:
                print('已达单轮登录上限, 停止登录! ')
                return
            if username not in cookies_usernames:
                password = self.accounts_db.get(username).decode('utf-8')
                username = username.decode('utf-8')
                print('正在生成 Cookies -> 账号: {}, 密码: {}'.format(
                    username, password))
                result = self.new_cookies(username, password)
                if result.get('status') == '1':
                    if isinstance(result['result'], list):
                        cookies = self.process_cookies(result['result'])
                    else:
                        cookies = result['result']
                    print('成功生成 Cookies : {}'.format(cookies))
                    if self.cookies_db.set(username, json.dumps(cookies)):
                        print('成功保存至 Cookie Pool!')
                    else:
                        print('疑似 redis 连接断开, 未成功保存, 尝试调用录入器保存...')
                        set_account(
                            self.site, 'cookies',
                            '{} {}'.format(username, json.dumps(cookies)))
                # 密码错误, 移除账号
                elif result.get('status') == '3':
                    print(result['result'])
                    if self.accounts_db.delete(username):
                        print('删除账号: ', username)
                    else:
                        print('疑似 redis 断开连接, 删除失败, 尝试调用删除器删除...')
                        delete_account(self.site, 'accounts', username)
                else:
                    print(result.get('result'))
                num += 1
            else:
                continue
            sleep_time = random.randint(90, 180)
            # print('休息{}秒...'.format(sleep_time))
            time.sleep(sleep_time)
        print('所有账号生成完毕! ')
Example #13
0
class AipClient(object):
    '''
    百度识别api
    '''
    def __init__(self, appid, api_key, secrrt_key, redis_url):
        self.appid = appid
        self.api_key = api_key
        self.secrrt_key = secrrt_key
        self.client = AipOcr(appid, api_key, secrrt_key)
        self.redis = RedisClient(redis_url)

    def __new__(cls, *args, **kw):
        '''
        api 单例模式
        '''
        if not hasattr(cls, '_instance'):
            cls._instance = super().__new__(cls)
        return cls._instance


    @property
    def options(self):
        return {"language_type":"CHN_ENG",
        "detect_direction":"false",
        "detect_language":"false",
        "probability":"false"}


    def General(self, image,**kwargs):
        print('调取General_api  识别')
        return self.client.basicGeneral(image, self.options)

    def Accurate(self, image):
        print('调取Accurate_api  识别')
        return self.client.basicAccurate(image, self.options)

    def orc(self, image, font_key, word, **kwargs):
        hash_value = MD5.md5(image)
        results = self.General(image, **kwargs)
        if results.get('words_result'):
            if results.get('words_result') != '*':
                result = results['words_result'][0]['words']
                self.redis.add(hash_value, result)
                self.redis.hadd(font_key, word, result)
            return result
        results = self.Accurate(image)
        if results.get('words_result'):
            if results.get('words_result') != '*':
                result = results['words_result'][0]['words']
                self.redis.add(hash_value, result)
                self.redis.hadd(font_key, word, result)
            return result
        # Image.open(BytesIO(image)).show()
        # print(hash_value)
        return '*'

    def run(self, image, font_key,word, **kwargs):
        hash_value = MD5.md5(image)
        if self.redis.exists(hash_value):
            result = self.redis.get(hash_value)
            self.redis.hadd(font_key, word, result)
            return result
        else:
            return self.orc(image, font_key, word, **kwargs)
Example #14
0
class CookiesGenerator():
    def __init__(self, website="default"):
        self.website = website
        self.cookie_db = RedisClient('cookies', self.website)
        self.account_db = RedisClient('accounts', self.website)
        self.browser = self.init_browser()

    def init_browser(self):
        if BROWSER_TYPE == "PhantomJS":
            caps = DesiredCapabilities.PHANTOMJS
            caps[
                "phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'

            browser = webdriver.PhantomJS(desired_capabilities=caps)
            browser.set_window_size(1400, 500)
            return browser
        elif BROWSER_TYPE == "Chrome":
            browser = webdriver.Chrome()
            return browser

    def new_cookies(self, username, password):

        raise NotImplementedError

    def parse_cookies_dict(self, cookies):
        dic = {}
        for cookie in cookies:
            dic[cookie['name']] = cookie['value']

        return dic

    def run(self):
        account_usernames = self.account_db.usernames()
        cookies_usernames = self.cookie_db.usernames()

        for username in account_usernames:
            if not username in cookies_usernames:
                password = self.account_db.get(username)
                print("正在生成Cookies", username, password)
                result = self.new_cookies(username, password)
                if result.get("status") == 1:
                    cookies = self.parse_cookies_dict(result.get('content'))
                    print("成功获取Cookies", cookies)
                    if self.cookie_db.set(username, json.dumps(cookies)):
                        print("成功保存Cookies")
                elif result.get('status') == 2:
                    print("密码错误")
                    if self.account_db.delete(username):
                        print("删除成功")
                else:
                    print(result.get("content"))

        print("所有账号已经成功获取Cookies")

    def close(self):
        try:
            print("Closing Browser")
            self.browser.close()
            del self.browser
        except TypeError:
            print("Browser not opened")

    def __del__(self):
        self.close()