Esempio n. 1
0
    def _deal_datas(self, datas):
        if self._to_md5:
            if isinstance(datas, list):
                keys = [get_md5(data) for data in datas]
            else:
                keys = get_md5(datas)
        else:
            keys = copy.deepcopy(datas)

        return keys
Esempio n. 2
0
def get_proxy_from_http(proxy_source_url, **kwargs):
    """
    从指定 http 地址获取代理
    :param proxy_source_url:
    :param kwargs:
    :return:
    """
    filename = tools.get_md5(proxy_source_url) + ".txt"
    abs_filename = os.path.join(proxy_path, filename)
    update_interval = kwargs.get("local_proxy_file_cache_timeout", 60)
    update_flag = 0
    if not update_interval:
        # 强制更新
        update_flag = 1
    elif not os.path.exists(abs_filename):
        # 文件不存在则更新
        update_flag = 1
    elif time.time() - os.stat(abs_filename).st_mtime > update_interval:
        # 超过更新间隔
        update_flag = 1
    if update_flag:
        response = requests.get(proxy_source_url, timeout=20)
        with open(os.path.join(proxy_path, filename), "w") as f:
            f.write(response.text)
    return get_proxy_from_file(filename)
Esempio n. 3
0
 def __init__(self, user_agent=None, proxies=None, cookies=None, **kwargs):
     self.__dict__.update(kwargs)
     self.user_agent = user_agent
     self.proxies = proxies
     self.cookies = cookies
     self.user_id = kwargs.get("user_id") or get_md5(
         user_agent, proxies, cookies)
Esempio n. 4
0
    def fingerprint(self):
        args = []
        for key, value in self.to_dict.items():
            if value:
                if (self.unique_key and key in self.unique_key) or not self.unique_key:
                    args.append(str(value))

        if args:
            args = sorted(args)
            return tools.get_md5(*args)
        else:
            return None
Esempio n. 5
0
    def fingerprint(self):
        """
        request唯一表识
        @return:
        """
        args = [self.__dict__.get("url", "")]
        params = self.requests_kwargs.get("params")
        datas = self.requests_kwargs.get("data")
        if params:
            args.append(str(params))

        if datas:
            args.append(str(datas))
        return tools.get_md5(*args)
Esempio n. 6
0
    def fingerprint(self):
        """
        request唯一表识
        @return:
        """
        url = self.__dict__.get("url", "")
        # url 归一化
        url = tools.canonicalize_url(url)
        args = [url]

        for arg in ["params", "data", "files", "auth", "cert", "json"]:
            if self.requests_kwargs.get(arg):
                args.append(self.requests_kwargs.get(arg))

        return tools.get_md5(*args)
Esempio n. 7
0
    def fingerprint(self):
        """
        request唯一表识
        @return:
        """
        url = self.__dict__.get("url", "")
        # url 归一化
        url = tools.canonicalize_url(url)

        args = [url]
        params = self.requests_kwargs.get("params")
        datas = self.requests_kwargs.get("data")
        if params:
            args.append(str(params))

        if datas:
            args.append(str(datas))
        return tools.get_md5(*args)
Esempio n. 8
0
def run():
    while True:
        redisdb = RedisDB()
        try:
            block_ip = redisdb.sget(setting.CAPTCHA_BLOCK_IP_REDIS_KEY)
            if not block_ip:
                log.debug("暂无被封ip")
            for ip in block_ip:
                task = redisdb.hget(setting.CAPTCHA_REDIS_KEY, ip, is_pop=True)
                task = eval(task)
                ua = task.get("ua")
                url = task.get("url")

                with WebDriver(proxy=ip, user_agent=ua) as browser:
                    log.info("解封ip {}, url {}".format(ip, url))
                    browser.get(url)
                    browser.implicitly_wait(5)
                    frame = browser.find_element_by_id("tcaptcha_iframe")
                    browser.switch_to.frame(frame)
                    for i in range(20):
                        for i in range(1000):
                            bg_url = browser.find_element_by_id(
                                "slideBg").get_attribute("src")
                            slide_url = browser.find_element_by_id(
                                "slideBlock").get_attribute("src")
                            if bg_url and slide_url:
                                break
                        else:
                            log.error("滑块加载失败")
                            return

                        bg_image = os.path.join(
                            CAPTCHA_PATH,
                            "bg_" + tools.get_md5(bg_url) + ".png")
                        slide_image = os.path.join(
                            CAPTCHA_PATH,
                            "slider_" + tools.get_md5(slide_url) + ".png")
                        if tools.download_file(
                                bg_url, bg_image) and tools.download_file(
                                    slide_url, slide_image):
                            # 识别缺口
                            x, y = get_gap_center_point(bg_image,
                                                        slide_image,
                                                        show=False)
                            # 缩放
                            x = x * 340 / 680
                            x = x - 27.5 - 30
                            # 滑动
                            slide_btn = browser.find_element_by_id(
                                "tcaptcha_drag_thumb")
                            tracks = track.get_tracks(x)
                            drag_and_drop(browser, slide_btn, tracks)
                            # 删除图片
                            os.remove(bg_image)
                            os.remove(slide_image)

                            tools.delay_time(2)
                            if "verify.maoyan.com" not in browser.current_url:
                                log.info("解封成功")
                                break
                            else:
                                try:
                                    browser.find_element_by_css_selector(
                                        ".tc-action-icon").click()
                                except:
                                    pass
            tools.delay_time(3)
        except Exception as e:
            log.error(e)