예제 #1
0
 def generate_ips(self):
     """
     从文件中读取 ip ,以生成器的方式返回可用 ip
     """
     path = self.config["abs_dir"] + "/ips_ok.txt"
     with open(path, "rt") as ips_file:
         for ip in ips_file:
             yield toolBox.strip(ip)
예제 #2
0
 def get_ips_from_file(self):
     """
     从 ips_ok.txt 中读取可用 ip
     :return: 存储可用 ip 的列表
     """
     path = self.config["abs_dir"] + "/ips_ok.txt"
     ips_ok = []
     with open(path, "rt") as ips_file:
         for ip in ips_file:
             ip = toolBox.strip(ip)
             ips_ok.append(ip)
     return ips_ok
예제 #3
0
    def get_66_ips(self):
        """
         获取 66网站 的 IP
        """
        base_url = ["http://www.66ip.cn/nmtq.php?proxytype=0",  # http
                    "http://www.66ip.cn/nmtq.php?proxytype=1"]  # https

        for n in range(10):
            url = random.choice(base_url)
            html = self.get_html(url, "gbk")
            tag = list(islice(html, 10, 49))[0::2]
            pre = "http://" if url.endswith("0") else "https://"
            for t in tag:
                if t is None:
                    continue
                ip_path = pre + toolBox.strip(t)
                self._ip_cache_lib.add(ip_path)
            time.sleep(self.config["frequency"])
예제 #4
0
    def load_config():
        """
        读取加载文件:同级目录下的 config.txt
        并赋值给类属性 config,以供其他方法共享、调用
        """
        config = {}

        # 读取配置文件中的参数的类型是字符串,但以下参数值得类型不应该是字符串
        # 所以构建此列表,以供后续处理其中对应的值
        need_eval = ["proxy", "connect_timeout", "read_timeout", "frequency"]
        config_file = os.path.abspath("config.txt")
        with open(config_file, "rt") as config_file:
            for item in config_file:
                item = toolBox.strip(item)
                key, value = item.split("=")
                if value == "":
                    msg = "'" + key + "'"
                    # 如果只获取到了 key 而没有获取到对应 value,则抛出解析错误异常
                    raise AnalysisError(msg)
                # 将字符串值转换为相应类型
                config[key] = eval(value) if key in need_eval else value
        toolBox.print_format("Loading config")

        # 设置各可配置项的默认值
        config.setdefault("proxy", False)
        config.setdefault("dir_name", "ips_lib/")
        config.setdefault("abs_dir", os.path.abspath(config["dir_name"]))

        # 暂时只支持 html.parser,后续加入 lxml 等解析器
        config.setdefault("parser", "html.parser")
        config.setdefault("connect_timeout", 3)
        config.setdefault("read_timeout", 6)
        config.setdefault("frequency", 6)
        config.setdefault("test_domain", "https://book.douban.com/")
        toolBox.print_dict(config)
        return config