Exemple #1
0
def test_settings():
    sw = SettingsLoader()
    set = sw.load({
        "a": 1,
        "b": "d",
        "c": {
            "d": [3, 4, 5, 6],
            "e": {"a", 4, 5, 6}
        }
    })
    print(set)
    print(set.c.e)
Exemple #2
0
    def settings(cls):
        """
        这个应该在app加载时调用,这样project_path才准确。
        由于有些项目可能在模块作用域就需要使用settings中的配置,
        所以调用应该在项目模块加载之前。

        :return:
        """
        return SettingsLoader().load(
            SettingsMixin.settings_path or "settings",
            default={"PROJECT_PATH": os.path.abspath(os.getcwd())})
Exemple #3
0
 def __init__(self):
     """
         初始化logger, redis_conn
     """
     self.enrich_parser_arguments()
     args = self.parser.parse_args()
     cwd = getcwd()
     sys.path.insert(0, cwd)
     self.settings = SettingsLoader().load(args.localsettings, args.settings)
     self.headers = self.settings.HEADERS
     self.proxies_check_in_channel = ThreadSafeSet()
     self.proxies_check_out_channel = TreadSafeDict()
     self.load_site(proxy_site_spider)
     self.load_site(args.spider_module)
     self.redis_conn = Redis(self.settings.get("REDIS_HOST"),
                             self.settings.get_int("REDIS_PORT"))
     if args.check_method:
         self.is_anonymous = partial(load(args.check_method), self)
     super().__init__()
     self.supervisor.control(log_path=os.path.join(cwd, self.name) + ".log")
     self.console.init_console()
     self.my_ip = requests.get("https://httpbin.org/ip").json()["origin"]
Exemple #4
0
class TestService(object):
    pytestmark = [
        arti_ser("settings", ret_val=SettingsLoader().load("settings")),
    ]

    @arti_ser("code", ret_val="111111")
    @pytest.mark.env(NEED_CODE="True")
    async def test_check_code_on_True(self):
        assert ArticleService().check_code("111111") is True

    @arti_ser("code", ret_val="22222")
    @pytest.mark.env(NEED_CODE="True")
    async def test_check_code_on_False(self):
        assert ArticleService().check_code("111111") is False

    @arti_ser("code", ret_val="3333")
    @pytest.mark.env(NEED_CODE="False")
    async def test_check_code_off(self):
        assert ArticleService().check_code("111111") is True
Exemple #5
0
class ProxyFactory(ParallelMonitor):
    name = "proxy_factory"
    proxy_methods = dict()
    parser = ArgumentParser(conflict_handler="resolve")
    supervisor = CommandlinePluginProxy(Supervisor, parser)
    console = CommandlinePluginProxy(Console, parser)

    def __init__(self):
        """
            初始化logger, redis_conn
        """
        self.enrich_parser_arguments()
        args = self.parser.parse_args()
        cwd = getcwd()
        sys.path.insert(0, cwd)
        self.settings = SettingsLoader().load(args.localsettings, args.settings)
        self.headers = self.settings.HEADERS
        self.proxies_check_in_channel = ThreadSafeSet()
        self.proxies_check_out_channel = TreadSafeDict()
        self.load_site(proxy_site_spider)
        self.load_site(args.spider_module)
        self.redis_conn = Redis(self.settings.get("REDIS_HOST"),
                                self.settings.get_int("REDIS_PORT"))
        if args.check_method:
            self.is_anonymous = partial(load(args.check_method), self)
        super().__init__()
        self.supervisor.control(log_path=os.path.join(cwd, self.name) + ".log")
        self.console.init_console()
        self.my_ip = requests.get("https://httpbin.org/ip").json()["origin"]

    def log_err(self, func_name, *args):
        self.logger.error("Error in %s: %s. " % (
            func_name, "".join(traceback.format_exception(*args))))
        return True

    def load_site(self, module_str):
        if module_str:
            if isinstance(module_str, str):
                mod = load(module_str)
            else:
                mod = module_str
            for key, func in vars(mod).items():
                if not key.startswith("fetch"):
                    continue
                self.proxy_methods[key] = partial(exception_wrapper(func), self)

    def is_anonymous(self, proxy):
        url = "http://www.98bk.com/cycx/ip1/"
        resp = requests.get(url, timeout=10, headers=self.headers,
                            proxies={"http": "http://%s" % proxy})
        buf = resp.text.encode("iso-8859-1").decode("gbk")
        real_ip = re_search(r"您的真实IP是([\d\.]+)", buf)
        self.logger.info(f"My ip :{self.my_ip}, Real ip: {real_ip}")
        return real_ip == "" or not self.my_ip.count(real_ip)

    def check(self, proxy, good):
        """
            检查代理是否可用
        """
        with ExceptContext(errback=lambda *args: True):
            if self.is_anonymous(proxy):
                good.add(proxy)

    def check_proxies(self):
        """
        对待检查队列中的代理进行检查
        :return:
        """
        self.logger.debug("Start check thread. ")

        threads = dict()
        good = set()
        while self.alive:
            if len(self.proxies_check_in_channel):
                proxy = self.proxies_check_in_channel.pop()
            else:
                proxy = None
            if isinstance(proxy, bytes):
                proxy = proxy.decode()
            if len(threads) < 150 and proxy:
                th = Thread(target=self.check, args=(proxy, good))
                th.setDaemon(True)
                th.start()
                threads[time.time()] = (th, proxy)
                time.sleep(.001)
            else:
                time.sleep(1)
                for start_time, (th, proxy) in threads.copy().items():
                    if start_time + 60 < time.time() or not th.is_alive():
                        del threads[start_time]
                        self.proxies_check_out_channel[proxy] = proxy in good
                        good.discard(proxy)

        self.logger.debug("Stop check thread. ")

    def bad_source(self):
        """
        每隔指定时间间隔将无效代理放到待检查队列进行检查
        :return:
        """
        self.logger.debug("Start bad source thread. ")
        while self.alive:
            if len(self.proxies_check_in_channel):
                continue

            with ExceptContext(errback=self.log_err):
                proxies = self.redis_conn.hgetall(
                    self.settings.get("BAD_PROXY_HASH", "bad_proxies"))
                if proxies:
                    self.logger.debug(
                        f"Bad proxy count is: {len(proxies)}, ready to check.")
                    while proxies:
                        proxy, times = proxies.popitem()
                        self.proxies_check_in_channel.add(proxy)

            Blocker(self.settings.get_int("BAD_CHECK_INTERVAL", 60 * 5)).\
                wait_timeout_or_notify(notify=lambda: not self.alive)

        self.logger.debug("Stop bad source thread. ")

    def good_source(self):
        """
        每隔指定时间间隔将有效代理放到待检查队列进行检查
        :return:
        """
        self.logger.debug("Start good source thread. ")
        while self.alive:
            with ExceptContext(errback=self.log_err):
                proxies = self.redis_conn.smembers(
                    self.settings.get("GOOD_PROXY_SET", "good_proxies"))
                if proxies:
                    self.logger.debug(
                        f"Good proxy count is: {len(proxies)}, ready to check.")
                    self.proxies_check_in_channel.update(proxies)
            Blocker(self.settings.get_int("GOOD_CHECK_INTERVAL", 60 * 5)).\
                wait_timeout_or_notify(notify=lambda: not self.alive)

        self.logger.debug("Stop good source thread. ")

    def reset_proxies(self):
        """
        分发有效代理和无效代理
        :return:
        """
        self.logger.debug("Start resets thread. ")
        while self.alive:
            with ExceptContext(errback=self.log_err):
                proxies = list(self.proxies_check_out_channel.pop_all())
                if proxies:
                    self.logger.debug(f"Got {len(proxies)} proxies to reset.")
                    bp = self.settings.get("BAD_PROXY_HASH", "bad_proxies")
                    gp = self.settings.get("GOOD_PROXY_SET", "good_proxies")
                    for proxy, good in proxies:
                        if good:
                            self.redis_conn.sadd(gp, proxy)
                            self.redis_conn.hdel(bp, proxy)
                        else:
                            count = self.redis_conn.hincrby(bp, proxy)
                            if count > self.settings.get_int("FAILED_TIMES", 5):
                                self.redis_conn.hdel(bp, proxy)
                                self.logger.debug(
                                    f"Abandon {proxy} of failed {count} times.")
                            self.redis_conn.srem(gp, proxy)
                else:
                    time.sleep(1)
            time.sleep(1)
        self.logger.debug("Stop resets thread. ")

    def gen_thread(self, target, name=None, args=(), kwargs=None):
        thread = Thread(target=target, name=name, args=args, kwargs=kwargs)
        thread.setDaemon(True)
        thread.start()
        self.children.append(thread)

    def start(self):
        self.logger.debug("Start proxy factory. ")
        self.gen_thread(self.check_proxies)
        self.gen_thread(self.bad_source)
        self.gen_thread(self.good_source)
        self.gen_thread(self.reset_proxies)

        while self.alive or any(th for th in self.children if th.is_alive()):
            with ExceptContext(errback=self.log_err):
                if self.alive:
                    self.logger.debug("Start to fetch proxies. ")
                    proxies = self.fetch_all()
                    self.logger.debug("%s proxies found. " % len(proxies))
                    self.proxies_check_in_channel.update(proxies)

            Blocker(self.settings.get_int("FETCH_INTERVAL", 10 * 60)).\
                wait_timeout_or_notify(notify=lambda: not self.alive)
        self.logger.debug("Stop proxy factory. ")

    def fetch_all(self):
        """
            获取全部网站代理,内部调用各网站代理获取函数
        """
        proxies = set()
        for key, value in self.proxy_methods.items():
            proxies.update(value())
        return proxies

    def enrich_parser_arguments(self):
        self.parser.add_argument(
            "-s", "--settings", help="Setting module. ", default=settings)
        self.parser.add_argument(
            "-ls", "--localsettings", help="Local setting module.",
            default="localsettings")
        self.parser.add_argument(
            "-cm", "--check-method",
            help="provide a check method to check proxies. eg:module.func")
        self.parser.add_argument(
            "-sm", "--spider-module",
            help="provide a module contains proxy site spider methods. "
                 "eg:module1.module2")
Exemple #6
0
 def settings(cls):
     # type: () -> Settings
     return SettingsLoader().load(cls.settings_path or "settings")
Exemple #7
0
 def __init__(self):
     self.settings = SettingsLoader().load(self.settings_path or "settings")
Exemple #8
0
import pytest

from io import BytesIO
from zipfile import ZipFile
from apistellar import settings
from collections import defaultdict
from pytest_apistellar import prop_alias
from toolkit.settings import SettingsLoader

from blog.blog.article.service import ArticleService, Article
from blog.blog.article.article_exporter import ArticleFile
arti_ser = prop_alias("blog.blog.article.service.ArticleService")
article = prop_alias("blog.blog.article.article.Article")


@arti_ser("settings", ret_val=SettingsLoader().load("blog.settings"))
@pytest.mark.asyncio
class TestService(object):
    @article("load",
             ret_val=Article(id="20181010101010",
                             article="![](http://www.baidu.com/)"))
    async def test_get(self):
        article = await ArticleService().get("20181010101010")
        assert article["first_img"] == "http://www.baidu.com/"

    @pytest.mark.prop(
        "blog.blog.article.article_exporter.ArticleExporter.export",
        ret_val=ArticleFile("test.pdf", b"aaaaa"))
    async def test_export(self):
        file_resp = await ArticleService().export([1], "", "")
        zip_file = BytesIO(file_resp.content)