Example #1
0
def all(xs, predicate=None):
    print(xs)
    print(predicate)
    if predicate:
        return __builtins__.all(x for x in xs if predicate(x))
    else:
        return __builtins__.all(xs)
Example #2
0
    def _compile(self, *args, **kwargs):
        if builtins.any(
                not isinstance(_, (core.ndarray, numpy.ndarray, numpy.generic))
                for _ in args):
            raise TypeError('Invalid argument type for \'{}\': ({})'.format(
                self.name,
                ', '.join(repr(type(_)) for _ in args)))

        def is_cupy_data(a):
            return isinstance(a, (core.ndarray, numpy.generic))
        if builtins.all(is_cupy_data(_) for _ in args):
            dtypes = [_.dtype for _ in args]
            return self._compile_from_dtypes(*dtypes)
        else:
            if builtins.any(type(_) is core.ndarray for _ in args):
                types_str = '.'.join(repr(type(_)) for _ in args)
                message = 'Can\'t fuse \n {}({})'.format(self.name, types_str)
                warnings.warn(message)
            else:
                return self.func, {}
Example #3
0
    def _call(self, *args, **kwargs):
        axis = kwargs['axis'] if 'axis' in kwargs else None
        if len(args) == 0:
            raise Exception('number of arguments must be more than 0')
        if builtins.any(
                not isinstance(_, (core.ndarray, numpy.ndarray, numpy.generic))
                for _ in args):
            raise TypeError('Invalid argument type for \'{}\': ({})'.format(
                self.name, ', '.join(repr(type(_)) for _ in args)))

        def is_cupy_data(a):
            return isinstance(a, (core.ndarray, numpy.generic))

        if builtins.all(is_cupy_data(_) for _ in args):
            types = [_.dtype for _ in args]
            key = tuple(types)
            if key not in self._memo:
                if self.input_num is not None:
                    nin = self.input_num
                else:
                    nin = len(args)
                f = _get_fusion(self.func, nin, self.reduce, self.post_map,
                                self.identity, types, self.name)
                self._memo[key] = f
            f = self._memo[key]
            if self.reduce is None:
                return f(*args)
            else:
                return f(*args, axis=axis)
        else:
            if builtins.any(type(_) is core.ndarray for _ in args):
                types = '.'.join(repr(type(_)) for _ in args)
                message = "Can't fuse \n %s(%s)" % (self.name, types)
                warnings.warn(message)
            if self.reduce is None:
                return self.func(*args)
            elif axis is None:
                return self.post_map(self.reduce(self.func(*args)))
            else:
                return self.post_map(self.reduce(self.func(*args), axis=axis))
Example #4
0
    def __call__(self, *args, **kwargs):
        axis = kwargs['axis'] if 'axis' in kwargs else None
        if len(args) == 0:
            raise Exception('number of arguments must be more than 0')
        if builtins.any(
                not isinstance(_, (core.ndarray, numpy.ndarray, numpy.generic))
                for _ in args):
            raise TypeError('Invalid argument type for \'{}\': ({})'.format(
                self.name,
                ', '.join(repr(type(_)) for _ in args)))

        def is_cupy_data(a):
            return isinstance(a, (core.ndarray, numpy.generic))
        if builtins.all(is_cupy_data(_) for _ in args):
            types = [_.dtype for _ in args]
            key = tuple(types)
            if key not in self._memo:
                if self.input_num is not None:
                    nin = self.input_num
                else:
                    nin = len(args)
                f = _get_fusion(self.func, nin, self.reduce,
                                self.post_map, self.identity, types)
                self._memo[key] = f
            f = self._memo[key]
            if self.reduce is None:
                return f(*args)
            else:
                return f(*args, axis=axis)
        else:
            if builtins.any(type(_) is core.ndarray for _ in args):
                types = '.'.join(repr(type(_)) for _ in args)
                message = "Can't fuse \n %s(%s)" % (self.name, types)
                warnings.warn(message)
            if self.reduce is None:
                return self.func(*args)
            elif axis is None:
                return self.post_map(self.reduce(self.func(*args)))
            else:
                return self.post_map(self.reduce(self.func(*args), axis=axis))
Example #5
0
    def compile(self, *args, **kwargs):
        if builtins.any(
                not isinstance(_, (core.ndarray, numpy.ndarray, numpy.generic))
                for _ in args):
            raise TypeError('Invalid argument type for \'{}\': ({})'.format(
                self.name,
                ', '.join(repr(type(_)) for _ in args)))

        def is_cupy_data(a):
            return isinstance(a, (core.ndarray, numpy.generic))
        if builtins.all(is_cupy_data(_) for _ in args):
            dtypes = [_.dtype for _ in args]
            key = tuple(dtypes)
            if key not in self._memo:
                self._memo[key] = _thread_local.history.get_fusion(
                    self.func, dtypes, self.name)
            return self._memo[key]
        else:
            if builtins.any(type(_) is core.ndarray for _ in args):
                types_str = '.'.join(repr(type(_)) for _ in args)
                message = 'Can\'t fuse \n {}({})'.format(self.name, types_str)
                warnings.warn(message)
            else:
                return self.func, {}
Example #6
0
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total,
          show):
    """
    Run Benchmark test.
    In bench mode, in-memory sqlite database is used instead of on-disk sqlite database.
    """
    from pyspider.libs import bench
    from pyspider.webui import bench_test
    bench_test  # make pyflake happy

    ctx.obj['debug'] = False
    g = ctx.obj
    if result_worker_num == 0:
        g['processor2result'] = None

    if run_in == 'subprocess' and os.name != 'nt':
        run_in = utils.run_in_subprocess
    else:
        run_in = utils.run_in_thread

    g.projectdb.insert(
        'bench', {
            'name': 'bench',
            'status': 'RUNNING',
            'script': bench.bench_script % {
                'total': total,
                'show': show
            },
            'rate': total,
            'burst': total,
            'updatetime': time.time()
        })

    # disable log
    logging.getLogger().setLevel(logging.ERROR)
    logging.getLogger('scheduler').setLevel(logging.ERROR)
    logging.getLogger('fetcher').setLevel(logging.ERROR)
    logging.getLogger('processor').setLevel(logging.ERROR)
    logging.getLogger('result').setLevel(logging.ERROR)
    logging.getLogger('webui').setLevel(logging.ERROR)

    threads = []

    # result worker
    result_worker_config = g.config.get('result_worker', {})
    for i in range(result_worker_num):
        threads.append(
            run_in(ctx.invoke,
                   result_worker,
                   result_cls='pyspider.libs.bench.BenchResultWorker',
                   **result_worker_config))

    # processor
    processor_config = g.config.get('processor', {})
    for i in range(processor_num):
        threads.append(
            run_in(ctx.invoke,
                   processor,
                   processor_cls='pyspider.libs.bench.BenchProcessor',
                   **processor_config))

    # fetcher
    fetcher_config = g.config.get('fetcher', {})
    fetcher_config.setdefault('xmlrpc_host', '127.0.0.1')
    for i in range(fetcher_num):
        threads.append(
            run_in(ctx.invoke,
                   fetcher,
                   fetcher_cls='pyspider.libs.bench.BenchFetcher',
                   **fetcher_config))

    # scheduler
    scheduler_config = g.config.get('scheduler', {})
    scheduler_config.setdefault('xmlrpc_host', '127.0.0.1')
    threads.append(
        run_in(ctx.invoke,
               scheduler,
               scheduler_cls='pyspider.libs.bench.BenchScheduler',
               **scheduler_config))

    # webui
    webui_config = g.config.get('webui', {})
    webui_config.setdefault(
        'scheduler_rpc', 'http://localhost:%s/' %
        g.config.get('scheduler', {}).get('xmlrpc_port', 23333))
    threads.append(run_in(ctx.invoke, webui, **webui_config))

    # run project
    time.sleep(1)
    import requests
    rv = requests.post('http://localhost:5000/run',
                       data={
                           'project': 'bench',
                       })
    assert rv.status_code == 200, 'run project error'

    # wait bench test finished
    while True:
        time.sleep(1)
        if builtins.all(
                getattr(g, x) is None or getattr(g, x).empty()
                for x in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                          'fetcher2processor', 'processor2result')):
            break

    # exit components run in threading
    for each in g.instances:
        each.quit()

    # exit components run in subprocess
    for each in threads:
        if hasattr(each, 'terminate'):
            each.terminate()
        each.join(1)
Example #7
0
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show):
    from pyspider.libs import bench
    from pyspider.webui import bench_test
    bench_test  # make pyflake happy

    ctx.obj['debug'] = False
    g = ctx.obj
    if result_worker_num == 0:
        g['processor2result'] = None

    if run_in == 'subprocess' and os.name != 'nt':
        run_in = run_in_subprocess
    else:
        run_in = run_in_thread

    g.projectdb.insert('bench', {
        'name': 'bench',
        'status': 'RUNNING',
        'script': bench.bench_script % {'total': total, 'show': show},
        'rate': total,
        'burst': total,
        'updatetime': time.time()
    })

    # disable log
    logging.getLogger().setLevel(logging.ERROR)
    logging.getLogger('scheduler').setLevel(logging.ERROR)
    logging.getLogger('fetcher').setLevel(logging.ERROR)
    logging.getLogger('processor').setLevel(logging.ERROR)
    logging.getLogger('result').setLevel(logging.ERROR)
    logging.getLogger('webui').setLevel(logging.ERROR)

    threads = []

    # result worker
    result_worker_config = g.config.get('result_worker', {})
    for i in range(result_worker_num):
        threads.append(run_in(ctx.invoke, result_worker,
                              ResultWorker=bench.BenchResultWorker, **result_worker_config))

    # processor
    processor_config = g.config.get('processor', {})
    for i in range(processor_num):
        threads.append(run_in(ctx.invoke, processor,
                              Processor=bench.BenchProcessor, **processor_config))

    # fetcher
    fetcher_config = g.config.get('fetcher', {})
    fetcher_config.setdefault('xmlrpc_host', '127.0.0.1')
    for i in range(fetcher_num):
        threads.append(run_in(ctx.invoke, fetcher,
                              Fetcher=bench.BenchFetcher, **fetcher_config))

    # scheduler
    scheduler_config = g.config.get('scheduler', {})
    scheduler_config.setdefault('xmlrpc_host', '127.0.0.1')
    threads.append(run_in(ctx.invoke, scheduler,
                          Scheduler=bench.BenchScheduler, **scheduler_config))

    # webui
    webui_config = g.config.get('webui', {})
    webui_config.setdefault('scheduler_rpc', 'http://localhost:%s/'
                            % g.config.get('scheduler', {}).get('xmlrpc_port', 23333))
    threads.append(run_in(ctx.invoke, webui, **webui_config))

    # run project
    time.sleep(1)
    import requests
    rv = requests.post('http://localhost:5000/run', data={
        'project': 'bench',
    })
    assert rv.status_code == 200, 'run project error'

    # wait bench test finished
    while True:
        time.sleep(1)
        if builtins.all(getattr(g, x) is None or getattr(g, x).empty() for x in (
                'newtask_queue', 'status_queue', 'scheduler2fetcher',
                'fetcher2processor', 'processor2result')):
            break

    # exit components run in threading
    for each in g.instances:
        each.quit()

    # exit components run in subprocess
    for each in threads:
        if hasattr(each, 'terminate'):
            each.terminate()
        each.join(1)
Example #8
0
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show,
          taskdb_bench, message_queue_bench, all_bench):
    """
    Run Benchmark test.
    In bench mode, in-memory sqlite database is used instead of on-disk sqlite database.
    """
    from pyspider.libs import bench
    from pyspider.webui import bench_test
    bench_test  # make pyflake happy

    ctx.obj['debug'] = False
    g = ctx.obj
    if result_worker_num == 0:
        g['processor2result'] = None

    if run_in == 'subprocess' and os.name != 'nt':
        run_in = utils.run_in_subprocess
    else:
        run_in = utils.run_in_thread

    all_test = not taskdb_bench and not message_queue_bench and not all_bench

    # test taskdb
    if all_test or taskdb_bench:
        bench.bench_test_taskdb(g.taskdb)
    # test message queue
    if all_test or message_queue_bench:
        bench.bench_test_message_queue(g.scheduler2fetcher)
    # test all
    if not all_test and not all_bench:
        return

    project_name = '__bench_test__'

    def clear_project():
        g.taskdb.drop(project_name)
        g.projectdb.drop(project_name)
        g.resultdb.drop(project_name)

    clear_project()
    g.projectdb.insert(project_name, {
        'name': project_name,
        'status': 'RUNNING',
        'script': bench.bench_script % {'total': total, 'show': show},
        'rate': total,
        'burst': total,
        'updatetime': time.time()
    })

    # disable log
    logging.getLogger().setLevel(logging.ERROR)
    logging.getLogger('scheduler').setLevel(logging.ERROR)
    logging.getLogger('fetcher').setLevel(logging.ERROR)
    logging.getLogger('processor').setLevel(logging.ERROR)
    logging.getLogger('result').setLevel(logging.ERROR)
    logging.getLogger('webui').setLevel(logging.ERROR)

    try:
        threads = []

        # result worker
        result_worker_config = g.config.get('result_worker', {})
        for i in range(result_worker_num):
            threads.append(run_in(ctx.invoke, result_worker,
                                  result_cls='pyspider.libs.bench.BenchResultWorker',
                                  **result_worker_config))

        # processor
        processor_config = g.config.get('processor', {})
        for i in range(processor_num):
            threads.append(run_in(ctx.invoke, processor,
                                  processor_cls='pyspider.libs.bench.BenchProcessor',
                                  **processor_config))

        # fetcher
        fetcher_config = g.config.get('fetcher', {})
        fetcher_config.setdefault('xmlrpc_host', '127.0.0.1')
        for i in range(fetcher_num):
            threads.append(run_in(ctx.invoke, fetcher,
                                  fetcher_cls='pyspider.libs.bench.BenchFetcher',
                                  **fetcher_config))

        # scheduler
        scheduler_config = g.config.get('scheduler', {})
        scheduler_config.setdefault('xmlrpc_host', '127.0.0.1')
        threads.append(run_in(ctx.invoke, scheduler,
                              scheduler_cls='pyspider.libs.bench.BenchScheduler',
                              **scheduler_config))

        # webui
        webui_config = g.config.get('webui', {})
        webui_config.setdefault('scheduler_rpc', 'http://localhost:%s/'
                                % g.config.get('scheduler', {}).get('xmlrpc_port', 23333))
        threads.append(run_in(ctx.invoke, webui, **webui_config))

        # wait bench test finished
        while True:
            time.sleep(1)
            if builtins.all(getattr(g, x) is None or getattr(g, x).empty() for x in (
                    'newtask_queue', 'status_queue', 'scheduler2fetcher',
                    'fetcher2processor', 'processor2result')):
                break
    finally:
        # exit components run in threading
        for each in g.instances:
            each.quit()

        # exit components run in subprocess
        for each in threads:
            if hasattr(each, 'terminate'):
                each.terminate()
            each.join(1)

        clear_project()
Example #9
0
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show,
          taskdb_bench, message_queue_bench, all_bench):
    """
    Run Benchmark test.
    In bench mode, in-memory sqlite database is used instead of on-disk sqlite database.
    """
    from pyspider.libs import bench
    from pyspider.webui import bench_test
    bench_test  # make pyflake happy

    ctx.obj['debug'] = False
    g = ctx.obj
    if result_worker_num == 0:
        g['processor2result'] = None

    if run_in == 'subprocess' and os.name != 'nt':
        run_in = utils.run_in_subprocess
    else:
        run_in = utils.run_in_thread

    all_test = not taskdb_bench and not message_queue_bench and not all_bench

    # test taskdb
    if all_test or taskdb_bench:
        bench.bench_test_taskdb(g.taskdb)
    # test message queue
    if all_test or message_queue_bench:
        bench.bench_test_message_queue(g.scheduler2fetcher)
    # test all
    if not all_test and not all_bench:
        return

    project_name = '__bench_test__'

    def clear_project():
        g.taskdb.drop(project_name)
        g.projectdb.drop(project_name)
        g.resultdb.drop(project_name)

    clear_project()
    g.projectdb.insert(project_name, {
        'name': project_name,
        'status': 'RUNNING',
        'script': bench.bench_script % {'total': total, 'show': show},
        'rate': total,
        'burst': total,
        'updatetime': time.time()
    })

    # disable log
    logging.getLogger().setLevel(logging.ERROR)
    logging.getLogger('scheduler').setLevel(logging.ERROR)
    logging.getLogger('fetcher').setLevel(logging.ERROR)
    logging.getLogger('processor').setLevel(logging.ERROR)
    logging.getLogger('result').setLevel(logging.ERROR)
    logging.getLogger('webui').setLevel(logging.ERROR)
    logging.getLogger('werkzeug').setLevel(logging.ERROR)

    try:
        threads = []

        # result worker
        result_worker_config = g.config.get('result_worker', {})
        for i in range(result_worker_num):
            threads.append(run_in(ctx.invoke, result_worker,
                                  result_cls='pyspider.libs.bench.BenchResultWorker',
                                  **result_worker_config))

        # processor
        processor_config = g.config.get('processor', {})
        for i in range(processor_num):
            threads.append(run_in(ctx.invoke, processor,
                                  processor_cls='pyspider.libs.bench.BenchProcessor',
                                  **processor_config))

        # fetcher
        fetcher_config = g.config.get('fetcher', {})
        fetcher_config.setdefault('xmlrpc_host', '127.0.0.1')
        for i in range(fetcher_num):
            threads.append(run_in(ctx.invoke, fetcher,
                                  fetcher_cls='pyspider.libs.bench.BenchFetcher',
                                  **fetcher_config))

        # scheduler
        scheduler_config = g.config.get('scheduler', {})
        scheduler_config.setdefault('xmlrpc_host', '127.0.0.1')
        threads.append(run_in(ctx.invoke, scheduler,
                              scheduler_cls='pyspider.libs.bench.BenchScheduler',
                              **scheduler_config))

        # webui
        webui_config = g.config.get('webui', {})
        webui_config.setdefault('scheduler_rpc', 'http://localhost:%s/'
                                % g.config.get('scheduler', {}).get('xmlrpc_port', 23333))
        threads.append(run_in(ctx.invoke, webui, **webui_config))

        # wait bench test finished
        while True:
            time.sleep(1)
            if builtins.all(getattr(g, x) is None or getattr(g, x).empty() for x in (
                    'newtask_queue', 'status_queue', 'scheduler2fetcher',
                    'fetcher2processor', 'processor2result')):
                break
    finally:
        # exit components run in threading
        for each in g.instances:
            each.quit()

        # exit components run in subprocess
        for each in threads:
            if hasattr(each, 'terminate'):
                each.terminate()
            each.join(1)

        clear_project()
Example #10
0
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show):
    from pyspider.libs import bench
    from pyspider.webui import bench_test

    bench_test  # make pyflake happy

    ctx.obj["debug"] = False
    g = ctx.obj
    if result_worker_num == 0:
        g["processor2result"] = None

    if run_in == "subprocess" and os.name != "nt":
        run_in = utils.run_in_subprocess
    else:
        run_in = utils.run_in_thread

    g.projectdb.insert(
        "bench",
        {
            "name": "bench",
            "status": "RUNNING",
            "script": bench.bench_script % {"total": total, "show": show},
            "rate": total,
            "burst": total,
            "updatetime": time.time(),
        },
    )

    # disable log
    logging.getLogger().setLevel(logging.ERROR)
    logging.getLogger("scheduler").setLevel(logging.ERROR)
    logging.getLogger("fetcher").setLevel(logging.ERROR)
    logging.getLogger("processor").setLevel(logging.ERROR)
    logging.getLogger("result").setLevel(logging.ERROR)
    logging.getLogger("webui").setLevel(logging.ERROR)

    threads = []

    # result worker
    result_worker_config = g.config.get("result_worker", {})
    for i in range(result_worker_num):
        threads.append(
            run_in(
                ctx.invoke, result_worker, result_cls="pyspider.libs.bench.BenchResultWorker", **result_worker_config
            )
        )

    # processor
    processor_config = g.config.get("processor", {})
    for i in range(processor_num):
        threads.append(
            run_in(ctx.invoke, processor, processor_cls="pyspider.libs.bench.BenchProcessor", **processor_config)
        )

    # fetcher
    fetcher_config = g.config.get("fetcher", {})
    fetcher_config.setdefault("xmlrpc_host", "127.0.0.1")
    for i in range(fetcher_num):
        threads.append(run_in(ctx.invoke, fetcher, fetcher_cls="pyspider.libs.bench.BenchFetcher", **fetcher_config))

    # scheduler
    scheduler_config = g.config.get("scheduler", {})
    scheduler_config.setdefault("xmlrpc_host", "127.0.0.1")
    threads.append(
        run_in(ctx.invoke, scheduler, scheduler_cls="pyspider.libs.bench.BenchScheduler", **scheduler_config)
    )

    # webui
    webui_config = g.config.get("webui", {})
    webui_config.setdefault(
        "scheduler_rpc", "http://localhost:%s/" % g.config.get("scheduler", {}).get("xmlrpc_port", 23333)
    )
    threads.append(run_in(ctx.invoke, webui, **webui_config))

    # run project
    time.sleep(1)
    import requests

    rv = requests.post("http://localhost:5000/run", data={"project": "bench"})
    assert rv.status_code == 200, "run project error"

    # wait bench test finished
    while True:
        time.sleep(1)
        if builtins.all(
            getattr(g, x) is None or getattr(g, x).empty()
            for x in ("newtask_queue", "status_queue", "scheduler2fetcher", "fetcher2processor", "processor2result")
        ):
            break

    # exit components run in threading
    for each in g.instances:
        each.quit()

    # exit components run in subprocess
    for each in threads:
        if hasattr(each, "terminate"):
            each.terminate()
        each.join(1)