def all(xs, predicate=None): print(xs) print(predicate) if predicate: return __builtins__.all(x for x in xs if predicate(x)) else: return __builtins__.all(xs)
def _compile(self, *args, **kwargs): if builtins.any( not isinstance(_, (core.ndarray, numpy.ndarray, numpy.generic)) for _ in args): raise TypeError('Invalid argument type for \'{}\': ({})'.format( self.name, ', '.join(repr(type(_)) for _ in args))) def is_cupy_data(a): return isinstance(a, (core.ndarray, numpy.generic)) if builtins.all(is_cupy_data(_) for _ in args): dtypes = [_.dtype for _ in args] return self._compile_from_dtypes(*dtypes) else: if builtins.any(type(_) is core.ndarray for _ in args): types_str = '.'.join(repr(type(_)) for _ in args) message = 'Can\'t fuse \n {}({})'.format(self.name, types_str) warnings.warn(message) else: return self.func, {}
def _call(self, *args, **kwargs): axis = kwargs['axis'] if 'axis' in kwargs else None if len(args) == 0: raise Exception('number of arguments must be more than 0') if builtins.any( not isinstance(_, (core.ndarray, numpy.ndarray, numpy.generic)) for _ in args): raise TypeError('Invalid argument type for \'{}\': ({})'.format( self.name, ', '.join(repr(type(_)) for _ in args))) def is_cupy_data(a): return isinstance(a, (core.ndarray, numpy.generic)) if builtins.all(is_cupy_data(_) for _ in args): types = [_.dtype for _ in args] key = tuple(types) if key not in self._memo: if self.input_num is not None: nin = self.input_num else: nin = len(args) f = _get_fusion(self.func, nin, self.reduce, self.post_map, self.identity, types, self.name) self._memo[key] = f f = self._memo[key] if self.reduce is None: return f(*args) else: return f(*args, axis=axis) else: if builtins.any(type(_) is core.ndarray for _ in args): types = '.'.join(repr(type(_)) for _ in args) message = "Can't fuse \n %s(%s)" % (self.name, types) warnings.warn(message) if self.reduce is None: return self.func(*args) elif axis is None: return self.post_map(self.reduce(self.func(*args))) else: return self.post_map(self.reduce(self.func(*args), axis=axis))
def __call__(self, *args, **kwargs): axis = kwargs['axis'] if 'axis' in kwargs else None if len(args) == 0: raise Exception('number of arguments must be more than 0') if builtins.any( not isinstance(_, (core.ndarray, numpy.ndarray, numpy.generic)) for _ in args): raise TypeError('Invalid argument type for \'{}\': ({})'.format( self.name, ', '.join(repr(type(_)) for _ in args))) def is_cupy_data(a): return isinstance(a, (core.ndarray, numpy.generic)) if builtins.all(is_cupy_data(_) for _ in args): types = [_.dtype for _ in args] key = tuple(types) if key not in self._memo: if self.input_num is not None: nin = self.input_num else: nin = len(args) f = _get_fusion(self.func, nin, self.reduce, self.post_map, self.identity, types) self._memo[key] = f f = self._memo[key] if self.reduce is None: return f(*args) else: return f(*args, axis=axis) else: if builtins.any(type(_) is core.ndarray for _ in args): types = '.'.join(repr(type(_)) for _ in args) message = "Can't fuse \n %s(%s)" % (self.name, types) warnings.warn(message) if self.reduce is None: return self.func(*args) elif axis is None: return self.post_map(self.reduce(self.func(*args))) else: return self.post_map(self.reduce(self.func(*args), axis=axis))
def compile(self, *args, **kwargs): if builtins.any( not isinstance(_, (core.ndarray, numpy.ndarray, numpy.generic)) for _ in args): raise TypeError('Invalid argument type for \'{}\': ({})'.format( self.name, ', '.join(repr(type(_)) for _ in args))) def is_cupy_data(a): return isinstance(a, (core.ndarray, numpy.generic)) if builtins.all(is_cupy_data(_) for _ in args): dtypes = [_.dtype for _ in args] key = tuple(dtypes) if key not in self._memo: self._memo[key] = _thread_local.history.get_fusion( self.func, dtypes, self.name) return self._memo[key] else: if builtins.any(type(_) is core.ndarray for _ in args): types_str = '.'.join(repr(type(_)) for _ in args) message = 'Can\'t fuse \n {}({})'.format(self.name, types_str) warnings.warn(message) else: return self.func, {}
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show): """ Run Benchmark test. In bench mode, in-memory sqlite database is used instead of on-disk sqlite database. """ from pyspider.libs import bench from pyspider.webui import bench_test bench_test # make pyflake happy ctx.obj['debug'] = False g = ctx.obj if result_worker_num == 0: g['processor2result'] = None if run_in == 'subprocess' and os.name != 'nt': run_in = utils.run_in_subprocess else: run_in = utils.run_in_thread g.projectdb.insert( 'bench', { 'name': 'bench', 'status': 'RUNNING', 'script': bench.bench_script % { 'total': total, 'show': show }, 'rate': total, 'burst': total, 'updatetime': time.time() }) # disable log logging.getLogger().setLevel(logging.ERROR) logging.getLogger('scheduler').setLevel(logging.ERROR) logging.getLogger('fetcher').setLevel(logging.ERROR) logging.getLogger('processor').setLevel(logging.ERROR) logging.getLogger('result').setLevel(logging.ERROR) logging.getLogger('webui').setLevel(logging.ERROR) threads = [] # result worker result_worker_config = g.config.get('result_worker', {}) for i in range(result_worker_num): threads.append( run_in(ctx.invoke, result_worker, result_cls='pyspider.libs.bench.BenchResultWorker', **result_worker_config)) # processor processor_config = g.config.get('processor', {}) for i in range(processor_num): threads.append( run_in(ctx.invoke, processor, processor_cls='pyspider.libs.bench.BenchProcessor', **processor_config)) # fetcher fetcher_config = g.config.get('fetcher', {}) fetcher_config.setdefault('xmlrpc_host', '127.0.0.1') for i in range(fetcher_num): threads.append( run_in(ctx.invoke, fetcher, fetcher_cls='pyspider.libs.bench.BenchFetcher', **fetcher_config)) # scheduler scheduler_config = g.config.get('scheduler', {}) scheduler_config.setdefault('xmlrpc_host', '127.0.0.1') threads.append( run_in(ctx.invoke, scheduler, scheduler_cls='pyspider.libs.bench.BenchScheduler', **scheduler_config)) # webui webui_config = g.config.get('webui', {}) webui_config.setdefault( 'scheduler_rpc', 'http://localhost:%s/' % g.config.get('scheduler', {}).get('xmlrpc_port', 23333)) threads.append(run_in(ctx.invoke, webui, **webui_config)) # run project time.sleep(1) import requests rv = requests.post('http://localhost:5000/run', data={ 'project': 'bench', }) assert rv.status_code == 200, 'run project error' # wait bench test finished while True: time.sleep(1) if builtins.all( getattr(g, x) is None or getattr(g, x).empty() for x in ('newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result')): break # exit components run in threading for each in g.instances: each.quit() # exit components run in subprocess for each in threads: if hasattr(each, 'terminate'): each.terminate() each.join(1)
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show): from pyspider.libs import bench from pyspider.webui import bench_test bench_test # make pyflake happy ctx.obj['debug'] = False g = ctx.obj if result_worker_num == 0: g['processor2result'] = None if run_in == 'subprocess' and os.name != 'nt': run_in = run_in_subprocess else: run_in = run_in_thread g.projectdb.insert('bench', { 'name': 'bench', 'status': 'RUNNING', 'script': bench.bench_script % {'total': total, 'show': show}, 'rate': total, 'burst': total, 'updatetime': time.time() }) # disable log logging.getLogger().setLevel(logging.ERROR) logging.getLogger('scheduler').setLevel(logging.ERROR) logging.getLogger('fetcher').setLevel(logging.ERROR) logging.getLogger('processor').setLevel(logging.ERROR) logging.getLogger('result').setLevel(logging.ERROR) logging.getLogger('webui').setLevel(logging.ERROR) threads = [] # result worker result_worker_config = g.config.get('result_worker', {}) for i in range(result_worker_num): threads.append(run_in(ctx.invoke, result_worker, ResultWorker=bench.BenchResultWorker, **result_worker_config)) # processor processor_config = g.config.get('processor', {}) for i in range(processor_num): threads.append(run_in(ctx.invoke, processor, Processor=bench.BenchProcessor, **processor_config)) # fetcher fetcher_config = g.config.get('fetcher', {}) fetcher_config.setdefault('xmlrpc_host', '127.0.0.1') for i in range(fetcher_num): threads.append(run_in(ctx.invoke, fetcher, Fetcher=bench.BenchFetcher, **fetcher_config)) # scheduler scheduler_config = g.config.get('scheduler', {}) scheduler_config.setdefault('xmlrpc_host', '127.0.0.1') threads.append(run_in(ctx.invoke, scheduler, Scheduler=bench.BenchScheduler, **scheduler_config)) # webui webui_config = g.config.get('webui', {}) webui_config.setdefault('scheduler_rpc', 'http://localhost:%s/' % g.config.get('scheduler', {}).get('xmlrpc_port', 23333)) threads.append(run_in(ctx.invoke, webui, **webui_config)) # run project time.sleep(1) import requests rv = requests.post('http://localhost:5000/run', data={ 'project': 'bench', }) assert rv.status_code == 200, 'run project error' # wait bench test finished while True: time.sleep(1) if builtins.all(getattr(g, x) is None or getattr(g, x).empty() for x in ( 'newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result')): break # exit components run in threading for each in g.instances: each.quit() # exit components run in subprocess for each in threads: if hasattr(each, 'terminate'): each.terminate() each.join(1)
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show, taskdb_bench, message_queue_bench, all_bench): """ Run Benchmark test. In bench mode, in-memory sqlite database is used instead of on-disk sqlite database. """ from pyspider.libs import bench from pyspider.webui import bench_test bench_test # make pyflake happy ctx.obj['debug'] = False g = ctx.obj if result_worker_num == 0: g['processor2result'] = None if run_in == 'subprocess' and os.name != 'nt': run_in = utils.run_in_subprocess else: run_in = utils.run_in_thread all_test = not taskdb_bench and not message_queue_bench and not all_bench # test taskdb if all_test or taskdb_bench: bench.bench_test_taskdb(g.taskdb) # test message queue if all_test or message_queue_bench: bench.bench_test_message_queue(g.scheduler2fetcher) # test all if not all_test and not all_bench: return project_name = '__bench_test__' def clear_project(): g.taskdb.drop(project_name) g.projectdb.drop(project_name) g.resultdb.drop(project_name) clear_project() g.projectdb.insert(project_name, { 'name': project_name, 'status': 'RUNNING', 'script': bench.bench_script % {'total': total, 'show': show}, 'rate': total, 'burst': total, 'updatetime': time.time() }) # disable log logging.getLogger().setLevel(logging.ERROR) logging.getLogger('scheduler').setLevel(logging.ERROR) logging.getLogger('fetcher').setLevel(logging.ERROR) logging.getLogger('processor').setLevel(logging.ERROR) logging.getLogger('result').setLevel(logging.ERROR) logging.getLogger('webui').setLevel(logging.ERROR) try: threads = [] # result worker result_worker_config = g.config.get('result_worker', {}) for i in range(result_worker_num): threads.append(run_in(ctx.invoke, result_worker, result_cls='pyspider.libs.bench.BenchResultWorker', **result_worker_config)) # processor processor_config = g.config.get('processor', {}) for i in range(processor_num): threads.append(run_in(ctx.invoke, processor, processor_cls='pyspider.libs.bench.BenchProcessor', **processor_config)) # fetcher fetcher_config = g.config.get('fetcher', {}) fetcher_config.setdefault('xmlrpc_host', '127.0.0.1') for i in range(fetcher_num): threads.append(run_in(ctx.invoke, fetcher, fetcher_cls='pyspider.libs.bench.BenchFetcher', **fetcher_config)) # scheduler scheduler_config = g.config.get('scheduler', {}) scheduler_config.setdefault('xmlrpc_host', '127.0.0.1') threads.append(run_in(ctx.invoke, scheduler, scheduler_cls='pyspider.libs.bench.BenchScheduler', **scheduler_config)) # webui webui_config = g.config.get('webui', {}) webui_config.setdefault('scheduler_rpc', 'http://localhost:%s/' % g.config.get('scheduler', {}).get('xmlrpc_port', 23333)) threads.append(run_in(ctx.invoke, webui, **webui_config)) # wait bench test finished while True: time.sleep(1) if builtins.all(getattr(g, x) is None or getattr(g, x).empty() for x in ( 'newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result')): break finally: # exit components run in threading for each in g.instances: each.quit() # exit components run in subprocess for each in threads: if hasattr(each, 'terminate'): each.terminate() each.join(1) clear_project()
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show, taskdb_bench, message_queue_bench, all_bench): """ Run Benchmark test. In bench mode, in-memory sqlite database is used instead of on-disk sqlite database. """ from pyspider.libs import bench from pyspider.webui import bench_test bench_test # make pyflake happy ctx.obj['debug'] = False g = ctx.obj if result_worker_num == 0: g['processor2result'] = None if run_in == 'subprocess' and os.name != 'nt': run_in = utils.run_in_subprocess else: run_in = utils.run_in_thread all_test = not taskdb_bench and not message_queue_bench and not all_bench # test taskdb if all_test or taskdb_bench: bench.bench_test_taskdb(g.taskdb) # test message queue if all_test or message_queue_bench: bench.bench_test_message_queue(g.scheduler2fetcher) # test all if not all_test and not all_bench: return project_name = '__bench_test__' def clear_project(): g.taskdb.drop(project_name) g.projectdb.drop(project_name) g.resultdb.drop(project_name) clear_project() g.projectdb.insert(project_name, { 'name': project_name, 'status': 'RUNNING', 'script': bench.bench_script % {'total': total, 'show': show}, 'rate': total, 'burst': total, 'updatetime': time.time() }) # disable log logging.getLogger().setLevel(logging.ERROR) logging.getLogger('scheduler').setLevel(logging.ERROR) logging.getLogger('fetcher').setLevel(logging.ERROR) logging.getLogger('processor').setLevel(logging.ERROR) logging.getLogger('result').setLevel(logging.ERROR) logging.getLogger('webui').setLevel(logging.ERROR) logging.getLogger('werkzeug').setLevel(logging.ERROR) try: threads = [] # result worker result_worker_config = g.config.get('result_worker', {}) for i in range(result_worker_num): threads.append(run_in(ctx.invoke, result_worker, result_cls='pyspider.libs.bench.BenchResultWorker', **result_worker_config)) # processor processor_config = g.config.get('processor', {}) for i in range(processor_num): threads.append(run_in(ctx.invoke, processor, processor_cls='pyspider.libs.bench.BenchProcessor', **processor_config)) # fetcher fetcher_config = g.config.get('fetcher', {}) fetcher_config.setdefault('xmlrpc_host', '127.0.0.1') for i in range(fetcher_num): threads.append(run_in(ctx.invoke, fetcher, fetcher_cls='pyspider.libs.bench.BenchFetcher', **fetcher_config)) # scheduler scheduler_config = g.config.get('scheduler', {}) scheduler_config.setdefault('xmlrpc_host', '127.0.0.1') threads.append(run_in(ctx.invoke, scheduler, scheduler_cls='pyspider.libs.bench.BenchScheduler', **scheduler_config)) # webui webui_config = g.config.get('webui', {}) webui_config.setdefault('scheduler_rpc', 'http://localhost:%s/' % g.config.get('scheduler', {}).get('xmlrpc_port', 23333)) threads.append(run_in(ctx.invoke, webui, **webui_config)) # wait bench test finished while True: time.sleep(1) if builtins.all(getattr(g, x) is None or getattr(g, x).empty() for x in ( 'newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result')): break finally: # exit components run in threading for each in g.instances: each.quit() # exit components run in subprocess for each in threads: if hasattr(each, 'terminate'): each.terminate() each.join(1) clear_project()
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show): from pyspider.libs import bench from pyspider.webui import bench_test bench_test # make pyflake happy ctx.obj["debug"] = False g = ctx.obj if result_worker_num == 0: g["processor2result"] = None if run_in == "subprocess" and os.name != "nt": run_in = utils.run_in_subprocess else: run_in = utils.run_in_thread g.projectdb.insert( "bench", { "name": "bench", "status": "RUNNING", "script": bench.bench_script % {"total": total, "show": show}, "rate": total, "burst": total, "updatetime": time.time(), }, ) # disable log logging.getLogger().setLevel(logging.ERROR) logging.getLogger("scheduler").setLevel(logging.ERROR) logging.getLogger("fetcher").setLevel(logging.ERROR) logging.getLogger("processor").setLevel(logging.ERROR) logging.getLogger("result").setLevel(logging.ERROR) logging.getLogger("webui").setLevel(logging.ERROR) threads = [] # result worker result_worker_config = g.config.get("result_worker", {}) for i in range(result_worker_num): threads.append( run_in( ctx.invoke, result_worker, result_cls="pyspider.libs.bench.BenchResultWorker", **result_worker_config ) ) # processor processor_config = g.config.get("processor", {}) for i in range(processor_num): threads.append( run_in(ctx.invoke, processor, processor_cls="pyspider.libs.bench.BenchProcessor", **processor_config) ) # fetcher fetcher_config = g.config.get("fetcher", {}) fetcher_config.setdefault("xmlrpc_host", "127.0.0.1") for i in range(fetcher_num): threads.append(run_in(ctx.invoke, fetcher, fetcher_cls="pyspider.libs.bench.BenchFetcher", **fetcher_config)) # scheduler scheduler_config = g.config.get("scheduler", {}) scheduler_config.setdefault("xmlrpc_host", "127.0.0.1") threads.append( run_in(ctx.invoke, scheduler, scheduler_cls="pyspider.libs.bench.BenchScheduler", **scheduler_config) ) # webui webui_config = g.config.get("webui", {}) webui_config.setdefault( "scheduler_rpc", "http://localhost:%s/" % g.config.get("scheduler", {}).get("xmlrpc_port", 23333) ) threads.append(run_in(ctx.invoke, webui, **webui_config)) # run project time.sleep(1) import requests rv = requests.post("http://localhost:5000/run", data={"project": "bench"}) assert rv.status_code == 200, "run project error" # wait bench test finished while True: time.sleep(1) if builtins.all( getattr(g, x) is None or getattr(g, x).empty() for x in ("newtask_queue", "status_queue", "scheduler2fetcher", "fetcher2processor", "processor2result") ): break # exit components run in threading for each in g.instances: each.quit() # exit components run in subprocess for each in threads: if hasattr(each, "terminate"): each.terminate() each.join(1)