def iter_completed(futures, max_jobs=None, max_load=None, loop=None): """ This is similar to asyncio.as_completed, but takes an iterator of futures as input, and includes support for max_jobs and max_load parameters. @param futures: iterator of asyncio.Future (or compatible) @type futures: iterator @param max_jobs: max number of futures to process concurrently (default is multiprocessing.cpu_count()) @type max_jobs: int @param max_load: max load allowed when scheduling a new future, otherwise schedule no more than 1 future at a time (default is multiprocessing.cpu_count()) @type max_load: int or float @param loop: event loop @type loop: EventLoop @return: iterator of futures that are done @rtype: iterator """ loop = loop or global_event_loop() max_jobs = max_jobs or multiprocessing.cpu_count() max_load = max_load or multiprocessing.cpu_count() future_map = {} def task_generator(): for future in futures: future_map[id(future)] = future yield AsyncTaskFuture(future=future) scheduler = TaskScheduler( task_generator(), max_jobs=max_jobs, max_load=max_load, event_loop=loop) try: scheduler.start() # scheduler should ensure that future_map is non-empty until # task_generator is exhausted while future_map: done, pending = loop.run_until_complete( wait(*list(future_map.values()), return_when=FIRST_COMPLETED)) for future in done: del future_map[id(future)] yield future finally: # cleanup in case of interruption by SIGINT, etc scheduler.cancel() scheduler.wait()
def iter_completed(futures, max_jobs=None, max_load=None, loop=None): """ This is similar to asyncio.as_completed, but takes an iterator of futures as input, and includes support for max_jobs and max_load parameters. @param futures: iterator of asyncio.Future (or compatible) @type futures: iterator @param max_jobs: max number of futures to process concurrently (default is multiprocessing.cpu_count()) @type max_jobs: int @param max_load: max load allowed when scheduling a new future, otherwise schedule no more than 1 future at a time (default is multiprocessing.cpu_count()) @type max_load: int or float @param loop: event loop @type loop: EventLoop @return: iterator of futures that are done @rtype: iterator """ loop = loop or global_event_loop() max_jobs = max_jobs or multiprocessing.cpu_count() max_load = max_load or multiprocessing.cpu_count() future_map = {} def task_generator(): for future in futures: future_map[id(future)] = future yield AsyncTaskFuture(future=future) scheduler = TaskScheduler(task_generator(), max_jobs=max_jobs, max_load=max_load, event_loop=loop) try: scheduler.start() # scheduler should ensure that future_map is non-empty until # task_generator is exhausted while future_map: done, pending = loop.run_until_complete( wait(list(future_map.values()), return_when=FIRST_COMPLETED)) for future in done: del future_map[id(future)] yield future finally: # cleanup in case of interruption by SIGINT, etc scheduler.cancel() scheduler.wait()
def _testPipeReader(self, test_string): """ Use a poll loop to read data from a pipe and assert that the data written to the pipe is identical to the data read from the pipe. """ if self._use_pty: got_pty, master_fd, slave_fd = _create_pty_or_pipe() if not got_pty: os.close(slave_fd) os.close(master_fd) skip_reason = "pty not acquired" self.portage_skip = skip_reason self.fail(skip_reason) return else: master_fd, slave_fd = os.pipe() # WARNING: It is very important to use unbuffered mode here, # in order to avoid issue 5380 with python3. master_file = os.fdopen(master_fd, 'rb', 0) slave_file = os.fdopen(slave_fd, 'wb', 0) producer = SpawnProcess( args=["bash", "-c", self._echo_cmd % test_string], env=os.environ, fd_pipes={1:slave_fd}) consumer = PipeReader( input_files={"producer" : master_file}, _use_array=self._use_array) task_scheduler = TaskScheduler(iter([producer, consumer]), max_jobs=2) # This will ensure that both tasks have exited, which # is necessary to avoid "ResourceWarning: unclosed file" # warnings since Python 3.2 (and also ensures that we # don't leave any zombie child processes). task_scheduler.start() slave_file.close() task_scheduler.wait() self.assertEqual(producer.returncode, os.EX_OK) self.assertEqual(consumer.returncode, os.EX_OK) return consumer.getvalue().decode('ascii', 'replace')
def _testPipeReader(self, test_string): """ Use a poll loop to read data from a pipe and assert that the data written to the pipe is identical to the data read from the pipe. """ if self._use_pty: got_pty, master_fd, slave_fd = _create_pty_or_pipe() if not got_pty: os.close(slave_fd) os.close(master_fd) skip_reason = "pty not acquired" self.portage_skip = skip_reason self.fail(skip_reason) return else: master_fd, slave_fd = os.pipe() # WARNING: It is very important to use unbuffered mode here, # in order to avoid issue 5380 with python3. master_file = os.fdopen(master_fd, 'rb', 0) slave_file = os.fdopen(slave_fd, 'wb', 0) producer = SpawnProcess( args=["bash", "-c", self._echo_cmd % test_string], env=os.environ, fd_pipes={1: slave_fd}) consumer = PipeReader(input_files={"producer": master_file}, _use_array=self._use_array) task_scheduler = TaskScheduler(iter([producer, consumer]), max_jobs=2) # This will ensure that both tasks have exited, which # is necessary to avoid "ResourceWarning: unclosed file" # warnings since Python 3.2 (and also ensures that we # don't leave any zombie child processes). task_scheduler.start() slave_file.close() task_scheduler.wait() self.assertEqual(producer.returncode, os.EX_OK) self.assertEqual(consumer.returncode, os.EX_OK) return consumer.getvalue().decode('ascii', 'replace')
def async_iter_completed(futures, max_jobs=None, max_load=None, loop=None): """ An asynchronous version of iter_completed. This yields futures, which when done, result in a set of input futures that are done. This serves as a wrapper around portage's internal TaskScheduler class, using standard asyncio interfaces. @param futures: iterator of asyncio.Future (or compatible) @type futures: iterator @param max_jobs: max number of futures to process concurrently (default is portage.util.cpuinfo.get_cpu_count()) @type max_jobs: int @param max_load: max load allowed when scheduling a new future, otherwise schedule no more than 1 future at a time (default is portage.util.cpuinfo.get_cpu_count()) @type max_load: int or float @param loop: event loop @type loop: EventLoop @return: iterator of futures, which when done, result in a set of input futures that are done @rtype: iterator """ loop = asyncio._wrap_loop(loop) max_jobs = max_jobs or get_cpu_count() max_load = max_load or get_cpu_count() future_map = {} def task_generator(): for future in futures: future_map[id(future)] = future yield AsyncTaskFuture(future=future) scheduler = TaskScheduler(task_generator(), max_jobs=max_jobs, max_load=max_load, event_loop=loop) def done_callback(future_done_set, wait_result): """Propagate results from wait_result to future_done_set.""" if future_done_set.cancelled(): return done, pending = wait_result.result() for future in done: del future_map[id(future)] future_done_set.set_result(done) def cancel_callback(wait_result, future_done_set): """Cancel wait_result if future_done_set has been cancelled.""" if future_done_set.cancelled() and not wait_result.done(): wait_result.cancel() try: scheduler.start() # scheduler should ensure that future_map is non-empty until # task_generator is exhausted while future_map: wait_result = asyncio.ensure_future(asyncio.wait( list(future_map.values()), return_when=asyncio.FIRST_COMPLETED, loop=loop), loop=loop) future_done_set = loop.create_future() future_done_set.add_done_callback( functools.partial(cancel_callback, wait_result)) wait_result.add_done_callback( functools.partial(done_callback, future_done_set)) yield future_done_set finally: # cleanup in case of interruption by SIGINT, etc scheduler.cancel() scheduler.wait()
def testDoebuild(self): """ Invoke portage.doebuild() with the fd_pipes parameter, and check that the expected output appears in the pipe. This functionality is not used by portage internally, but it is supported for API consumers (see bug #475812). """ output_fd = 200 ebuild_body = ["S=${WORKDIR}"] for phase_func in ( "pkg_info", "pkg_nofetch", "pkg_pretend", "pkg_setup", "src_unpack", "src_prepare", "src_configure", "src_compile", "src_test", "src_install", ): ebuild_body.append(("%s() { echo ${EBUILD_PHASE}" " 1>&%s; }") % (phase_func, output_fd)) ebuild_body.append("") ebuild_body = "\n".join(ebuild_body) ebuilds = { "app-misct/foo-1": { "EAPI": "5", "MISC_CONTENT": ebuild_body, } } # Override things that may be unavailable, or may have portability # issues when running tests in exotic environments. # prepstrip - bug #447810 (bash read builtin EINTR problem) true_symlinks = ("find", "prepstrip", "sed", "scanelf") true_binary = portage.process.find_binary("true") self.assertEqual(true_binary is None, False, "true command not found") dev_null = open(os.devnull, "wb") playground = ResolverPlayground(ebuilds=ebuilds) try: QueryCommand._db = playground.trees root_config = playground.trees[playground.eroot]["root_config"] portdb = root_config.trees["porttree"].dbapi settings = portage.config(clone=playground.settings) if "__PORTAGE_TEST_HARDLINK_LOCKS" in os.environ: settings["__PORTAGE_TEST_HARDLINK_LOCKS"] = os.environ[ "__PORTAGE_TEST_HARDLINK_LOCKS"] settings.backup_changes("__PORTAGE_TEST_HARDLINK_LOCKS") settings.features.add("noauto") settings.features.add("test") settings["PORTAGE_PYTHON"] = portage._python_interpreter settings["PORTAGE_QUIET"] = "1" settings["PYTHONDONTWRITEBYTECODE"] = os.environ.get( "PYTHONDONTWRITEBYTECODE", "") fake_bin = os.path.join(settings["EPREFIX"], "bin") portage.util.ensure_dirs(fake_bin) for x in true_symlinks: os.symlink(true_binary, os.path.join(fake_bin, x)) settings["__PORTAGE_TEST_PATH_OVERRIDE"] = fake_bin settings.backup_changes("__PORTAGE_TEST_PATH_OVERRIDE") cpv = "app-misct/foo-1" metadata = dict( zip(Package.metadata_keys, portdb.aux_get(cpv, Package.metadata_keys))) pkg = Package( built=False, cpv=cpv, installed=False, metadata=metadata, root_config=root_config, type_name="ebuild", ) settings.setcpv(pkg) ebuildpath = portdb.findname(cpv) self.assertNotEqual(ebuildpath, None) for phase in ( "info", "nofetch", "pretend", "setup", "unpack", "prepare", "configure", "compile", "test", "install", "qmerge", "clean", "merge", ): pr, pw = os.pipe() producer = DoebuildProcess( doebuild_pargs=(ebuildpath, phase), doebuild_kwargs={ "settings": settings, "mydbapi": portdb, "tree": "porttree", "vartree": root_config.trees["vartree"], "fd_pipes": { 1: dev_null.fileno(), 2: dev_null.fileno(), output_fd: pw, }, "prev_mtimes": {}, }, ) consumer = PipeReader(input_files={"producer": pr}) task_scheduler = TaskScheduler(iter([producer, consumer]), max_jobs=2) try: task_scheduler.start() finally: # PipeReader closes pr os.close(pw) task_scheduler.wait() output = portage._unicode_decode( consumer.getvalue()).rstrip("\n") if task_scheduler.returncode != os.EX_OK: portage.writemsg(output, noiselevel=-1) self.assertEqual(task_scheduler.returncode, os.EX_OK) if phase not in ("clean", "merge", "qmerge"): self.assertEqual(phase, output) finally: dev_null.close() playground.cleanup() QueryCommand._db = None
def testDoebuild(self): """ Invoke portage.doebuild() with the fd_pipes parameter, and check that the expected output appears in the pipe. This functionality is not used by portage internally, but it is supported for API consumers (see bug #475812). """ output_fd = 200 ebuild_body = ['S=${WORKDIR}'] for phase_func in ('pkg_info', 'pkg_nofetch', 'pkg_pretend', 'pkg_setup', 'src_unpack', 'src_prepare', 'src_configure', 'src_compile', 'src_test', 'src_install'): ebuild_body.append(('%s() { echo ${EBUILD_PHASE}' ' 1>&%s; }') % (phase_func, output_fd)) ebuild_body.append('') ebuild_body = '\n'.join(ebuild_body) ebuilds = { 'app-misct/foo-1': { 'EAPI' : '5', "MISC_CONTENT": ebuild_body, } } # Override things that may be unavailable, or may have portability # issues when running tests in exotic environments. # prepstrip - bug #447810 (bash read builtin EINTR problem) true_symlinks = ("find", "prepstrip", "sed", "scanelf") true_binary = portage.process.find_binary("true") self.assertEqual(true_binary is None, False, "true command not found") dev_null = open(os.devnull, 'wb') playground = ResolverPlayground(ebuilds=ebuilds) try: QueryCommand._db = playground.trees root_config = playground.trees[playground.eroot]['root_config'] portdb = root_config.trees["porttree"].dbapi settings = portage.config(clone=playground.settings) if "__PORTAGE_TEST_HARDLINK_LOCKS" in os.environ: settings["__PORTAGE_TEST_HARDLINK_LOCKS"] = \ os.environ["__PORTAGE_TEST_HARDLINK_LOCKS"] settings.backup_changes("__PORTAGE_TEST_HARDLINK_LOCKS") settings.features.add("noauto") settings.features.add("test") settings['PORTAGE_PYTHON'] = portage._python_interpreter settings['PORTAGE_QUIET'] = "1" settings['PYTHONDONTWRITEBYTECODE'] = os.environ.get("PYTHONDONTWRITEBYTECODE", "") fake_bin = os.path.join(settings["EPREFIX"], "bin") portage.util.ensure_dirs(fake_bin) for x in true_symlinks: os.symlink(true_binary, os.path.join(fake_bin, x)) settings["__PORTAGE_TEST_PATH_OVERRIDE"] = fake_bin settings.backup_changes("__PORTAGE_TEST_PATH_OVERRIDE") cpv = 'app-misct/foo-1' metadata = dict(zip(Package.metadata_keys, portdb.aux_get(cpv, Package.metadata_keys))) pkg = Package(built=False, cpv=cpv, installed=False, metadata=metadata, root_config=root_config, type_name='ebuild') settings.setcpv(pkg) ebuildpath = portdb.findname(cpv) self.assertNotEqual(ebuildpath, None) for phase in ('info', 'nofetch', 'pretend', 'setup', 'unpack', 'prepare', 'configure', 'compile', 'test', 'install', 'qmerge', 'clean', 'merge'): pr, pw = os.pipe() producer = DoebuildProcess(doebuild_pargs=(ebuildpath, phase), doebuild_kwargs={"settings" : settings, "mydbapi": portdb, "tree": "porttree", "vartree": root_config.trees["vartree"], "fd_pipes": { 1: dev_null.fileno(), 2: dev_null.fileno(), output_fd: pw, }, "prev_mtimes": {}}) consumer = PipeReader( input_files={"producer" : pr}) task_scheduler = TaskScheduler(iter([producer, consumer]), max_jobs=2) try: task_scheduler.start() finally: # PipeReader closes pr os.close(pw) task_scheduler.wait() output = portage._unicode_decode( consumer.getvalue()).rstrip("\n") if task_scheduler.returncode != os.EX_OK: portage.writemsg(output, noiselevel=-1) self.assertEqual(task_scheduler.returncode, os.EX_OK) if phase not in ('clean', 'merge', 'qmerge'): self.assertEqual(phase, output) finally: dev_null.close() playground.cleanup() QueryCommand._db = None
def async_iter_completed(futures, max_jobs=None, max_load=None, loop=None): """ An asynchronous version of iter_completed. This yields futures, which when done, result in a set of input futures that are done. This serves as a wrapper around portage's internal TaskScheduler class, using standard asyncio interfaces. @param futures: iterator of asyncio.Future (or compatible) @type futures: iterator @param max_jobs: max number of futures to process concurrently (default is portage.util.cpuinfo.get_cpu_count()) @type max_jobs: int @param max_load: max load allowed when scheduling a new future, otherwise schedule no more than 1 future at a time (default is portage.util.cpuinfo.get_cpu_count()) @type max_load: int or float @param loop: event loop @type loop: EventLoop @return: iterator of futures, which when done, result in a set of input futures that are done @rtype: iterator """ loop = asyncio._wrap_loop(loop) max_jobs = max_jobs or get_cpu_count() max_load = max_load or get_cpu_count() future_map = {} def task_generator(): for future in futures: future_map[id(future)] = future yield AsyncTaskFuture(future=future) scheduler = TaskScheduler( task_generator(), max_jobs=max_jobs, max_load=max_load, event_loop=loop) def done_callback(future_done_set, wait_result): """Propagate results from wait_result to future_done_set.""" if future_done_set.cancelled(): return done, pending = wait_result.result() for future in done: del future_map[id(future)] future_done_set.set_result(done) def cancel_callback(wait_result, future_done_set): """Cancel wait_result if future_done_set has been cancelled.""" if future_done_set.cancelled() and not wait_result.done(): wait_result.cancel() try: scheduler.start() # scheduler should ensure that future_map is non-empty until # task_generator is exhausted while future_map: wait_result = asyncio.ensure_future( asyncio.wait(list(future_map.values()), return_when=asyncio.FIRST_COMPLETED, loop=loop), loop=loop) future_done_set = loop.create_future() future_done_set.add_done_callback( functools.partial(cancel_callback, wait_result)) wait_result.add_done_callback( functools.partial(done_callback, future_done_set)) yield future_done_set finally: # cleanup in case of interruption by SIGINT, etc scheduler.cancel() scheduler.wait()