class DeleteService(Thread): ''' Provide a blocking file delete implementation with support for the recycle bin. On windows, deleting files to the recycle bin spins the event loop, which can cause locking errors in the main thread. We get around this by only moving the files/folders to be deleted out of the library in the main thread, they are deleted to recycle bin in a separate worker thread. This has the added advantage that doing a restore from the recycle bin wont cause metadata.db and the file system to get out of sync. Also, deleting becomes much faster, since in the common case, the move is done by a simple os.rename(). The downside is that if the user quits calibre while a long move to recycle bin is happening, the files may not all be deleted.''' daemon = True def __init__(self): Thread.__init__(self) self.requests = Queue() def shutdown(self, timeout=20): self.requests.put(None) self.join(timeout) def create_staging(self, library_path): base_path = os.path.dirname(library_path) base = os.path.basename(library_path) try: ans = tempfile.mkdtemp(prefix=base+' deleted ', dir=base_path) except OSError: ans = tempfile.mkdtemp(prefix=base+' deleted ') atexit.register(remove_dir, ans) return ans def remove_dir_if_empty(self, path): try: os.rmdir(path) except OSError as e: if e.errno == errno.ENOTEMPTY or len(os.listdir(path)) > 0: # Some linux systems appear to raise an EPERM instead of an # ENOTEMPTY, see https://bugs.launchpad.net/bugs/1240797 return raise def delete_books(self, paths, library_path): tdir = self.create_staging(library_path) self.queue_paths(tdir, paths, delete_empty_parent=True) def queue_paths(self, tdir, paths, delete_empty_parent=True): try: self._queue_paths(tdir, paths, delete_empty_parent=delete_empty_parent) except: if os.path.exists(tdir): shutil.rmtree(tdir, ignore_errors=True) raise def _queue_paths(self, tdir, paths, delete_empty_parent=True): requests = [] for path in paths: if os.path.exists(path): basename = os.path.basename(path) c = 0 while True: dest = os.path.join(tdir, basename) if not os.path.exists(dest): break c += 1 basename = '%d - %s' % (c, os.path.basename(path)) try: shutil.move(path, dest) except EnvironmentError: if os.path.isdir(path): # shutil.move may have partially copied the directory, # so the subsequent call to move() will fail as the # destination directory already exists raise # Wait a little in case something has locked a file time.sleep(1) shutil.move(path, dest) if delete_empty_parent: remove_dir_if_empty(os.path.dirname(path), ignore_metadata_caches=True) requests.append(dest) if not requests: remove_dir_if_empty(tdir) else: self.requests.put(tdir) def delete_files(self, paths, library_path): tdir = self.create_staging(library_path) self.queue_paths(tdir, paths, delete_empty_parent=False) def run(self): while True: x = self.requests.get() try: if x is None: break try: self.do_delete(x) except: import traceback traceback.print_exc() finally: self.requests.task_done() def wait(self): 'Blocks until all pending deletes have completed' self.requests.join() def do_delete(self, tdir): if os.path.exists(tdir): try: for x in os.listdir(tdir): x = os.path.join(tdir, x) if os.path.isdir(x): delete_tree(x) else: delete_file(x) finally: shutil.rmtree(tdir)
class Pool(Thread): daemon = True def __init__(self, max_workers=None, name=None): Thread.__init__(self, name=name) self.max_workers = max_workers or detect_ncpus() self.available_workers = [] self.busy_workers = {} self.pending_jobs = [] self.events = Queue() self.results = Queue() self.tracker = Queue() self.terminal_failure = None self.common_data = pickle_dumps(None) self.worker_data = None self.shutting_down = False self.start() def set_common_data(self, data=None): ''' Set some data that will be passed to all subsequent jobs without needing to be transmitted every time. You must call this method before queueing any jobs, otherwise the behavior is undefined. You can call it after all jobs are done, then it will be used for the new round of jobs. Can raise the :class:`Failure` exception is data could not be sent to workers.''' if self.failed: raise Failure(self.terminal_failure) self.events.put(data) def __call__(self, job_id, module, func, *args, **kwargs): ''' Schedule a job. The job will be run in a worker process, with the result placed in self.results. If a terminal failure has occurred previously, this method will raise the :class:`Failure` exception. :param job_id: A unique id for the job. The result will have this id. :param module: Either a fully qualified python module name or python source code which will be executed as a module. Source code is detected by the presence of newlines in module. :param func: Name of the function from ``module`` that will be executed. ``args`` and ``kwargs`` will be passed to the function. ''' if self.failed: raise Failure(self.terminal_failure) job = Job(job_id, module, func, args, kwargs) self.tracker.put(None) self.events.put(job) def wait_for_tasks(self, timeout=None): ''' Wait for all queued jobs to be completed, if timeout is not None, will raise a RuntimeError if jobs are not completed in the specified time. Will raise a :class:`Failure` exception if a terminal failure has occurred previously. ''' if self.failed: raise Failure(self.terminal_failure) if timeout is None: self.tracker.join() else: join_with_timeout(self.tracker, timeout) def shutdown(self, wait_time=0.1): ''' Shutdown this pool, terminating all worker process. The pool cannot be used after a shutdown. ''' self.shutting_down = True self.events.put(None) self.shutdown_workers(wait_time=wait_time) def create_worker(self): p = start_worker('from {0} import run_main, {1}; run_main({1})'.format(self.__class__.__module__, 'worker_main')) sys.stdout.flush() eintr_retry_call(p.stdin.write, self.worker_data) p.stdin.flush(), p.stdin.close() conn = eintr_retry_call(self.listener.accept) w = Worker(p, conn, self.events, self.name) if self.common_data != pickle_dumps(None): w.set_common_data(self.common_data) return w def start_worker(self): try: w = self.create_worker() if not self.shutting_down: self.available_workers.append(w) except Exception: import traceback self.terminal_failure = TerminalFailure('Failed to start worker process', traceback.format_exc(), None) self.terminal_error() return False def run(self): from calibre.utils.ipc.server import create_listener self.auth_key = os.urandom(32) self.address, self.listener = create_listener(self.auth_key) self.worker_data = msgpack_dumps((self.address, self.auth_key)) if self.start_worker() is False: return while True: event = self.events.get() if event is None or self.shutting_down: break if self.handle_event(event) is False: break def handle_event(self, event): if isinstance(event, Job): job = event if not self.available_workers: if len(self.busy_workers) >= self.max_workers: self.pending_jobs.append(job) return if self.start_worker() is False: return False return self.run_job(job) elif isinstance(event, WorkerResult): worker_result = event self.busy_workers.pop(worker_result.worker, None) self.available_workers.append(worker_result.worker) self.tracker.task_done() if worker_result.is_terminal_failure: self.terminal_failure = TerminalFailure('Worker process crashed while executing job', worker_result.result.traceback, worker_result.id) self.terminal_error() return False self.results.put(worker_result) else: self.common_data = pickle_dumps(event) if len(self.common_data) > MAX_SIZE: self.cd_file = PersistentTemporaryFile('pool_common_data') with self.cd_file as f: f.write(self.common_data) self.common_data = pickle_dumps(File(f.name)) for worker in self.available_workers: try: worker.set_common_data(self.common_data) except Exception: import traceback self.terminal_failure = TerminalFailure('Worker process crashed while sending common data', traceback.format_exc(), None) self.terminal_error() return False while self.pending_jobs and self.available_workers: if self.run_job(self.pending_jobs.pop()) is False: return False def run_job(self, job): worker = self.available_workers.pop() try: worker(job) except Exception: import traceback self.terminal_failure = TerminalFailure('Worker process crashed while sending job', traceback.format_exc(), job.id) self.terminal_error() return False self.busy_workers[worker] = job @property def failed(self): return self.terminal_failure is not None def terminal_error(self): if self.shutting_down: return for worker, job in iteritems(self.busy_workers): self.results.put(WorkerResult(job.id, Result(None, None, None), True, worker)) self.tracker.task_done() while self.pending_jobs: job = self.pending_jobs.pop() self.results.put(WorkerResult(job.id, Result(None, None, None), True, None)) self.tracker.task_done() self.shutdown() def shutdown_workers(self, wait_time=0.1): self.worker_data = self.common_data = None for worker in self.busy_workers: if worker.process.poll() is None: try: worker.process.terminate() except EnvironmentError: pass # If the process has already been killed workers = [w.process for w in self.available_workers + list(self.busy_workers)] aw = list(self.available_workers) def join(): for w in aw: try: w(None) except Exception: pass for w in workers: try: w.wait() except Exception: pass reaper = Thread(target=join, name='ReapPoolWorkers') reaper.daemon = True reaper.start() reaper.join(wait_time) for w in self.available_workers + list(self.busy_workers): try: w.conn.close() except Exception: pass for w in workers: if w.poll() is None: try: w.kill() except EnvironmentError: pass del self.available_workers[:] self.busy_workers.clear() if hasattr(self, 'cd_file'): try: os.remove(self.cd_file.name) except EnvironmentError: pass
class Pool(Thread): daemon = True def __init__(self, max_workers=None, name=None): Thread.__init__(self, name=name) self.max_workers = max_workers or detect_ncpus() self.available_workers = [] self.busy_workers = {} self.pending_jobs = [] self.events = Queue() self.results = Queue() self.tracker = Queue() self.terminal_failure = None self.common_data = pickle_dumps(None) self.shutting_down = False self.start() def set_common_data(self, data=None): ''' Set some data that will be passed to all subsequent jobs without needing to be transmitted every time. You must call this method before queueing any jobs, otherwise the behavior is undefined. You can call it after all jobs are done, then it will be used for the new round of jobs. Can raise the :class:`Failure` exception is data could not be sent to workers.''' if self.failed: raise Failure(self.terminal_failure) self.events.put(data) def __call__(self, job_id, module, func, *args, **kwargs): ''' Schedule a job. The job will be run in a worker process, with the result placed in self.results. If a terminal failure has occurred previously, this method will raise the :class:`Failure` exception. :param job_id: A unique id for the job. The result will have this id. :param module: Either a fully qualified python module name or python source code which will be executed as a module. Source code is detected by the presence of newlines in module. :param func: Name of the function from ``module`` that will be executed. ``args`` and ``kwargs`` will be passed to the function. ''' if self.failed: raise Failure(self.terminal_failure) job = Job(job_id, module, func, args, kwargs) self.tracker.put(None) self.events.put(job) def wait_for_tasks(self, timeout=None): ''' Wait for all queued jobs to be completed, if timeout is not None, will raise a RuntimeError if jobs are not completed in the specified time. Will raise a :class:`Failure` exception if a terminal failure has occurred previously. ''' if self.failed: raise Failure(self.terminal_failure) if timeout is None: self.tracker.join() else: join_with_timeout(self.tracker, timeout) def shutdown(self, wait_time=0.1): ''' Shutdown this pool, terminating all worker process. The pool cannot be used after a shutdown. ''' self.shutting_down = True self.events.put(None) self.shutdown_workers(wait_time=wait_time) def create_worker(self): a, b = Pipe() with a: cmd = 'from {0} import run_main, {1}; run_main({2!r}, {1})'.format( self.__class__.__module__, 'worker_main', a.fileno()) p = start_worker(cmd, (a.fileno(), )) sys.stdout.flush() p.stdin.close() w = Worker(p, b, self.events, self.name) if self.common_data != pickle_dumps(None): w.set_common_data(self.common_data) return w def start_worker(self): try: w = self.create_worker() if not self.shutting_down: self.available_workers.append(w) except Exception: import traceback self.terminal_failure = TerminalFailure( 'Failed to start worker process', traceback.format_exc(), None) self.terminal_error() return False def run(self): if self.start_worker() is False: return while True: event = self.events.get() if event is None or self.shutting_down: break if self.handle_event(event) is False: break def handle_event(self, event): if isinstance(event, Job): job = event if not self.available_workers: if len(self.busy_workers) >= self.max_workers: self.pending_jobs.append(job) return if self.start_worker() is False: return False return self.run_job(job) elif isinstance(event, WorkerResult): worker_result = event self.busy_workers.pop(worker_result.worker, None) self.available_workers.append(worker_result.worker) self.tracker.task_done() if worker_result.is_terminal_failure: self.terminal_failure = TerminalFailure( 'Worker process crashed while executing job', worker_result.result.traceback, worker_result.id) self.terminal_error() return False self.results.put(worker_result) else: self.common_data = pickle_dumps(event) if len(self.common_data) > MAX_SIZE: self.cd_file = PersistentTemporaryFile('pool_common_data') with self.cd_file as f: f.write(self.common_data) self.common_data = pickle_dumps(File(f.name)) for worker in self.available_workers: try: worker.set_common_data(self.common_data) except Exception: import traceback self.terminal_failure = TerminalFailure( 'Worker process crashed while sending common data', traceback.format_exc(), None) self.terminal_error() return False while self.pending_jobs and self.available_workers: if self.run_job(self.pending_jobs.pop()) is False: return False def run_job(self, job): worker = self.available_workers.pop() try: worker(job) except Exception: import traceback self.terminal_failure = TerminalFailure( 'Worker process crashed while sending job', traceback.format_exc(), job.id) self.terminal_error() return False self.busy_workers[worker] = job @property def failed(self): return self.terminal_failure is not None def terminal_error(self): if self.shutting_down: return for worker, job in iteritems(self.busy_workers): self.results.put( WorkerResult(job.id, Result(None, None, None), True, worker)) self.tracker.task_done() while self.pending_jobs: job = self.pending_jobs.pop() self.results.put( WorkerResult(job.id, Result(None, None, None), True, None)) self.tracker.task_done() self.shutdown() def shutdown_workers(self, wait_time=0.1): self.worker_data = self.common_data = None for worker in self.busy_workers: if worker.process.poll() is None: try: worker.process.terminate() except OSError: pass # If the process has already been killed workers = [ w.process for w in self.available_workers + list(self.busy_workers) ] aw = list(self.available_workers) def join(): for w in aw: try: w(None) except Exception: pass for w in workers: try: w.wait() except Exception: pass reaper = Thread(target=join, name='ReapPoolWorkers') reaper.daemon = True reaper.start() reaper.join(wait_time) for w in self.available_workers + list(self.busy_workers): try: w.conn.close() except Exception: pass for w in workers: if w.poll() is None: try: w.kill() except OSError: pass del self.available_workers[:] self.busy_workers.clear() if hasattr(self, 'cd_file'): try: os.remove(self.cd_file.name) except OSError: pass
class DeleteService(Thread): ''' Provide a blocking file delete implementation with support for the recycle bin. On windows, deleting files to the recycle bin spins the event loop, which can cause locking errors in the main thread. We get around this by only moving the files/folders to be deleted out of the library in the main thread, they are deleted to recycle bin in a separate worker thread. This has the added advantage that doing a restore from the recycle bin wont cause metadata.db and the file system to get out of sync. Also, deleting becomes much faster, since in the common case, the move is done by a simple os.rename(). The downside is that if the user quits calibre while a long move to recycle bin is happening, the files may not all be deleted.''' daemon = True def __init__(self): Thread.__init__(self) self.requests = Queue() if isosx: plugins['cocoa'][0].enable_cocoa_multithreading() def shutdown(self, timeout=20): self.requests.put(None) self.join(timeout) def create_staging(self, library_path): base_path = os.path.dirname(library_path) base = os.path.basename(library_path) try: ans = tempfile.mkdtemp(prefix=base + ' deleted ', dir=base_path) except OSError: ans = tempfile.mkdtemp(prefix=base + ' deleted ') atexit.register(remove_dir, ans) return ans def remove_dir_if_empty(self, path): try: os.rmdir(path) except OSError as e: if e.errno == errno.ENOTEMPTY or len(os.listdir(path)) > 0: # Some linux systems appear to raise an EPERM instead of an # ENOTEMPTY, see https://bugs.launchpad.net/bugs/1240797 return raise def delete_books(self, paths, library_path): tdir = self.create_staging(library_path) self.queue_paths(tdir, paths, delete_empty_parent=True) def queue_paths(self, tdir, paths, delete_empty_parent=True): try: self._queue_paths(tdir, paths, delete_empty_parent=delete_empty_parent) except: if os.path.exists(tdir): shutil.rmtree(tdir, ignore_errors=True) raise def _queue_paths(self, tdir, paths, delete_empty_parent=True): requests = [] for path in paths: if os.path.exists(path): basename = os.path.basename(path) c = 0 while True: dest = os.path.join(tdir, basename) if not os.path.exists(dest): break c += 1 basename = '%d - %s' % (c, os.path.basename(path)) try: shutil.move(path, dest) except EnvironmentError: if os.path.isdir(path): # shutil.move may have partially copied the directory, # so the subsequent call to move() will fail as the # destination directory already exists raise # Wait a little in case something has locked a file time.sleep(1) shutil.move(path, dest) if delete_empty_parent: remove_dir_if_empty(os.path.dirname(path), ignore_metadata_caches=True) requests.append(dest) if not requests: remove_dir_if_empty(tdir) else: self.requests.put(tdir) def delete_files(self, paths, library_path): tdir = self.create_staging(library_path) self.queue_paths(tdir, paths, delete_empty_parent=False) def run(self): while True: x = self.requests.get() try: if x is None: break try: self.do_delete(x) except: import traceback traceback.print_exc() finally: self.requests.task_done() def wait(self): 'Blocks until all pending deletes have completed' self.requests.join() def do_delete(self, tdir): if os.path.exists(tdir): try: for x in os.listdir(tdir): x = os.path.join(tdir, x) if os.path.isdir(x): delete_tree(x) else: delete_file(x) finally: shutil.rmtree(tdir)