def mp_buffer(df,buffer_dist,n_workers): # array to sharedmem array = df[['unique_id','geometry']].to_records(index=False) shape, dtype = array.shape, array.dtype shm = SharedMemory(name='arr',create=True, size=array.nbytes) shm_array = np.recarray(shape=shape, dtype=dtype, buf=shm.buf) np.copyto(shm_array, array) shm_spec = {'name':'arr','shape':shape,'dtype':dtype} # do multiprocess chunk = len(df)//n_workers +1 args = [(shm_spec, range(len(df))[ii*chunk:(ii+1)*chunk], buffer_dist) for ii in range(n_workers)] with mp.Pool(n_workers) as pool: res = pool.starmap(buffer_worker, args) res = [item for sublist in res for item in sublist] shm.close() shm.unlink() return [r[1] for r in res]
def find_adv_batch(self, model: nn.Module, inputs: torch.Tensor, inputs_adv_ref: torch.Tensor, labels: torch.Tensor, epsilon: float, max_nr_adv): self._start_workers() with ensure_training_state(model, False): model_outputs: torch.Tensor = model(inputs_adv_ref) eval_model = model.cvt_to_eval() correct_mask = torch.eq(model_outputs.argmax(dim=1), labels) idx_remap = np.arange(inputs.shape[0], dtype=np.int32) np.random.shuffle(idx_remap) assert inputs.dtype == torch.float32 shm_size = BUFFER_COUNTER_SIZE + 4 * inputs.numel() shm = SharedMemory(size=shm_size, create=True) shm.buf[:BUFFER_COUNTER_SIZE] = b'\0' * BUFFER_COUNTER_SIZE shm_name = shm.name args = RemoteArgs(eval_model, inputs, inputs_adv_ref, labels, correct_mask, epsilon, max_nr_adv, shm_name, shm_size, idx_remap) try: return self._work(shm.buf, args) except: self.close() raise finally: shm.close() shm.unlink()
def producer(conn): # os.environ["PYTHONWARNINGS"] = "ignore" feed_shm_name = '{}_{}_{}'.format('test', os.getpid(), threading.currentThread().ident) print('input shm name : {}'.format(feed_shm_name)) feed_shm = SharedMemory(name=feed_shm_name, create=True, size=2 * 4) feed_shm_arr = np.ndarray((1, 2), dtype=np.float32, buffer=feed_shm.buf) input_arr = np.random.random((1, 2)).astype(np.float32) feed_shm_arr[:] = input_arr[:] conn.send(feed_shm_name) result_shm_name = conn.recv() result_shm = SharedMemory(name=result_shm_name) result_shm_arr = np.ndarray((1, 2), dtype=np.float32, buffer=result_shm.buf) print('Output array : {}'.format(result_shm_arr)) conn.send('exit') del result_shm_arr result_shm.close() conn.recv() del feed_shm_arr feed_shm.close() feed_shm.unlink() print('clean and exit') return
def from_dict(self, classname: str, d: dict): """convert dict from `ndarray_to_dict` back to np.ndarray""" shm = SharedMemory(name=d["shm"], create=False) array = np.ndarray(d["shape"], dtype=d["dtype"], buffer=shm.buf).copy() shm.close() shm.unlink() return array
async def teardown(**kwargs): object_ids = kwargs.get('object_ids') for object_id in object_ids: try: shm = SharedMemory(name=object_id) shm.unlink() await asyncio.sleep(0) except FileNotFoundError: pass
def SharedMemory(self, name: str = None, size: int = None): assert name is not None or size is not None with self._Client(self._address, authkey=self._authkey) as conn: if name is not None: shm = SharedMemory(name=name) self.client_shms[name] = shm else: shm = SharedMemory(None, create=True, size=size) try: dispatch(conn, None, 'track_segment', (shm.name, )) except BaseException as e: shm.unlink() raise e return shm
async def delete(self, object_id): try: shm = SharedMemory(name=object_id) shm.unlink() shm.close() except FileNotFoundError: if sys.platform == 'win32': # skip file not found error for windows pass else: # pragma: no cover raise try: self._object_ids.remove(object_id) except KeyError: # pragma: no cover return
def open_memory(name, create=False, size=0, consume=False): """Open shared memory via a context manager The shared memory will automatically be closed when the context ends. The shared memory may also optionally be created and/or consumed. If the "consume" flag is True, the shared memory will be unlinked as well when the context ends. Args: name (str): the name of the shared memory create (bool): whether to create the shared memory or try to open a pre-existing one. If this is True, the size argument must be non-zero. size (int): the size in bytes of the shared memory block to create. Only used if create == True. consume (bool): whether or not to unlink the shared memory when the context ends. Yields: posix_ipc.MessageQueue: the message queue """ if create and size == 0: raise Exception('If create is True, size must be non-zero') kwargs = { 'name': name, 'create': create, 'size': size, } shm = SharedMemory(**kwargs) try: yield shm finally: shm.close() if consume: shm.unlink()
class DLManager(Process): def __init__(self, download_dir, base_url, cache_dir=None, status_q=None, max_workers=0, update_interval=1.0, dl_timeout=10, resume_file=None, max_shared_memory=1024 * 1024 * 1024): super().__init__(name='DLManager') self.log = logging.getLogger('DLM') self.proc_debug = False self.base_url = base_url self.dl_dir = download_dir self.cache_dir = cache_dir if cache_dir else os.path.join(download_dir, '.cache') # All the queues! self.logging_queue = None self.dl_worker_queue = None self.writer_queue = None self.dl_result_q = None self.writer_result_q = None self.max_workers = max_workers if max_workers else min(cpu_count() * 2, 16) self.dl_timeout = dl_timeout # Analysis stuff self.analysis = None self.tasks = deque() self.chunks_to_dl = deque() self.chunk_data_list = None # shared memory stuff self.max_shared_memory = max_shared_memory # 1 GiB by default self.sms = deque() self.shared_memory = None # Interval for log updates and pushing updates to the queue self.update_interval = update_interval self.status_queue = status_q # queue used to relay status info back to GUI/CLI # Resume file stuff self.resume_file = resume_file self.hash_map = dict() # cross-thread runtime information self.running = True self.active_tasks = 0 self.children = [] self.threads = [] self.conditions = [] # bytes downloaded and decompressed since last report self.bytes_downloaded_since_last = 0 self.bytes_decompressed_since_last = 0 # bytes written since last report self.bytes_written_since_last = 0 # bytes read since last report self.bytes_read_since_last = 0 # chunks written since last report self.num_processed_since_last = 0 self.num_tasks_processed_since_last = 0 def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None, patch=True, resume=True, file_prefix_filter=None, file_exclude_filter=None, file_install_tag=None, processing_optimization=False) -> AnalysisResult: """ Run analysis on manifest and old manifest (if not None) and return a result with a summary resources required in order to install the provided manifest. :param manifest: Manifest to install :param old_manifest: Old manifest to patch from (if applicable) :param patch: Patch instead of redownloading the entire file :param resume: Continue based on resume file if it exists :param file_prefix_filter: Only download files that start with this prefix :param file_exclude_filter: Exclude files with this prefix from download :param file_install_tag: Only install files with the specified tag :param processing_optimization: Attempt to optimize processing order and RAM usage :return: AnalysisResult """ analysis_res = AnalysisResult() analysis_res.install_size = sum(fm.file_size for fm in manifest.file_manifest_list.elements) analysis_res.biggest_chunk = max(c.window_size for c in manifest.chunk_data_list.elements) analysis_res.biggest_file_size = max(f.file_size for f in manifest.file_manifest_list.elements) is_1mib = analysis_res.biggest_chunk == 1024 * 1024 self.log.debug(f'Biggest chunk size: {analysis_res.biggest_chunk} bytes (== 1 MiB? {is_1mib})') self.log.debug(f'Creating manifest comparison...') mc = ManifestComparison.create(manifest, old_manifest) analysis_res.manifest_comparison = mc if resume and self.resume_file and os.path.exists(self.resume_file): self.log.info('Found previously interrupted download. Download will be resumed if possible.') try: missing = 0 mismatch = 0 completed_files = set() for line in open(self.resume_file).readlines(): file_hash, _, filename = line.strip().partition(':') _p = os.path.join(self.dl_dir, filename) if not os.path.exists(_p): self.log.debug(f'File does not exist but is in resume file: "{_p}"') missing += 1 elif file_hash != manifest.file_manifest_list.get_file_by_path(filename).sha_hash.hex(): mismatch += 1 else: completed_files.add(filename) if missing: self.log.warning(f'{missing} previously completed file(s) are missing, they will be redownloaded.') if mismatch: self.log.warning(f'{mismatch} existing file(s) have been changed and will be redownloaded.') # remove completed files from changed/added and move them to unchanged for the analysis. mc.added -= completed_files mc.changed -= completed_files mc.unchanged |= completed_files self.log.info(f'Skipping {len(completed_files)} files based on resume data.') except Exception as e: self.log.warning(f'Reading resume file failed: {e!r}, continuing as normal...') # Install tags are used for selective downloading, e.g. for language packs additional_deletion_tasks = [] if file_install_tag is not None: if isinstance(file_install_tag, str): file_install_tag = [file_install_tag] files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements if not any((fit in i.install_tags) or (not fit and not i.install_tags) for fit in file_install_tag)) self.log.info(f'Found {len(files_to_skip)} files to skip based on install tag.') mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip for fname in sorted(files_to_skip): additional_deletion_tasks.append(FileTask(fname, delete=True, silent=True)) # if include/exclude prefix has been set: mark all files that are not to be downloaded as unchanged if file_exclude_filter: if isinstance(file_exclude_filter, str): file_exclude_filter = [file_exclude_filter] file_exclude_filter = [f.lower() for f in file_exclude_filter] files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements if any(i.filename.lower().startswith(pfx) for pfx in file_exclude_filter)) self.log.info(f'Found {len(files_to_skip)} files to skip based on exclude prefix.') mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip if file_prefix_filter: if isinstance(file_prefix_filter, str): file_prefix_filter = [file_prefix_filter] file_prefix_filter = [f.lower() for f in file_prefix_filter] files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements if not any(i.filename.lower().startswith(pfx) for pfx in file_prefix_filter)) self.log.info(f'Found {len(files_to_skip)} files to skip based on include prefix(es)') mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip if file_prefix_filter or file_exclude_filter or file_install_tag: self.log.info(f'Remaining files after filtering: {len(mc.added) + len(mc.changed)}') # correct install size after filtering analysis_res.install_size = sum(fm.file_size for fm in manifest.file_manifest_list.elements if fm.filename in mc.added) if mc.removed: analysis_res.removed = len(mc.removed) self.log.debug(f'{analysis_res.removed} removed files') if mc.added: analysis_res.added = len(mc.added) self.log.debug(f'{analysis_res.added} added files') if mc.changed: analysis_res.changed = len(mc.changed) self.log.debug(f'{analysis_res.changed} changed files') if mc.unchanged: analysis_res.unchanged = len(mc.unchanged) self.log.debug(f'{analysis_res.unchanged} unchanged files') if processing_optimization and len(manifest.file_manifest_list.elements) > 100_000: self.log.warning('Manifest contains too many files, processing optimizations will be disabled.') processing_optimization = False elif processing_optimization: self.log.info('Processing order optimization is enabled, analysis may take a few seconds longer...') # count references to chunks for determining runtime cache size later references = Counter() fmlist = sorted(manifest.file_manifest_list.elements, key=lambda a: a.filename.lower()) for fm in fmlist: self.hash_map[fm.filename] = fm.sha_hash.hex() # chunks of unchanged files are not downloaded so we can skip them if fm.filename in mc.unchanged: analysis_res.unchanged += fm.file_size continue for cp in fm.chunk_parts: references[cp.guid_num] += 1 if processing_optimization: s_time = time.time() # reorder the file manifest list to group files that share many chunks # 4 is mostly arbitrary but has shown in testing to be a good choice min_overlap = 4 # ignore files with less than N chunk parts, this speeds things up dramatically cp_threshold = 5 remaining_files = {fm.filename: {cp.guid_num for cp in fm.chunk_parts} for fm in fmlist if fm.filename not in mc.unchanged} _fmlist = [] # iterate over all files that will be downloaded and pair up those that share the most chunks for fm in fmlist: if fm.filename not in remaining_files: continue _fmlist.append(fm) f_chunks = remaining_files.pop(fm.filename) if len(f_chunks) < cp_threshold: continue best_overlap, match = 0, None for fname, chunks in remaining_files.items(): if len(chunks) < cp_threshold: continue overlap = len(f_chunks & chunks) if overlap > min_overlap and overlap > best_overlap: best_overlap, match = overlap, fname if match: _fmlist.append(manifest.file_manifest_list.get_file_by_path(match)) remaining_files.pop(match) fmlist = _fmlist opt_delta = time.time() - s_time self.log.debug(f'Processing optimizations took {opt_delta:.01f} seconds.') # determine reusable chunks and prepare lookup table for reusable ones re_usable = defaultdict(dict) if old_manifest and mc.changed and patch: self.log.debug('Analyzing manifests for re-usable chunks...') for changed in mc.changed: old_file = old_manifest.file_manifest_list.get_file_by_path(changed) new_file = manifest.file_manifest_list.get_file_by_path(changed) existing_chunks = defaultdict(list) off = 0 for cp in old_file.chunk_parts: existing_chunks[cp.guid_num].append((off, cp.offset, cp.offset + cp.size)) off += cp.size for cp in new_file.chunk_parts: key = (cp.guid_num, cp.offset, cp.size) for file_o, cp_o, cp_end_o in existing_chunks[cp.guid_num]: # check if new chunk part is wholly contained in the old chunk part if cp_o <= cp.offset and (cp.offset + cp.size) <= cp_end_o: references[cp.guid_num] -= 1 re_usable[changed][key] = file_o + (cp.offset - cp_o) analysis_res.reuse_size += cp.size break last_cache_size = current_cache_size = 0 # set to determine whether a file is currently cached or not cached = set() # Using this secondary set is orders of magnitude faster than checking the deque. chunks_in_dl_list = set() # This is just used to count all unique guids that have been cached dl_cache_guids = set() # run through the list of files and create the download jobs and also determine minimum # runtime cache requirement by simulating adding/removing from cache during download. self.log.debug('Creating filetasks and chunktasks...') for current_file in fmlist: # skip unchanged and empty files if current_file.filename in mc.unchanged: continue elif not current_file.chunk_parts: self.tasks.append(FileTask(current_file.filename, empty=True)) continue existing_chunks = re_usable.get(current_file.filename, None) chunk_tasks = [] reused = 0 for cp in current_file.chunk_parts: ct = ChunkTask(cp.guid_num, cp.offset, cp.size) # re-use the chunk from the existing file if we can if existing_chunks and (cp.guid_num, cp.offset, cp.size) in existing_chunks: reused += 1 ct.chunk_file = current_file.filename ct.chunk_offset = existing_chunks[(cp.guid_num, cp.offset, cp.size)] else: # add to DL list if not already in it if cp.guid_num not in chunks_in_dl_list: self.chunks_to_dl.append(cp.guid_num) chunks_in_dl_list.add(cp.guid_num) # if chunk has more than one use or is already in cache, # check if we need to add or remove it again. if references[cp.guid_num] > 1 or cp.guid_num in cached: references[cp.guid_num] -= 1 # delete from cache if no references left if references[cp.guid_num] < 1: current_cache_size -= analysis_res.biggest_chunk cached.remove(cp.guid_num) ct.cleanup = True # add to cache if not already cached elif cp.guid_num not in cached: dl_cache_guids.add(cp.guid_num) cached.add(cp.guid_num) current_cache_size += analysis_res.biggest_chunk else: ct.cleanup = True chunk_tasks.append(ct) if reused: self.log.debug(f' + Reusing {reused} chunks from: {current_file.filename}') # open temporary file that will contain download + old file contents self.tasks.append(FileTask(current_file.filename + u'.tmp', fopen=True)) self.tasks.extend(chunk_tasks) self.tasks.append(FileTask(current_file.filename + u'.tmp', close=True)) # delete old file and rename temporary self.tasks.append(FileTask(current_file.filename, delete=True, rename=True, temporary_filename=current_file.filename + u'.tmp')) else: self.tasks.append(FileTask(current_file.filename, fopen=True)) self.tasks.extend(chunk_tasks) self.tasks.append(FileTask(current_file.filename, close=True)) # check if runtime cache size has changed if current_cache_size > last_cache_size: self.log.debug(f' * New maximum cache size: {current_cache_size / 1024 / 1024:.02f} MiB') last_cache_size = current_cache_size self.log.debug(f'Final cache size requirement: {last_cache_size / 1024 / 1024} MiB.') analysis_res.min_memory = last_cache_size + (1024 * 1024 * 32) # add some padding just to be safe # Todo implement on-disk caching to avoid this issue. if analysis_res.min_memory > self.max_shared_memory: shared_mib = f'{self.max_shared_memory / 1024 / 1024:.01f} MiB' required_mib = f'{analysis_res.min_memory / 1024 / 1024:.01f} MiB' suggested_mib = round(self.max_shared_memory / 1024 / 1024 + (analysis_res.min_memory - self.max_shared_memory) / 1024 / 1024 + 32) if processing_optimization: message = f'Try running legendary with "--enable-reordering --max-shared-memory {suggested_mib:.0f}"' else: message = 'Try running legendary with "--enable-reordering" to reduce memory usage, ' \ f'or use "--max-shared-memory {suggested_mib:.0f}" to increase the limit.' raise MemoryError(f'Current shared memory cache is smaller than required: {shared_mib} < {required_mib}. ' + message) # calculate actual dl and patch write size. analysis_res.dl_size = \ sum(c.file_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list) analysis_res.uncompressed_dl_size = \ sum(c.window_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list) # add jobs to remove files for fname in mc.removed: self.tasks.append(FileTask(fname, delete=True)) self.tasks.extend(additional_deletion_tasks) analysis_res.num_chunks_cache = len(dl_cache_guids) self.chunk_data_list = manifest.chunk_data_list self.analysis = analysis_res return analysis_res def download_job_manager(self, task_cond: Condition, shm_cond: Condition): while self.chunks_to_dl and self.running: while self.active_tasks < self.max_workers * 2 and self.chunks_to_dl: try: sms = self.sms.popleft() no_shm = False except IndexError: # no free cache no_shm = True break c_guid = self.chunks_to_dl.popleft() chunk = self.chunk_data_list.get_chunk_by_guid(c_guid) self.log.debug(f'Adding {chunk.guid_num} (active: {self.active_tasks})') try: self.dl_worker_queue.put(DownloaderTask(url=self.base_url + '/' + chunk.path, chunk_guid=c_guid, shm=sms), timeout=1.0) except Exception as e: self.log.warning(f'Failed to add to download queue: {e!r}') self.chunks_to_dl.appendleft(c_guid) break self.active_tasks += 1 else: # active tasks limit hit, wait for tasks to finish with task_cond: self.log.debug('Waiting for download tasks to complete..') task_cond.wait(timeout=1.0) continue if no_shm: # if we break we ran out of shared memory, so wait for that. with shm_cond: self.log.debug('Waiting for more shared memory...') shm_cond.wait(timeout=1.0) self.log.debug('Download Job Manager quitting...') def dl_results_handler(self, task_cond: Condition): in_buffer = dict() task = self.tasks.popleft() current_file = '' while task and self.running: if isinstance(task, FileTask): # this wasn't necessarily a good idea... try: if task.empty: self.writer_queue.put(WriterTask(task.filename, empty=True), timeout=1.0) elif task.rename: self.writer_queue.put(WriterTask(task.filename, rename=True, delete=task.delete, old_filename=task.temporary_filename), timeout=1.0) elif task.delete: self.writer_queue.put(WriterTask(task.filename, delete=True, silent=task.silent), timeout=1.0) elif task.open: self.writer_queue.put(WriterTask(task.filename, fopen=True), timeout=1.0) current_file = task.filename elif task.close: self.writer_queue.put(WriterTask(task.filename, close=True), timeout=1.0) except Exception as e: self.tasks.appendleft(task) self.log.warning(f'Adding to queue failed: {e!r}') continue try: task = self.tasks.popleft() except IndexError: # finished break continue while (task.chunk_guid in in_buffer) or task.chunk_file: res_shm = None if not task.chunk_file: # not re-using from an old file res_shm = in_buffer[task.chunk_guid].shm try: self.log.debug(f'Adding {task.chunk_guid} to writer queue') self.writer_queue.put(WriterTask( filename=current_file, shared_memory=res_shm, chunk_offset=task.chunk_offset, chunk_size=task.chunk_size, chunk_guid=task.chunk_guid, release_memory=task.cleanup, old_file=task.chunk_file # todo on-disk cache ), timeout=1.0) except Exception as e: self.log.warning(f'Adding to queue failed: {e!r}') break if task.cleanup and not task.chunk_file: del in_buffer[task.chunk_guid] try: task = self.tasks.popleft() if isinstance(task, FileTask): break except IndexError: # finished task = None break else: # only enter blocking code if the loop did not break try: res = self.dl_result_q.get(timeout=1) self.active_tasks -= 1 with task_cond: task_cond.notify() if res.success: self.log.debug(f'Download for {res.guid} succeeded, adding to in_buffer...') in_buffer[res.guid] = res self.bytes_downloaded_since_last += res.compressed_size self.bytes_decompressed_since_last += res.size else: self.log.error(f'Download for {res.guid} failed, retrying...') try: self.dl_worker_queue.put(DownloaderTask( url=res.url, chunk_guid=res.guid, shm=res.shm ), timeout=1.0) self.active_tasks += 1 except Exception as e: self.log.warning(f'Failed adding retry task to queue! {e!r}') # If this failed for whatever reason, put the chunk at the front of the DL list self.chunks_to_dl.appendleft(res.chunk_guid) except Empty: pass except Exception as e: self.log.warning(f'Unhandled exception when trying to read download result queue: {e!r}') self.log.debug('Download result handler quitting...') def fw_results_handler(self, shm_cond: Condition): while self.running: try: res = self.writer_result_q.get(timeout=1.0) self.num_tasks_processed_since_last += 1 if res.closed and self.resume_file and res.success: if res.filename.endswith('.tmp'): res.filename = res.filename[:-4] file_hash = self.hash_map[res.filename] # write last completed file to super simple resume file with open(self.resume_file, 'ab') as rf: rf.write(f'{file_hash}:{res.filename}\n'.encode('utf-8')) if res.kill: self.log.debug('Got termination command in FW result handler') break if not res.success: # todo make this kill the installation process or at least skip the file and mark it as failed self.log.fatal(f'Writing for {res.filename} failed!') if res.release_memory: self.sms.appendleft(res.shm) with shm_cond: shm_cond.notify() if res.chunk_guid: self.bytes_written_since_last += res.size # if there's no shared memory we must have read from disk. if not res.shm: self.bytes_read_since_last += res.size self.num_processed_since_last += 1 except Empty: continue except Exception as e: self.log.warning(f'Exception when trying to read writer result queue: {e!r}') self.log.debug('Writer result handler quitting...') def run(self): if not self.analysis: raise ValueError('Did not run analysis before trying to run download!') # Subprocess will use its own root logger that logs to a Queue instead _root = logging.getLogger() _root.setLevel(logging.DEBUG if self.proc_debug else logging.INFO) if self.logging_queue: _root.handlers = [] _root.addHandler(QueueHandler(self.logging_queue)) self.log = logging.getLogger('DLManager') self.log.info(f'Download Manager running with process-id: {os.getpid()}') try: self.run_real() except KeyboardInterrupt: self.log.warning('Immediate exit requested!') self.running = False # send conditions to unlock threads if they aren't already for cond in self.conditions: with cond: cond.notify() # make sure threads are dead. for t in self.threads: t.join(timeout=5.0) if t.is_alive(): self.log.warning(f'Thread did not terminate! {repr(t)}') # clean up all the queues, otherwise this process won't terminate properly for name, q in zip(('Download jobs', 'Writer jobs', 'Download results', 'Writer results'), (self.dl_worker_queue, self.writer_queue, self.dl_result_q, self.writer_result_q)): self.log.debug(f'Cleaning up queue "{name}"') try: while True: _ = q.get_nowait() except Empty: q.close() q.join_thread() def run_real(self): self.shared_memory = SharedMemory(create=True, size=self.max_shared_memory) self.log.debug(f'Created shared memory of size: {self.shared_memory.size / 1024 / 1024:.02f} MiB') # create the shared memory segments and add them to their respective pools for i in range(int(self.shared_memory.size / self.analysis.biggest_chunk)): _sms = SharedMemorySegment(offset=i * self.analysis.biggest_chunk, end=i * self.analysis.biggest_chunk + self.analysis.biggest_chunk) self.sms.append(_sms) self.log.debug(f'Created {len(self.sms)} shared memory segments.') # Create queues self.dl_worker_queue = MPQueue(-1) self.writer_queue = MPQueue(-1) self.dl_result_q = MPQueue(-1) self.writer_result_q = MPQueue(-1) self.log.info(f'Starting download workers...') for i in range(self.max_workers): w = DLWorker(f'DLWorker {i + 1}', self.dl_worker_queue, self.dl_result_q, self.shared_memory.name, logging_queue=self.logging_queue, dl_timeout=self.dl_timeout) self.children.append(w) w.start() self.log.info('Starting file writing worker...') writer_p = FileWorker(self.writer_queue, self.writer_result_q, self.dl_dir, self.shared_memory.name, self.cache_dir, self.logging_queue) self.children.append(writer_p) writer_p.start() num_chunk_tasks = sum(isinstance(t, ChunkTask) for t in self.tasks) num_dl_tasks = len(self.chunks_to_dl) num_tasks = len(self.tasks) num_shared_memory_segments = len(self.sms) self.log.debug(f'Chunks to download: {num_dl_tasks}, File tasks: {num_tasks}, Chunk tasks: {num_chunk_tasks}') # active downloader tasks self.active_tasks = 0 processed_chunks = 0 processed_tasks = 0 total_dl = 0 total_write = 0 # synchronization conditions shm_cond = Condition() task_cond = Condition() self.conditions = [shm_cond, task_cond] # start threads s_time = time.time() self.threads.append(Thread(target=self.download_job_manager, args=(task_cond, shm_cond))) self.threads.append(Thread(target=self.dl_results_handler, args=(task_cond,))) self.threads.append(Thread(target=self.fw_results_handler, args=(shm_cond,))) for t in self.threads: t.start() last_update = time.time() while processed_tasks < num_tasks: delta = time.time() - last_update if not delta: time.sleep(self.update_interval) continue # update all the things processed_chunks += self.num_processed_since_last processed_tasks += self.num_tasks_processed_since_last total_dl += self.bytes_downloaded_since_last total_write += self.bytes_written_since_last dl_speed = self.bytes_downloaded_since_last / delta dl_unc_speed = self.bytes_decompressed_since_last / delta w_speed = self.bytes_written_since_last / delta r_speed = self.bytes_read_since_last / delta # c_speed = self.num_processed_since_last / delta # set temporary counters to 0 self.bytes_read_since_last = self.bytes_written_since_last = 0 self.bytes_downloaded_since_last = self.num_processed_since_last = 0 self.bytes_decompressed_since_last = self.num_tasks_processed_since_last = 0 last_update = time.time() perc = (processed_chunks / num_chunk_tasks) * 100 runtime = time.time() - s_time total_avail = len(self.sms) total_used = (num_shared_memory_segments - total_avail) * (self.analysis.biggest_chunk / 1024 / 1024) if runtime and processed_chunks: rt_hours, runtime = int(runtime // 3600), runtime % 3600 rt_minutes, rt_seconds = int(runtime // 60), int(runtime % 60) average_speed = processed_chunks / runtime estimate = (num_chunk_tasks - processed_chunks) / average_speed hours, estimate = int(estimate // 3600), estimate % 3600 minutes, seconds = int(estimate // 60), int(estimate % 60) else: hours = minutes = seconds = 0 rt_hours = rt_minutes = rt_seconds = 0 bar.set_fraction(perc) bar.set_text( f'{perc:.02f}% ({processed_chunks}/{num_chunk_tasks}), ' f'Elapsed: {rt_hours:02d}:{rt_minutes:02d}:{rt_seconds:02d}, ' f'ETA: {hours:02d}:{minutes:02d}:{seconds:02d}' f'{dl_speed / 1024 / 1024:.02f} MiB/s' ) #self.log.info(f'= Progress: {perc:.02f}% ({processed_chunks}/{num_chunk_tasks}), ' # f'Running for {rt_hours:02d}:{rt_minutes:02d}:{rt_seconds:02d}, ' # f'ETA: {hours:02d}:{minutes:02d}:{seconds:02d}') #self.log.info(f' - Downloaded: {total_dl / 1024 / 1024:.02f} MiB, ' # f'Written: {total_write / 1024 / 1024:.02f} MiB') #self.log.info(f' - Cache usage: {total_used} MiB, active tasks: {self.active_tasks}') #self.log.info(f' + Download\t- {dl_speed / 1024 / 1024:.02f} MiB/s (raw) ' # f'/ {dl_unc_speed / 1024 / 1024:.02f} MiB/s (decompressed)') #self.log.info(f' + Disk\t- {w_speed / 1024 / 1024:.02f} MiB/s (write) / ' # f'{r_speed / 1024 / 1024:.02f} MiB/s (read)') # send status update to back to instantiator (if queue exists) if self.status_queue: try: self.status_queue.put(UIUpdate( progress=perc, download_speed=dl_unc_speed, write_speed=w_speed, read_speed=r_speed, memory_usage=total_used * 1024 * 1024 ), timeout=1.0) except Exception as e: self.log.warning(f'Failed to send status update to queue: {e!r}') time.sleep(self.update_interval) for i in range(self.max_workers): self.dl_worker_queue.put_nowait(DownloaderTask(kill=True)) self.log.info('Waiting for installation to finish...') self.writer_queue.put_nowait(WriterTask('', kill=True)) writer_p.join(timeout=10.0) if writer_p.exitcode is None: self.log.warning(f'Terminating writer process, no exit code!') writer_p.terminate() # forcibly kill DL workers that are not actually dead yet for child in self.children: if child.exitcode is None: child.terminate() # make sure all the threads are dead. for t in self.threads: t.join(timeout=5.0) if t.is_alive(): self.log.warning(f'Thread did not terminate! {repr(t)}') # clean up resume file if self.resume_file: try: os.remove(self.resume_file) except OSError as e: self.log.warning(f'Failed to remove resume file: {e!r}') # close up shared memory self.shared_memory.close() self.shared_memory.unlink() self.shared_memory = None self.log.info('All done! Download manager quitting...') # finally, exit the process. exit(0)
async def delete(self, object_id): shm = SharedMemory(name=object_id) shm.unlink() self._object_ids.remove(object_id)
async def teardown(**kwargs): object_ids = kwargs.get('object_ids') for object_id in object_ids: shm = SharedMemory(name=object_id) shm.unlink() await asyncio.sleep(0)
class SpaceColony: def __init__( self, points, roots=np.zeros((1, 3)), parameters=Param(r=0.04, iD=0.5, kD=0.2, bias=np.zeros(3)), trunk_lim=1, min_activation=5, yeet_condition=5, maxsize=100000, ncpu=cpu_count(), grow_function=(lambda v: normalize(v)), ): # Static information self.par = parameters self.ncpu = ncpu self.min_activation = min_activation self.trunk_lim = trunk_lim self.maxsize = maxsize self.yeet_condition = yeet_condition self.grow_function = grow_function self.nroots = len(roots) # Dynamic information self.age = 0 self.start = 0 self.end = len(roots) self.done = False self.trunk_mode = True self.yeet_count = 0 self.activation = 0 self.reached_points = 0 self.stats = [] self.dirty = True # Local dynamics self.edges = [] self.children = [[] for _ in range(maxsize)] self.w = [] # This array is sliced at start: self.points = points # This is sparta. self.lock = Lock() A = np.inf * np.ones((self.maxsize, 3), dtype=roots.dtype) self.vectors_sm = SharedMemory(create=True, size=A.nbytes) self.tree_sm = SharedMemory(create=True, size=A.nbytes) self.vectors = as_numpy_arr(A.shape, shared_obj=self.vectors_sm) self.vectors[:] = A[:] self.nodes = as_numpy_arr(A.shape, self.tree_sm) self.nodes[:] = A[:] for i in range(len(roots)): self.nodes[i] = roots[i] # Explicit pool creation for better control point_slices = np.array_split(self.points, self.ncpu) self.workers = [] self.pipes = [] for i in range(self.ncpu): parent_pipe, child_pipe = Pipe() self.pipes.append(parent_pipe) args = self.pack(point_slices[i], child_pipe) self.workers.append(Horse(*args)) self.workers[i].start() self.running = True def iterate(self, N): self.dirty = True log.info(f"START: {time()}\n{self.__str__()}") for i in range(N): self.update_stats() if self.done: break for pipe in self.pipes: pipe.send( Batch(True, 1, (self.start, self.end, self.trunk_mode))) result_list = [pipe.recv() for pipe in self.pipes] res = self.collect(result_list) self.grow(res) self.age += 1 self.done_yet() log.info(f"DONE: {time()}\n{self.__str__()}") def stop(self): if not self.running: return log.info("Horse shutdown.") for pipe in self.pipes: pipe.send(Batch(False, 1, (None, ))) pipe.close() for w in self.workers: w.join(1) w.terminate() self.pipes = [] self.workers = [] self.running = False def collect(self, result_list): self.activation = 0 self.reached_points = 0 result = [] for res in result_list: self.activation += res[0] self.reached_points += res[1] for i in res[2]: if i not in result: result.append(i) return result def grow(self, res): self.start = self.end for i in res: if self.end >= self.maxsize: log.info("Halt condition: node vector full.") self.done = True return self.nodes[self.end] = ( self.nodes[i] + (self.grow_function(self.vectors[i]) + self.par.bias) * self.par.r) self.children[i].append(self.end) self.vectors[i] = np.ones(3) * np.inf self.end += 1 def done_yet(self): if self.done: return True if self.trunk_mode: self.trunk_mode = self.activation <= self.trunk_lim if self.trunk_mode: return False else: log.info(f"Trunk mode disabled at {self.age} iterations.") if self.activation < self.min_activation: log.info(f"Halt condition: activation < {self.min_activation}.") self.done = True return True # The yeet condition is basically to inhibit periodic behaviours from growing # the structure ad infinitum. It stops iterating if it detects that activation # levels are not changing any more. There are some obvious corner cases to this, # same numerical activation does not imply that the same set of attractors are # active, but in practice this method is fast and works well enough. if self.age > self.yeet_condition: if np.abs(self.activation - self.stats[self.age - 1].act) < 3: self.yeet_count += 1 if self.yeet_count >= self.yeet_condition: self.end = self.stats[self.age - self.yeet_count].sz for i in range(self.end): self.children[i] = [ c for c in self.children[i] if c <= self.end - 1 ] self.age -= self.yeet_count self.stats = self.stats[:self.age] log.info(f"Halt condition: yeet count {self.yeet_count}.") self.done = True return True else: self.yeet_count = 0 return False return False # Populate edge table def walk(self): self.w = np.ones(self.maxsize) self.edges = [] for i in range(self.nroots): self._walk(i) self.dirty = False def _walk(self, i): w = self.w[i] for j in self.children[i]: self.edges.append((i, j)) w += self._walk(j)**2 w = np.sqrt(w) self.w[i] = w return w # Use explicit packing/unpacking def pack(self, points, pipe): return ( points, self.par.iD, self.par.kD, self.vectors_sm.name, self.tree_sm.name, self.maxsize, pipe, self.lock, ) def update_stats(self): self.stats.append(Stats(self.end, self.activation, self.reached_points)) def get_stats(self): return self.stats def __str__(self): nproc = 0 for w in self.workers: nproc += 1 if w.is_alive() else 0 leaves = 0 for i in range(self.end): if len(self.children[i]) == 0: leaves += 1 return f"{self.end} nodes, {self.age} iterations \n\ {self.activation}/{len(self.points) - self.reached_points} active points \n\ Total {len(self.points)} points on {nproc}/{self.ncpu} processes \n\ avg. branching: {leaves/(self.end+1)} \n\ {self.par}" def __del__(self): log.debug("Delete SpaceColony") self.stop() self.vectors_sm.close() self.vectors_sm.unlink() self.tree_sm.close() self.tree_sm.unlink()
class DLManager(Process): def __init__(self, download_dir, base_url, cache_dir=None, status_q=None, max_jobs=100, max_failures=5, max_workers=0, update_interval=1.0, max_shared_memory=1024 * 1024 * 1024, resume_file=None): super().__init__(name='DLManager') self.log = logging.getLogger('DLM') self.proc_debug = False self.base_url = base_url self.dl_dir = download_dir self.cache_dir = cache_dir if cache_dir else os.path.join( download_dir, '.cache') # All the queues! self.logging_queue = None self.dl_worker_queue = None self.writer_queue = None self.dl_result_q = None self.writer_result_q = None self.max_jobs = max_jobs self.max_workers = max_workers if max_workers else min( cpu_count() * 2, 16) # Analysis stuff self.analysis = None self.tasks = deque() self.chunks_to_dl = deque() self.chunk_data_list = None # shared memory stuff self.max_shared_memory = max_shared_memory # 1 GiB by default self.sms = deque() self.shared_memory = None # Interval for log updates and pushing updates to the queue self.update_interval = update_interval self.status_queue = status_q # queue used to relay status info back to GUI/CLI # behaviour settings self.max_failures = max_failures self.resume_file = resume_file # cross-thread runtime information self.running = True self.active_tasks = 0 self.children = [] self.threads = [] self.conditions = [] # bytes downloaded and decompressed since last report self.bytes_downloaded_since_last = 0 self.bytes_decompressed_since_last = 0 # bytes written since last report self.bytes_written_since_last = 0 # bytes read since last report self.bytes_read_since_last = 0 # chunks written since last report self.num_processed_since_last = 0 self.num_tasks_processed_since_last = 0 def download_job_manager(self, task_cond: Condition, shm_cond: Condition): while self.chunks_to_dl and self.running: while self.active_tasks < self.max_workers * 2 and self.chunks_to_dl: try: sms = self.sms.popleft() no_shm = False except IndexError: # no free cache no_shm = True break c_guid = self.chunks_to_dl.popleft() chunk = self.chunk_data_list.get_chunk_by_guid(c_guid) self.log.debug( f'Adding {chunk.guid_num} (active: {self.active_tasks})') try: self.dl_worker_queue.put(DownloaderTask(url=self.base_url + '/' + chunk.path, chunk_guid=c_guid, shm=sms), timeout=1.0) except Exception as e: self.log.warning(f'Failed to add to download queue: {e!r}') self.chunks_to_dl.appendleft(c_guid) break self.active_tasks += 1 else: # active tasks limit hit, wait for tasks to finish with task_cond: self.log.debug('Waiting for download tasks to complete..') task_cond.wait(timeout=1.0) continue if no_shm: # if we break we ran out of shared memory, so wait for that. with shm_cond: self.log.debug('Waiting for more shared memory...') shm_cond.wait(timeout=1.0) self.log.info('Download Job Manager quitting...') def dl_results_handler(self, task_cond: Condition): in_buffer = dict() task = self.tasks.popleft() current_file = '' while task and self.running: if isinstance(task, FileTask): # this wasn't necessarily a good idea... try: if task.empty: self.writer_queue.put(WriterTask(task.filename, empty=True), timeout=1.0) elif task.rename: self.writer_queue.put(WriterTask( task.filename, rename=True, delete=task.delete, old_filename=task.temporary_filename), timeout=1.0) elif task.delete: self.writer_queue.put(WriterTask(task.filename, delete=True), timeout=1.0) elif task.open: self.writer_queue.put(WriterTask(task.filename, fopen=True), timeout=1.0) current_file = task.filename elif task.close: self.writer_queue.put(WriterTask(task.filename, close=True), timeout=1.0) except Exception as e: self.tasks.appendleft(task) self.log.warning(f'Adding to queue failed: {e!r}') continue try: task = self.tasks.popleft() except IndexError: # finished break continue while (task.chunk_guid in in_buffer) or task.chunk_file: res_shm = None if not task.chunk_file: # not re-using from an old file res_shm = in_buffer[task.chunk_guid].shm try: self.writer_queue.put( WriterTask( filename=current_file, shared_memory=res_shm, chunk_offset=task.chunk_offset, chunk_size=task.chunk_size, chunk_guid=task.chunk_guid, release_memory=task.cleanup, old_file=task.chunk_file # todo on-disk cache ), timeout=1.0) except Exception as e: self.log.warning(f'Adding to queue failed: {e!r}') break if task.cleanup and not task.chunk_file: del in_buffer[task.chunk_guid] try: task = self.tasks.popleft() if isinstance(task, FileTask): break except IndexError: # finished task = None break else: # only enter blocking code if the loop did not break try: res = self.dl_result_q.get(timeout=1) self.active_tasks -= 1 with task_cond: task_cond.notify() if res.success: in_buffer[res.guid] = res self.bytes_downloaded_since_last += res.compressed_size self.bytes_decompressed_since_last += res.size else: self.log.error( f'Download for {res.guid} failed, retrying...') try: self.dl_worker_queue.put(DownloaderTask( url=res.url, chunk_guid=res.guid, shm=res.shm), timeout=1.0) self.active_tasks += 1 except Exception as e: self.log.warning( f'Failed adding retry task to queue! {e!r}') # If this failed for whatever reason, put the chunk at the front of the DL list self.chunks_to_dl.appendleft(res.chunk_guid) except Empty: pass except Exception as e: self.log.warning( f'Unhandled exception when trying to read download result queue: {e!r}' ) self.log.info('Download result handler quitting...') def fw_results_handler(self, shm_cond: Condition): while self.running: try: res = self.writer_result_q.get(timeout=1.0) self.num_tasks_processed_since_last += 1 if res.closed and self.resume_file: # write last completed file to super simple resume file with open(self.resume_file, 'ab') as rf: rf.write(f'{res.filename}\n'.encode('utf-8')) if res.kill: self.log.info( 'Got termination command in FW result handler') break if not res.success: # todo make this kill the installation process or at least skip the file and mark it as failed self.log.fatal(f'Writing for {res.filename} failed!') if res.release_memory: self.sms.appendleft(res.shm) with shm_cond: shm_cond.notify() if res.chunk_guid: self.bytes_written_since_last += res.size # if there's no shared memory we must have read from disk. if not res.shm: self.bytes_read_since_last += res.size self.num_processed_since_last += 1 except Empty: continue except Exception as e: self.log.warning( f'Exception when trying to read writer result queue: {e!r}' ) self.log.info('Writer result handler quitting...') def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None, patch=True, resume=True, file_prefix_filter=None, file_exclude_filter=None, file_install_tag=None) -> AnalysisResult: """ Run analysis on manifest and old manifest (if not None) and return a result with a summary resources required in order to install the provided manifest. :param manifest: Manifest to install :param old_manifest: Old manifest to patch from (if applicable) :param patch: Patch instead of redownloading the entire file :param resume: Continue based on resume file if it exists :param file_prefix_filter: Only download files that start with this prefix :param file_exclude_filter: Exclude files with this prefix from download :return: AnalysisResult """ analysis_res = AnalysisResult() analysis_res.install_size = sum( fm.file_size for fm in manifest.file_manifest_list.elements) analysis_res.biggest_chunk = max( c.window_size for c in manifest.chunk_data_list.elements) analysis_res.biggest_file_size = max( f.file_size for f in manifest.file_manifest_list.elements) is_1mib = analysis_res.biggest_chunk == 1024 * 1024 self.log.debug( f'Biggest chunk size: {analysis_res.biggest_chunk} bytes (== 1 MiB? {is_1mib})' ) self.log.debug(f'Creating manifest comparison...') mc = ManifestComparison.create(manifest, old_manifest) analysis_res.manifest_comparison = mc if resume and self.resume_file and os.path.exists(self.resume_file): try: completed_files = set( i.strip() for i in open(self.resume_file).readlines()) # remove completed files from changed/added and move them to unchanged for the analysis. mc.added -= completed_files mc.changed -= completed_files mc.unchanged |= completed_files self.log.debug( f'Skipped {len(completed_files)} files based on resume data!' ) except Exception as e: self.log.warning( f'Reading resume file failed: {e!r}, continuing as normal...' ) # Not entirely sure what install tags are used for, only some titles have them. # Let's add it for testing anyway. if file_install_tag: files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements if file_install_tag not in i.install_tags) self.log.info( f'Found {len(files_to_skip)} files to skip based on install tag.' ) mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip # if include/exclude prefix has been set: mark all files that are not to be downloaded as unchanged if file_exclude_filter: file_exclude_filter = file_exclude_filter.lower() files_to_skip = set(i for i in mc.added | mc.changed if i.lower().startswith(file_exclude_filter)) self.log.info( f'Found {len(files_to_skip)} files to skip based on exclude prefix.' ) mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip if file_prefix_filter: file_prefix_filter = file_prefix_filter.lower() files_to_skip = set( i for i in mc.added | mc.changed if not i.lower().startswith(file_prefix_filter)) self.log.info( f'Found {len(files_to_skip)} files to skip based on include prefix.' ) mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip if file_prefix_filter or file_exclude_filter or file_install_tag: self.log.info( f'Remaining files after filtering: {len(mc.added) + len(mc.changed)}' ) # correct install size after filtering analysis_res.install_size = sum( fm.file_size for fm in manifest.file_manifest_list.elements if fm.filename in mc.added) if mc.removed: analysis_res.removed = len(mc.removed) self.log.debug(f'{analysis_res.removed} removed files') if mc.added: analysis_res.added = len(mc.added) self.log.debug(f'{analysis_res.added} added files') if mc.changed: analysis_res.changed = len(mc.changed) self.log.debug(f'{analysis_res.changed} changed files') if mc.unchanged: analysis_res.unchanged = len(mc.unchanged) self.log.debug(f'{analysis_res.unchanged} unchanged files') # count references to chunks for determining runtime cache size later references = Counter() for fm in manifest.file_manifest_list.elements: # chunks of unchanged files are not downloaded so we can skip them if fm.filename in mc.unchanged: analysis_res.unchanged += fm.file_size continue for cp in fm.chunk_parts: references[cp.guid_num] += 1 # determine reusable chunks and prepare lookup table for reusable ones re_usable = defaultdict(dict) if old_manifest and mc.changed and patch: self.log.debug('Analyzing manifests for re-usable chunks...') for changed in mc.changed: old_file = old_manifest.file_manifest_list.get_file_by_path( changed) new_file = manifest.file_manifest_list.get_file_by_path( changed) existing_chunks = dict() off = 0 for cp in old_file.chunk_parts: existing_chunks[(cp.guid_num, cp.offset, cp.size)] = off off += cp.size for cp in new_file.chunk_parts: key = (cp.guid_num, cp.offset, cp.size) if key in existing_chunks: references[cp.guid_num] -= 1 re_usable[changed][key] = existing_chunks[key] analysis_res.reuse_size += cp.size last_cache_size = current_cache_size = 0 # set to determine whether a file is currently cached or not cached = set() # Using this secondary set is orders of magnitude faster than checking the deque. chunks_in_dl_list = set() # This is just used to count all unique guids that have been cached dl_cache_guids = set() # run through the list of files and create the download jobs and also determine minimum # runtime cache requirement by simulating adding/removing from cache during download. self.log.debug('Creating filetasks and chunktasks...') for current_file in sorted(manifest.file_manifest_list.elements, key=lambda a: a.filename.lower()): # skip unchanged and empty files if current_file.filename in mc.unchanged: continue elif not current_file.chunk_parts: self.tasks.append(FileTask(current_file.filename, empty=True)) continue existing_chunks = re_usable.get(current_file.filename, None) chunk_tasks = [] reused = 0 for cp in current_file.chunk_parts: ct = ChunkTask(cp.guid_num, cp.offset, cp.size) # re-use the chunk from the existing file if we can if existing_chunks and (cp.guid_num, cp.offset, cp.size) in existing_chunks: reused += 1 ct.chunk_file = current_file.filename ct.chunk_offset = existing_chunks[(cp.guid_num, cp.offset, cp.size)] else: # add to DL list if not already in it if cp.guid_num not in chunks_in_dl_list: self.chunks_to_dl.append(cp.guid_num) chunks_in_dl_list.add(cp.guid_num) # if chunk has more than one use or is already in cache, # check if we need to add or remove it again. if references[cp.guid_num] > 1 or cp.guid_num in cached: references[cp.guid_num] -= 1 # delete from cache if no references left if references[cp.guid_num] < 1: current_cache_size -= analysis_res.biggest_chunk cached.remove(cp.guid_num) ct.cleanup = True # add to cache if not already cached elif cp.guid_num not in cached: dl_cache_guids.add(cp.guid_num) cached.add(cp.guid_num) current_cache_size += analysis_res.biggest_chunk else: ct.cleanup = True chunk_tasks.append(ct) if reused: self.log.debug( f' + Reusing {reused} chunks from: {current_file.filename}' ) # open temporary file that will contain download + old file contents self.tasks.append( FileTask(current_file.filename + u'.tmp', fopen=True)) self.tasks.extend(chunk_tasks) self.tasks.append( FileTask(current_file.filename + u'.tmp', close=True)) # delete old file and rename temproary self.tasks.append( FileTask(current_file.filename, delete=True, rename=True, temporary_filename=current_file.filename + u'.tmp')) else: self.tasks.append(FileTask(current_file.filename, fopen=True)) self.tasks.extend(chunk_tasks) self.tasks.append(FileTask(current_file.filename, close=True)) # check if runtime cache size has changed if current_cache_size > last_cache_size: self.log.debug( f' * New maximum cache size: {current_cache_size / 1024 / 1024:.02f} MiB' ) last_cache_size = current_cache_size self.log.debug( f'Final cache size requirement: {last_cache_size / 1024 / 1024} MiB.' ) analysis_res.min_memory = last_cache_size + ( 1024 * 1024 * 32) # add some padding just to be safe # Todo implement on-disk caching to avoid this issue. if analysis_res.min_memory > self.max_shared_memory: shared_mib = f'{self.max_shared_memory / 1024 / 1024:.01f} MiB' required_mib = f'{analysis_res.min_memory / 1024 / 1024:.01} MiB' raise MemoryError( f'Current shared memory cache is smaller than required! {shared_mib} < {required_mib}' ) # calculate actual dl and patch write size. analysis_res.dl_size = \ sum(c.file_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list) analysis_res.uncompressed_dl_size = \ sum(c.window_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list) # add jobs to remove files for fname in mc.removed: self.tasks.append(FileTask(fname, delete=True)) analysis_res.num_chunks_cache = len(dl_cache_guids) self.chunk_data_list = manifest.chunk_data_list self.analysis = analysis_res return analysis_res def run(self): if not self.analysis: raise ValueError( 'Did not run analysis before trying to run download!') # Subprocess will use its own root logger that logs to a Queue instead _root = logging.getLogger() _root.setLevel(logging.DEBUG if self.proc_debug else logging.INFO) if self.logging_queue: _root.handlers = [] _root.addHandler(QueueHandler(self.logging_queue)) self.log = logging.getLogger('DLMProc') self.log.info( f'Download Manager running with process-id: {os.getpid()}') try: self.run_real() except KeyboardInterrupt: self.log.warning('Immediate exit requested!') self.running = False # send conditions to unlock threads if they aren't already for cond in self.conditions: with cond: cond.notify() # make sure threads are dead. for t in self.threads: t.join(timeout=5.0) if t.is_alive(): self.log.warning(f'Thread did not terminate! {repr(t)}') # clean up all the queues, otherwise this process won't terminate properly for name, q in zip(('Download jobs', 'Writer jobs', 'Download results', 'Writer results'), (self.dl_worker_queue, self.writer_queue, self.dl_result_q, self.writer_result_q)): self.log.debug(f'Cleaning up queue "{name}"') try: while True: _ = q.get_nowait() except Empty: q.close() q.join_thread() def run_real(self): self.shared_memory = SharedMemory(create=True, size=self.max_shared_memory) self.log.debug( f'Created shared memory of size: {self.shared_memory.size / 1024 / 1024:.02f} MiB' ) # create the shared memory segments and add them to their respective pools for i in range( int(self.shared_memory.size / self.analysis.biggest_chunk)): _sms = SharedMemorySegment(offset=i * self.analysis.biggest_chunk, end=i * self.analysis.biggest_chunk + self.analysis.biggest_chunk) self.sms.append(_sms) self.log.debug(f'Created {len(self.sms)} shared memory segments.') # Create queues self.dl_worker_queue = MPQueue(-1) self.writer_queue = MPQueue(-1) self.dl_result_q = MPQueue(-1) self.writer_result_q = MPQueue(-1) self.log.info(f'Starting download workers...') for i in range(self.max_workers): w = DLWorker(f'DLWorker {i + 1}', self.dl_worker_queue, self.dl_result_q, self.shared_memory.name, logging_queue=self.logging_queue) self.children.append(w) w.start() self.log.info('Starting file writing worker...') writer_p = FileWorker(self.writer_queue, self.writer_result_q, self.dl_dir, self.shared_memory.name, self.cache_dir, self.logging_queue) self.children.append(writer_p) writer_p.start() num_chunk_tasks = sum(isinstance(t, ChunkTask) for t in self.tasks) num_dl_tasks = len(self.chunks_to_dl) num_tasks = len(self.tasks) num_shared_memory_segments = len(self.sms) self.log.debug( f'Chunks to download: {num_dl_tasks}, File tasks: {num_tasks}, Chunk tasks: {num_chunk_tasks}' ) # active downloader tasks self.active_tasks = 0 processed_chunks = 0 processed_tasks = 0 total_dl = 0 total_write = 0 # synchronization conditions shm_cond = Condition() task_cond = Condition() self.conditions = [shm_cond, task_cond] # start threads s_time = time.time() self.threads.append( Thread(target=self.download_job_manager, args=(task_cond, shm_cond))) self.threads.append( Thread(target=self.dl_results_handler, args=(task_cond, ))) self.threads.append( Thread(target=self.fw_results_handler, args=(shm_cond, ))) for t in self.threads: t.start() last_update = time.time() while processed_tasks < num_tasks: delta = time.time() - last_update if not delta: time.sleep(self.update_interval) continue # update all the things processed_chunks += self.num_processed_since_last processed_tasks += self.num_tasks_processed_since_last total_dl += self.bytes_downloaded_since_last total_write += self.bytes_written_since_last dl_speed = self.bytes_downloaded_since_last / delta dl_unc_speed = self.bytes_decompressed_since_last / delta w_speed = self.bytes_written_since_last / delta r_speed = self.bytes_read_since_last / delta c_speed = self.num_processed_since_last / delta # set temporary counters to 0 self.bytes_read_since_last = self.bytes_written_since_last = 0 self.bytes_downloaded_since_last = self.num_processed_since_last = 0 self.bytes_decompressed_since_last = self.num_tasks_processed_since_last = 0 last_update = time.time() perc = (processed_chunks / num_chunk_tasks) * 100 self.log.info( f'\n============== {time.time() - s_time:.01f} seconds since start' ) self.log.info( f'Progress: {processed_chunks}/{num_chunk_tasks} ({perc:.02f}%) chunk tasks processed.' ) self.log.info(f'Downloaded: {total_dl / 1024 / 1024:.02f} MiB, ' f'Written: {total_write / 1024 / 1024:.02f} MiB') # speed meters self.log.info('Speeds:') self.log.info( f' + Download - {dl_speed / 1024 / 1024:.02f} MiB/s (raw) ' f'/ {dl_unc_speed / 1024 / 1024:.02f} MiB/s (decompressed)') self.log.info( f' + Write (disk) - {w_speed / 1024 / 1024:.02f} MiB/s') self.log.info( f' + Read (disk) - {r_speed / 1024 / 1024:.02f} MiB/s') self.log.info(f' + Tasks - {c_speed:.02f} Chunks/s') self.log.info(f'Active download tasks: {self.active_tasks}') # shared memory debugging total_avail = len(self.sms) total_used = (num_shared_memory_segments - total_avail) * ( self.analysis.biggest_chunk / 1024 / 1024) self.log.info( f'Shared memory usage: {total_used} MiB, available: {total_avail}' ) # send status update to back to instantiator (if queue exists) if self.status_queue: try: self.status_queue.put(UIUpdate(progress=perc, download_speed=dl_unc_speed, write_speed=w_speed, read_speed=r_speed, memory_usage=total_used * 1024 * 1024), timeout=1.0) except Exception as e: self.log.warning( f'Failed to send status update to queue: {e!r}') time.sleep(self.update_interval) for i in range(self.max_workers): self.dl_worker_queue.put_nowait(DownloaderTask(kill=True)) self.writer_queue.put_nowait(WriterTask('', kill=True)) self.log.info('Waiting for writer process to finish...') writer_p.join(timeout=10.0) if writer_p.exitcode is None: self.log.warning(f'Terminating writer process {e!r}') writer_p.terminate() # forcibly kill DL workers that are not actually dead yet for child in self.children: if child.exitcode is None: child.terminate() # make sure all the threads are dead. for t in self.threads: t.join(timeout=5.0) if t.is_alive(): self.log.warning(f'Thread did not terminate! {repr(t)}') # clean up resume file if self.resume_file: try: os.remove(self.resume_file) except OSError as e: self.log.warning(f'Failed to remove resume file: {e!r}') # close up shared memory self.shared_memory.close() self.shared_memory.unlink() self.shared_memory = None # finally, exit the process. exit(0)
def free_shared_memory(name: str) -> None: shared_memory = SharedMemory(MEMORY_NAME.format(name=name)) shared_memory.unlink()
def execute_with_strace( output_dir: Path = DEFAULT_OUTPUT_DIR, excluded_modules: Optional[Set[str]] = DEFAULT_EXCLUDED_MODULES): """Strace ansible module invocations. This context manager patches Ansible's ActionBase and StrategyBase classes to execute modules using strace. Parameters ---------- output_dir : Path Directory for strace output. excluded_modules : Optional[Set[str]] Modules that will not be traced. """ # Make output directory if it doesn't already exist output_dir.mkdir(exist_ok=True, parents=True) # If no excluded modules, make it an empty set if excluded_modules is None: excluded_modules = frozenset() # Clean output directory (remove all subdirectories and files) for path in output_dir.glob('*'): if path.is_dir(): shutil.rmtree(path) if path.is_file(): path.unlink() # Save references to original functions action_base_execute = ActionBase._low_level_execute_command # Define custom execute that wraps the ActionBase execute function @wraps(action_base_execute) def _execute_with_strace(self: ActionBase, cmd: str, *args, **kwargs) -> Dict[str, Any]: """Execute commands with strace. This inner function modifies module commands to be run with strace before delegating to the original execute function. Ansible runs tasks through worker processes, which means that invocations of this function do not share memory/variables with the main process. """ # Just execute the command if gathering facts if self._task.action == 'gather_facts': return action_base_execute(self, cmd, *args, **kwargs) # Get command parts parts = cmd.split() # Module metadata. This will be None if module is not traced, and # defined if the module is traced. metadata = None # If there are at least two parts, the command may be running a module if len(parts) >= 2: # Get the potential executable and module path executable = parts[0] module = parts[1] # Check for python and module python_match = R_PYTHON.match(executable) module_match = R_MODULE.match(module) # If it matches a python module that is not being excluded, # modify the command to run strace. if (python_match and module_match and module_match.group('module') not in excluded_modules): # Get index shared memory, parse index, and increment the # value stored in shared memory index_shm = SharedMemory(name=INDEX_NAME) index = int.from_bytes(bytes=index_shm.buf.tobytes(), byteorder=sys.byteorder) index_shm.buf[:] = (index + 1).to_bytes( byteorder=sys.byteorder, length=INDEX_BYTES) index_shm.close() # Create module output directory module_dir = output_dir / str(index) module_dir.mkdir(exist_ok=True, parents=True) # Read module source with open(module, 'r') as fd: source_lines = fd.readlines() # Parse zip data zip_data = '' for line in source_lines: line = line.strip() if line.startswith(ZIPDATA): zip_data = line[len(ZIPDATA) + 3:-3] break # Extract zipped data for output with ExitStack() as stack: t_fd = stack.enter_context(tempfile.NamedTemporaryFile()) t_fd.write(base64.b64decode(zip_data)) t_fd.flush() z_fd = stack.enter_context(zipfile.ZipFile(t_fd.name)) z_fd.extractall(module_dir) # Copy module for output shutil.copy(module, module_dir) # Parse module arguments module_args = {} for line in source_lines: line = line.strip() if line.startswith(ANSIBALLZ_PARAMS): start = len(ANSIBALLZ_PARAMS) + 1 try: ansiballz_params_str = ( line[start:-1].encode('utf-8').decode( 'unicode_escape', errors='ignore')) ansiballz_params = json.loads(ansiballz_params_str) module_args = { key: value for key, value in ansiballz_params[ANSIBLE_MODULE_ARGS].items() if not key.startswith('_ansible') } except ( UnicodeError, JSONDecodeError, ): print(' Error parsing module params.') break # Modify command and print info original_command = cmd cmd = (f'strace -DDD -f -y -yy -X raw -I 2 -o "| awk ' f'\'NR>{MAX_ROWS}{{print "\\""TRUNCATED"\\""; exit}}; ' f'{{print}}\' > {module_dir / "strace.txt"}" ' f'-e trace=!close {cmd}') print(f' Modified Command: {cmd}') print(f' Args: {module_args}') sys.stdout.flush() # Compute metadata metadata = { 'name': self._task.name, 'action': self._task.action, 'module': module_match.group('module'), 'index': index, 'original_cmd': original_command, 'modified_cmd': cmd, 'args': module_args, } # Don't worry about it. It's probably fine. self._task.ignore_errors = True # Delegate to the execute method. # The loader basedir replacement is to make sure the command is # executed in the correct working directory. loader_basedir = self._loader.get_basedir() self._loader.set_basedir(Path.cwd()) execute_start_s = time() result = action_base_execute(self, cmd, *args, **kwargs) execute_end_s = time() self._loader.set_basedir(loader_basedir) execute_duration_s = execute_end_s - execute_start_s # Write metadata if the command was traced. # module_dir is defined iff metadata is. if metadata is not None: # Set execution duration and print info metadata['duration'] = execute_duration_s print(f' Execution time: {execute_duration_s:.2f}s') # Deep copy result to be safe. This prevents us from accidentally # overriding anything when we parse stdout and stderr. metadata['result'] = copy.deepcopy(result) # Parse stdout as JSON if possible try: metadata['result']['stdout'] = json.loads(result['stdout']) except (TypeError, JSONDecodeError): pass # Parse stderr as JSON if possible try: metadata['result']['stderr'] = json.loads(result['stderr']) except (TypeError, JSONDecodeError): pass # Add result to metadata before writing with open(module_dir / 'metadata.json', 'w') as fd: json.dump(metadata, fd) # Return result return result # Replace original functions with custom ones ActionBase._low_level_execute_command = _execute_with_strace # 32 bit unsigned shared integer. To be used for module execution index # (artificial unique identifier for modules as they are executed). Initial # value is zero, and the value is incremented every time strace is run. index_shm = SharedMemory( name=INDEX_NAME, size=INDEX_BYTES, create=True, ) index_shm.buf[:] = b'\x00\x00\x00\x00' # Yield context and then restore the original behavior. try: yield finally: ActionBase._low_level_execute_command = action_base_execute index_shm.unlink()