コード例 #1
0
def mp_buffer(df,buffer_dist,n_workers):
      
    # array to sharedmem
    array = df[['unique_id','geometry']].to_records(index=False)
    shape, dtype = array.shape, array.dtype
    shm = SharedMemory(name='arr',create=True, size=array.nbytes)
    shm_array = np.recarray(shape=shape, dtype=dtype, buf=shm.buf)
    np.copyto(shm_array, array)
    
    shm_spec = {'name':'arr','shape':shape,'dtype':dtype}
    
    # do multiprocess
    chunk = len(df)//n_workers +1
    
    args = [(shm_spec, range(len(df))[ii*chunk:(ii+1)*chunk], buffer_dist) for ii in range(n_workers)]
    
    with mp.Pool(n_workers) as pool:
        res = pool.starmap(buffer_worker, args)
        
    res = [item for sublist in res for item in sublist]
    
    shm.close()
    shm.unlink()
        
    return [r[1] for r in res]
コード例 #2
0
ファイル: mp_verifier_attack.py プロジェクト: jia-kai/eevbnn
    def find_adv_batch(self, model: nn.Module, inputs: torch.Tensor,
                       inputs_adv_ref: torch.Tensor, labels: torch.Tensor,
                       epsilon: float, max_nr_adv):

        self._start_workers()

        with ensure_training_state(model, False):
            model_outputs: torch.Tensor = model(inputs_adv_ref)
            eval_model = model.cvt_to_eval()

        correct_mask = torch.eq(model_outputs.argmax(dim=1), labels)
        idx_remap = np.arange(inputs.shape[0], dtype=np.int32)
        np.random.shuffle(idx_remap)
        assert inputs.dtype == torch.float32

        shm_size = BUFFER_COUNTER_SIZE + 4 * inputs.numel()
        shm = SharedMemory(size=shm_size, create=True)
        shm.buf[:BUFFER_COUNTER_SIZE] = b'\0' * BUFFER_COUNTER_SIZE
        shm_name = shm.name

        args = RemoteArgs(eval_model, inputs, inputs_adv_ref, labels,
                          correct_mask, epsilon, max_nr_adv, shm_name,
                          shm_size, idx_remap)

        try:
            return self._work(shm.buf, args)
        except:
            self.close()
            raise
        finally:
            shm.close()
            shm.unlink()
コード例 #3
0
ファイル: shm_sample.py プロジェクト: zhyj3038/SDBI
def producer(conn):
    # os.environ["PYTHONWARNINGS"] = "ignore"
    feed_shm_name = '{}_{}_{}'.format('test', os.getpid(),
                                      threading.currentThread().ident)
    print('input shm name : {}'.format(feed_shm_name))

    feed_shm = SharedMemory(name=feed_shm_name, create=True, size=2 * 4)

    feed_shm_arr = np.ndarray((1, 2), dtype=np.float32, buffer=feed_shm.buf)
    input_arr = np.random.random((1, 2)).astype(np.float32)
    feed_shm_arr[:] = input_arr[:]

    conn.send(feed_shm_name)
    result_shm_name = conn.recv()
    result_shm = SharedMemory(name=result_shm_name)
    result_shm_arr = np.ndarray((1, 2),
                                dtype=np.float32,
                                buffer=result_shm.buf)
    print('Output array : {}'.format(result_shm_arr))

    conn.send('exit')
    del result_shm_arr
    result_shm.close()

    conn.recv()
    del feed_shm_arr
    feed_shm.close()
    feed_shm.unlink()

    print('clean and exit')

    return
コード例 #4
0
ファイル: _serialize.py プロジェクト: fdrgsp/pymmcore-remote
 def from_dict(self, classname: str, d: dict):
     """convert dict from `ndarray_to_dict` back to np.ndarray"""
     shm = SharedMemory(name=d["shm"], create=False)
     array = np.ndarray(d["shape"], dtype=d["dtype"], buffer=shm.buf).copy()
     shm.close()
     shm.unlink()
     return array
コード例 #5
0
ファイル: shared_memory.py プロジェクト: fyrestone/mars
 async def teardown(**kwargs):
     object_ids = kwargs.get('object_ids')
     for object_id in object_ids:
         try:
             shm = SharedMemory(name=object_id)
             shm.unlink()
             await asyncio.sleep(0)
         except FileNotFoundError:
             pass
コード例 #6
0
ファイル: shared.py プロジェクト: asrvsn/ndgpy
 def SharedMemory(self, name: str = None, size: int = None):
     assert name is not None or size is not None
     with self._Client(self._address, authkey=self._authkey) as conn:
         if name is not None:
             shm = SharedMemory(name=name)
             self.client_shms[name] = shm
         else:
             shm = SharedMemory(None, create=True, size=size)
             try:
                 dispatch(conn, None, 'track_segment', (shm.name, ))
             except BaseException as e:
                 shm.unlink()
                 raise e
     return shm
コード例 #7
0
ファイル: shared_memory.py プロジェクト: fyrestone/mars
 async def delete(self, object_id):
     try:
         shm = SharedMemory(name=object_id)
         shm.unlink()
         shm.close()
     except FileNotFoundError:
         if sys.platform == 'win32':
             # skip file not found error for windows
             pass
         else:  # pragma: no cover
             raise
     try:
         self._object_ids.remove(object_id)
     except KeyError:  # pragma: no cover
         return
コード例 #8
0
ファイル: shared_resources.py プロジェクト: psavery/oremda
def open_memory(name, create=False, size=0, consume=False):
    """Open shared memory via a context manager

    The shared memory will automatically be closed when the
    context ends. The shared memory may also optionally be
    created and/or consumed. If the "consume" flag is True,
    the shared memory will be unlinked as well when the context
    ends.

    Args:
        name (str): the name of the shared memory
        create (bool): whether to create the shared memory or try
                       to open a pre-existing one. If this is True,
                       the size argument must be non-zero.
        size (int): the size in bytes of the shared memory block to create.
                    Only used if create == True.
        consume (bool): whether or not to unlink the shared memory
                        when the context ends.

    Yields:
        posix_ipc.MessageQueue: the message queue
    """
    if create and size == 0:
        raise Exception('If create is True, size must be non-zero')

    kwargs = {
        'name': name,
        'create': create,
        'size': size,
    }
    shm = SharedMemory(**kwargs)
    try:
        yield shm
    finally:
        shm.close()
        if consume:
            shm.unlink()
コード例 #9
0
ファイル: manager_gui.py プロジェクト: koraynilay/legendary
class DLManager(Process):
    def __init__(self, download_dir, base_url, cache_dir=None, status_q=None,
                 max_workers=0, update_interval=1.0, dl_timeout=10, resume_file=None,
                 max_shared_memory=1024 * 1024 * 1024):
        super().__init__(name='DLManager')
        self.log = logging.getLogger('DLM')
        self.proc_debug = False

        self.base_url = base_url
        self.dl_dir = download_dir
        self.cache_dir = cache_dir if cache_dir else os.path.join(download_dir, '.cache')

        # All the queues!
        self.logging_queue = None
        self.dl_worker_queue = None
        self.writer_queue = None
        self.dl_result_q = None
        self.writer_result_q = None
        self.max_workers = max_workers if max_workers else min(cpu_count() * 2, 16)
        self.dl_timeout = dl_timeout

        # Analysis stuff
        self.analysis = None
        self.tasks = deque()
        self.chunks_to_dl = deque()
        self.chunk_data_list = None

        # shared memory stuff
        self.max_shared_memory = max_shared_memory  # 1 GiB by default
        self.sms = deque()
        self.shared_memory = None

        # Interval for log updates and pushing updates to the queue
        self.update_interval = update_interval
        self.status_queue = status_q  # queue used to relay status info back to GUI/CLI

        # Resume file stuff
        self.resume_file = resume_file
        self.hash_map = dict()

        # cross-thread runtime information
        self.running = True
        self.active_tasks = 0
        self.children = []
        self.threads = []
        self.conditions = []
        # bytes downloaded and decompressed since last report
        self.bytes_downloaded_since_last = 0
        self.bytes_decompressed_since_last = 0
        # bytes written since last report
        self.bytes_written_since_last = 0
        # bytes read since last report
        self.bytes_read_since_last = 0
        # chunks written since last report
        self.num_processed_since_last = 0
        self.num_tasks_processed_since_last = 0

    def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None,
                     patch=True, resume=True, file_prefix_filter=None,
                     file_exclude_filter=None, file_install_tag=None,
                     processing_optimization=False) -> AnalysisResult:
        """
        Run analysis on manifest and old manifest (if not None) and return a result
        with a summary resources required in order to install the provided manifest.

        :param manifest: Manifest to install
        :param old_manifest: Old manifest to patch from (if applicable)
        :param patch: Patch instead of redownloading the entire file
        :param resume: Continue based on resume file if it exists
        :param file_prefix_filter: Only download files that start with this prefix
        :param file_exclude_filter: Exclude files with this prefix from download
        :param file_install_tag: Only install files with the specified tag
        :param processing_optimization: Attempt to optimize processing order and RAM usage
        :return: AnalysisResult
        """

        analysis_res = AnalysisResult()
        analysis_res.install_size = sum(fm.file_size for fm in manifest.file_manifest_list.elements)
        analysis_res.biggest_chunk = max(c.window_size for c in manifest.chunk_data_list.elements)
        analysis_res.biggest_file_size = max(f.file_size for f in manifest.file_manifest_list.elements)
        is_1mib = analysis_res.biggest_chunk == 1024 * 1024
        self.log.debug(f'Biggest chunk size: {analysis_res.biggest_chunk} bytes (== 1 MiB? {is_1mib})')

        self.log.debug(f'Creating manifest comparison...')
        mc = ManifestComparison.create(manifest, old_manifest)
        analysis_res.manifest_comparison = mc

        if resume and self.resume_file and os.path.exists(self.resume_file):
            self.log.info('Found previously interrupted download. Download will be resumed if possible.')
            try:
                missing = 0
                mismatch = 0
                completed_files = set()

                for line in open(self.resume_file).readlines():
                    file_hash, _, filename = line.strip().partition(':')
                    _p = os.path.join(self.dl_dir, filename)
                    if not os.path.exists(_p):
                        self.log.debug(f'File does not exist but is in resume file: "{_p}"')
                        missing += 1
                    elif file_hash != manifest.file_manifest_list.get_file_by_path(filename).sha_hash.hex():
                        mismatch += 1
                    else:
                        completed_files.add(filename)

                if missing:
                    self.log.warning(f'{missing} previously completed file(s) are missing, they will be redownloaded.')
                if mismatch:
                    self.log.warning(f'{mismatch} existing file(s) have been changed and will be redownloaded.')

                # remove completed files from changed/added and move them to unchanged for the analysis.
                mc.added -= completed_files
                mc.changed -= completed_files
                mc.unchanged |= completed_files
                self.log.info(f'Skipping {len(completed_files)} files based on resume data.')
            except Exception as e:
                self.log.warning(f'Reading resume file failed: {e!r}, continuing as normal...')

        # Install tags are used for selective downloading, e.g. for language packs
        additional_deletion_tasks = []
        if file_install_tag is not None:
            if isinstance(file_install_tag, str):
                file_install_tag = [file_install_tag]

            files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements
                                if not any((fit in i.install_tags) or (not fit and not i.install_tags)
                                           for fit in file_install_tag))
            self.log.info(f'Found {len(files_to_skip)} files to skip based on install tag.')
            mc.added -= files_to_skip
            mc.changed -= files_to_skip
            mc.unchanged |= files_to_skip
            for fname in sorted(files_to_skip):
                additional_deletion_tasks.append(FileTask(fname, delete=True, silent=True))

        # if include/exclude prefix has been set: mark all files that are not to be downloaded as unchanged
        if file_exclude_filter:
            if isinstance(file_exclude_filter, str):
                file_exclude_filter = [file_exclude_filter]

            file_exclude_filter = [f.lower() for f in file_exclude_filter]
            files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements if
                                any(i.filename.lower().startswith(pfx) for pfx in file_exclude_filter))
            self.log.info(f'Found {len(files_to_skip)} files to skip based on exclude prefix.')
            mc.added -= files_to_skip
            mc.changed -= files_to_skip
            mc.unchanged |= files_to_skip

        if file_prefix_filter:
            if isinstance(file_prefix_filter, str):
                file_prefix_filter = [file_prefix_filter]

            file_prefix_filter = [f.lower() for f in file_prefix_filter]
            files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements if not
                                any(i.filename.lower().startswith(pfx) for pfx in file_prefix_filter))
            self.log.info(f'Found {len(files_to_skip)} files to skip based on include prefix(es)')
            mc.added -= files_to_skip
            mc.changed -= files_to_skip
            mc.unchanged |= files_to_skip

        if file_prefix_filter or file_exclude_filter or file_install_tag:
            self.log.info(f'Remaining files after filtering: {len(mc.added) + len(mc.changed)}')
            # correct install size after filtering
            analysis_res.install_size = sum(fm.file_size for fm in manifest.file_manifest_list.elements
                                            if fm.filename in mc.added)

        if mc.removed:
            analysis_res.removed = len(mc.removed)
            self.log.debug(f'{analysis_res.removed} removed files')
        if mc.added:
            analysis_res.added = len(mc.added)
            self.log.debug(f'{analysis_res.added} added files')
        if mc.changed:
            analysis_res.changed = len(mc.changed)
            self.log.debug(f'{analysis_res.changed} changed files')
        if mc.unchanged:
            analysis_res.unchanged = len(mc.unchanged)
            self.log.debug(f'{analysis_res.unchanged} unchanged files')

        if processing_optimization and len(manifest.file_manifest_list.elements) > 100_000:
            self.log.warning('Manifest contains too many files, processing optimizations will be disabled.')
            processing_optimization = False
        elif processing_optimization:
            self.log.info('Processing order optimization is enabled, analysis may take a few seconds longer...')

        # count references to chunks for determining runtime cache size later
        references = Counter()
        fmlist = sorted(manifest.file_manifest_list.elements,
                        key=lambda a: a.filename.lower())

        for fm in fmlist:
            self.hash_map[fm.filename] = fm.sha_hash.hex()

            # chunks of unchanged files are not downloaded so we can skip them
            if fm.filename in mc.unchanged:
                analysis_res.unchanged += fm.file_size
                continue

            for cp in fm.chunk_parts:
                references[cp.guid_num] += 1

        if processing_optimization:
            s_time = time.time()
            # reorder the file manifest list to group files that share many chunks
            # 4 is mostly arbitrary but has shown in testing to be a good choice
            min_overlap = 4
            # ignore files with less than N chunk parts, this speeds things up dramatically
            cp_threshold = 5

            remaining_files = {fm.filename: {cp.guid_num for cp in fm.chunk_parts}
                               for fm in fmlist if fm.filename not in mc.unchanged}
            _fmlist = []

            # iterate over all files that will be downloaded and pair up those that share the most chunks
            for fm in fmlist:
                if fm.filename not in remaining_files:
                    continue

                _fmlist.append(fm)
                f_chunks = remaining_files.pop(fm.filename)
                if len(f_chunks) < cp_threshold:
                    continue

                best_overlap, match = 0, None
                for fname, chunks in remaining_files.items():
                    if len(chunks) < cp_threshold:
                        continue
                    overlap = len(f_chunks & chunks)
                    if overlap > min_overlap and overlap > best_overlap:
                        best_overlap, match = overlap, fname

                if match:
                    _fmlist.append(manifest.file_manifest_list.get_file_by_path(match))
                    remaining_files.pop(match)

            fmlist = _fmlist
            opt_delta = time.time() - s_time
            self.log.debug(f'Processing optimizations took {opt_delta:.01f} seconds.')

        # determine reusable chunks and prepare lookup table for reusable ones
        re_usable = defaultdict(dict)
        if old_manifest and mc.changed and patch:
            self.log.debug('Analyzing manifests for re-usable chunks...')
            for changed in mc.changed:
                old_file = old_manifest.file_manifest_list.get_file_by_path(changed)
                new_file = manifest.file_manifest_list.get_file_by_path(changed)

                existing_chunks = defaultdict(list)
                off = 0
                for cp in old_file.chunk_parts:
                    existing_chunks[cp.guid_num].append((off, cp.offset, cp.offset + cp.size))
                    off += cp.size

                for cp in new_file.chunk_parts:
                    key = (cp.guid_num, cp.offset, cp.size)
                    for file_o, cp_o, cp_end_o in existing_chunks[cp.guid_num]:
                        # check if new chunk part is wholly contained in the old chunk part
                        if cp_o <= cp.offset and (cp.offset + cp.size) <= cp_end_o:
                            references[cp.guid_num] -= 1
                            re_usable[changed][key] = file_o + (cp.offset - cp_o)
                            analysis_res.reuse_size += cp.size
                            break

        last_cache_size = current_cache_size = 0
        # set to determine whether a file is currently cached or not
        cached = set()
        # Using this secondary set is orders of magnitude faster than checking the deque.
        chunks_in_dl_list = set()
        # This is just used to count all unique guids that have been cached
        dl_cache_guids = set()

        # run through the list of files and create the download jobs and also determine minimum
        # runtime cache requirement by simulating adding/removing from cache during download.
        self.log.debug('Creating filetasks and chunktasks...')
        for current_file in fmlist:
            # skip unchanged and empty files
            if current_file.filename in mc.unchanged:
                continue
            elif not current_file.chunk_parts:
                self.tasks.append(FileTask(current_file.filename, empty=True))
                continue

            existing_chunks = re_usable.get(current_file.filename, None)
            chunk_tasks = []
            reused = 0

            for cp in current_file.chunk_parts:
                ct = ChunkTask(cp.guid_num, cp.offset, cp.size)

                # re-use the chunk from the existing file if we can
                if existing_chunks and (cp.guid_num, cp.offset, cp.size) in existing_chunks:
                    reused += 1
                    ct.chunk_file = current_file.filename
                    ct.chunk_offset = existing_chunks[(cp.guid_num, cp.offset, cp.size)]
                else:
                    # add to DL list if not already in it
                    if cp.guid_num not in chunks_in_dl_list:
                        self.chunks_to_dl.append(cp.guid_num)
                        chunks_in_dl_list.add(cp.guid_num)

                    # if chunk has more than one use or is already in cache,
                    # check if we need to add or remove it again.
                    if references[cp.guid_num] > 1 or cp.guid_num in cached:
                        references[cp.guid_num] -= 1

                        # delete from cache if no references left
                        if references[cp.guid_num] < 1:
                            current_cache_size -= analysis_res.biggest_chunk
                            cached.remove(cp.guid_num)
                            ct.cleanup = True
                        # add to cache if not already cached
                        elif cp.guid_num not in cached:
                            dl_cache_guids.add(cp.guid_num)
                            cached.add(cp.guid_num)
                            current_cache_size += analysis_res.biggest_chunk
                    else:
                        ct.cleanup = True

                chunk_tasks.append(ct)

            if reused:
                self.log.debug(f' + Reusing {reused} chunks from: {current_file.filename}')
                # open temporary file that will contain download + old file contents
                self.tasks.append(FileTask(current_file.filename + u'.tmp', fopen=True))
                self.tasks.extend(chunk_tasks)
                self.tasks.append(FileTask(current_file.filename + u'.tmp', close=True))
                # delete old file and rename temporary
                self.tasks.append(FileTask(current_file.filename, delete=True, rename=True,
                                           temporary_filename=current_file.filename + u'.tmp'))
            else:
                self.tasks.append(FileTask(current_file.filename, fopen=True))
                self.tasks.extend(chunk_tasks)
                self.tasks.append(FileTask(current_file.filename, close=True))

            # check if runtime cache size has changed
            if current_cache_size > last_cache_size:
                self.log.debug(f' * New maximum cache size: {current_cache_size / 1024 / 1024:.02f} MiB')
                last_cache_size = current_cache_size

        self.log.debug(f'Final cache size requirement: {last_cache_size / 1024 / 1024} MiB.')
        analysis_res.min_memory = last_cache_size + (1024 * 1024 * 32)  # add some padding just to be safe

        # Todo implement on-disk caching to avoid this issue.
        if analysis_res.min_memory > self.max_shared_memory:
            shared_mib = f'{self.max_shared_memory / 1024 / 1024:.01f} MiB'
            required_mib = f'{analysis_res.min_memory / 1024 / 1024:.01f} MiB'
            suggested_mib = round(self.max_shared_memory / 1024 / 1024 +
                                  (analysis_res.min_memory - self.max_shared_memory) / 1024 / 1024 + 32)

            if processing_optimization:
                message = f'Try running legendary with "--enable-reordering --max-shared-memory {suggested_mib:.0f}"'
            else:
                message = 'Try running legendary with "--enable-reordering" to reduce memory usage, ' \
                          f'or use "--max-shared-memory {suggested_mib:.0f}" to increase the limit.'

            raise MemoryError(f'Current shared memory cache is smaller than required: {shared_mib} < {required_mib}. '
                              + message)

        # calculate actual dl and patch write size.
        analysis_res.dl_size = \
            sum(c.file_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list)
        analysis_res.uncompressed_dl_size = \
            sum(c.window_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list)

        # add jobs to remove files
        for fname in mc.removed:
            self.tasks.append(FileTask(fname, delete=True))
        self.tasks.extend(additional_deletion_tasks)

        analysis_res.num_chunks_cache = len(dl_cache_guids)
        self.chunk_data_list = manifest.chunk_data_list
        self.analysis = analysis_res

        return analysis_res

    def download_job_manager(self, task_cond: Condition, shm_cond: Condition):
        while self.chunks_to_dl and self.running:
            while self.active_tasks < self.max_workers * 2 and self.chunks_to_dl:
                try:
                    sms = self.sms.popleft()
                    no_shm = False
                except IndexError:  # no free cache
                    no_shm = True
                    break

                c_guid = self.chunks_to_dl.popleft()
                chunk = self.chunk_data_list.get_chunk_by_guid(c_guid)
                self.log.debug(f'Adding {chunk.guid_num} (active: {self.active_tasks})')
                try:
                    self.dl_worker_queue.put(DownloaderTask(url=self.base_url + '/' + chunk.path,
                                                            chunk_guid=c_guid, shm=sms),
                                             timeout=1.0)
                except Exception as e:
                    self.log.warning(f'Failed to add to download queue: {e!r}')
                    self.chunks_to_dl.appendleft(c_guid)
                    break

                self.active_tasks += 1
            else:
                # active tasks limit hit, wait for tasks to finish
                with task_cond:
                    self.log.debug('Waiting for download tasks to complete..')
                    task_cond.wait(timeout=1.0)
                    continue

            if no_shm:
                # if we break we ran out of shared memory, so wait for that.
                with shm_cond:
                    self.log.debug('Waiting for more shared memory...')
                    shm_cond.wait(timeout=1.0)

        self.log.debug('Download Job Manager quitting...')

    def dl_results_handler(self, task_cond: Condition):
        in_buffer = dict()

        task = self.tasks.popleft()
        current_file = ''

        while task and self.running:
            if isinstance(task, FileTask):  # this wasn't necessarily a good idea...
                try:
                    if task.empty:
                        self.writer_queue.put(WriterTask(task.filename, empty=True), timeout=1.0)
                    elif task.rename:
                        self.writer_queue.put(WriterTask(task.filename, rename=True,
                                                         delete=task.delete,
                                                         old_filename=task.temporary_filename),
                                              timeout=1.0)
                    elif task.delete:
                        self.writer_queue.put(WriterTask(task.filename, delete=True, silent=task.silent), timeout=1.0)
                    elif task.open:
                        self.writer_queue.put(WriterTask(task.filename, fopen=True), timeout=1.0)
                        current_file = task.filename
                    elif task.close:
                        self.writer_queue.put(WriterTask(task.filename, close=True), timeout=1.0)
                except Exception as e:
                    self.tasks.appendleft(task)
                    self.log.warning(f'Adding to queue failed: {e!r}')
                    continue

                try:
                    task = self.tasks.popleft()
                except IndexError:  # finished
                    break
                continue

            while (task.chunk_guid in in_buffer) or task.chunk_file:
                res_shm = None
                if not task.chunk_file:  # not re-using from an old file
                    res_shm = in_buffer[task.chunk_guid].shm

                try:
                    self.log.debug(f'Adding {task.chunk_guid} to writer queue')
                    self.writer_queue.put(WriterTask(
                        filename=current_file, shared_memory=res_shm,
                        chunk_offset=task.chunk_offset, chunk_size=task.chunk_size,
                        chunk_guid=task.chunk_guid, release_memory=task.cleanup,
                        old_file=task.chunk_file  # todo on-disk cache
                    ), timeout=1.0)
                except Exception as e:
                    self.log.warning(f'Adding to queue failed: {e!r}')
                    break

                if task.cleanup and not task.chunk_file:
                    del in_buffer[task.chunk_guid]

                try:
                    task = self.tasks.popleft()
                    if isinstance(task, FileTask):
                        break
                except IndexError:  # finished
                    task = None
                    break
            else:  # only enter blocking code if the loop did not break
                try:
                    res = self.dl_result_q.get(timeout=1)
                    self.active_tasks -= 1
                    with task_cond:
                        task_cond.notify()

                    if res.success:
                        self.log.debug(f'Download for {res.guid} succeeded, adding to in_buffer...')
                        in_buffer[res.guid] = res
                        self.bytes_downloaded_since_last += res.compressed_size
                        self.bytes_decompressed_since_last += res.size
                    else:
                        self.log.error(f'Download for {res.guid} failed, retrying...')
                        try:
                            self.dl_worker_queue.put(DownloaderTask(
                                url=res.url, chunk_guid=res.guid, shm=res.shm
                            ), timeout=1.0)
                            self.active_tasks += 1
                        except Exception as e:
                            self.log.warning(f'Failed adding retry task to queue! {e!r}')
                            # If this failed for whatever reason, put the chunk at the front of the DL list
                            self.chunks_to_dl.appendleft(res.chunk_guid)
                except Empty:
                    pass
                except Exception as e:
                    self.log.warning(f'Unhandled exception when trying to read download result queue: {e!r}')

        self.log.debug('Download result handler quitting...')

    def fw_results_handler(self, shm_cond: Condition):
        while self.running:
            try:
                res = self.writer_result_q.get(timeout=1.0)
                self.num_tasks_processed_since_last += 1

                if res.closed and self.resume_file and res.success:
                    if res.filename.endswith('.tmp'):
                        res.filename = res.filename[:-4]

                    file_hash = self.hash_map[res.filename]
                    # write last completed file to super simple resume file
                    with open(self.resume_file, 'ab') as rf:
                        rf.write(f'{file_hash}:{res.filename}\n'.encode('utf-8'))

                if res.kill:
                    self.log.debug('Got termination command in FW result handler')
                    break

                if not res.success:
                    # todo make this kill the installation process or at least skip the file and mark it as failed
                    self.log.fatal(f'Writing for {res.filename} failed!')
                if res.release_memory:
                    self.sms.appendleft(res.shm)
                    with shm_cond:
                        shm_cond.notify()

                if res.chunk_guid:
                    self.bytes_written_since_last += res.size
                    # if there's no shared memory we must have read from disk.
                    if not res.shm:
                        self.bytes_read_since_last += res.size
                    self.num_processed_since_last += 1

            except Empty:
                continue
            except Exception as e:
                self.log.warning(f'Exception when trying to read writer result queue: {e!r}')
        self.log.debug('Writer result handler quitting...')

    def run(self):
        if not self.analysis:
            raise ValueError('Did not run analysis before trying to run download!')

        # Subprocess will use its own root logger that logs to a Queue instead
        _root = logging.getLogger()
        _root.setLevel(logging.DEBUG if self.proc_debug else logging.INFO)
        if self.logging_queue:
            _root.handlers = []
            _root.addHandler(QueueHandler(self.logging_queue))

        self.log = logging.getLogger('DLManager')
        self.log.info(f'Download Manager running with process-id: {os.getpid()}')

        try:
            self.run_real()
        except KeyboardInterrupt:
            self.log.warning('Immediate exit requested!')
            self.running = False

            # send conditions to unlock threads if they aren't already
            for cond in self.conditions:
                with cond:
                    cond.notify()

            # make sure threads are dead.
            for t in self.threads:
                t.join(timeout=5.0)
                if t.is_alive():
                    self.log.warning(f'Thread did not terminate! {repr(t)}')

            # clean up all the queues, otherwise this process won't terminate properly
            for name, q in zip(('Download jobs', 'Writer jobs', 'Download results', 'Writer results'),
                               (self.dl_worker_queue, self.writer_queue, self.dl_result_q, self.writer_result_q)):
                self.log.debug(f'Cleaning up queue "{name}"')
                try:
                    while True:
                        _ = q.get_nowait()
                except Empty:
                    q.close()
                    q.join_thread()

    def run_real(self):
        self.shared_memory = SharedMemory(create=True, size=self.max_shared_memory)
        self.log.debug(f'Created shared memory of size: {self.shared_memory.size / 1024 / 1024:.02f} MiB')

        # create the shared memory segments and add them to their respective pools
        for i in range(int(self.shared_memory.size / self.analysis.biggest_chunk)):
            _sms = SharedMemorySegment(offset=i * self.analysis.biggest_chunk,
                                       end=i * self.analysis.biggest_chunk + self.analysis.biggest_chunk)
            self.sms.append(_sms)

        self.log.debug(f'Created {len(self.sms)} shared memory segments.')

        # Create queues
        self.dl_worker_queue = MPQueue(-1)
        self.writer_queue = MPQueue(-1)
        self.dl_result_q = MPQueue(-1)
        self.writer_result_q = MPQueue(-1)

        self.log.info(f'Starting download workers...')
        for i in range(self.max_workers):
            w = DLWorker(f'DLWorker {i + 1}', self.dl_worker_queue, self.dl_result_q,
                         self.shared_memory.name, logging_queue=self.logging_queue,
                         dl_timeout=self.dl_timeout)
            self.children.append(w)
            w.start()

        self.log.info('Starting file writing worker...')
        writer_p = FileWorker(self.writer_queue, self.writer_result_q, self.dl_dir,
                              self.shared_memory.name, self.cache_dir, self.logging_queue)
        self.children.append(writer_p)
        writer_p.start()

        num_chunk_tasks = sum(isinstance(t, ChunkTask) for t in self.tasks)
        num_dl_tasks = len(self.chunks_to_dl)
        num_tasks = len(self.tasks)
        num_shared_memory_segments = len(self.sms)
        self.log.debug(f'Chunks to download: {num_dl_tasks}, File tasks: {num_tasks}, Chunk tasks: {num_chunk_tasks}')

        # active downloader tasks
        self.active_tasks = 0
        processed_chunks = 0
        processed_tasks = 0
        total_dl = 0
        total_write = 0

        # synchronization conditions
        shm_cond = Condition()
        task_cond = Condition()
        self.conditions = [shm_cond, task_cond]

        # start threads
        s_time = time.time()
        self.threads.append(Thread(target=self.download_job_manager, args=(task_cond, shm_cond)))
        self.threads.append(Thread(target=self.dl_results_handler, args=(task_cond,)))
        self.threads.append(Thread(target=self.fw_results_handler, args=(shm_cond,)))

        for t in self.threads:
            t.start()

        last_update = time.time()

        while processed_tasks < num_tasks:
            delta = time.time() - last_update
            if not delta:
                time.sleep(self.update_interval)
                continue

            # update all the things
            processed_chunks += self.num_processed_since_last
            processed_tasks += self.num_tasks_processed_since_last

            total_dl += self.bytes_downloaded_since_last
            total_write += self.bytes_written_since_last

            dl_speed = self.bytes_downloaded_since_last / delta
            dl_unc_speed = self.bytes_decompressed_since_last / delta
            w_speed = self.bytes_written_since_last / delta
            r_speed = self.bytes_read_since_last / delta
            # c_speed = self.num_processed_since_last / delta

            # set temporary counters to 0
            self.bytes_read_since_last = self.bytes_written_since_last = 0
            self.bytes_downloaded_since_last = self.num_processed_since_last = 0
            self.bytes_decompressed_since_last = self.num_tasks_processed_since_last = 0
            last_update = time.time()

            perc = (processed_chunks / num_chunk_tasks) * 100
            runtime = time.time() - s_time
            total_avail = len(self.sms)
            total_used = (num_shared_memory_segments - total_avail) * (self.analysis.biggest_chunk / 1024 / 1024)

            if runtime and processed_chunks:
                rt_hours, runtime = int(runtime // 3600), runtime % 3600
                rt_minutes, rt_seconds = int(runtime // 60), int(runtime % 60)

                average_speed = processed_chunks / runtime
                estimate = (num_chunk_tasks - processed_chunks) / average_speed
                hours, estimate = int(estimate // 3600), estimate % 3600
                minutes, seconds = int(estimate // 60), int(estimate % 60)
            else:
                hours = minutes = seconds = 0
                rt_hours = rt_minutes = rt_seconds = 0

            bar.set_fraction(perc)
            bar.set_text( f'{perc:.02f}% ({processed_chunks}/{num_chunk_tasks}), '
                          f'Elapsed: {rt_hours:02d}:{rt_minutes:02d}:{rt_seconds:02d}, '
                          f'ETA: {hours:02d}:{minutes:02d}:{seconds:02d}'
                          f'{dl_speed / 1024 / 1024:.02f} MiB/s'
            )

            #self.log.info(f'= Progress: {perc:.02f}% ({processed_chunks}/{num_chunk_tasks}), '
            #              f'Running for {rt_hours:02d}:{rt_minutes:02d}:{rt_seconds:02d}, '
            #              f'ETA: {hours:02d}:{minutes:02d}:{seconds:02d}')
            #self.log.info(f' - Downloaded: {total_dl / 1024 / 1024:.02f} MiB, '
            #              f'Written: {total_write / 1024 / 1024:.02f} MiB')
            #self.log.info(f' - Cache usage: {total_used} MiB, active tasks: {self.active_tasks}')
            #self.log.info(f' + Download\t- {dl_speed / 1024 / 1024:.02f} MiB/s (raw) '
            #              f'/ {dl_unc_speed / 1024 / 1024:.02f} MiB/s (decompressed)')
            #self.log.info(f' + Disk\t- {w_speed / 1024 / 1024:.02f} MiB/s (write) / '
            #              f'{r_speed / 1024 / 1024:.02f} MiB/s (read)')

            # send status update to back to instantiator (if queue exists)
            if self.status_queue:
                try:
                    self.status_queue.put(UIUpdate(
                        progress=perc, download_speed=dl_unc_speed, write_speed=w_speed, read_speed=r_speed,
                        memory_usage=total_used * 1024 * 1024
                    ), timeout=1.0)
                except Exception as e:
                    self.log.warning(f'Failed to send status update to queue: {e!r}')

            time.sleep(self.update_interval)

        for i in range(self.max_workers):
            self.dl_worker_queue.put_nowait(DownloaderTask(kill=True))

        self.log.info('Waiting for installation to finish...')
        self.writer_queue.put_nowait(WriterTask('', kill=True))

        writer_p.join(timeout=10.0)
        if writer_p.exitcode is None:
            self.log.warning(f'Terminating writer process, no exit code!')
            writer_p.terminate()

        # forcibly kill DL workers that are not actually dead yet
        for child in self.children:
            if child.exitcode is None:
                child.terminate()

        # make sure all the threads are dead.
        for t in self.threads:
            t.join(timeout=5.0)
            if t.is_alive():
                self.log.warning(f'Thread did not terminate! {repr(t)}')

        # clean up resume file
        if self.resume_file:
            try:
                os.remove(self.resume_file)
            except OSError as e:
                self.log.warning(f'Failed to remove resume file: {e!r}')

        # close up shared memory
        self.shared_memory.close()
        self.shared_memory.unlink()
        self.shared_memory = None

        self.log.info('All done! Download manager quitting...')
        # finally, exit the process.
        exit(0)
コード例 #10
0
ファイル: shared_memory.py プロジェクト: tangyiyong/mars
 async def delete(self, object_id):
     shm = SharedMemory(name=object_id)
     shm.unlink()
     self._object_ids.remove(object_id)
コード例 #11
0
ファイル: shared_memory.py プロジェクト: tangyiyong/mars
 async def teardown(**kwargs):
     object_ids = kwargs.get('object_ids')
     for object_id in object_ids:
         shm = SharedMemory(name=object_id)
         shm.unlink()
         await asyncio.sleep(0)
コード例 #12
0
class SpaceColony:
    def __init__(
            self,
            points,
            roots=np.zeros((1, 3)),
            parameters=Param(r=0.04, iD=0.5, kD=0.2, bias=np.zeros(3)),
            trunk_lim=1,
            min_activation=5,
            yeet_condition=5,
            maxsize=100000,
            ncpu=cpu_count(),
            grow_function=(lambda v: normalize(v)),
    ):

        # Static information
        self.par = parameters
        self.ncpu = ncpu
        self.min_activation = min_activation
        self.trunk_lim = trunk_lim
        self.maxsize = maxsize
        self.yeet_condition = yeet_condition
        self.grow_function = grow_function

        self.nroots = len(roots)

        # Dynamic information
        self.age = 0
        self.start = 0
        self.end = len(roots)
        self.done = False
        self.trunk_mode = True
        self.yeet_count = 0

        self.activation = 0
        self.reached_points = 0
        self.stats = []

        self.dirty = True

        # Local dynamics
        self.edges = []
        self.children = [[] for _ in range(maxsize)]
        self.w = []

        # This array is sliced at start:
        self.points = points

        # This is sparta.
        self.lock = Lock()
        A = np.inf * np.ones((self.maxsize, 3), dtype=roots.dtype)

        self.vectors_sm = SharedMemory(create=True, size=A.nbytes)
        self.tree_sm = SharedMemory(create=True, size=A.nbytes)

        self.vectors = as_numpy_arr(A.shape, shared_obj=self.vectors_sm)
        self.vectors[:] = A[:]

        self.nodes = as_numpy_arr(A.shape, self.tree_sm)
        self.nodes[:] = A[:]
        for i in range(len(roots)):
            self.nodes[i] = roots[i]

        # Explicit pool creation for better control
        point_slices = np.array_split(self.points, self.ncpu)
        self.workers = []
        self.pipes = []
        for i in range(self.ncpu):
            parent_pipe, child_pipe = Pipe()
            self.pipes.append(parent_pipe)
            args = self.pack(point_slices[i], child_pipe)
            self.workers.append(Horse(*args))
            self.workers[i].start()

        self.running = True

    def iterate(self, N):
        self.dirty = True
        log.info(f"START: {time()}\n{self.__str__()}")
        for i in range(N):
            self.update_stats()
            if self.done:
                break

            for pipe in self.pipes:
                pipe.send(
                    Batch(True, 1, (self.start, self.end, self.trunk_mode)))

            result_list = [pipe.recv() for pipe in self.pipes]
            res = self.collect(result_list)
            self.grow(res)
            self.age += 1
            self.done_yet()

        log.info(f"DONE: {time()}\n{self.__str__()}")

    def stop(self):
        if not self.running:
            return

        log.info("Horse shutdown.")
        for pipe in self.pipes:
            pipe.send(Batch(False, 1, (None, )))
            pipe.close()

        for w in self.workers:
            w.join(1)
            w.terminate()
        self.pipes = []
        self.workers = []
        self.running = False

    def collect(self, result_list):
        self.activation = 0
        self.reached_points = 0
        result = []
        for res in result_list:
            self.activation += res[0]
            self.reached_points += res[1]
            for i in res[2]:
                if i not in result:
                    result.append(i)
        return result

    def grow(self, res):
        self.start = self.end
        for i in res:
            if self.end >= self.maxsize:
                log.info("Halt condition: node vector full.")
                self.done = True
                return
            self.nodes[self.end] = (
                self.nodes[i] +
                (self.grow_function(self.vectors[i]) + self.par.bias) *
                self.par.r)
            self.children[i].append(self.end)
            self.vectors[i] = np.ones(3) * np.inf
            self.end += 1

    def done_yet(self):
        if self.done:
            return True

        if self.trunk_mode:
            self.trunk_mode = self.activation <= self.trunk_lim
            if self.trunk_mode:
                return False
            else:
                log.info(f"Trunk mode disabled at {self.age} iterations.")

        if self.activation < self.min_activation:
            log.info(f"Halt condition: activation < {self.min_activation}.")
            self.done = True
            return True

        # The yeet condition is basically to inhibit periodic behaviours from growing
        # the structure ad infinitum. It stops iterating if it detects that activation
        # levels are not changing any more. There are some obvious corner cases to this,
        # same numerical activation does not imply that the same set of attractors are
        # active, but in practice this method is fast and works well enough.
        if self.age > self.yeet_condition:
            if np.abs(self.activation - self.stats[self.age - 1].act) < 3:
                self.yeet_count += 1
                if self.yeet_count >= self.yeet_condition:
                    self.end = self.stats[self.age - self.yeet_count].sz

                    for i in range(self.end):
                        self.children[i] = [
                            c for c in self.children[i] if c <= self.end - 1
                        ]

                    self.age -= self.yeet_count
                    self.stats = self.stats[:self.age]
                    log.info(f"Halt condition: yeet count {self.yeet_count}.")
                    self.done = True
                    return True
            else:
                self.yeet_count = 0
                return False

        return False

    # Populate edge table
    def walk(self):
        self.w = np.ones(self.maxsize)
        self.edges = []

        for i in range(self.nroots):
            self._walk(i)

        self.dirty = False

    def _walk(self, i):
        w = self.w[i]
        for j in self.children[i]:
            self.edges.append((i, j))
            w += self._walk(j)**2

        w = np.sqrt(w)
        self.w[i] = w
        return w

    # Use explicit packing/unpacking
    def pack(self, points, pipe):
        return (
            points,
            self.par.iD,
            self.par.kD,
            self.vectors_sm.name,
            self.tree_sm.name,
            self.maxsize,
            pipe,
            self.lock,
        )

    def update_stats(self):
        self.stats.append(Stats(self.end, self.activation,
                                self.reached_points))

    def get_stats(self):
        return self.stats

    def __str__(self):
        nproc = 0
        for w in self.workers:
            nproc += 1 if w.is_alive() else 0

        leaves = 0
        for i in range(self.end):
            if len(self.children[i]) == 0:
                leaves += 1

        return f"{self.end} nodes, {self.age} iterations \n\
                {self.activation}/{len(self.points) - self.reached_points} active points \n\
                Total {len(self.points)} points on {nproc}/{self.ncpu} processes \n\
                avg. branching: {leaves/(self.end+1)} \n\
                {self.par}"

    def __del__(self):
        log.debug("Delete SpaceColony")
        self.stop()
        self.vectors_sm.close()
        self.vectors_sm.unlink()
        self.tree_sm.close()
        self.tree_sm.unlink()
コード例 #13
0
class DLManager(Process):
    def __init__(self,
                 download_dir,
                 base_url,
                 cache_dir=None,
                 status_q=None,
                 max_jobs=100,
                 max_failures=5,
                 max_workers=0,
                 update_interval=1.0,
                 max_shared_memory=1024 * 1024 * 1024,
                 resume_file=None):
        super().__init__(name='DLManager')
        self.log = logging.getLogger('DLM')
        self.proc_debug = False

        self.base_url = base_url
        self.dl_dir = download_dir
        self.cache_dir = cache_dir if cache_dir else os.path.join(
            download_dir, '.cache')

        # All the queues!
        self.logging_queue = None
        self.dl_worker_queue = None
        self.writer_queue = None
        self.dl_result_q = None
        self.writer_result_q = None
        self.max_jobs = max_jobs
        self.max_workers = max_workers if max_workers else min(
            cpu_count() * 2, 16)

        # Analysis stuff
        self.analysis = None
        self.tasks = deque()
        self.chunks_to_dl = deque()
        self.chunk_data_list = None

        # shared memory stuff
        self.max_shared_memory = max_shared_memory  # 1 GiB by default
        self.sms = deque()
        self.shared_memory = None

        # Interval for log updates and pushing updates to the queue
        self.update_interval = update_interval
        self.status_queue = status_q  # queue used to relay status info back to GUI/CLI

        # behaviour settings
        self.max_failures = max_failures
        self.resume_file = resume_file

        # cross-thread runtime information
        self.running = True
        self.active_tasks = 0
        self.children = []
        self.threads = []
        self.conditions = []
        # bytes downloaded and decompressed since last report
        self.bytes_downloaded_since_last = 0
        self.bytes_decompressed_since_last = 0
        # bytes written since last report
        self.bytes_written_since_last = 0
        # bytes read since last report
        self.bytes_read_since_last = 0
        # chunks written since last report
        self.num_processed_since_last = 0
        self.num_tasks_processed_since_last = 0

    def download_job_manager(self, task_cond: Condition, shm_cond: Condition):
        while self.chunks_to_dl and self.running:
            while self.active_tasks < self.max_workers * 2 and self.chunks_to_dl:
                try:
                    sms = self.sms.popleft()
                    no_shm = False
                except IndexError:  # no free cache
                    no_shm = True
                    break

                c_guid = self.chunks_to_dl.popleft()
                chunk = self.chunk_data_list.get_chunk_by_guid(c_guid)
                self.log.debug(
                    f'Adding {chunk.guid_num} (active: {self.active_tasks})')
                try:
                    self.dl_worker_queue.put(DownloaderTask(url=self.base_url +
                                                            '/' + chunk.path,
                                                            chunk_guid=c_guid,
                                                            shm=sms),
                                             timeout=1.0)
                except Exception as e:
                    self.log.warning(f'Failed to add to download queue: {e!r}')
                    self.chunks_to_dl.appendleft(c_guid)
                    break

                self.active_tasks += 1
            else:
                # active tasks limit hit, wait for tasks to finish
                with task_cond:
                    self.log.debug('Waiting for download tasks to complete..')
                    task_cond.wait(timeout=1.0)
                    continue

            if no_shm:
                # if we break we ran out of shared memory, so wait for that.
                with shm_cond:
                    self.log.debug('Waiting for more shared memory...')
                    shm_cond.wait(timeout=1.0)

        self.log.info('Download Job Manager quitting...')

    def dl_results_handler(self, task_cond: Condition):
        in_buffer = dict()

        task = self.tasks.popleft()
        current_file = ''

        while task and self.running:
            if isinstance(task,
                          FileTask):  # this wasn't necessarily a good idea...
                try:
                    if task.empty:
                        self.writer_queue.put(WriterTask(task.filename,
                                                         empty=True),
                                              timeout=1.0)
                    elif task.rename:
                        self.writer_queue.put(WriterTask(
                            task.filename,
                            rename=True,
                            delete=task.delete,
                            old_filename=task.temporary_filename),
                                              timeout=1.0)
                    elif task.delete:
                        self.writer_queue.put(WriterTask(task.filename,
                                                         delete=True),
                                              timeout=1.0)
                    elif task.open:
                        self.writer_queue.put(WriterTask(task.filename,
                                                         fopen=True),
                                              timeout=1.0)
                        current_file = task.filename
                    elif task.close:
                        self.writer_queue.put(WriterTask(task.filename,
                                                         close=True),
                                              timeout=1.0)
                except Exception as e:
                    self.tasks.appendleft(task)
                    self.log.warning(f'Adding to queue failed: {e!r}')
                    continue

                try:
                    task = self.tasks.popleft()
                except IndexError:  # finished
                    break
                continue

            while (task.chunk_guid in in_buffer) or task.chunk_file:
                res_shm = None
                if not task.chunk_file:  # not re-using from an old file
                    res_shm = in_buffer[task.chunk_guid].shm

                try:
                    self.writer_queue.put(
                        WriterTask(
                            filename=current_file,
                            shared_memory=res_shm,
                            chunk_offset=task.chunk_offset,
                            chunk_size=task.chunk_size,
                            chunk_guid=task.chunk_guid,
                            release_memory=task.cleanup,
                            old_file=task.chunk_file  # todo on-disk cache
                        ),
                        timeout=1.0)
                except Exception as e:
                    self.log.warning(f'Adding to queue failed: {e!r}')
                    break

                if task.cleanup and not task.chunk_file:
                    del in_buffer[task.chunk_guid]

                try:
                    task = self.tasks.popleft()
                    if isinstance(task, FileTask):
                        break
                except IndexError:  # finished
                    task = None
                    break
            else:  # only enter blocking code if the loop did not break
                try:
                    res = self.dl_result_q.get(timeout=1)
                    self.active_tasks -= 1
                    with task_cond:
                        task_cond.notify()

                    if res.success:
                        in_buffer[res.guid] = res
                        self.bytes_downloaded_since_last += res.compressed_size
                        self.bytes_decompressed_since_last += res.size
                    else:
                        self.log.error(
                            f'Download for {res.guid} failed, retrying...')
                        try:
                            self.dl_worker_queue.put(DownloaderTask(
                                url=res.url, chunk_guid=res.guid, shm=res.shm),
                                                     timeout=1.0)
                            self.active_tasks += 1
                        except Exception as e:
                            self.log.warning(
                                f'Failed adding retry task to queue! {e!r}')
                            # If this failed for whatever reason, put the chunk at the front of the DL list
                            self.chunks_to_dl.appendleft(res.chunk_guid)
                except Empty:
                    pass
                except Exception as e:
                    self.log.warning(
                        f'Unhandled exception when trying to read download result queue: {e!r}'
                    )

        self.log.info('Download result handler quitting...')

    def fw_results_handler(self, shm_cond: Condition):
        while self.running:
            try:
                res = self.writer_result_q.get(timeout=1.0)
                self.num_tasks_processed_since_last += 1

                if res.closed and self.resume_file:
                    # write last completed file to super simple resume file
                    with open(self.resume_file, 'ab') as rf:
                        rf.write(f'{res.filename}\n'.encode('utf-8'))

                if res.kill:
                    self.log.info(
                        'Got termination command in FW result handler')
                    break

                if not res.success:
                    # todo make this kill the installation process or at least skip the file and mark it as failed
                    self.log.fatal(f'Writing for {res.filename} failed!')
                if res.release_memory:
                    self.sms.appendleft(res.shm)
                    with shm_cond:
                        shm_cond.notify()

                if res.chunk_guid:
                    self.bytes_written_since_last += res.size
                    # if there's no shared memory we must have read from disk.
                    if not res.shm:
                        self.bytes_read_since_last += res.size
                    self.num_processed_since_last += 1

            except Empty:
                continue
            except Exception as e:
                self.log.warning(
                    f'Exception when trying to read writer result queue: {e!r}'
                )
        self.log.info('Writer result handler quitting...')

    def run_analysis(self,
                     manifest: Manifest,
                     old_manifest: Manifest = None,
                     patch=True,
                     resume=True,
                     file_prefix_filter=None,
                     file_exclude_filter=None,
                     file_install_tag=None) -> AnalysisResult:
        """
        Run analysis on manifest and old manifest (if not None) and return a result
        with a summary resources required in order to install the provided manifest.

        :param manifest: Manifest to install
        :param old_manifest: Old manifest to patch from (if applicable)
        :param patch: Patch instead of redownloading the entire file
        :param resume: Continue based on resume file if it exists
        :param file_prefix_filter: Only download files that start with this prefix
        :param file_exclude_filter: Exclude files with this prefix from download
        :return: AnalysisResult
        """

        analysis_res = AnalysisResult()
        analysis_res.install_size = sum(
            fm.file_size for fm in manifest.file_manifest_list.elements)
        analysis_res.biggest_chunk = max(
            c.window_size for c in manifest.chunk_data_list.elements)
        analysis_res.biggest_file_size = max(
            f.file_size for f in manifest.file_manifest_list.elements)
        is_1mib = analysis_res.biggest_chunk == 1024 * 1024
        self.log.debug(
            f'Biggest chunk size: {analysis_res.biggest_chunk} bytes (== 1 MiB? {is_1mib})'
        )

        self.log.debug(f'Creating manifest comparison...')
        mc = ManifestComparison.create(manifest, old_manifest)
        analysis_res.manifest_comparison = mc

        if resume and self.resume_file and os.path.exists(self.resume_file):
            try:
                completed_files = set(
                    i.strip() for i in open(self.resume_file).readlines())
                # remove completed files from changed/added and move them to unchanged for the analysis.
                mc.added -= completed_files
                mc.changed -= completed_files
                mc.unchanged |= completed_files
                self.log.debug(
                    f'Skipped {len(completed_files)} files based on resume data!'
                )
            except Exception as e:
                self.log.warning(
                    f'Reading resume file failed: {e!r}, continuing as normal...'
                )

        # Not entirely sure what install tags are used for, only some titles have them.
        # Let's add it for testing anyway.
        if file_install_tag:
            files_to_skip = set(i.filename
                                for i in manifest.file_manifest_list.elements
                                if file_install_tag not in i.install_tags)
            self.log.info(
                f'Found {len(files_to_skip)} files to skip based on install tag.'
            )
            mc.added -= files_to_skip
            mc.changed -= files_to_skip
            mc.unchanged |= files_to_skip

        # if include/exclude prefix has been set: mark all files that are not to be downloaded as unchanged
        if file_exclude_filter:
            file_exclude_filter = file_exclude_filter.lower()
            files_to_skip = set(i for i in mc.added | mc.changed
                                if i.lower().startswith(file_exclude_filter))
            self.log.info(
                f'Found {len(files_to_skip)} files to skip based on exclude prefix.'
            )
            mc.added -= files_to_skip
            mc.changed -= files_to_skip
            mc.unchanged |= files_to_skip

        if file_prefix_filter:
            file_prefix_filter = file_prefix_filter.lower()
            files_to_skip = set(
                i for i in mc.added | mc.changed
                if not i.lower().startswith(file_prefix_filter))
            self.log.info(
                f'Found {len(files_to_skip)} files to skip based on include prefix.'
            )
            mc.added -= files_to_skip
            mc.changed -= files_to_skip
            mc.unchanged |= files_to_skip

        if file_prefix_filter or file_exclude_filter or file_install_tag:
            self.log.info(
                f'Remaining files after filtering: {len(mc.added) + len(mc.changed)}'
            )
            # correct install size after filtering
            analysis_res.install_size = sum(
                fm.file_size for fm in manifest.file_manifest_list.elements
                if fm.filename in mc.added)

        if mc.removed:
            analysis_res.removed = len(mc.removed)
            self.log.debug(f'{analysis_res.removed} removed files')
        if mc.added:
            analysis_res.added = len(mc.added)
            self.log.debug(f'{analysis_res.added} added files')
        if mc.changed:
            analysis_res.changed = len(mc.changed)
            self.log.debug(f'{analysis_res.changed} changed files')
        if mc.unchanged:
            analysis_res.unchanged = len(mc.unchanged)
            self.log.debug(f'{analysis_res.unchanged} unchanged files')

        # count references to chunks for determining runtime cache size later
        references = Counter()
        for fm in manifest.file_manifest_list.elements:
            # chunks of unchanged files are not downloaded so we can skip them
            if fm.filename in mc.unchanged:
                analysis_res.unchanged += fm.file_size
                continue

            for cp in fm.chunk_parts:
                references[cp.guid_num] += 1

        # determine reusable chunks and prepare lookup table for reusable ones
        re_usable = defaultdict(dict)
        if old_manifest and mc.changed and patch:
            self.log.debug('Analyzing manifests for re-usable chunks...')
            for changed in mc.changed:
                old_file = old_manifest.file_manifest_list.get_file_by_path(
                    changed)
                new_file = manifest.file_manifest_list.get_file_by_path(
                    changed)

                existing_chunks = dict()
                off = 0
                for cp in old_file.chunk_parts:
                    existing_chunks[(cp.guid_num, cp.offset, cp.size)] = off
                    off += cp.size

                for cp in new_file.chunk_parts:
                    key = (cp.guid_num, cp.offset, cp.size)
                    if key in existing_chunks:
                        references[cp.guid_num] -= 1
                        re_usable[changed][key] = existing_chunks[key]
                        analysis_res.reuse_size += cp.size

        last_cache_size = current_cache_size = 0
        # set to determine whether a file is currently cached or not
        cached = set()
        # Using this secondary set is orders of magnitude faster than checking the deque.
        chunks_in_dl_list = set()
        # This is just used to count all unique guids that have been cached
        dl_cache_guids = set()

        # run through the list of files and create the download jobs and also determine minimum
        # runtime cache requirement by simulating adding/removing from cache during download.
        self.log.debug('Creating filetasks and chunktasks...')
        for current_file in sorted(manifest.file_manifest_list.elements,
                                   key=lambda a: a.filename.lower()):
            # skip unchanged and empty files
            if current_file.filename in mc.unchanged:
                continue
            elif not current_file.chunk_parts:
                self.tasks.append(FileTask(current_file.filename, empty=True))
                continue

            existing_chunks = re_usable.get(current_file.filename, None)
            chunk_tasks = []
            reused = 0

            for cp in current_file.chunk_parts:
                ct = ChunkTask(cp.guid_num, cp.offset, cp.size)

                # re-use the chunk from the existing file if we can
                if existing_chunks and (cp.guid_num, cp.offset,
                                        cp.size) in existing_chunks:
                    reused += 1
                    ct.chunk_file = current_file.filename
                    ct.chunk_offset = existing_chunks[(cp.guid_num, cp.offset,
                                                       cp.size)]
                else:
                    # add to DL list if not already in it
                    if cp.guid_num not in chunks_in_dl_list:
                        self.chunks_to_dl.append(cp.guid_num)
                        chunks_in_dl_list.add(cp.guid_num)

                    # if chunk has more than one use or is already in cache,
                    # check if we need to add or remove it again.
                    if references[cp.guid_num] > 1 or cp.guid_num in cached:
                        references[cp.guid_num] -= 1

                        # delete from cache if no references left
                        if references[cp.guid_num] < 1:
                            current_cache_size -= analysis_res.biggest_chunk
                            cached.remove(cp.guid_num)
                            ct.cleanup = True
                        # add to cache if not already cached
                        elif cp.guid_num not in cached:
                            dl_cache_guids.add(cp.guid_num)
                            cached.add(cp.guid_num)
                            current_cache_size += analysis_res.biggest_chunk
                    else:
                        ct.cleanup = True

                chunk_tasks.append(ct)

            if reused:
                self.log.debug(
                    f' + Reusing {reused} chunks from: {current_file.filename}'
                )
                # open temporary file that will contain download + old file contents
                self.tasks.append(
                    FileTask(current_file.filename + u'.tmp', fopen=True))
                self.tasks.extend(chunk_tasks)
                self.tasks.append(
                    FileTask(current_file.filename + u'.tmp', close=True))
                # delete old file and rename temproary
                self.tasks.append(
                    FileTask(current_file.filename,
                             delete=True,
                             rename=True,
                             temporary_filename=current_file.filename +
                             u'.tmp'))
            else:
                self.tasks.append(FileTask(current_file.filename, fopen=True))
                self.tasks.extend(chunk_tasks)
                self.tasks.append(FileTask(current_file.filename, close=True))

            # check if runtime cache size has changed
            if current_cache_size > last_cache_size:
                self.log.debug(
                    f' * New maximum cache size: {current_cache_size / 1024 / 1024:.02f} MiB'
                )
                last_cache_size = current_cache_size

        self.log.debug(
            f'Final cache size requirement: {last_cache_size / 1024 / 1024} MiB.'
        )
        analysis_res.min_memory = last_cache_size + (
            1024 * 1024 * 32)  # add some padding just to be safe

        # Todo implement on-disk caching to avoid this issue.
        if analysis_res.min_memory > self.max_shared_memory:
            shared_mib = f'{self.max_shared_memory / 1024 / 1024:.01f} MiB'
            required_mib = f'{analysis_res.min_memory / 1024 / 1024:.01} MiB'
            raise MemoryError(
                f'Current shared memory cache is smaller than required! {shared_mib} < {required_mib}'
            )

        # calculate actual dl and patch write size.
        analysis_res.dl_size = \
            sum(c.file_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list)
        analysis_res.uncompressed_dl_size = \
            sum(c.window_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list)

        # add jobs to remove files
        for fname in mc.removed:
            self.tasks.append(FileTask(fname, delete=True))

        analysis_res.num_chunks_cache = len(dl_cache_guids)
        self.chunk_data_list = manifest.chunk_data_list
        self.analysis = analysis_res

        return analysis_res

    def run(self):
        if not self.analysis:
            raise ValueError(
                'Did not run analysis before trying to run download!')

        # Subprocess will use its own root logger that logs to a Queue instead
        _root = logging.getLogger()
        _root.setLevel(logging.DEBUG if self.proc_debug else logging.INFO)
        if self.logging_queue:
            _root.handlers = []
            _root.addHandler(QueueHandler(self.logging_queue))

        self.log = logging.getLogger('DLMProc')
        self.log.info(
            f'Download Manager running with process-id: {os.getpid()}')

        try:
            self.run_real()
        except KeyboardInterrupt:
            self.log.warning('Immediate exit requested!')
            self.running = False

            # send conditions to unlock threads if they aren't already
            for cond in self.conditions:
                with cond:
                    cond.notify()

            # make sure threads are dead.
            for t in self.threads:
                t.join(timeout=5.0)
                if t.is_alive():
                    self.log.warning(f'Thread did not terminate! {repr(t)}')

            # clean up all the queues, otherwise this process won't terminate properly
            for name, q in zip(('Download jobs', 'Writer jobs',
                                'Download results', 'Writer results'),
                               (self.dl_worker_queue, self.writer_queue,
                                self.dl_result_q, self.writer_result_q)):
                self.log.debug(f'Cleaning up queue "{name}"')
                try:
                    while True:
                        _ = q.get_nowait()
                except Empty:
                    q.close()
                    q.join_thread()

    def run_real(self):
        self.shared_memory = SharedMemory(create=True,
                                          size=self.max_shared_memory)
        self.log.debug(
            f'Created shared memory of size: {self.shared_memory.size / 1024 / 1024:.02f} MiB'
        )

        # create the shared memory segments and add them to their respective pools
        for i in range(
                int(self.shared_memory.size / self.analysis.biggest_chunk)):
            _sms = SharedMemorySegment(offset=i * self.analysis.biggest_chunk,
                                       end=i * self.analysis.biggest_chunk +
                                       self.analysis.biggest_chunk)
            self.sms.append(_sms)

        self.log.debug(f'Created {len(self.sms)} shared memory segments.')

        # Create queues
        self.dl_worker_queue = MPQueue(-1)
        self.writer_queue = MPQueue(-1)
        self.dl_result_q = MPQueue(-1)
        self.writer_result_q = MPQueue(-1)

        self.log.info(f'Starting download workers...')
        for i in range(self.max_workers):
            w = DLWorker(f'DLWorker {i + 1}',
                         self.dl_worker_queue,
                         self.dl_result_q,
                         self.shared_memory.name,
                         logging_queue=self.logging_queue)
            self.children.append(w)
            w.start()

        self.log.info('Starting file writing worker...')
        writer_p = FileWorker(self.writer_queue, self.writer_result_q,
                              self.dl_dir, self.shared_memory.name,
                              self.cache_dir, self.logging_queue)
        self.children.append(writer_p)
        writer_p.start()

        num_chunk_tasks = sum(isinstance(t, ChunkTask) for t in self.tasks)
        num_dl_tasks = len(self.chunks_to_dl)
        num_tasks = len(self.tasks)
        num_shared_memory_segments = len(self.sms)
        self.log.debug(
            f'Chunks to download: {num_dl_tasks}, File tasks: {num_tasks}, Chunk tasks: {num_chunk_tasks}'
        )

        # active downloader tasks
        self.active_tasks = 0
        processed_chunks = 0
        processed_tasks = 0
        total_dl = 0
        total_write = 0

        # synchronization conditions
        shm_cond = Condition()
        task_cond = Condition()
        self.conditions = [shm_cond, task_cond]

        # start threads
        s_time = time.time()
        self.threads.append(
            Thread(target=self.download_job_manager,
                   args=(task_cond, shm_cond)))
        self.threads.append(
            Thread(target=self.dl_results_handler, args=(task_cond, )))
        self.threads.append(
            Thread(target=self.fw_results_handler, args=(shm_cond, )))

        for t in self.threads:
            t.start()

        last_update = time.time()

        while processed_tasks < num_tasks:
            delta = time.time() - last_update
            if not delta:
                time.sleep(self.update_interval)
                continue

            # update all the things
            processed_chunks += self.num_processed_since_last
            processed_tasks += self.num_tasks_processed_since_last

            total_dl += self.bytes_downloaded_since_last
            total_write += self.bytes_written_since_last

            dl_speed = self.bytes_downloaded_since_last / delta
            dl_unc_speed = self.bytes_decompressed_since_last / delta
            w_speed = self.bytes_written_since_last / delta
            r_speed = self.bytes_read_since_last / delta
            c_speed = self.num_processed_since_last / delta

            # set temporary counters to 0
            self.bytes_read_since_last = self.bytes_written_since_last = 0
            self.bytes_downloaded_since_last = self.num_processed_since_last = 0
            self.bytes_decompressed_since_last = self.num_tasks_processed_since_last = 0
            last_update = time.time()

            perc = (processed_chunks / num_chunk_tasks) * 100
            self.log.info(
                f'\n============== {time.time() - s_time:.01f} seconds since start'
            )
            self.log.info(
                f'Progress: {processed_chunks}/{num_chunk_tasks} ({perc:.02f}%) chunk tasks processed.'
            )
            self.log.info(f'Downloaded: {total_dl / 1024 / 1024:.02f} MiB, '
                          f'Written: {total_write / 1024 / 1024:.02f} MiB')

            # speed meters
            self.log.info('Speeds:')
            self.log.info(
                f' + Download     - {dl_speed / 1024 / 1024:.02f} MiB/s (raw) '
                f'/ {dl_unc_speed / 1024 / 1024:.02f} MiB/s (decompressed)')
            self.log.info(
                f' + Write (disk) - {w_speed / 1024 / 1024:.02f} MiB/s')
            self.log.info(
                f' + Read (disk)  - {r_speed / 1024 / 1024:.02f} MiB/s')
            self.log.info(f' + Tasks        - {c_speed:.02f} Chunks/s')
            self.log.info(f'Active download tasks: {self.active_tasks}')

            # shared memory debugging
            total_avail = len(self.sms)
            total_used = (num_shared_memory_segments - total_avail) * (
                self.analysis.biggest_chunk / 1024 / 1024)
            self.log.info(
                f'Shared memory usage: {total_used} MiB, available: {total_avail}'
            )

            # send status update to back to instantiator (if queue exists)
            if self.status_queue:
                try:
                    self.status_queue.put(UIUpdate(progress=perc,
                                                   download_speed=dl_unc_speed,
                                                   write_speed=w_speed,
                                                   read_speed=r_speed,
                                                   memory_usage=total_used *
                                                   1024 * 1024),
                                          timeout=1.0)
                except Exception as e:
                    self.log.warning(
                        f'Failed to send status update to queue: {e!r}')

            time.sleep(self.update_interval)

        for i in range(self.max_workers):
            self.dl_worker_queue.put_nowait(DownloaderTask(kill=True))

        self.writer_queue.put_nowait(WriterTask('', kill=True))
        self.log.info('Waiting for writer process to finish...')

        writer_p.join(timeout=10.0)
        if writer_p.exitcode is None:
            self.log.warning(f'Terminating writer process {e!r}')
            writer_p.terminate()

        # forcibly kill DL workers that are not actually dead yet
        for child in self.children:
            if child.exitcode is None:
                child.terminate()

        # make sure all the threads are dead.
        for t in self.threads:
            t.join(timeout=5.0)
            if t.is_alive():
                self.log.warning(f'Thread did not terminate! {repr(t)}')

        # clean up resume file
        if self.resume_file:
            try:
                os.remove(self.resume_file)
            except OSError as e:
                self.log.warning(f'Failed to remove resume file: {e!r}')

        # close up shared memory
        self.shared_memory.close()
        self.shared_memory.unlink()
        self.shared_memory = None

        # finally, exit the process.
        exit(0)
コード例 #14
0
ファイル: hooks.py プロジェクト: luizalabs/shared-memory-dict
def free_shared_memory(name: str) -> None:
    shared_memory = SharedMemory(MEMORY_NAME.format(name=name))
    shared_memory.unlink()
コード例 #15
0
ファイル: collect.py プロジェクト: newcanopies/cartographer
def execute_with_strace(
        output_dir: Path = DEFAULT_OUTPUT_DIR,
        excluded_modules: Optional[Set[str]] = DEFAULT_EXCLUDED_MODULES):
    """Strace ansible module invocations.

    This context manager patches Ansible's ActionBase and StrategyBase classes
    to execute modules using strace.

    Parameters
    ----------
    output_dir : Path
        Directory for strace output.
    excluded_modules : Optional[Set[str]]
        Modules that will not be traced.
    """
    # Make output directory if it doesn't already exist
    output_dir.mkdir(exist_ok=True, parents=True)

    # If no excluded modules, make it an empty set
    if excluded_modules is None:
        excluded_modules = frozenset()

    # Clean output directory (remove all subdirectories and files)
    for path in output_dir.glob('*'):
        if path.is_dir():
            shutil.rmtree(path)
        if path.is_file():
            path.unlink()

    # Save references to original functions
    action_base_execute = ActionBase._low_level_execute_command

    # Define custom execute that wraps the ActionBase execute function
    @wraps(action_base_execute)
    def _execute_with_strace(self: ActionBase, cmd: str, *args,
                             **kwargs) -> Dict[str, Any]:
        """Execute commands with strace.

        This inner function modifies module commands to be run with strace
        before delegating to the original execute function. Ansible runs tasks
        through worker processes, which means that invocations of this function
        do not share memory/variables with the main process.
        """
        # Just execute the command if gathering facts
        if self._task.action == 'gather_facts':
            return action_base_execute(self, cmd, *args, **kwargs)

        # Get command parts
        parts = cmd.split()

        # Module metadata. This will be None if module is not traced, and
        # defined if the module is traced.
        metadata = None

        # If there are at least two parts, the command may be running a module
        if len(parts) >= 2:

            # Get the potential executable and module path
            executable = parts[0]
            module = parts[1]

            # Check for python and module
            python_match = R_PYTHON.match(executable)
            module_match = R_MODULE.match(module)

            # If it matches a python module that is not being excluded,
            # modify the command to run strace.
            if (python_match and module_match
                    and module_match.group('module') not in excluded_modules):

                # Get index shared memory, parse index, and increment the
                # value stored in shared memory
                index_shm = SharedMemory(name=INDEX_NAME)
                index = int.from_bytes(bytes=index_shm.buf.tobytes(),
                                       byteorder=sys.byteorder)
                index_shm.buf[:] = (index + 1).to_bytes(
                    byteorder=sys.byteorder, length=INDEX_BYTES)
                index_shm.close()

                # Create module output directory
                module_dir = output_dir / str(index)
                module_dir.mkdir(exist_ok=True, parents=True)

                # Read module source
                with open(module, 'r') as fd:
                    source_lines = fd.readlines()

                # Parse zip data
                zip_data = ''
                for line in source_lines:
                    line = line.strip()
                    if line.startswith(ZIPDATA):
                        zip_data = line[len(ZIPDATA) + 3:-3]
                        break

                # Extract zipped data for output
                with ExitStack() as stack:
                    t_fd = stack.enter_context(tempfile.NamedTemporaryFile())
                    t_fd.write(base64.b64decode(zip_data))
                    t_fd.flush()
                    z_fd = stack.enter_context(zipfile.ZipFile(t_fd.name))
                    z_fd.extractall(module_dir)

                # Copy module for output
                shutil.copy(module, module_dir)

                # Parse module arguments
                module_args = {}
                for line in source_lines:
                    line = line.strip()
                    if line.startswith(ANSIBALLZ_PARAMS):
                        start = len(ANSIBALLZ_PARAMS) + 1
                        try:
                            ansiballz_params_str = (
                                line[start:-1].encode('utf-8').decode(
                                    'unicode_escape', errors='ignore'))
                            ansiballz_params = json.loads(ansiballz_params_str)
                            module_args = {
                                key: value
                                for key, value in
                                ansiballz_params[ANSIBLE_MODULE_ARGS].items()
                                if not key.startswith('_ansible')
                            }
                        except (
                                UnicodeError,
                                JSONDecodeError,
                        ):
                            print('    Error parsing module params.')

                        break

                # Modify command and print info
                original_command = cmd
                cmd = (f'strace -DDD -f -y -yy -X raw -I 2 -o "| awk '
                       f'\'NR>{MAX_ROWS}{{print "\\""TRUNCATED"\\""; exit}}; '
                       f'{{print}}\' > {module_dir / "strace.txt"}" '
                       f'-e trace=!close {cmd}')
                print(f'    Modified Command: {cmd}')
                print(f'    Args: {module_args}')
                sys.stdout.flush()

                # Compute metadata
                metadata = {
                    'name': self._task.name,
                    'action': self._task.action,
                    'module': module_match.group('module'),
                    'index': index,
                    'original_cmd': original_command,
                    'modified_cmd': cmd,
                    'args': module_args,
                }

        # Don't worry about it. It's probably fine.
        self._task.ignore_errors = True

        # Delegate to the execute method.
        # The loader basedir replacement is to make sure the command is
        # executed in the correct working directory.
        loader_basedir = self._loader.get_basedir()
        self._loader.set_basedir(Path.cwd())
        execute_start_s = time()
        result = action_base_execute(self, cmd, *args, **kwargs)
        execute_end_s = time()
        self._loader.set_basedir(loader_basedir)
        execute_duration_s = execute_end_s - execute_start_s

        # Write metadata if the command was traced.
        # module_dir is defined iff metadata is.
        if metadata is not None:

            # Set execution duration and print info
            metadata['duration'] = execute_duration_s
            print(f'    Execution time: {execute_duration_s:.2f}s')

            # Deep copy result to be safe. This prevents us from accidentally
            # overriding anything when we parse stdout and stderr.
            metadata['result'] = copy.deepcopy(result)

            # Parse stdout as JSON if possible
            try:
                metadata['result']['stdout'] = json.loads(result['stdout'])
            except (TypeError, JSONDecodeError):
                pass

            # Parse stderr as JSON if possible
            try:
                metadata['result']['stderr'] = json.loads(result['stderr'])
            except (TypeError, JSONDecodeError):
                pass

            # Add result to metadata before writing
            with open(module_dir / 'metadata.json', 'w') as fd:
                json.dump(metadata, fd)

        # Return result
        return result

    # Replace original functions with custom ones
    ActionBase._low_level_execute_command = _execute_with_strace

    # 32 bit unsigned shared integer. To be used for module execution index
    # (artificial unique identifier for modules as they are executed). Initial
    # value is zero, and the value is incremented every time strace is run.
    index_shm = SharedMemory(
        name=INDEX_NAME,
        size=INDEX_BYTES,
        create=True,
    )
    index_shm.buf[:] = b'\x00\x00\x00\x00'

    # Yield context and then restore the original behavior.
    try:
        yield
    finally:
        ActionBase._low_level_execute_command = action_base_execute
        index_shm.unlink()