def receive_cancel_this_query(self, qi): """Receive message: One query was dropped Parameters ---------- qi: _actors.cached.query_infos.QueryInfos """ msgs = [] # Cancel waiting jobs jobs_to_kill = [ job for job in self._waiting_jobs if job.qi == qi ] for job in jobs_to_kill: msgs += [Msg(self._waiting_room_address, 'unschedule_job', job)] self._waiting_jobs.remove(job) # Cancel working jobs jobs_to_kill = [ job for job in self._working_jobs if job.qi == qi ] for job in jobs_to_kill: msgs += [Msg(self._working_room_address, 'cancel_job', job)] self._working_jobs.remove(job) if qi in self._prod_infos: del self._prod_infos[qi] return msgs
def ext_receive_new_query(self, queue_wref, max_queue_size, produce_fps, channel_ids, is_flat, dst_nodata, interpolation, parent_uid, key_in_parent): """Receive message sent by something else than an actor, still treated synchronously: There is a new query. Parameters ---------- queue_wref: weakref.ref of queue.Queue Queue returned by the underlying `queue_data` (or behind a `(get|iter)_data`). max_queue_size: int Max queue size of the queue returned by the underlying `queue_data` (or behind a `(get|iter)_data`). produce_fps: sequence of Footprint Parameter of the underlying `(get|iter|queue)_data` channel_ids: sequence of int Parameter of the underlying `(get|iter|queue)_data` is_flat: bool Parameter of the underlying `(get|iter|queue)_data` dst_nodata: nbr Parameter of the underlying `(get|iter|queue)_data` interpolation: str Parameter of the underlying `(get|iter|queue)_data` parent_uid: None or uuid.UUID4 uuid of parent raster if None: This query comes directly from the user else: The id of the parent that issued the query key_in_parent: None or object identity of this query in the parent query if None: This query comes directly from the user else: This query was issued by another raster """ msgs = [] qi = CachedQueryInfos( self._raster, produce_fps, channel_ids, is_flat, dst_nodata, interpolation, max_queue_size, parent_uid, key_in_parent, ) self._raster.debug_mngr.event('object_allocated', qi) q = _Query(queue_wref) self._queries[qi] = q msgs += [ Msg('ProductionGate', 'make_those_arrays', qi), ] if len(qi.list_of_cache_fp) > 0: msgs += [ Msg('CacheSupervisor', 'make_those_cache_files_available', qi) ] return msgs
def receive_die(self): assert self._alive self._alive = False msgs = [] for job in self._waiting_jobs: msgs += [Msg(self._waiting_room_address, 'unschedule_job', job)] for job in self._working_jobs: msgs += [Msg(self._working_room_address, 'cancel_job', job)] self._waiting_jobs.clear() self._working_jobs.clear() self._raster = None self._back_ds = None return msgs
def receive_die(self): """Receive message: The raster was killed (collect by gc or closed by user)""" assert self._alive self._alive = False msgs = [] for job in self._waiting_jobs: msgs += [Msg(self._waiting_room_address, 'unschedule_job', job)] for job in self._working_jobs: msgs += [Msg(self._working_room_address, 'cancel_job', job)] self._waiting_jobs.clear() self._working_jobs.clear() self._raster = None return msgs
def _commit_work_result(self, job, result): if self._raster.io_pool is None or self._same_address_space: assert result is None else: job.dst_array_slice[:] = result dst_array = self._sample_array_per_prod_tile[job.qi][job.prod_idx] self._missing_cache_fps_per_prod_tile[job.qi][job.prod_idx].remove( job.cache_fp) # Perform fine grain garbage collection if len(self._missing_cache_fps_per_prod_tile[job.qi][ job.prod_idx]) == 0: # Done reading for that `(qi, prod_idx)` del self._missing_cache_fps_per_prod_tile[job.qi][job.prod_idx] del self._sample_array_per_prod_tile[job.qi][job.prod_idx] if len(self._missing_cache_fps_per_prod_tile[job.qi]) == 0: # Not reading for that `qi` del self._missing_cache_fps_per_prod_tile[job.qi] del self._sample_array_per_prod_tile[job.qi] return [ Msg( 'CacheExtractor', 'sampled_a_cache_file_to_the_array', job.qi, job.prod_idx, job.cache_fp, dst_array, ) ]
def receive_sample_those_cache_files_to_an_array(self, qi, prod_idx): """Receive message: An array is needed soon as it requires one or more read operations. Please perform those reads when the cache files are ready. """ msgs = [] cache_fps = qi.prod[prod_idx].cache_fps available_cache_fps = self._path_of_cache_files_ready.keys( ) & cache_fps missing_cache_fps = cache_fps - available_cache_fps for cache_fp in available_cache_fps: msgs += [ Msg( 'Reader', 'sample_cache_file_to_unique_array', qi, prod_idx, cache_fp, self._path_of_cache_files_ready[cache_fp], ) ] for cache_fp in missing_cache_fps: self._reads_waiting_for_cache_fp[cache_fp][qi].add(prod_idx) return msgs
def receive_cache_files_ready(self, path_of_cache_files_ready): """Receive message: A cache file is ready, you might already know it. Parameters: path_of_cache_files_ready: dict from Footprint to str """ msgs = [] new_cache_fps = path_of_cache_files_ready.keys( ) - self._path_of_cache_files_ready.keys() self._path_of_cache_files_ready.update(path_of_cache_files_ready) for cache_fp in new_cache_fps: # TODO Idea: Send a external message to the facade to expose the set of path to cache files with a mutex for qi, prod_idxs in self._reads_waiting_for_cache_fp[ cache_fp].items(): for prod_idx in prod_idxs: msgs += [ Msg('Reader', 'sample_cache_file_to_unique_array', qi, prod_idx, cache_fp, self._path_of_cache_files_ready[cache_fp]) ] del self._reads_waiting_for_cache_fp[cache_fp] return msgs
def _allow(qi, q): msgs = [] qicc = qi.cache_computation max_prod_idx_allowed = q.pulled_count + qi.max_queue_size - 1 i = q.allowed_count while True: # list_of_compute_fp being sorted by priority, `min_prod_idx` is increasing between loops if i == len(qicc.list_of_compute_fp): break compute_fp = qicc.list_of_compute_fp[i] min_prod_idx = qicc.dict_of_min_prod_idx_per_compute_fp[compute_fp] if min_prod_idx > max_prod_idx_allowed: break msgs += [Msg( 'ComputationGate2', 'compute_this_array', qi, i, )] i += 1 q.allowed_count = i return msgs
def receive_salvage_token(self, token): """Receive message: A Job is done/cancelled, allow some other jobs Parameters ---------- token: _PoolToken """ assert token in self._all_tokens, 'Received a token that is not owned by this waiting room' assert token not in self._tokens, 'Received a token that is already here' self._tokens.add(token) job_count = self._job_count if job_count == 0: return [] else: token_count = len(self._tokens) assert token_count == 1, """The way this class is designed, this point in code should only reached if token_count == 1""" job = self._unstore_most_urgent_job() return [ Msg(job.sender_address, 'token_to_working_room', job, self._tokens.pop()) ]
def receive_infer_cache_file_status(self, cache_fp, path): msgs = [] if self._raster.io_pool is None: work = Work(self, cache_fp, path) status = work.func() msgs += [ Msg('CacheSupervisor', 'inferred_cache_file_status', cache_fp, path, status) ] else: wait = Wait(self, cache_fp, path) self._waiting_jobs.add(wait) msgs += [Msg(self._waiting_room_address, 'schedule_job', wait)] return msgs
def receive_token_to_working_room(self, job, token): self._waiting_jobs.remove(job) work = self._create_work_job(job.cache_fp, job.array_per_fp) self._working_jobs.add(work) return [ Msg(self._working_room_address, 'launch_job_with_token', work, token) ]
def _query_start_collection(self, qi, query): assert len(query.cache_fps_checking) == 0 assert len(query.cache_fps_to_compute) > 0 cache_fps = [ fp for fp in qi.list_of_cache_fp if fp in query.cache_fps_to_compute ] assert qi.cache_computation is None qi.cache_computation = CacheComputationInfos(qi, self._raster, cache_fps) self._raster.debug_mngr.event('object_allocated', qi.cache_computation) return [ Msg('/Global/GlobalPrioritiesWatcher', 'a_query_need_those_cache_tiles', self._raster.uid, qi, cache_fps), Msg('ComputationGate1', 'compute_those_cache_files', qi), ]
def receive_token_to_working_room(self, job, token): self._waiting_jobs.remove(job) work = Work(self, job.cache_fp, job.path) self._working_jobs.add(work) return [ Msg(self._working_room_address, 'launch_job_with_token', work, token) ]
def queue_data(self, fps, channel_ids, dst_nodata, interpolation, max_queue_size, is_flat, parent_uid, key_in_parent): q = queue.Queue(max_queue_size) self.back_ds.put_message( Msg('/Raster{}/QueriesHandler'.format(self.uid), 'new_query', weakref.ref(q), max_queue_size, fps, channel_ids, is_flat, dst_nodata, interpolation, parent_uid, key_in_parent)) return q
def receive_inferred_cache_file_status(self, cache_fp, path, status): """Receive message: One cache tile was checked Parameters ---------- cache_fp: Footprint path: str status: bool """ msgs = [] # assertions assert self._cache_fps_status[cache_fp] == _CacheTileStatus.checking for query in self._queries.values(): assert cache_fp not in query.cache_fps_ensured assert cache_fp not in query.cache_fps_to_compute if status: # This cache tile is OK to be read # - notify the production pipeline self._path_of_cache_fp[cache_fp] = path self._cache_fps_status[cache_fp] = _CacheTileStatus.ready self._raster.debug_mngr.event('cache_file_update', self._raster.facade_proxy, cache_fp, 'ready') msgs += [ Msg('CacheExtractor', 'cache_files_ready', {cache_fp: path}) ] else: # This cache tile was corrupted and removed self._cache_fps_status[cache_fp] = _CacheTileStatus.absent del self._path_of_cache_fp[cache_fp] self._raster.debug_mngr.event('cache_file_update', self._raster.facade_proxy, cache_fp, 'absent') queries_treated = [] for qi, query in self._queries.items(): if cache_fp in query.cache_fps_checking: query.cache_fps_checking.remove(cache_fp) if status: query.cache_fps_ensured.add(cache_fp) else: query.cache_fps_to_compute.add(cache_fp) if len(query.cache_fps_checking) == 0: # CacheSupervisor is now done working on this query queries_treated.append(qi) if len(query.cache_fps_to_compute) > 0: # Some tiles need to be computed and none need to be checked, launching collection right # now msgs += self._query_start_collection(qi, query) for qi in queries_treated: del self._queries[qi] return msgs
def ext_receive_kill_raster(self, raster): """Receive message sent by something else than an actor, still treated synchronously: An actor is closing Parameter --------- raster: _a_recipe_raster.ABackRecipeRaster """ msgs = [] self._rasters.remove(raster) raster.debug_mngr.event('raster_stopped', raster.facade_proxy) # Deleting raster's actors ********************************************* # Deal with QueriesHandler first. # TODO: Should the order of 'die' messages be enforced somewhere else? msgs += [ Msg(address, 'die') for address in sorted( self._actor_addresses_of_raster[raster], key=lambda address: 'QueriesHandler' not in address, ) ] del self._actor_addresses_of_raster[raster] # Deleting pools' actors *********************************************** pools = { id(pool): pool for attr in [ 'computation_pool', 'merge_pool', 'io_pool', 'resample_pool', ] if hasattr(raster, attr) for pool in [getattr(raster, attr)] if pool is not None } for pool_id in pools.keys(): self._rasters_per_pool[pool_id].remove(raster) if len(self._rasters_per_pool[pool_id]) == 0: del self._rasters_per_pool[pool_id] msgs += [ Msg(actor_adress, 'die') for actor_adress in self._actor_addresses_of_pool[pool_id] ] del self._actor_addresses_of_pool[pool_id] return msgs
def receive_die(self): """Receive message: The raster was killed""" assert self._alive self._alive = False msgs = [] for job in self._waiting_jobs: msgs += [Msg(self._waiting_room_address, 'unschedule_job', job)] for job in self._working_jobs: msgs += [Msg(self._working_room_address, 'cancel_job', job)] self._waiting_jobs.clear() self._working_jobs.clear() self._sample_array_per_prod_tile.clear() self._missing_cache_fps_per_prod_tile.clear() self._raster = None self._back_ds = None return msgs
def receive_cancel_job(self, job): """Receive message: A Job you launched can be discarded. Lose the reference to the future Parameters ---------- job: _actors.pool_job.PoolJobWorking """ _, token = self._jobs.pop(job) return [Msg('WaitingRoom', 'salvage_token', token)]
def receive_salvage_token(self, token): """Receive message: Your WaitingRoom allowed a job, but the job does not need to be perfomed any more. Parameters ---------- token: _actors.pool_waiting_room._PoolToken (superclass of int) """ return [Msg('WaitingRoom', 'salvage_token', token)]
def receive_make_this_array(self, qi, prod_idx): """Receive message: Start making this array Parameters ---------- qi: _actors.cached.query_infos.QueryInfos prod_idx: int """ msgs = [] pi = qi.prod[prod_idx] # type: CacheProduceInfos assert pi.share_area is (len(pi.cache_fps) != 0) if pi.share_area: # If this prod_idx requires some cache file reads (this is the case most of the time) msgs += [ Msg( 'CacheExtractor', 'sample_those_cache_files_to_an_array', qi, prod_idx, ) ] for resample_fp in pi.resample_fps: sample_fp = pi.resample_sample_dep_fp[resample_fp] if sample_fp is None: # Start the 'resampling' step of the resample_fp fully outside of raster assert (resample_fp not in pi.resample_cache_deps_fps or len(pi.resample_cache_deps_fps[resample_fp]) == 0) msgs += [ Msg( 'Resampler', 'resample_and_accumulate', qi, prod_idx, None, resample_fp, None, ) ] self._produce_per_query[qi][prod_idx] = _ProdArray(pi) return msgs
def receive_merge_those_arrays(self, cache_fp, array_per_fp): msgs = [] assert len(array_per_fp) > 0 if len(array_per_fp) == 1: (fp, arr), = array_per_fp.items() assert fp.almost_equals(cache_fp) msgs += [Msg('Writer', 'write_this_array', cache_fp, arr)] elif self._raster.merge_pool is None: work = self._create_work_job(cache_fp, array_per_fp) res = work.func() res = self._normalize_user_result(cache_fp, res) msgs += self._commit_work_result(work, res) else: wait = Wait(self, cache_fp, array_per_fp) self._waiting_jobs.add(wait) msgs += [Msg(self._waiting_room_address, 'schedule_job', wait)] return msgs
def ext_receive_nothing(self): """Receive message sent by something else than an actor, still treated synchronously: What's up? Did a Job finished? Check all futures """ msgs = [] finished_jobs = [ job for job, (future, _) in self._jobs.items() if future.ready() ] for job in finished_jobs: future, token = self._jobs.pop(job) res = future.get() msgs += [ Msg(job.sender_address, 'job_done', job, res), Msg('WaitingRoom', 'salvage_token', token), ] return msgs
def receive_job_done(self, job, result): """Receive message: Writing operation is complete Parameters ---------- job: Work result: str Path to the written file """ self._working_jobs.remove(job) return [Msg('CacheSupervisor', 'cache_file_written', job.cache_fp, result)]
def _cancel_query(self, qi): q = self._queries.pop(qi) assert q.produced_count != qi.produce_count, "This query finished and can't be cancelled" LOGGER.warning('Dropping a query with {}/{} arrays produced.'.format( q.produced_count, qi.produce_count, )) return [ Msg('/Global/GlobalPrioritiesWatcher', 'cancel_this_query', self._raster.uid, qi), Msg('ProductionGate', 'cancel_this_query', qi), Msg('Producer', 'cancel_this_query', qi), Msg('Resampler', 'cancel_this_query', qi), Msg('CacheExtractor', 'cancel_this_query', qi), Msg('Reader', 'cancel_this_query', qi), Msg('CacheSupervisor', 'cancel_this_query', qi), Msg('ComputationGate1', 'cancel_this_query', qi), Msg('ComputationGate2', 'cancel_this_query', qi), Msg('Computer', 'cancel_this_query', qi), ]
def receive_die(self): """Receive message: The raster was killed""" assert self._alive self._alive = False msgs = [] msgs += [ Msg(self._waiting_room_address, 'unschedule_job', job) for jobs in self._waiting_jobs_per_query.values() for job in jobs ] self._waiting_jobs_per_query.clear() msgs += [ Msg(self._working_room_address, 'cancel_job', job) for job in self._working_jobs ] self._working_jobs.clear() self._raster = None return msgs
def receive_cancel_this_query(self, qi): msgs = [] # Cancel waiting jobs jobs_to_kill = [job for job in self._waiting_jobs if job.qi == qi] for job in jobs_to_kill: msgs += [Msg(self._waiting_room_address, 'unschedule_job', job)] self._waiting_jobs.remove(job) # Cancel working jobs jobs_to_kill = [job for job in self._working_jobs if job.qi == qi] for job in jobs_to_kill: msgs += [Msg(self._working_room_address, 'cancel_job', job)] self._working_jobs.remove(job) # Clean datastructures if qi in self._sample_array_per_prod_tile: del self._sample_array_per_prod_tile[qi] del self._missing_cache_fps_per_prod_tile[qi] return msgs
def receive_write_this_array(self, cache_fp, array): """Receive message: Please write this array to disk. Parameters ---------- cache_fp: Footprint of shape (Y, X) array: ndarray of shape (Y, X, C) """ msgs = [] if self._raster.io_pool is None: # No `io_pool` provided by user, perform write operation right now on this thread. work = Work(self, cache_fp, array) path = work.func() msgs += [Msg('CacheSupervisor', 'cache_file_written', cache_fp, path)] else: # Enqueue job in the `Pool/WaitingRoom` actor wait = Wait(self, cache_fp, array) self._waiting_jobs.add(wait) msgs += [Msg(self._waiting_room_address, 'schedule_job', wait)] return msgs
def receive_cancel_this_query(self, qi): """Receive message: One query was dropped Parameters ---------- qi: _actors.cached.query_infos.QueryInfos """ msgs = [] for job in self._waiting_jobs_per_query[qi]: msgs += [Msg(self._waiting_room_address, 'unschedule_job', job)] del self._waiting_jobs_per_query[qi] return msgs
def receive_token_to_working_room(self, job, token): """Receive message: Waiting job can proceed to the working room""" self._waiting_jobs.remove(job) work = self._create_interpolation_work_job( job.qi, job.prod_idx, job.sample_fp, job.resample_fp, job.subsample_array, ) self._working_jobs.add(work) return [ Msg(self._working_room_address, 'launch_job_with_token', work, token) ]
def receive_sampled_a_cache_file_to_the_array(self, qi, prod_idx, cache_fp, array): """Receive message: A cache file was read for that output array""" return [ Msg( 'Producer', 'sampled_a_cache_file_to_the_array', qi, prod_idx, cache_fp, array, ) ]