def __call__(self): saw_sentinel = False while not self.stop.is_set(): try: data = self.input_queue.get(timeout=timeout_seconds) except Queue.Empty: continue if data is None: saw_sentinel = True break metadata = data['metadata'] coord_handle = metadata['coord_handle'] coord = data['coord'] timing_state = metadata['timing_state'] start = time.time() try: self.inflight_mgr.unmark_inflight(coord) except Exception as e: stacktrace = format_stacktrace_one_line() self.tile_proc_logger.error( 'Unmarking in-flight error', e, stacktrace, coord) continue queue_handle, err = _ack_coord_handle( coord, coord_handle, self.queue_mapper, self.msg_tracker, timing_state, self.tile_proc_logger, self.stats_handler) if err is not None: continue timing = metadata['timing'] now = time.time() timing['ack'] = convert_seconds_to_millis(now - start) time_in_queue = 0 msg_timestamp = timing_state['msg_timestamp'] if msg_timestamp: time_in_queue = convert_seconds_to_millis(now) - msg_timestamp timing['queue'] = time_in_queue layers = metadata['layers'] size = layers['size'] store_info = metadata['store'] coord_proc_data = CoordProcessData( coord, timing, size, store_info, ) self.tile_proc_logger.log_processed_coord(coord_proc_data) self.stats_handler.processed_coord(coord_proc_data) if not saw_sentinel: _force_empty_queue(self.input_queue) self.tile_proc_logger.lifecycle('tile queue writer stopped')
def _fetch(self, fetch, coord, metadata): nominal_zoom = coord.zoom + self.metatile_zoom start_zoom = coord.zoom + self.metatile_start_zoom unpadded_bounds = coord_to_mercator_bounds(coord) start = time.time() source_rows = fetch(nominal_zoom, unpadded_bounds) metadata['timing']['fetch'] = convert_seconds_to_millis( time.time() - start) # every tile job that we get from the queue is a "parent" tile # and its four children to cut from it. at zoom 15, this may # also include a whole bunch of other children below the max # zoom. cut_coords = list( coord_children_subrange(coord, start_zoom, nominal_zoom)) return dict( metadata=metadata, coord=coord, source_rows=source_rows, unpadded_bounds=unpadded_bounds, cut_coords=cut_coords, nominal_zoom=nominal_zoom, )
def __call__(self, stop): # ignore ctrl-c interrupts when run from terminal signal.signal(signal.SIGINT, signal.SIG_IGN) output = OutputQueue(self.output_queue, self.tile_proc_logger, stop) saw_sentinel = False while not stop.is_set(): try: data = self.input_queue.get(timeout=timeout_seconds) except Queue.Empty: continue if data is None: saw_sentinel = True break coord = data['coord'] unpadded_bounds = data['unpadded_bounds'] cut_coords = data['cut_coords'] nominal_zoom = data['nominal_zoom'] source_rows = data['source_rows'] start = time.time() try: feature_layers = convert_source_data_to_feature_layers( source_rows, self.layer_data, unpadded_bounds, nominal_zoom) formatted_tiles, extra_data = process_coord( coord, nominal_zoom, feature_layers, self.post_process_data, self.formats, unpadded_bounds, cut_coords, self.buffer_cfg, self.output_calc_mapping) except Exception as e: stacktrace = format_stacktrace_one_line() self.tile_proc_logger.error( 'Processing error', e, stacktrace, coord) self.stats_handler.proc_error() continue metadata = data['metadata'] metadata['timing']['process'] = convert_seconds_to_millis( time.time() - start) metadata['layers'] = extra_data data = dict( metadata=metadata, coord=coord, formatted_tiles=formatted_tiles, ) if output(coord, data): break if not saw_sentinel: _force_empty_queue(self.input_queue) self.tile_proc_logger.lifecycle('processor stopped')
def _ack_coord_handle( coord, coord_handle, queue_mapper, msg_tracker, timing_state, tile_proc_logger, stats_handler): """share code for acknowledging a coordinate""" # returns tuple of (handle, error), either of which can be None track_result = msg_tracker.done(coord_handle) queue_handle = track_result.queue_handle if not queue_handle: return None, None tile_queue = queue_mapper.get_queue(queue_handle.queue_id) assert tile_queue, \ 'Missing tile_queue: %s' % queue_handle.queue_id parent_tile = None if track_result.all_done: parent_tile = track_result.parent_tile try: tile_queue.job_done(queue_handle.handle) except Exception as e: stacktrace = format_stacktrace_one_line() tile_proc_logger.error_job_done( 'tile_queue.job_done', e, stacktrace, coord, parent_tile, ) return queue_handle, e if parent_tile is not None: # we completed a tile pyramid and should log appropriately start_time = timing_state['start'] stop_time = convert_seconds_to_millis(time.time()) tile_proc_logger.log_processed_pyramid( parent_tile, start_time, stop_time) stats_handler.processed_pyramid( parent_tile, start_time, stop_time) else: try: tile_queue.job_progress(queue_handle.handle) except Exception as e: stacktrace = format_stacktrace_one_line() err_details = {"queue_handle": queue_handle.handle} if isinstance(e, JobProgressException): err_details = e.err_details tile_proc_logger.error_job_progress( 'tile_queue.job_progress', e, stacktrace, coord, parent_tile, err_details, ) return queue_handle, e return queue_handle, None
def __call__(self, stop): saw_sentinel = False queue_output = OutputQueue( self.output_queue, self.tile_proc_logger, stop) while not stop.is_set(): try: data = self.input_queue.get(timeout=timeout_seconds) except Queue.Empty: continue if data is None: saw_sentinel = True break coord = data['coord'] start = time.time() try: async_jobs = self.save_tiles(data['formatted_tiles']) except Exception as e: # cannot propagate this error - it crashes the thread and # blocks up the whole queue! stacktrace = format_stacktrace_one_line() self.tile_proc_logger.error('Save error', e, stacktrace, coord) continue async_exc_info = None e = None n_stored = 0 n_not_stored = 0 for async_job in async_jobs: try: did_store = async_job.get() if did_store: n_stored += 1 else: n_not_stored += 1 except Exception as e: # it's important to wait for all async jobs to # complete but we just keep a reference to the last # exception it's unlikely that we would receive multiple # different exceptions when uploading to s3 async_exc_info = sys.exc_info() if async_exc_info: stacktrace = format_stacktrace_one_line(async_exc_info) self.tile_proc_logger.error( 'Store error', e, stacktrace, coord) continue metadata = data['metadata'] metadata['timing']['s3'] = convert_seconds_to_millis( time.time() - start) metadata['store'] = dict( stored=n_stored, not_stored=n_not_stored, ) data = dict( coord=coord, metadata=metadata, ) if queue_output(coord, data): break if not saw_sentinel: _force_empty_queue(self.input_queue) self.tile_proc_logger.lifecycle('s3 storage stopped')
def __call__(self): while not self.stop.is_set(): msg_handles = () for queue_id, tile_queue in ( self.queue_mapper.queues_in_priority_order()): try: msg_handles = tile_queue.read() except Exception as e: stacktrace = format_stacktrace_one_line() self.tile_proc_logger.error( 'Queue read error', e, stacktrace) continue if msg_handles: break if not msg_handles: continue for msg_handle in msg_handles: # if asked to stop, break as soon as possible if self.stop.is_set(): break now = convert_seconds_to_millis(time.time()) msg_timestamp = None if msg_handle.metadata: msg_timestamp = msg_handle.metadata.get('timestamp') timing_state = dict( msg_timestamp=msg_timestamp, start=now, ) coords = self.msg_marshaller.unmarshall(msg_handle.payload) # it seems unlikely, but just in case there are no coordinates # in the payload, there's nothing to do, so skip to the next # payload. if not coords: continue # check for duplicate coordinates - for the message tracking to # work, we assume that coordinates are unique, as we use them # as keys in a dict. (plus, it doesn't make a lot of sense to # render the coordinate twice in the same job anyway). coords = list(set(coords)) parent_tile = self._parent(coords) queue_handle = QueueHandle(queue_id, msg_handle.handle) coord_handles = self.msg_tracker.track( queue_handle, coords, parent_tile) all_coords_data = [] for coord, coord_handle in izip(coords, coord_handles): if coord.zoom > self.max_zoom: self._reject_coord(coord, coord_handle, timing_state) continue metadata = dict( # the timing is just what will be filled out later timing=dict( fetch=None, process=None, s3=None, ack=None, ), # this is temporary state that is used later on to # determine timing information timing_state=timing_state, coord_handle=coord_handle, ) data = dict( metadata=metadata, coord=coord, ) all_coords_data.append(data) # we might have no coordinates if we rejected all the # coordinates. in which case, there's nothing to do anyway, as # the _reject_coord method will have marked the job as done. if all_coords_data: coord_input_spec = all_coords_data, parent_tile msg = "group of %d tiles below %s" \ % (len(all_coords_data), serialize_coord(parent_tile)) if self.output(msg, coord_input_spec): break for _, tile_queue in self.queue_mapper.queues_in_priority_order(): tile_queue.close() self.tile_proc_logger.lifecycle('tile queue reader stopped')