def test_read_multiple(self): from mock import MagicMock from tilequeue.tile import deserialize_coord from tilequeue.tile import serialize_coord coord1 = deserialize_coord('1/1/1') coord2 = deserialize_coord('2/2/2') sqs_queue1 = MagicMock() sqs_queue2 = MagicMock() sqs_queue1.name = 'q1' sqs_queue1.get_messages = self._make_get_messages( [serialize_coord(coord1)]) sqs_queue2.name = 'q2' sqs_queue2.get_messages = self._make_get_messages( [serialize_coord(coord2)]) msq = self._make_one( [sqs_queue1, sqs_queue2], _test_q1_q2_get_queue_name_for_zoom, ) coord_msgs = msq.read() self.assertEqual(2, len(coord_msgs)) self.assertEqual(coord1, coord_msgs[0].coord) self.assertEqual(coord2, coord_msgs[1].coord)
def test_tilequeue_intersect_does_not_enqueue_coords(self): from mock import MagicMock from tilequeue.command import tilequeue_intersect from ModestMaps.Core import Coordinate from tilequeue.tile import serialize_coord cfg_mock = MagicMock() cfg_mock.queue_type = 'sqs' periperals_mock = MagicMock() c0 = Coordinate(row=0, column=0, zoom=0) c1 = Coordinate(row=1, column=1, zoom=1) periperals_mock.redis_cache_index = MagicMock( intersect=lambda x, y: ([])) queue_mock = MagicMock() periperals_mock.queue = queue_mock queue_mock.enqueue = self.fake_enqueue queue_mock.enqueue_batch = self.fake_enqueue_batch import os with tempdir() as expired_tiles_location: expected_file = os.path.join(expired_tiles_location, 'expire_list.txt') with open(expected_file, "w+") as fp: fp.write(serialize_coord(c0) + "\n" + serialize_coord(c1)) cfg_mock.intersect_expired_tiles_location = expired_tiles_location cfg_mock.logconfig = None tilequeue_intersect(cfg_mock, periperals_mock) self.assertNotIn(c0, self.enqueued_list) self.assertNotIn(c1, self.enqueued_list)
def enqueue(self, coord): if not self._inflight(coord): payload = serialize_coord(coord) message = RawMessage() message.set_body(payload) self.sqs_queue.write(message) self._add_to_flight(coord)
def async_store(store, tile_data, coord, format, layer): """update cache store with tile_data""" try: store.write_tile(tile_data, coord, format, layer) except: stacktrace = format_stacktrace_one_line() print 'Error storing coord %s with format %s: %s' % ( serialize_coord(coord), format.extension, stacktrace)
def _add_to_tiles_of_interest(self, coord): coord_str = serialize_coord(coord) try: self.redis_cache_index.index_coord(coord) print 'Added to tiles of interest: %s' % coord_str except: print >> sys.stderr, \ 'Error adding to tiles of interest: %s' % coord_str traceback.print_exc(file=sys.stderr)
def _write_batch(self, coords): assert len(coords) <= 10 values = [] msg_tuples = [] for i, coord in enumerate(coords): msg_tuples.append((str(i), serialize_coord(coord), 0)) values.append(coord_marshall_int(coord)) self.sqs_queue.write_batch(msg_tuples) self.redis_client.sadd(self.inflight_key, *values)
def test_job_done_removes_tile_from_in_flight(self): from tilequeue.tile import CoordMessage coord = Coordinate(row=1, column=1, zoom=1) payload = serialize_coord(coord) message = RawMessage() message.set_body(payload) coord_message = CoordMessage(coord, message) self.sqs.job_done(coord_message) from tilequeue.tile import coord_marshall_int exp_value = coord_marshall_int(coord) self.mockRedis.srem.assert_called_once_with(self.sqs.inflight_key, exp_value)
def async_enqueue(sqs_queue, coord): """enqueue a coordinate for offline processing This ensures that when we receive a request for a tile format that hasn't been generated yet, we create the other formats eventually. """ try: sqs_queue.enqueue(coord) except: stacktrace = format_stacktrace_one_line() print 'Error enqueueing coord %s: %s\n' % ( serialize_coord(coord), stacktrace)
def test_read_single(self): from mock import MagicMock from tilequeue.tile import deserialize_coord from tilequeue.tile import serialize_coord sqs_queue = MagicMock() coord = deserialize_coord('1/1/1') sqs_queue.get_messages = self._make_get_messages( [serialize_coord(coord)]) msq = self._make_one([sqs_queue]) coord_msgs = msq.read() self.assertEqual(1, len(coord_msgs)) self.assertEqual(coord, coord_msgs[0].coord)
def enqueue(self, coord): if not coord_is_valid(coord): # TODO log? return coord_int = coord_marshall_int(coord) if not self._inflight(coord): payload = serialize_coord(coord) message = RawMessage() message.set_body(payload) sqs_queue_name = self.get_queue_name_for_zoom(coord.zoom) sqs_queue = self.sqs_queue_for_name.get(sqs_queue_name) assert sqs_queue, 'No queue found for: %s' % sqs_queue_name sqs_queue.write(message) self._add_to_flight(coord_int)
def _write_batch(self, sqs_queue, buf): assert len(buf) <= self.queue_buf_size msg_tuples = [] coord_ints = [] for i, (coord, coord_int) in enumerate(buf): msg_id = str(i) coord_str = serialize_coord(coord) msg_delay = 0 msg_tuple = (msg_id, coord_str, msg_delay) msg_tuples.append(msg_tuple) coord_ints.append(coord_int) sqs_queue.write_batch(msg_tuples) self.redis_client.sadd(self.inflight_key, *coord_ints)
def __call__(self, stop): # ignore ctrl-c interrupts when run from terminal signal.signal(signal.SIGINT, signal.SIG_IGN) saw_sentinel = False while not stop.is_set(): try: data = self.input_queue.get(timeout=timeout_seconds) except Queue.Empty: continue if data is None: saw_sentinel = True break coord = data['coord'] feature_layers = data['feature_layers'] unpadded_bounds = data['unpadded_bounds'] cut_coords = data['cut_coords'] nominal_zoom = data['nominal_zoom'] start = time.time() try: formatted_tiles, extra_data = process_coord( coord, nominal_zoom, feature_layers, self.post_process_data, self.formats, unpadded_bounds, cut_coords, self.buffer_cfg) except: stacktrace = format_stacktrace_one_line() self.logger.error('Error processing: %s - %s' % (serialize_coord(coord), stacktrace)) continue metadata = data['metadata'] metadata['timing']['process_seconds'] = time.time() - start metadata['layers'] = extra_data data = dict( metadata=metadata, coord=coord, formatted_tiles=formatted_tiles, ) while not _non_blocking_put(self.output_queue, data): if stop.is_set(): break if not saw_sentinel: _force_empty_queue(self.input_queue) self.logger.debug('processor stopped')
def __call__(self, stop): # ignore ctrl-c interrupts when run from terminal signal.signal(signal.SIGINT, signal.SIG_IGN) saw_sentinel = False while not stop.is_set(): try: data = self.input_queue.get(timeout=timeout_seconds) except Queue.Empty: continue if data is None: saw_sentinel = True break coord = data['coord'] feature_layers = data['feature_layers'] unpadded_bounds = data['unpadded_bounds'] cut_coords = data['cut_coords'] start = time.time() try: formatted_tiles, extra_data = process_coord( coord, feature_layers, self.post_process_data, self.formats, unpadded_bounds, cut_coords, self.layers_to_format, self.buffer_cfg) except: stacktrace = format_stacktrace_one_line() self.logger.error('Error processing: %s - %s' % ( serialize_coord(coord), stacktrace)) continue metadata = data['metadata'] metadata['timing']['process_seconds'] = time.time() - start metadata['layers'] = extra_data data = dict( metadata=metadata, coord=coord, formatted_tiles=formatted_tiles, ) while not _non_blocking_put(self.output_queue, data): if stop.is_set(): break if not saw_sentinel: _force_empty_queue(self.input_queue) self.logger.debug('processor stopped')
def process_work_data(): while True: work_data = work.get() if work_data is None: break coord = work_data['coord'] format = work_data['format'] key_path = 'osm/all/%s.%s' % ( serialize_coord(coord), format.extension) key = bucket.get_key(key_path) # this shouldn't practically happen if key is None: continue size = key.size add_size(work_data['metro'], coord, format, size)
def process_work_data(): while True: work_data = work.get() if work_data is None: break coord = work_data['coord'] format = work_data['format'] key_path = 'osm/all/%s.%s' % (serialize_coord(coord), format.extension) key = bucket.get_key(key_path) # this shouldn't practically happen if key is None: continue size = key.size add_size(work_data['metro'], coord, format, size)
def job_done(self, coord_message): queue_name = None if coord_message.metadata: queue_name = coord_message.metadata.get('queue_name') assert queue_name, \ 'Missing queue name metadata for coord: %s' % serialize_coord( coord_message.coord) sqs_queue = self.sqs_queue_for_name.get(queue_name) assert sqs_queue, 'Missing queue for: %s' % queue_name coord_int = coord_marshall_int(coord_message.coord) self.redis_client.srem(self.inflight_key, coord_int) sqs_queue.delete_message(coord_message.message_handle)
def test_enqueue_and_enqueue_batch(self): from tilequeue.tile import serialize_coord # Test `.enqueue_batch()`. num_to_enqueue = 3 self.assertEqual( self.queue.enqueue_batch( map(serialize_coord, self.test_tile_objs[:num_to_enqueue])), (num_to_enqueue, 0), 'Return value of `enqueue_batch()` does not match expected') # Test `.enqueue()`. for coord in self.test_tile_objs[num_to_enqueue:]: self.queue.enqueue(serialize_coord(coord)) self.assertEqual(self.tiles_fp.getvalue(), self.tile_coords_str, 'Contents of file do not match expected')
def missing_jobs(missing_bucket, rawr_bucket, date_prefix, region, config, group_by_zoom=10, queue_zoom=7, key_format_type='prefix-hash', tile_coords_generator=None, tile_verifier=None): """ Write and yield file containing a z/x/y coordinate for each job (at the job zoom) corresponding to a missing tile (at the tile zoom) in the bucket. Cleans up the temporary directory after the yielded-to code returns. """ tiles = missing_tiles(missing_bucket, rawr_bucket, date_prefix, region, key_format_type, config, group_by_zoom, tile_coords_generator, tile_verifier) # the rawr tiles of `group_by_zoom` is actually built by AWS batch jobs of # `queue_zoom` so we need to do a zoomTo here to find the corresponding jobs job_coords = {coord.zoomTo(queue_zoom).container() for coord in tiles} if bool(tile_coords_generator): print('[make_rawr_tiles] Going to rebuild %d tiles by (%d jobs)' % (len(tiles), len(job_coords))) else: print('[make_rawr_tiles] Missing %d tiles (%d jobs)' % (len(tiles), len(job_coords))) tmpdir = tempfile.mkdtemp() try: missing_file = os.path.join(tmpdir, 'missing_jobs.txt') with open(missing_file, 'w') as fh: for coord in sorted(job_coords): fh.write(serialize_coord(coord) + '\n') if tile_verifier is not None: tile_verifier.generate_tile_coords_rebuild_paths_rawr( [coord], group_by_zoom) yield missing_file finally: shutil.rmtree(tmpdir)
def async_update_tiles_of_interest(redis_cache_index, coord): """update tiles of interest set The tiles of interest represent all tiles that will get processed on osm diffs. Our policy is to cache tiles up to zoom level 20. As an optimization, because the queries only change up until zoom level 16, ie they are the same for z16+, we enqueue work at z16, and the higher zoom tiles get generated by cutting the z16 tile appropriately. This means that when we receive requests for tiles > z16, we need to also track the corresponding tile at z16, otherwise those tiles would never get regenerated. """ try: if coord.zoom <= 20: redis_cache_index.index_coord(coord) if coord.zoom > 16: coord_at_z16 = coord.zoomTo(16).container() redis_cache_index.index_coord(coord_at_z16) except: stacktrace = format_stacktrace_one_line() print 'Error updating tiles of interest for coord %s: %s\n' % ( serialize_coord(coord), stacktrace)
def __call__(self): while not self.stop.is_set(): msg_handles = () for queue_id, tile_queue in ( self.queue_mapper.queues_in_priority_order()): try: msg_handles = tile_queue.read() except Exception as e: stacktrace = format_stacktrace_one_line() self.tile_proc_logger.error( 'Queue read error', e, stacktrace) continue if msg_handles: break if not msg_handles: continue for msg_handle in msg_handles: # if asked to stop, break as soon as possible if self.stop.is_set(): break now = convert_seconds_to_millis(time.time()) msg_timestamp = None if msg_handle.metadata: msg_timestamp = msg_handle.metadata.get('timestamp') timing_state = dict( msg_timestamp=msg_timestamp, start=now, ) coords = self.msg_marshaller.unmarshall(msg_handle.payload) # it seems unlikely, but just in case there are no coordinates # in the payload, there's nothing to do, so skip to the next # payload. if not coords: continue # check for duplicate coordinates - for the message tracking to # work, we assume that coordinates are unique, as we use them # as keys in a dict. (plus, it doesn't make a lot of sense to # render the coordinate twice in the same job anyway). coords = list(set(coords)) parent_tile = self._parent(coords) queue_handle = QueueHandle(queue_id, msg_handle.handle) coord_handles = self.msg_tracker.track( queue_handle, coords, parent_tile) all_coords_data = [] for coord, coord_handle in izip(coords, coord_handles): if coord.zoom > self.max_zoom: self._reject_coord(coord, coord_handle, timing_state) continue metadata = dict( # the timing is just what will be filled out later timing=dict( fetch=None, process=None, s3=None, ack=None, ), # this is temporary state that is used later on to # determine timing information timing_state=timing_state, coord_handle=coord_handle, ) data = dict( metadata=metadata, coord=coord, ) all_coords_data.append(data) # we might have no coordinates if we rejected all the # coordinates. in which case, there's nothing to do anyway, as # the _reject_coord method will have marked the job as done. if all_coords_data: coord_input_spec = all_coords_data, parent_tile msg = "group of %d tiles below %s" \ % (len(all_coords_data), serialize_coord(parent_tile)) if self.output(msg, coord_input_spec): break for _, tile_queue in self.queue_mapper.queues_in_priority_order(): tile_queue.close() self.tile_proc_logger.lifecycle('tile queue reader stopped')
def marshall(self, coords): return ','.join(serialize_coord(x) for x in coords)
def __call__(self, stop): saw_sentinel = False while not stop.is_set(): try: data = self.input_queue.get(timeout=timeout_seconds) except Queue.Empty: continue if data is None: saw_sentinel = True break coord = data['coord'] start = time.time() try: fetch_data = self.fetcher(coord) except: exc_type, exc_value, exc_traceback = sys.exc_info() stacktrace = format_stacktrace_one_line( (exc_type, exc_value, exc_traceback)) if isinstance(exc_value, TransactionRollbackError): log_level = logging.WARNING else: log_level = logging.ERROR self.logger.log(log_level, 'Error fetching: %s - %s' % ( serialize_coord(coord), stacktrace)) continue metadata = data['metadata'] metadata['timing']['fetch_seconds'] = time.time() - start # if we are at zoom level 16, it will serve as a metatile # to derive the tiles underneath it cut_coords = None if coord.zoom == 16: cut_coords = [] async_jobs = [] children_until = 20 # ask redis if there are any tiles underneath in the # tiles of interest set rci = self.redis_cache_index async_fn = rci.is_coord_int_in_tiles_of_interest for child in coord_children_range(coord, children_until): zoomed_coord_int = coord_marshall_int(child) async_result = self.io_pool.apply_async( async_fn, (zoomed_coord_int,)) async_jobs.append((child, async_result)) async_exc_info = None for async_job in async_jobs: zoomed_coord, async_result = async_job try: is_coord_in_tiles_of_interest = async_result.get() except: async_exc_info = sys.exc_info() stacktrace = format_stacktrace_one_line(async_exc_info) self.logger.error(stacktrace) else: if is_coord_in_tiles_of_interest: cut_coords.append(zoomed_coord) if async_exc_info: continue data = dict( metadata=metadata, coord=coord, feature_layers=fetch_data['feature_layers'], unpadded_bounds=fetch_data['unpadded_bounds'], cut_coords=cut_coords, ) while not _non_blocking_put(self.output_queue, data): if stop.is_set(): break if not saw_sentinel: _force_empty_queue(self.input_queue) self.logger.debug('data fetch stopped')
def __call__(self): saw_sentinel = False while not self.stop.is_set(): try: data = self.input_queue.get(timeout=timeout_seconds) except Queue.Empty: continue if data is None: saw_sentinel = True break metadata = data['metadata'] coord_message = metadata['coord_message'] coord = data['coord'] start = time.time() try: self.sqs_queue.job_done(coord_message) except: stacktrace = format_stacktrace_one_line() self.logger.error('Error acknowledging: %s - %s' % ( serialize_coord(coord), stacktrace)) continue timing = metadata['timing'] now = time.time() timing['ack_seconds'] = now - start coord_message = metadata['coord_message'] msg_metadata = coord_message.metadata time_in_queue = 0 if msg_metadata: sqs_timestamp_millis = msg_metadata.get('timestamp') if sqs_timestamp_millis is not None: sqs_timestamp_seconds = sqs_timestamp_millis / 1000.0 time_in_queue = now - sqs_timestamp_seconds layers = metadata['layers'] size = layers['size'] size_as_str = repr(size) store_info = metadata['store'] self.logger.info( '%s ' 'data(%.2fs) ' 'proc(%.2fs) ' 's3(%.2fs) ' 'ack(%.2fs) ' 'sqs(%.2fs) ' 'size(%s) ' 'stored(%s) ' 'not_stored(%s)' % ( serialize_coord(coord), timing['fetch_seconds'], timing['process_seconds'], timing['s3_seconds'], timing['ack_seconds'], time_in_queue, size_as_str, store_info['stored'], store_info['not_stored'], )) if not saw_sentinel: _force_empty_queue(self.input_queue) self.logger.debug('sqs queue writer stopped')
def test_unmarshall_single(self): from tilequeue.tile import serialize_coord coords = self.msg_marshaller.unmarshall('1/1/1') self.assertEqual(1, len(coords)) self.assertEqual('1/1/1', serialize_coord(coords[0]))
def enqueue(self, coord): with self.lock: payload = serialize_coord(coord) self.fp.write(payload + '\n')
def missing_tiles_split(self, group_by_zoom, queue_zoom, big_jobs, try_generator): # type: (int, int, CoordSet, bool) -> Iterator[MissingTiles] """ To be used in a with-statement. Yields a MissingTiles object, giving information about the tiles which are missing. High zoom jobs are output either at group_by_zoom (RAWR tile granularity) or queue_zoom (usually lower, e.g: 7) depending on whether big_jobs contains a truthy value for the RAWR tile. The big_jobs are looked up at queue_zoom. """ def get_coord(coords_source, coordinate): if isinstance(coords_source, io.IOBase): return deserialize_coord(coordinate) else: return coordinate tmpdir = tempfile.mkdtemp() try: missing_low_file = os.path.join(tmpdir, 'missing.low.txt') missing_high_file = os.path.join(tmpdir, 'missing.high.txt') print( '[%s][make_meta_tiles] Splitting into high and low zoom lists' % (time.ctime())) # contains zooms 0 until group_by_zoom. the jobs between the group # zoom and RAWR zoom are merged into the parent at group zoom. missing_low = CoordSet(max_zoom=queue_zoom) # 7 # contains job coords at either queue_zoom or group_by_zoom only. # queue_zoom is always less than or equal to group_by_zoom? missing_high = CoordSet(min_zoom=queue_zoom, max_zoom=group_by_zoom) coords_src = self.generate_missing_tile_coords( tmpdir, try_generator) for c in coords_src: this_coord = get_coord(coords_src, c) if this_coord.zoom < group_by_zoom: # 10 # in order to not have too many jobs in the queue, we # group the low zoom jobs to the zoom_max (usually 7) if this_coord.zoom > queue_zoom: # 7 this_coord = this_coord.zoomTo(queue_zoom).container() missing_low[this_coord] = True else: # if the group of jobs at queue_zoom would be too big # (according to big_jobs[]) then enqueue the original # coordinate. this is to prevent a "long tail" of huge # job groups. job_coord = this_coord.zoomTo(queue_zoom).container() # 7 if not big_jobs[job_coord]: this_coord = job_coord missing_high[this_coord] = True if isinstance(coords_src, io.IOBase): coords_src.close() is_using_tile_verifier = try_generator and self.tile_verifier is not None with open(missing_low_file, 'w') as fh: for coord in missing_low: fh.write(serialize_coord(coord) + '\n') if is_using_tile_verifier: self.tile_verifier.generate_tile_coords_rebuild_paths_low_zoom( job_coords=[coord], queue_zoom=queue_zoom, group_by_zoom=group_by_zoom) with open(missing_high_file, 'w') as fh: for coord in missing_high: fh.write(serialize_coord(coord) + '\n') if is_using_tile_verifier: self.tile_verifier.generate_tile_coords_rebuild_paths_high_zoom( job_coords=[coord], queue_zoom=queue_zoom, group_by_zoom=group_by_zoom) yield MissingTiles(missing_low_file, missing_high_file) finally: shutil.rmtree(tmpdir)
def marshall(self, coords): assert len(coords) == 1 coord = coords[0] return serialize_coord(coord)
def save_set_to_fp(the_set, fp): for coord_int in sorted(the_set): coord = coord_unmarshall_int(coord_int) fp.write(serialize_coord(coord)) fp.write('\n')
def __call__(self, stop): saw_sentinel = False while not stop.is_set(): try: data = self.input_queue.get(timeout=timeout_seconds) except Queue.Empty: continue if data is None: saw_sentinel = True break coord = data['coord'] start = time.time() async_jobs = self.save_tiles(data['formatted_tiles']) async_exc_info = None n_stored = 0 n_not_stored = 0 for async_job in async_jobs: try: did_store = async_job.get() if did_store: n_stored += 1 else: n_not_stored += 1 except: # it's important to wait for all async jobs to # complete # but we just keep a reference to the last # exception # it's unlikely that we would receive multiple # different exceptions when uploading to s3 async_exc_info = sys.exc_info() if async_exc_info: stacktrace = format_stacktrace_one_line(async_exc_info) self.logger.error('Error storing: %s - %s' % ( serialize_coord(coord), stacktrace)) continue metadata = data['metadata'] metadata['timing']['s3_seconds'] = time.time() - start metadata['store'] = dict( stored=n_stored, not_stored=n_not_stored, ) data = dict( coord=coord, metadata=metadata, ) while not _non_blocking_put(self.output_queue, data): if stop.is_set(): break if not saw_sentinel: _force_empty_queue(self.input_queue) self.logger.debug('s3 storage stopped')
def __call__(self, stop): saw_sentinel = False while not stop.is_set(): try: data = self.input_queue.get(timeout=timeout_seconds) except Queue.Empty: continue if data is None: saw_sentinel = True break coord = data['coord'] start = time.time() async_jobs = [] for formatted_tile in data['formatted_tiles']: async_result = self.io_pool.apply_async( write_tile_if_changed, ( self.store, formatted_tile['tile'], # important to use the coord from the # formatted tile here, because we could have # cut children tiles that have separate zooms # too formatted_tile['coord'], formatted_tile['format'], formatted_tile['layer'])) async_jobs.append(async_result) async_exc_info = None n_stored = 0 n_not_stored = 0 for async_job in async_jobs: try: did_store = async_job.get() if did_store: n_stored += 1 else: n_not_stored += 1 except: # it's important to wait for all async jobs to # complete # but we just keep a reference to the last # exception # it's unlikely that we would receive multiple # different exceptions when uploading to s3 async_exc_info = sys.exc_info() if async_exc_info: stacktrace = format_stacktrace_one_line(async_exc_info) self.logger.error('Error storing: %s - %s' % ( serialize_coord(coord), stacktrace)) continue metadata = data['metadata'] metadata['timing']['s3_seconds'] = time.time() - start metadata['store'] = dict( stored=n_stored, not_stored=n_not_stored, ) data = dict( coord=coord, metadata=metadata, ) while not _non_blocking_put(self.output_queue, data): if stop.is_set(): break if not saw_sentinel: _force_empty_queue(self.input_queue) self.logger.debug('s3 storage stopped')
def __call__(self): saw_sentinel = False while not self.stop.is_set(): try: data = self.input_queue.get(timeout=timeout_seconds) except Queue.Empty: continue if data is None: saw_sentinel = True break metadata = data['metadata'] sqs_handle = metadata['sqs_handle'] coord = data['coord'] coord_message = CoordMessage(coord, sqs_handle) start = time.time() try: self.sqs_queue.job_done(coord_message) except: stacktrace = format_stacktrace_one_line() self.logger.error('Error acknowledging: %s - %s' % ( serialize_coord(coord), stacktrace)) continue timing = metadata['timing'] now = time.time() timing['ack_seconds'] = now - start sqs_timestamp_millis = metadata['timestamp'] sqs_timestamp_seconds = sqs_timestamp_millis / 1000.0 time_in_queue = now - sqs_timestamp_seconds layers = metadata['layers'] size = layers['size'] size_as_str = repr(size) store_info = metadata['store'] self.logger.info( '%s ' 'data(%.2fs) ' 'proc(%.2fs) ' 's3(%.2fs) ' 'ack(%.2fs) ' 'sqs(%.2fs) ' 'size(%s) ' 'stored(%s) ' 'not_stored(%s)' % ( serialize_coord(coord), timing['fetch_seconds'], timing['process_seconds'], timing['s3_seconds'], timing['ack_seconds'], time_in_queue, size_as_str, store_info['stored'], store_info['not_stored'], )) if not saw_sentinel: _force_empty_queue(self.input_queue) self.logger.debug('sqs queue writer stopped')