Esempio n. 1
0
def _ack_coord_handle(
        coord, coord_handle, queue_mapper, msg_tracker, timing_state,
        tile_proc_logger, stats_handler):
    """share code for acknowledging a coordinate"""

    # returns tuple of (handle, error), either of which can be None

    track_result = msg_tracker.done(coord_handle)
    queue_handle = track_result.queue_handle
    if not queue_handle:
        return None, None

    tile_queue = queue_mapper.get_queue(queue_handle.queue_id)
    assert tile_queue, \
        'Missing tile_queue: %s' % queue_handle.queue_id

    parent_tile = None
    if track_result.all_done:
        parent_tile = track_result.parent_tile

        try:
            tile_queue.job_done(queue_handle.handle)
        except Exception as e:
            stacktrace = format_stacktrace_one_line()
            tile_proc_logger.error_job_done(
                'tile_queue.job_done', e, stacktrace,
                coord, parent_tile,
            )
            return queue_handle, e

        if parent_tile is not None:
            # we completed a tile pyramid and should log appropriately

            start_time = timing_state['start']
            stop_time = convert_seconds_to_millis(time.time())
            tile_proc_logger.log_processed_pyramid(
                parent_tile, start_time, stop_time)
            stats_handler.processed_pyramid(
                parent_tile, start_time, stop_time)
    else:
        try:
            tile_queue.job_progress(queue_handle.handle)
        except Exception as e:
            stacktrace = format_stacktrace_one_line()
            err_details = {"queue_handle": queue_handle.handle}
            if isinstance(e, JobProgressException):
                err_details = e.err_details
            tile_proc_logger.error_job_progress(
                'tile_queue.job_progress', e, stacktrace,
                coord, parent_tile, err_details,
            )
            return queue_handle, e

    return queue_handle, None
Esempio n. 2
0
    def __call__(self, stop):
        saw_sentinel = False
        output = OutputQueue(self.output_queue, self.tile_proc_logger, stop)

        while not stop.is_set():
            try:
                coord_input_spec = self.input_queue.get(
                    timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if coord_input_spec is None:
                saw_sentinel = True
                break

            coord = None
            parent = None
            try:
                all_data, parent = coord_input_spec
                for fetch, data in self.fetcher.fetch_tiles(all_data):
                    metadata = data['metadata']
                    coord = data['coord']
                    if self._fetch_and_output(fetch, coord, metadata, output):
                        break
            except Exception as e:
                stacktrace = format_stacktrace_one_line()
                self.tile_proc_logger.fetch_error(e, stacktrace, coord, parent)
                self.stats_handler.fetch_error()

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)

        self.tile_proc_logger.lifecycle('data fetch stopped')
Esempio n. 3
0
    def __call__(self):
        while not self.stop.is_set():
            try:
                msgs = self.sqs_queue.read(
                    max_to_read=self.sqs_msgs_to_read_size)
            except:
                stacktrace = format_stacktrace_one_line()
                self.logger.error(stacktrace)
                continue

            for msg in msgs:
                # if asked to stop, break as soon as possible
                if self.stop.is_set():
                    break

                metadata = dict(
                    timing=dict(
                        fetch_seconds=None,
                        process_seconds=None,
                        s3_seconds=None,
                        ack_seconds=None,
                    ),
                    coord_message=msg,
                )
                data = dict(
                    metadata=metadata,
                    coord=msg.coord,
                )
                while not _non_blocking_put(self.output_queue, data):
                    if self.stop.is_set():
                        break

        self.sqs_queue.close()
        self.logger.debug('sqs queue reader stopped')
Esempio n. 4
0
    def __call__(self, coord, data):
        """
        Send data, associated with coordinate coord, to the queue. While also
        watching for a signal to stop. If the data is too large to send, then
        trap the MemoryError and exit the program.

        Note that `coord` may be a Coordinate instance or a string. It is only
        used for printing out a message if there's a MemoryError, so for
        requests which have no meaningful single coordinate, something else
        can be used.

        Returns True if the "stop signal" has been set and the thread should
        shut down. False if normal operations should continue.
        """

        try:
            while not _non_blocking_put(self.output_queue, data):
                if self.stop.is_set():
                    return True

        except MemoryError as e:
            stacktrace = format_stacktrace_one_line()
            self.tile_proc_logger.error(
                'MemoryError sending to queue', e, stacktrace, coord)
            # memory error might not leave the malloc subsystem in a usable
            # state, so better to exit the whole worker here than crash this
            # thread, which would lock up the whole worker.
            sys.exit(1)

        return False
Esempio n. 5
0
    def __call__(self):
        while not self.stop.is_set():
            try:
                msgs = self.sqs_queue.read(
                    max_to_read=self.sqs_msgs_to_read_size)
            except:
                stacktrace = format_stacktrace_one_line()
                self.logger.error(stacktrace)
                continue

            for msg in msgs:
                # if asked to stop, break as soon as possible
                if self.stop.is_set():
                    break

                metadata = dict(
                    timing=dict(
                        fetch_seconds=None,
                        process_seconds=None,
                        s3_seconds=None,
                        ack_seconds=None,
                    ),
                    sqs_handle=msg.message_handle,
                    timestamp=msg.timestamp
                )
                data = dict(
                    metadata=metadata,
                    coord=msg.coord,
                )
                while not _non_blocking_put(self.output_queue, data):
                    if self.stop.is_set():
                        break

        self.sqs_queue.close()
        self.logger.debug('sqs queue reader stopped')
Esempio n. 6
0
    def __call__(self):
        saw_sentinel = False
        while not self.stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            metadata = data['metadata']
            coord_handle = metadata['coord_handle']
            coord = data['coord']
            timing_state = metadata['timing_state']

            start = time.time()

            try:
                self.inflight_mgr.unmark_inflight(coord)
            except Exception as e:
                stacktrace = format_stacktrace_one_line()
                self.tile_proc_logger.error(
                    'Unmarking in-flight error', e, stacktrace, coord)
                continue

            queue_handle, err = _ack_coord_handle(
                coord, coord_handle, self.queue_mapper, self.msg_tracker,
                timing_state, self.tile_proc_logger, self.stats_handler)
            if err is not None:
                continue

            timing = metadata['timing']
            now = time.time()
            timing['ack'] = convert_seconds_to_millis(now - start)

            time_in_queue = 0
            msg_timestamp = timing_state['msg_timestamp']
            if msg_timestamp:
                time_in_queue = convert_seconds_to_millis(now) - msg_timestamp
            timing['queue'] = time_in_queue

            layers = metadata['layers']
            size = layers['size']

            store_info = metadata['store']

            coord_proc_data = CoordProcessData(
                coord,
                timing,
                size,
                store_info,
            )
            self.tile_proc_logger.log_processed_coord(coord_proc_data)
            self.stats_handler.processed_coord(coord_proc_data)

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.tile_proc_logger.lifecycle('tile queue writer stopped')
Esempio n. 7
0
def async_store(store, tile_data, coord, format, layer):
    """update cache store with tile_data"""
    try:
        store.write_tile(tile_data, coord, format, layer)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error storing coord %s with format %s: %s' % (
            serialize_coord(coord), format.extension, stacktrace)
Esempio n. 8
0
def async_store(store, tile_data, coord, format, layer):
    """update cache store with tile_data"""
    try:
        store.write_tile(tile_data, coord, format, layer)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error storing coord %s with format %s: %s' % (
            serialize_coord(coord), format.extension, stacktrace)
Esempio n. 9
0
    def __call__(self, stop):
        # ignore ctrl-c interrupts when run from terminal
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        output = OutputQueue(self.output_queue, self.tile_proc_logger, stop)

        saw_sentinel = False
        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']
            unpadded_bounds = data['unpadded_bounds']
            cut_coords = data['cut_coords']
            nominal_zoom = data['nominal_zoom']
            source_rows = data['source_rows']

            start = time.time()

            try:
                feature_layers = convert_source_data_to_feature_layers(
                    source_rows, self.layer_data, unpadded_bounds,
                    nominal_zoom)
                formatted_tiles, extra_data = process_coord(
                    coord, nominal_zoom, feature_layers,
                    self.post_process_data, self.formats, unpadded_bounds,
                    cut_coords, self.buffer_cfg, self.output_calc_mapping)
            except Exception as e:
                stacktrace = format_stacktrace_one_line()
                self.tile_proc_logger.error(
                    'Processing error', e, stacktrace, coord)
                self.stats_handler.proc_error()
                continue

            metadata = data['metadata']
            metadata['timing']['process'] = convert_seconds_to_millis(
                time.time() - start)
            metadata['layers'] = extra_data

            data = dict(
                metadata=metadata,
                coord=coord,
                formatted_tiles=formatted_tiles,
            )

            if output(coord, data):
                break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.tile_proc_logger.lifecycle('processor stopped')
Esempio n. 10
0
 def _log_exception(self, msg, exception, coord):
     stacktrace = format_stacktrace_one_line()
     json_obj = dict(
         coord=make_coord_dict(coord),
         type=log_level_name(LogLevel.ERROR),
         msg=msg,
         exception=str(exception),
         stacktrace=stacktrace,
     )
     json_str = json.dumps(json_obj)
     self.logger.error(json_str)
Esempio n. 11
0
def async_enqueue(sqs_queue, coord):
    """enqueue a coordinate for offline processing

    This ensures that when we receive a request for a tile format that
    hasn't been generated yet, we create the other formats eventually.
    """
    try:
        sqs_queue.enqueue(coord)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error enqueueing coord %s: %s\n' % (
            serialize_coord(coord), stacktrace)
Esempio n. 12
0
def async_enqueue(sqs_queue, coord):
    """enqueue a coordinate for offline processing

    This ensures that when we receive a request for a tile format that
    hasn't been generated yet, we create the other formats eventually.
    """
    try:
        sqs_queue.enqueue(coord)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error enqueueing coord %s: %s\n' % (
            serialize_coord(coord), stacktrace)
Esempio n. 13
0
 def __call__(self, environ, start_response):
     request = Request(environ)
     try:
         response = self.handle_request(request)
     except:
         if self.propagate_errors:
             raise
         stacktrace = format_stacktrace_one_line()
         print 'Error handling request for %s: %s' % (
             request.path, stacktrace)
         response = Response(
             'Internal Server Error', status=500, mimetype='text/plain')
     return response(environ, start_response)
Esempio n. 14
0
 def __call__(self, environ, start_response):
     request = Request(environ)
     try:
         response = self.handle_request(request)
     except:
         if self.propagate_errors:
             raise
         stacktrace = format_stacktrace_one_line()
         print 'Error handling request for %s: %s' % (
             request.path, stacktrace)
         response = self.create_response(
             request, 500, 'Internal Server Error', 'text/plain')
     return response(environ, start_response)
Esempio n. 15
0
 def error(self, exception, parent, coord):
     stacktrace = format_stacktrace_one_line()
     json_obj = dict(
         type=log_level_name(LogLevel.ERROR),
         category=log_category_name(LogCategory.RAWR_TILE),
         exception=str(exception),
         stacktrace=stacktrace,
         coord=make_coord_dict(coord),
         parent=make_coord_dict(parent),
         run_id=self.run_id,
     )
     json_str = json.dumps(json_obj)
     self.logger.error(json_str)
Esempio n. 16
0
 def _log_exception(self, msg, exception, parent, coord):
     stacktrace = format_stacktrace_one_line()
     json_obj = dict(
         type=log_level_name(LogLevel.ERROR),
         category=log_category_name(LogCategory.META_TILE_LOW_ZOOM),
         msg=msg,
         exception=str(exception),
         stacktrace=stacktrace,
         run_id=self.run_id,
         parent=make_coord_dict(parent),
         coord=make_coord_dict(coord),
     )
     json_str = json.dumps(json_obj)
     self.logger.error(json_str)
Esempio n. 17
0
    def __call__(self, stop):
        # ignore ctrl-c interrupts when run from terminal
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        saw_sentinel = False
        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']
            feature_layers = data['feature_layers']
            unpadded_bounds = data['unpadded_bounds']
            cut_coords = data['cut_coords']
            nominal_zoom = data['nominal_zoom']

            start = time.time()

            try:
                formatted_tiles, extra_data = process_coord(
                    coord, nominal_zoom, feature_layers,
                    self.post_process_data, self.formats, unpadded_bounds,
                    cut_coords, self.buffer_cfg)
            except:
                stacktrace = format_stacktrace_one_line()
                self.logger.error('Error processing: %s - %s' %
                                  (serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['process_seconds'] = time.time() - start
            metadata['layers'] = extra_data

            data = dict(
                metadata=metadata,
                coord=coord,
                formatted_tiles=formatted_tiles,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('processor stopped')
Esempio n. 18
0
    def __call__(self, stop):
        # ignore ctrl-c interrupts when run from terminal
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        saw_sentinel = False
        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']
            feature_layers = data['feature_layers']
            unpadded_bounds = data['unpadded_bounds']
            cut_coords = data['cut_coords']

            start = time.time()

            try:
                formatted_tiles, extra_data = process_coord(
                    coord, feature_layers, self.post_process_data,
                    self.formats, unpadded_bounds, cut_coords,
                    self.layers_to_format, self.buffer_cfg)
            except:
                stacktrace = format_stacktrace_one_line()
                self.logger.error('Error processing: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['process_seconds'] = time.time() - start
            metadata['layers'] = extra_data

            data = dict(
                metadata=metadata,
                coord=coord,
                formatted_tiles=formatted_tiles,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('processor stopped')
Esempio n. 19
0
def async_update_tiles_of_interest(redis_cache_index, coord):
    """update tiles of interest set

    The tiles of interest represent all tiles that will get processed
    on osm diffs. Our policy is to cache tiles up to zoom level 20. As
    an optimization, because the queries only change up until zoom
    level 16, ie they are the same for z16+, we enqueue work at z16,
    and the higher zoom tiles get generated by cutting the z16 tile
    appropriately. This means that when we receive requests for tiles
    > z16, we need to also track the corresponding tile at z16,
    otherwise those tiles would never get regenerated.
    """
    try:
        if coord.zoom <= 20:
            redis_cache_index.index_coord(coord)
        if coord.zoom > 16:
            coord_at_z16 = coord.zoomTo(16).container()
            redis_cache_index.index_coord(coord_at_z16)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error updating tiles of interest for coord %s: %s\n' % (
            serialize_coord(coord), stacktrace)
Esempio n. 20
0
def async_update_tiles_of_interest(redis_cache_index, coord):
    """update tiles of interest set

    The tiles of interest represent all tiles that will get processed
    on osm diffs. Our policy is to cache tiles up to zoom level 20. As
    an optimization, because the queries only change up until zoom
    level 16, ie they are the same for z16+, we enqueue work at z16,
    and the higher zoom tiles get generated by cutting the z16 tile
    appropriately. This means that when we receive requests for tiles
    > z16, we need to also track the corresponding tile at z16,
    otherwise those tiles would never get regenerated.
    """
    try:
        if coord.zoom <= 20:
            redis_cache_index.index_coord(coord)
        if coord.zoom > 16:
            coord_at_z16 = coord.zoomTo(16).container()
            redis_cache_index.index_coord(coord_at_z16)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error updating tiles of interest for coord %s: %s\n' % (
            serialize_coord(coord), stacktrace)
Esempio n. 21
0
 def log_exception(self, exception, msg, parent_coord=None):
     stacktrace = format_stacktrace_one_line()
     self.rawr_proc_logger.error(msg, exception, stacktrace, parent_coord)
Esempio n. 22
0
    def __call__(self):
        saw_sentinel = False
        while not self.stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            metadata = data['metadata']
            sqs_handle = metadata['sqs_handle']
            coord = data['coord']
            coord_message = CoordMessage(coord, sqs_handle)

            start = time.time()
            try:
                self.sqs_queue.job_done(coord_message)
            except:
                stacktrace = format_stacktrace_one_line()
                self.logger.error('Error acknowledging: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            timing = metadata['timing']
            now = time.time()
            timing['ack_seconds'] = now - start

            sqs_timestamp_millis = metadata['timestamp']
            sqs_timestamp_seconds = sqs_timestamp_millis / 1000.0
            time_in_queue = now - sqs_timestamp_seconds

            layers = metadata['layers']
            size = layers['size']
            size_as_str = repr(size)

            store_info = metadata['store']

            self.logger.info(
                '%s '
                'data(%.2fs) '
                'proc(%.2fs) '
                's3(%.2fs) '
                'ack(%.2fs) '
                'sqs(%.2fs) '
                'size(%s) '
                'stored(%s) '
                'not_stored(%s)' % (
                    serialize_coord(coord),
                    timing['fetch_seconds'],
                    timing['process_seconds'],
                    timing['s3_seconds'],
                    timing['ack_seconds'],
                    time_in_queue,
                    size_as_str,
                    store_info['stored'],
                    store_info['not_stored'],
                ))

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('sqs queue writer stopped')
Esempio n. 23
0
    def __call__(self, stop):
        saw_sentinel = False

        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']

            start = time.time()
            async_jobs = []
            for formatted_tile in data['formatted_tiles']:

                async_result = self.io_pool.apply_async(
                    write_tile_if_changed, (
                        self.store,
                        formatted_tile['tile'],
                        # important to use the coord from the
                        # formatted tile here, because we could have
                        # cut children tiles that have separate zooms
                        # too
                        formatted_tile['coord'],
                        formatted_tile['format'],
                        formatted_tile['layer']))
                async_jobs.append(async_result)

            async_exc_info = None
            n_stored = 0
            n_not_stored = 0
            for async_job in async_jobs:
                try:
                    did_store = async_job.get()
                    if did_store:
                        n_stored += 1
                    else:
                        n_not_stored += 1
                except:
                    # it's important to wait for all async jobs to
                    # complete
                    # but we just keep a reference to the last
                    # exception
                    # it's unlikely that we would receive multiple
                    # different exceptions when uploading to s3
                    async_exc_info = sys.exc_info()

            if async_exc_info:
                stacktrace = format_stacktrace_one_line(async_exc_info)
                self.logger.error('Error storing: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['s3_seconds'] = time.time() - start
            metadata['store'] = dict(
                stored=n_stored,
                not_stored=n_not_stored,
            )

            data = dict(
                coord=coord,
                metadata=metadata,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('s3 storage stopped')
Esempio n. 24
0
    def __call__(self, stop):
        saw_sentinel = False
        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']

            start = time.time()

            try:
                fetch_data = self.fetcher(coord)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                stacktrace = format_stacktrace_one_line(
                    (exc_type, exc_value, exc_traceback))
                if isinstance(exc_value, TransactionRollbackError):
                    log_level = logging.WARNING
                else:
                    log_level = logging.ERROR
                self.logger.log(log_level, 'Error fetching: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['fetch_seconds'] = time.time() - start

            # if we are at zoom level 16, it will serve as a metatile
            # to derive the tiles underneath it
            cut_coords = None
            if coord.zoom == 16:
                cut_coords = []
                async_jobs = []
                children_until = 20
                # ask redis if there are any tiles underneath in the
                # tiles of interest set
                rci = self.redis_cache_index
                async_fn = rci.is_coord_int_in_tiles_of_interest

                for child in coord_children_range(coord, children_until):
                    zoomed_coord_int = coord_marshall_int(child)
                    async_result = self.io_pool.apply_async(
                        async_fn, (zoomed_coord_int,))
                    async_jobs.append((child, async_result))

                async_exc_info = None
                for async_job in async_jobs:
                    zoomed_coord, async_result = async_job
                    try:
                        is_coord_in_tiles_of_interest = async_result.get()
                    except:
                        async_exc_info = sys.exc_info()
                        stacktrace = format_stacktrace_one_line(async_exc_info)
                        self.logger.error(stacktrace)
                    else:
                        if is_coord_in_tiles_of_interest:
                            cut_coords.append(zoomed_coord)
                if async_exc_info:
                    continue

            data = dict(
                metadata=metadata,
                coord=coord,
                feature_layers=fetch_data['feature_layers'],
                unpadded_bounds=fetch_data['unpadded_bounds'],
                cut_coords=cut_coords,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('data fetch stopped')
Esempio n. 25
0
    def __call__(self, stop):
        saw_sentinel = False
        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']

            start = time.time()

            try:
                fetch_data = self.fetcher(coord)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                stacktrace = format_stacktrace_one_line(
                    (exc_type, exc_value, exc_traceback))
                if isinstance(exc_value, TransactionRollbackError):
                    log_level = logging.WARNING
                else:
                    log_level = logging.ERROR
                self.logger.log(log_level, 'Error fetching: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['fetch_seconds'] = time.time() - start

            # if we are at zoom level 16, it will serve as a metatile
            # to derive the tiles underneath it
            cut_coords = None
            if coord.zoom == 16:
                cut_coords = []
                async_jobs = []
                children_until = 20
                # ask redis if there are any tiles underneath in the
                # tiles of interest set
                rci = self.redis_cache_index
                async_fn = rci.is_coord_int_in_tiles_of_interest

                for child in coord_children_range(coord, children_until):
                    zoomed_coord_int = coord_marshall_int(child)
                    async_result = self.io_pool.apply_async(
                        async_fn, (zoomed_coord_int,))
                    async_jobs.append((child, async_result))

                async_exc_info = None
                for async_job in async_jobs:
                    zoomed_coord, async_result = async_job
                    try:
                        is_coord_in_tiles_of_interest = async_result.get()
                    except:
                        async_exc_info = sys.exc_info()
                        stacktrace = format_stacktrace_one_line(async_exc_info)
                        self.logger.error(stacktrace)
                    else:
                        if is_coord_in_tiles_of_interest:
                            cut_coords.append(zoomed_coord)
                if async_exc_info:
                    continue

            data = dict(
                metadata=metadata,
                coord=coord,
                feature_layers=fetch_data['feature_layers'],
                unpadded_bounds=fetch_data['unpadded_bounds'],
                cut_coords=cut_coords,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('data fetch stopped')
Esempio n. 26
0
    def __call__(self, stop):
        saw_sentinel = False

        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']

            start = time.time()
            async_jobs = self.save_tiles(data['formatted_tiles'])

            async_exc_info = None
            n_stored = 0
            n_not_stored = 0
            for async_job in async_jobs:
                try:
                    did_store = async_job.get()
                    if did_store:
                        n_stored += 1
                    else:
                        n_not_stored += 1
                except:
                    # it's important to wait for all async jobs to
                    # complete
                    # but we just keep a reference to the last
                    # exception
                    # it's unlikely that we would receive multiple
                    # different exceptions when uploading to s3
                    async_exc_info = sys.exc_info()

            if async_exc_info:
                stacktrace = format_stacktrace_one_line(async_exc_info)
                self.logger.error('Error storing: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['s3_seconds'] = time.time() - start
            metadata['store'] = dict(
                stored=n_stored,
                not_stored=n_not_stored,
            )

            data = dict(
                coord=coord,
                metadata=metadata,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('s3 storage stopped')
Esempio n. 27
0
    def __call__(self):
        saw_sentinel = False
        while not self.stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            metadata = data['metadata']
            coord_message = metadata['coord_message']
            coord = data['coord']

            start = time.time()
            try:
                self.sqs_queue.job_done(coord_message)
            except:
                stacktrace = format_stacktrace_one_line()
                self.logger.error('Error acknowledging: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            timing = metadata['timing']
            now = time.time()
            timing['ack_seconds'] = now - start

            coord_message = metadata['coord_message']
            msg_metadata = coord_message.metadata
            time_in_queue = 0
            if msg_metadata:
                sqs_timestamp_millis = msg_metadata.get('timestamp')
                if sqs_timestamp_millis is not None:
                    sqs_timestamp_seconds = sqs_timestamp_millis / 1000.0
                    time_in_queue = now - sqs_timestamp_seconds

            layers = metadata['layers']
            size = layers['size']
            size_as_str = repr(size)

            store_info = metadata['store']

            self.logger.info(
                '%s '
                'data(%.2fs) '
                'proc(%.2fs) '
                's3(%.2fs) '
                'ack(%.2fs) '
                'sqs(%.2fs) '
                'size(%s) '
                'stored(%s) '
                'not_stored(%s)' % (
                    serialize_coord(coord),
                    timing['fetch_seconds'],
                    timing['process_seconds'],
                    timing['s3_seconds'],
                    timing['ack_seconds'],
                    time_in_queue,
                    size_as_str,
                    store_info['stored'],
                    store_info['not_stored'],
                ))

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('sqs queue writer stopped')
Esempio n. 28
0
    def __call__(self, stop):
        saw_sentinel = False

        queue_output = OutputQueue(
            self.output_queue, self.tile_proc_logger, stop)

        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']

            start = time.time()
            try:
                async_jobs = self.save_tiles(data['formatted_tiles'])

            except Exception as e:
                # cannot propagate this error - it crashes the thread and
                # blocks up the whole queue!
                stacktrace = format_stacktrace_one_line()
                self.tile_proc_logger.error('Save error', e, stacktrace, coord)
                continue

            async_exc_info = None
            e = None
            n_stored = 0
            n_not_stored = 0
            for async_job in async_jobs:
                try:
                    did_store = async_job.get()
                    if did_store:
                        n_stored += 1
                    else:
                        n_not_stored += 1
                except Exception as e:
                    # it's important to wait for all async jobs to
                    # complete but we just keep a reference to the last
                    # exception it's unlikely that we would receive multiple
                    # different exceptions when uploading to s3
                    async_exc_info = sys.exc_info()

            if async_exc_info:
                stacktrace = format_stacktrace_one_line(async_exc_info)
                self.tile_proc_logger.error(
                    'Store error', e, stacktrace, coord)
                continue

            metadata = data['metadata']
            metadata['timing']['s3'] = convert_seconds_to_millis(
                time.time() - start)
            metadata['store'] = dict(
                stored=n_stored,
                not_stored=n_not_stored,
            )

            data = dict(
                coord=coord,
                metadata=metadata,
            )

            if queue_output(coord, data):
                break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.tile_proc_logger.lifecycle('s3 storage stopped')
Esempio n. 29
0
 def log_exception(self, exception, msg, parent_coord=None):
     stacktrace = format_stacktrace_one_line()
     self.rawr_proc_logger.error(msg, exception, stacktrace, parent_coord)
Esempio n. 30
0
    def __call__(self):
        while not self.stop.is_set():

            msg_handles = ()

            for queue_id, tile_queue in (
                    self.queue_mapper.queues_in_priority_order()):
                try:
                    msg_handles = tile_queue.read()
                except Exception as e:
                    stacktrace = format_stacktrace_one_line()
                    self.tile_proc_logger.error(
                        'Queue read error', e, stacktrace)
                    continue
                if msg_handles:
                    break

            if not msg_handles:
                continue

            for msg_handle in msg_handles:
                # if asked to stop, break as soon as possible
                if self.stop.is_set():
                    break

                now = convert_seconds_to_millis(time.time())
                msg_timestamp = None
                if msg_handle.metadata:
                    msg_timestamp = msg_handle.metadata.get('timestamp')
                timing_state = dict(
                    msg_timestamp=msg_timestamp,
                    start=now,
                )

                coords = self.msg_marshaller.unmarshall(msg_handle.payload)
                # it seems unlikely, but just in case there are no coordinates
                # in the payload, there's nothing to do, so skip to the next
                # payload.
                if not coords:
                    continue

                # check for duplicate coordinates - for the message tracking to
                # work, we assume that coordinates are unique, as we use them
                # as keys in a dict. (plus, it doesn't make a lot of sense to
                # render the coordinate twice in the same job anyway).
                coords = list(set(coords))

                parent_tile = self._parent(coords)

                queue_handle = QueueHandle(queue_id, msg_handle.handle)
                coord_handles = self.msg_tracker.track(
                    queue_handle, coords, parent_tile)

                all_coords_data = []
                for coord, coord_handle in izip(coords, coord_handles):
                    if coord.zoom > self.max_zoom:
                        self._reject_coord(coord, coord_handle, timing_state)
                        continue

                    metadata = dict(
                        # the timing is just what will be filled out later
                        timing=dict(
                            fetch=None,
                            process=None,
                            s3=None,
                            ack=None,
                        ),
                        # this is temporary state that is used later on to
                        # determine timing information
                        timing_state=timing_state,
                        coord_handle=coord_handle,
                    )
                    data = dict(
                        metadata=metadata,
                        coord=coord,
                    )

                    all_coords_data.append(data)

                # we might have no coordinates if we rejected all the
                # coordinates. in which case, there's nothing to do anyway, as
                # the _reject_coord method will have marked the job as done.
                if all_coords_data:
                    coord_input_spec = all_coords_data, parent_tile
                    msg = "group of %d tiles below %s" \
                          % (len(all_coords_data),
                             serialize_coord(parent_tile))
                    if self.output(msg, coord_input_spec):
                        break

        for _, tile_queue in self.queue_mapper.queues_in_priority_order():
            tile_queue.close()
        self.tile_proc_logger.lifecycle('tile queue reader stopped')