예제 #1
0
    def __call__(self):
        saw_sentinel = False
        while not self.stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            metadata = data['metadata']
            coord_handle = metadata['coord_handle']
            coord = data['coord']
            timing_state = metadata['timing_state']

            start = time.time()

            try:
                self.inflight_mgr.unmark_inflight(coord)
            except Exception as e:
                stacktrace = format_stacktrace_one_line()
                self.tile_proc_logger.error(
                    'Unmarking in-flight error', e, stacktrace, coord)
                continue

            queue_handle, err = _ack_coord_handle(
                coord, coord_handle, self.queue_mapper, self.msg_tracker,
                timing_state, self.tile_proc_logger, self.stats_handler)
            if err is not None:
                continue

            timing = metadata['timing']
            now = time.time()
            timing['ack'] = convert_seconds_to_millis(now - start)

            time_in_queue = 0
            msg_timestamp = timing_state['msg_timestamp']
            if msg_timestamp:
                time_in_queue = convert_seconds_to_millis(now) - msg_timestamp
            timing['queue'] = time_in_queue

            layers = metadata['layers']
            size = layers['size']

            store_info = metadata['store']

            coord_proc_data = CoordProcessData(
                coord,
                timing,
                size,
                store_info,
            )
            self.tile_proc_logger.log_processed_coord(coord_proc_data)
            self.stats_handler.processed_coord(coord_proc_data)

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.tile_proc_logger.lifecycle('tile queue writer stopped')
예제 #2
0
    def _fetch(self, fetch, coord, metadata):
        nominal_zoom = coord.zoom + self.metatile_zoom
        start_zoom = coord.zoom + self.metatile_start_zoom
        unpadded_bounds = coord_to_mercator_bounds(coord)

        start = time.time()

        source_rows = fetch(nominal_zoom, unpadded_bounds)

        metadata['timing']['fetch'] = convert_seconds_to_millis(
            time.time() - start)

        # every tile job that we get from the queue is a "parent" tile
        # and its four children to cut from it. at zoom 15, this may
        # also include a whole bunch of other children below the max
        # zoom.
        cut_coords = list(
            coord_children_subrange(coord, start_zoom, nominal_zoom))

        return dict(
            metadata=metadata,
            coord=coord,
            source_rows=source_rows,
            unpadded_bounds=unpadded_bounds,
            cut_coords=cut_coords,
            nominal_zoom=nominal_zoom,
        )
예제 #3
0
    def __call__(self, stop):
        # ignore ctrl-c interrupts when run from terminal
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        output = OutputQueue(self.output_queue, self.tile_proc_logger, stop)

        saw_sentinel = False
        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']
            unpadded_bounds = data['unpadded_bounds']
            cut_coords = data['cut_coords']
            nominal_zoom = data['nominal_zoom']
            source_rows = data['source_rows']

            start = time.time()

            try:
                feature_layers = convert_source_data_to_feature_layers(
                    source_rows, self.layer_data, unpadded_bounds,
                    nominal_zoom)
                formatted_tiles, extra_data = process_coord(
                    coord, nominal_zoom, feature_layers,
                    self.post_process_data, self.formats, unpadded_bounds,
                    cut_coords, self.buffer_cfg, self.output_calc_mapping)
            except Exception as e:
                stacktrace = format_stacktrace_one_line()
                self.tile_proc_logger.error(
                    'Processing error', e, stacktrace, coord)
                self.stats_handler.proc_error()
                continue

            metadata = data['metadata']
            metadata['timing']['process'] = convert_seconds_to_millis(
                time.time() - start)
            metadata['layers'] = extra_data

            data = dict(
                metadata=metadata,
                coord=coord,
                formatted_tiles=formatted_tiles,
            )

            if output(coord, data):
                break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.tile_proc_logger.lifecycle('processor stopped')
예제 #4
0
def _ack_coord_handle(
        coord, coord_handle, queue_mapper, msg_tracker, timing_state,
        tile_proc_logger, stats_handler):
    """share code for acknowledging a coordinate"""

    # returns tuple of (handle, error), either of which can be None

    track_result = msg_tracker.done(coord_handle)
    queue_handle = track_result.queue_handle
    if not queue_handle:
        return None, None

    tile_queue = queue_mapper.get_queue(queue_handle.queue_id)
    assert tile_queue, \
        'Missing tile_queue: %s' % queue_handle.queue_id

    parent_tile = None
    if track_result.all_done:
        parent_tile = track_result.parent_tile

        try:
            tile_queue.job_done(queue_handle.handle)
        except Exception as e:
            stacktrace = format_stacktrace_one_line()
            tile_proc_logger.error_job_done(
                'tile_queue.job_done', e, stacktrace,
                coord, parent_tile,
            )
            return queue_handle, e

        if parent_tile is not None:
            # we completed a tile pyramid and should log appropriately

            start_time = timing_state['start']
            stop_time = convert_seconds_to_millis(time.time())
            tile_proc_logger.log_processed_pyramid(
                parent_tile, start_time, stop_time)
            stats_handler.processed_pyramid(
                parent_tile, start_time, stop_time)
    else:
        try:
            tile_queue.job_progress(queue_handle.handle)
        except Exception as e:
            stacktrace = format_stacktrace_one_line()
            err_details = {"queue_handle": queue_handle.handle}
            if isinstance(e, JobProgressException):
                err_details = e.err_details
            tile_proc_logger.error_job_progress(
                'tile_queue.job_progress', e, stacktrace,
                coord, parent_tile, err_details,
            )
            return queue_handle, e

    return queue_handle, None
예제 #5
0
    def __call__(self, stop):
        saw_sentinel = False

        queue_output = OutputQueue(
            self.output_queue, self.tile_proc_logger, stop)

        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']

            start = time.time()
            try:
                async_jobs = self.save_tiles(data['formatted_tiles'])

            except Exception as e:
                # cannot propagate this error - it crashes the thread and
                # blocks up the whole queue!
                stacktrace = format_stacktrace_one_line()
                self.tile_proc_logger.error('Save error', e, stacktrace, coord)
                continue

            async_exc_info = None
            e = None
            n_stored = 0
            n_not_stored = 0
            for async_job in async_jobs:
                try:
                    did_store = async_job.get()
                    if did_store:
                        n_stored += 1
                    else:
                        n_not_stored += 1
                except Exception as e:
                    # it's important to wait for all async jobs to
                    # complete but we just keep a reference to the last
                    # exception it's unlikely that we would receive multiple
                    # different exceptions when uploading to s3
                    async_exc_info = sys.exc_info()

            if async_exc_info:
                stacktrace = format_stacktrace_one_line(async_exc_info)
                self.tile_proc_logger.error(
                    'Store error', e, stacktrace, coord)
                continue

            metadata = data['metadata']
            metadata['timing']['s3'] = convert_seconds_to_millis(
                time.time() - start)
            metadata['store'] = dict(
                stored=n_stored,
                not_stored=n_not_stored,
            )

            data = dict(
                coord=coord,
                metadata=metadata,
            )

            if queue_output(coord, data):
                break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.tile_proc_logger.lifecycle('s3 storage stopped')
예제 #6
0
    def __call__(self):
        while not self.stop.is_set():

            msg_handles = ()

            for queue_id, tile_queue in (
                    self.queue_mapper.queues_in_priority_order()):
                try:
                    msg_handles = tile_queue.read()
                except Exception as e:
                    stacktrace = format_stacktrace_one_line()
                    self.tile_proc_logger.error(
                        'Queue read error', e, stacktrace)
                    continue
                if msg_handles:
                    break

            if not msg_handles:
                continue

            for msg_handle in msg_handles:
                # if asked to stop, break as soon as possible
                if self.stop.is_set():
                    break

                now = convert_seconds_to_millis(time.time())
                msg_timestamp = None
                if msg_handle.metadata:
                    msg_timestamp = msg_handle.metadata.get('timestamp')
                timing_state = dict(
                    msg_timestamp=msg_timestamp,
                    start=now,
                )

                coords = self.msg_marshaller.unmarshall(msg_handle.payload)
                # it seems unlikely, but just in case there are no coordinates
                # in the payload, there's nothing to do, so skip to the next
                # payload.
                if not coords:
                    continue

                # check for duplicate coordinates - for the message tracking to
                # work, we assume that coordinates are unique, as we use them
                # as keys in a dict. (plus, it doesn't make a lot of sense to
                # render the coordinate twice in the same job anyway).
                coords = list(set(coords))

                parent_tile = self._parent(coords)

                queue_handle = QueueHandle(queue_id, msg_handle.handle)
                coord_handles = self.msg_tracker.track(
                    queue_handle, coords, parent_tile)

                all_coords_data = []
                for coord, coord_handle in izip(coords, coord_handles):
                    if coord.zoom > self.max_zoom:
                        self._reject_coord(coord, coord_handle, timing_state)
                        continue

                    metadata = dict(
                        # the timing is just what will be filled out later
                        timing=dict(
                            fetch=None,
                            process=None,
                            s3=None,
                            ack=None,
                        ),
                        # this is temporary state that is used later on to
                        # determine timing information
                        timing_state=timing_state,
                        coord_handle=coord_handle,
                    )
                    data = dict(
                        metadata=metadata,
                        coord=coord,
                    )

                    all_coords_data.append(data)

                # we might have no coordinates if we rejected all the
                # coordinates. in which case, there's nothing to do anyway, as
                # the _reject_coord method will have marked the job as done.
                if all_coords_data:
                    coord_input_spec = all_coords_data, parent_tile
                    msg = "group of %d tiles below %s" \
                          % (len(all_coords_data),
                             serialize_coord(parent_tile))
                    if self.output(msg, coord_input_spec):
                        break

        for _, tile_queue in self.queue_mapper.queues_in_priority_order():
            tile_queue.close()
        self.tile_proc_logger.lifecycle('tile queue reader stopped')