def test_read_multiple(self):
        from mock import MagicMock
        from tilequeue.tile import deserialize_coord
        from tilequeue.tile import serialize_coord

        coord1 = deserialize_coord('1/1/1')
        coord2 = deserialize_coord('2/2/2')
        sqs_queue1 = MagicMock()
        sqs_queue2 = MagicMock()

        sqs_queue1.name = 'q1'
        sqs_queue1.get_messages = self._make_get_messages(
            [serialize_coord(coord1)])
        sqs_queue2.name = 'q2'
        sqs_queue2.get_messages = self._make_get_messages(
            [serialize_coord(coord2)])

        msq = self._make_one(
            [sqs_queue1, sqs_queue2],
            _test_q1_q2_get_queue_name_for_zoom,
        )
        coord_msgs = msq.read()
        self.assertEqual(2, len(coord_msgs))
        self.assertEqual(coord1, coord_msgs[0].coord)
        self.assertEqual(coord2, coord_msgs[1].coord)
Esempio n. 2
0
 def test_tilequeue_intersect_does_not_enqueue_coords(self):
     from mock import MagicMock
     from tilequeue.command import tilequeue_intersect
     from ModestMaps.Core import Coordinate
     from tilequeue.tile import serialize_coord
     cfg_mock = MagicMock()
     cfg_mock.queue_type = 'sqs'
     periperals_mock = MagicMock()
     c0 = Coordinate(row=0, column=0, zoom=0)
     c1 = Coordinate(row=1, column=1, zoom=1)
     periperals_mock.redis_cache_index = MagicMock(
         intersect=lambda x, y: ([]))
     queue_mock = MagicMock()
     periperals_mock.queue = queue_mock
     queue_mock.enqueue = self.fake_enqueue
     queue_mock.enqueue_batch = self.fake_enqueue_batch
     import os
     with tempdir() as expired_tiles_location:
         expected_file = os.path.join(expired_tiles_location,
                                      'expire_list.txt')
         with open(expected_file, "w+") as fp:
             fp.write(serialize_coord(c0) + "\n" + serialize_coord(c1))
         cfg_mock.intersect_expired_tiles_location = expired_tiles_location
         cfg_mock.logconfig = None
         tilequeue_intersect(cfg_mock, periperals_mock)
     self.assertNotIn(c0, self.enqueued_list)
     self.assertNotIn(c1, self.enqueued_list)
Esempio n. 3
0
 def enqueue(self, coord):
     if not self._inflight(coord):
         payload = serialize_coord(coord)
         message = RawMessage()
         message.set_body(payload)
         self.sqs_queue.write(message)
         self._add_to_flight(coord)
Esempio n. 4
0
def async_store(store, tile_data, coord, format, layer):
    """update cache store with tile_data"""
    try:
        store.write_tile(tile_data, coord, format, layer)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error storing coord %s with format %s: %s' % (
            serialize_coord(coord), format.extension, stacktrace)
Esempio n. 5
0
def async_store(store, tile_data, coord, format, layer):
    """update cache store with tile_data"""
    try:
        store.write_tile(tile_data, coord, format, layer)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error storing coord %s with format %s: %s' % (
            serialize_coord(coord), format.extension, stacktrace)
 def _add_to_tiles_of_interest(self, coord):
     coord_str = serialize_coord(coord)
     try:
         self.redis_cache_index.index_coord(coord)
         print 'Added to tiles of interest: %s' % coord_str
     except:
         print >> sys.stderr, \
             'Error adding to tiles of interest: %s' % coord_str
         traceback.print_exc(file=sys.stderr)
 def _add_to_tiles_of_interest(self, coord):
     coord_str = serialize_coord(coord)
     try:
         self.redis_cache_index.index_coord(coord)
         print 'Added to tiles of interest: %s' % coord_str
     except:
         print >> sys.stderr, \
             'Error adding to tiles of interest: %s' % coord_str
         traceback.print_exc(file=sys.stderr)
Esempio n. 8
0
    def _write_batch(self, coords):
        assert len(coords) <= 10
        values = []
        msg_tuples = []

        for i, coord in enumerate(coords):
            msg_tuples.append((str(i), serialize_coord(coord), 0))
            values.append(coord_marshall_int(coord))

        self.sqs_queue.write_batch(msg_tuples)
        self.redis_client.sadd(self.inflight_key, *values)
Esempio n. 9
0
 def test_job_done_removes_tile_from_in_flight(self):
     from tilequeue.tile import CoordMessage
     coord = Coordinate(row=1, column=1, zoom=1)
     payload = serialize_coord(coord)
     message = RawMessage()
     message.set_body(payload)
     coord_message = CoordMessage(coord, message)
     self.sqs.job_done(coord_message)
     from tilequeue.tile import coord_marshall_int
     exp_value = coord_marshall_int(coord)
     self.mockRedis.srem.assert_called_once_with(self.sqs.inflight_key,
                                                 exp_value)
Esempio n. 10
0
def async_enqueue(sqs_queue, coord):
    """enqueue a coordinate for offline processing

    This ensures that when we receive a request for a tile format that
    hasn't been generated yet, we create the other formats eventually.
    """
    try:
        sqs_queue.enqueue(coord)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error enqueueing coord %s: %s\n' % (
            serialize_coord(coord), stacktrace)
Esempio n. 11
0
 def test_job_done_removes_tile_from_in_flight(self):
     from tilequeue.tile import CoordMessage
     coord = Coordinate(row=1, column=1, zoom=1)
     payload = serialize_coord(coord)
     message = RawMessage()
     message.set_body(payload)
     coord_message = CoordMessage(coord, message)
     self.sqs.job_done(coord_message)
     from tilequeue.tile import coord_marshall_int
     exp_value = coord_marshall_int(coord)
     self.mockRedis.srem.assert_called_once_with(self.sqs.inflight_key,
                                                 exp_value)
Esempio n. 12
0
def async_enqueue(sqs_queue, coord):
    """enqueue a coordinate for offline processing

    This ensures that when we receive a request for a tile format that
    hasn't been generated yet, we create the other formats eventually.
    """
    try:
        sqs_queue.enqueue(coord)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error enqueueing coord %s: %s\n' % (
            serialize_coord(coord), stacktrace)
Esempio n. 13
0
    def test_read_single(self):
        from mock import MagicMock
        from tilequeue.tile import deserialize_coord
        from tilequeue.tile import serialize_coord

        sqs_queue = MagicMock()
        coord = deserialize_coord('1/1/1')
        sqs_queue.get_messages = self._make_get_messages(
            [serialize_coord(coord)])
        msq = self._make_one([sqs_queue])
        coord_msgs = msq.read()
        self.assertEqual(1, len(coord_msgs))
        self.assertEqual(coord, coord_msgs[0].coord)
Esempio n. 14
0
 def enqueue(self, coord):
     if not coord_is_valid(coord):
         # TODO log?
         return
     coord_int = coord_marshall_int(coord)
     if not self._inflight(coord):
         payload = serialize_coord(coord)
         message = RawMessage()
         message.set_body(payload)
         sqs_queue_name = self.get_queue_name_for_zoom(coord.zoom)
         sqs_queue = self.sqs_queue_for_name.get(sqs_queue_name)
         assert sqs_queue, 'No queue found for: %s' % sqs_queue_name
         sqs_queue.write(message)
         self._add_to_flight(coord_int)
Esempio n. 15
0
    def _write_batch(self, sqs_queue, buf):
        assert len(buf) <= self.queue_buf_size
        msg_tuples = []
        coord_ints = []
        for i, (coord, coord_int) in enumerate(buf):
            msg_id = str(i)
            coord_str = serialize_coord(coord)
            msg_delay = 0
            msg_tuple = (msg_id, coord_str, msg_delay)
            msg_tuples.append(msg_tuple)
            coord_ints.append(coord_int)

        sqs_queue.write_batch(msg_tuples)
        self.redis_client.sadd(self.inflight_key, *coord_ints)
Esempio n. 16
0
    def __call__(self, stop):
        # ignore ctrl-c interrupts when run from terminal
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        saw_sentinel = False
        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']
            feature_layers = data['feature_layers']
            unpadded_bounds = data['unpadded_bounds']
            cut_coords = data['cut_coords']
            nominal_zoom = data['nominal_zoom']

            start = time.time()

            try:
                formatted_tiles, extra_data = process_coord(
                    coord, nominal_zoom, feature_layers,
                    self.post_process_data, self.formats, unpadded_bounds,
                    cut_coords, self.buffer_cfg)
            except:
                stacktrace = format_stacktrace_one_line()
                self.logger.error('Error processing: %s - %s' %
                                  (serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['process_seconds'] = time.time() - start
            metadata['layers'] = extra_data

            data = dict(
                metadata=metadata,
                coord=coord,
                formatted_tiles=formatted_tiles,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('processor stopped')
Esempio n. 17
0
    def __call__(self, stop):
        # ignore ctrl-c interrupts when run from terminal
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        saw_sentinel = False
        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']
            feature_layers = data['feature_layers']
            unpadded_bounds = data['unpadded_bounds']
            cut_coords = data['cut_coords']

            start = time.time()

            try:
                formatted_tiles, extra_data = process_coord(
                    coord, feature_layers, self.post_process_data,
                    self.formats, unpadded_bounds, cut_coords,
                    self.layers_to_format, self.buffer_cfg)
            except:
                stacktrace = format_stacktrace_one_line()
                self.logger.error('Error processing: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['process_seconds'] = time.time() - start
            metadata['layers'] = extra_data

            data = dict(
                metadata=metadata,
                coord=coord,
                formatted_tiles=formatted_tiles,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('processor stopped')
Esempio n. 18
0
 def process_work_data():
     while True:
         work_data = work.get()
         if work_data is None:
             break
         coord = work_data['coord']
         format = work_data['format']
         key_path = 'osm/all/%s.%s' % (
             serialize_coord(coord), format.extension)
         key = bucket.get_key(key_path)
         # this shouldn't practically happen
         if key is None:
             continue
         size = key.size
         add_size(work_data['metro'], coord, format, size)
Esempio n. 19
0
 def process_work_data():
     while True:
         work_data = work.get()
         if work_data is None:
             break
         coord = work_data['coord']
         format = work_data['format']
         key_path = 'osm/all/%s.%s' % (serialize_coord(coord),
                                       format.extension)
         key = bucket.get_key(key_path)
         # this shouldn't practically happen
         if key is None:
             continue
         size = key.size
         add_size(work_data['metro'], coord, format, size)
Esempio n. 20
0
    def job_done(self, coord_message):
        queue_name = None
        if coord_message.metadata:
            queue_name = coord_message.metadata.get('queue_name')
        assert queue_name, \
            'Missing queue name metadata for coord: %s' % serialize_coord(
                coord_message.coord)

        sqs_queue = self.sqs_queue_for_name.get(queue_name)
        assert sqs_queue, 'Missing queue for: %s' % queue_name

        coord_int = coord_marshall_int(coord_message.coord)

        self.redis_client.srem(self.inflight_key, coord_int)
        sqs_queue.delete_message(coord_message.message_handle)
Esempio n. 21
0
    def test_enqueue_and_enqueue_batch(self):
        from tilequeue.tile import serialize_coord
        # Test `.enqueue_batch()`.
        num_to_enqueue = 3
        self.assertEqual(
            self.queue.enqueue_batch(
                map(serialize_coord, self.test_tile_objs[:num_to_enqueue])),
            (num_to_enqueue, 0),
            'Return value of `enqueue_batch()` does not match expected')

        # Test `.enqueue()`.
        for coord in self.test_tile_objs[num_to_enqueue:]:
            self.queue.enqueue(serialize_coord(coord))

        self.assertEqual(self.tiles_fp.getvalue(), self.tile_coords_str,
                         'Contents of file do not match expected')
Esempio n. 22
0
def missing_jobs(missing_bucket,
                 rawr_bucket,
                 date_prefix,
                 region,
                 config,
                 group_by_zoom=10,
                 queue_zoom=7,
                 key_format_type='prefix-hash',
                 tile_coords_generator=None,
                 tile_verifier=None):
    """
    Write and yield file containing a z/x/y coordinate for each job (at the
    job zoom) corresponding to a missing tile (at the tile zoom) in the bucket.

    Cleans up the temporary directory after the yielded-to code returns.
    """

    tiles = missing_tiles(missing_bucket, rawr_bucket, date_prefix, region,
                          key_format_type, config, group_by_zoom,
                          tile_coords_generator, tile_verifier)

    # the rawr tiles of `group_by_zoom` is actually built by AWS batch jobs of
    # `queue_zoom` so we need to do a zoomTo here to find the corresponding jobs
    job_coords = {coord.zoomTo(queue_zoom).container() for coord in tiles}

    if bool(tile_coords_generator):
        print('[make_rawr_tiles] Going to rebuild %d tiles by (%d jobs)' %
              (len(tiles), len(job_coords)))
    else:
        print('[make_rawr_tiles] Missing %d tiles (%d jobs)' %
              (len(tiles), len(job_coords)))

    tmpdir = tempfile.mkdtemp()
    try:
        missing_file = os.path.join(tmpdir, 'missing_jobs.txt')

        with open(missing_file, 'w') as fh:
            for coord in sorted(job_coords):
                fh.write(serialize_coord(coord) + '\n')
                if tile_verifier is not None:
                    tile_verifier.generate_tile_coords_rebuild_paths_rawr(
                        [coord], group_by_zoom)

        yield missing_file

    finally:
        shutil.rmtree(tmpdir)
Esempio n. 23
0
def async_update_tiles_of_interest(redis_cache_index, coord):
    """update tiles of interest set

    The tiles of interest represent all tiles that will get processed
    on osm diffs. Our policy is to cache tiles up to zoom level 20. As
    an optimization, because the queries only change up until zoom
    level 16, ie they are the same for z16+, we enqueue work at z16,
    and the higher zoom tiles get generated by cutting the z16 tile
    appropriately. This means that when we receive requests for tiles
    > z16, we need to also track the corresponding tile at z16,
    otherwise those tiles would never get regenerated.
    """
    try:
        if coord.zoom <= 20:
            redis_cache_index.index_coord(coord)
        if coord.zoom > 16:
            coord_at_z16 = coord.zoomTo(16).container()
            redis_cache_index.index_coord(coord_at_z16)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error updating tiles of interest for coord %s: %s\n' % (
            serialize_coord(coord), stacktrace)
Esempio n. 24
0
def async_update_tiles_of_interest(redis_cache_index, coord):
    """update tiles of interest set

    The tiles of interest represent all tiles that will get processed
    on osm diffs. Our policy is to cache tiles up to zoom level 20. As
    an optimization, because the queries only change up until zoom
    level 16, ie they are the same for z16+, we enqueue work at z16,
    and the higher zoom tiles get generated by cutting the z16 tile
    appropriately. This means that when we receive requests for tiles
    > z16, we need to also track the corresponding tile at z16,
    otherwise those tiles would never get regenerated.
    """
    try:
        if coord.zoom <= 20:
            redis_cache_index.index_coord(coord)
        if coord.zoom > 16:
            coord_at_z16 = coord.zoomTo(16).container()
            redis_cache_index.index_coord(coord_at_z16)
    except:
        stacktrace = format_stacktrace_one_line()
        print 'Error updating tiles of interest for coord %s: %s\n' % (
            serialize_coord(coord), stacktrace)
Esempio n. 25
0
    def __call__(self):
        while not self.stop.is_set():

            msg_handles = ()

            for queue_id, tile_queue in (
                    self.queue_mapper.queues_in_priority_order()):
                try:
                    msg_handles = tile_queue.read()
                except Exception as e:
                    stacktrace = format_stacktrace_one_line()
                    self.tile_proc_logger.error(
                        'Queue read error', e, stacktrace)
                    continue
                if msg_handles:
                    break

            if not msg_handles:
                continue

            for msg_handle in msg_handles:
                # if asked to stop, break as soon as possible
                if self.stop.is_set():
                    break

                now = convert_seconds_to_millis(time.time())
                msg_timestamp = None
                if msg_handle.metadata:
                    msg_timestamp = msg_handle.metadata.get('timestamp')
                timing_state = dict(
                    msg_timestamp=msg_timestamp,
                    start=now,
                )

                coords = self.msg_marshaller.unmarshall(msg_handle.payload)
                # it seems unlikely, but just in case there are no coordinates
                # in the payload, there's nothing to do, so skip to the next
                # payload.
                if not coords:
                    continue

                # check for duplicate coordinates - for the message tracking to
                # work, we assume that coordinates are unique, as we use them
                # as keys in a dict. (plus, it doesn't make a lot of sense to
                # render the coordinate twice in the same job anyway).
                coords = list(set(coords))

                parent_tile = self._parent(coords)

                queue_handle = QueueHandle(queue_id, msg_handle.handle)
                coord_handles = self.msg_tracker.track(
                    queue_handle, coords, parent_tile)

                all_coords_data = []
                for coord, coord_handle in izip(coords, coord_handles):
                    if coord.zoom > self.max_zoom:
                        self._reject_coord(coord, coord_handle, timing_state)
                        continue

                    metadata = dict(
                        # the timing is just what will be filled out later
                        timing=dict(
                            fetch=None,
                            process=None,
                            s3=None,
                            ack=None,
                        ),
                        # this is temporary state that is used later on to
                        # determine timing information
                        timing_state=timing_state,
                        coord_handle=coord_handle,
                    )
                    data = dict(
                        metadata=metadata,
                        coord=coord,
                    )

                    all_coords_data.append(data)

                # we might have no coordinates if we rejected all the
                # coordinates. in which case, there's nothing to do anyway, as
                # the _reject_coord method will have marked the job as done.
                if all_coords_data:
                    coord_input_spec = all_coords_data, parent_tile
                    msg = "group of %d tiles below %s" \
                          % (len(all_coords_data),
                             serialize_coord(parent_tile))
                    if self.output(msg, coord_input_spec):
                        break

        for _, tile_queue in self.queue_mapper.queues_in_priority_order():
            tile_queue.close()
        self.tile_proc_logger.lifecycle('tile queue reader stopped')
Esempio n. 26
0
 def marshall(self, coords):
     return ','.join(serialize_coord(x) for x in coords)
Esempio n. 27
0
    def __call__(self, stop):
        saw_sentinel = False
        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']

            start = time.time()

            try:
                fetch_data = self.fetcher(coord)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                stacktrace = format_stacktrace_one_line(
                    (exc_type, exc_value, exc_traceback))
                if isinstance(exc_value, TransactionRollbackError):
                    log_level = logging.WARNING
                else:
                    log_level = logging.ERROR
                self.logger.log(log_level, 'Error fetching: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['fetch_seconds'] = time.time() - start

            # if we are at zoom level 16, it will serve as a metatile
            # to derive the tiles underneath it
            cut_coords = None
            if coord.zoom == 16:
                cut_coords = []
                async_jobs = []
                children_until = 20
                # ask redis if there are any tiles underneath in the
                # tiles of interest set
                rci = self.redis_cache_index
                async_fn = rci.is_coord_int_in_tiles_of_interest

                for child in coord_children_range(coord, children_until):
                    zoomed_coord_int = coord_marshall_int(child)
                    async_result = self.io_pool.apply_async(
                        async_fn, (zoomed_coord_int,))
                    async_jobs.append((child, async_result))

                async_exc_info = None
                for async_job in async_jobs:
                    zoomed_coord, async_result = async_job
                    try:
                        is_coord_in_tiles_of_interest = async_result.get()
                    except:
                        async_exc_info = sys.exc_info()
                        stacktrace = format_stacktrace_one_line(async_exc_info)
                        self.logger.error(stacktrace)
                    else:
                        if is_coord_in_tiles_of_interest:
                            cut_coords.append(zoomed_coord)
                if async_exc_info:
                    continue

            data = dict(
                metadata=metadata,
                coord=coord,
                feature_layers=fetch_data['feature_layers'],
                unpadded_bounds=fetch_data['unpadded_bounds'],
                cut_coords=cut_coords,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('data fetch stopped')
Esempio n. 28
0
    def __call__(self):
        saw_sentinel = False
        while not self.stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            metadata = data['metadata']
            coord_message = metadata['coord_message']
            coord = data['coord']

            start = time.time()
            try:
                self.sqs_queue.job_done(coord_message)
            except:
                stacktrace = format_stacktrace_one_line()
                self.logger.error('Error acknowledging: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            timing = metadata['timing']
            now = time.time()
            timing['ack_seconds'] = now - start

            coord_message = metadata['coord_message']
            msg_metadata = coord_message.metadata
            time_in_queue = 0
            if msg_metadata:
                sqs_timestamp_millis = msg_metadata.get('timestamp')
                if sqs_timestamp_millis is not None:
                    sqs_timestamp_seconds = sqs_timestamp_millis / 1000.0
                    time_in_queue = now - sqs_timestamp_seconds

            layers = metadata['layers']
            size = layers['size']
            size_as_str = repr(size)

            store_info = metadata['store']

            self.logger.info(
                '%s '
                'data(%.2fs) '
                'proc(%.2fs) '
                's3(%.2fs) '
                'ack(%.2fs) '
                'sqs(%.2fs) '
                'size(%s) '
                'stored(%s) '
                'not_stored(%s)' % (
                    serialize_coord(coord),
                    timing['fetch_seconds'],
                    timing['process_seconds'],
                    timing['s3_seconds'],
                    timing['ack_seconds'],
                    time_in_queue,
                    size_as_str,
                    store_info['stored'],
                    store_info['not_stored'],
                ))

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('sqs queue writer stopped')
Esempio n. 29
0
 def test_unmarshall_single(self):
     from tilequeue.tile import serialize_coord
     coords = self.msg_marshaller.unmarshall('1/1/1')
     self.assertEqual(1, len(coords))
     self.assertEqual('1/1/1', serialize_coord(coords[0]))
Esempio n. 30
0
    def __call__(self, stop):
        saw_sentinel = False
        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']

            start = time.time()

            try:
                fetch_data = self.fetcher(coord)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                stacktrace = format_stacktrace_one_line(
                    (exc_type, exc_value, exc_traceback))
                if isinstance(exc_value, TransactionRollbackError):
                    log_level = logging.WARNING
                else:
                    log_level = logging.ERROR
                self.logger.log(log_level, 'Error fetching: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['fetch_seconds'] = time.time() - start

            # if we are at zoom level 16, it will serve as a metatile
            # to derive the tiles underneath it
            cut_coords = None
            if coord.zoom == 16:
                cut_coords = []
                async_jobs = []
                children_until = 20
                # ask redis if there are any tiles underneath in the
                # tiles of interest set
                rci = self.redis_cache_index
                async_fn = rci.is_coord_int_in_tiles_of_interest

                for child in coord_children_range(coord, children_until):
                    zoomed_coord_int = coord_marshall_int(child)
                    async_result = self.io_pool.apply_async(
                        async_fn, (zoomed_coord_int,))
                    async_jobs.append((child, async_result))

                async_exc_info = None
                for async_job in async_jobs:
                    zoomed_coord, async_result = async_job
                    try:
                        is_coord_in_tiles_of_interest = async_result.get()
                    except:
                        async_exc_info = sys.exc_info()
                        stacktrace = format_stacktrace_one_line(async_exc_info)
                        self.logger.error(stacktrace)
                    else:
                        if is_coord_in_tiles_of_interest:
                            cut_coords.append(zoomed_coord)
                if async_exc_info:
                    continue

            data = dict(
                metadata=metadata,
                coord=coord,
                feature_layers=fetch_data['feature_layers'],
                unpadded_bounds=fetch_data['unpadded_bounds'],
                cut_coords=cut_coords,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('data fetch stopped')
Esempio n. 31
0
 def enqueue(self, coord):
     with self.lock:
         payload = serialize_coord(coord)
         self.fp.write(payload + '\n')
Esempio n. 32
0
    def missing_tiles_split(self, group_by_zoom, queue_zoom, big_jobs,
                            try_generator):
        # type: (int, int, CoordSet, bool) -> Iterator[MissingTiles]
        """
        To be used in a with-statement. Yields a MissingTiles object, giving
        information about the tiles which are missing.

        High zoom jobs are output either at group_by_zoom (RAWR tile granularity)
        or queue_zoom (usually lower, e.g: 7) depending on whether big_jobs
        contains a truthy value for the RAWR tile. The big_jobs are looked up
        at queue_zoom.
        """
        def get_coord(coords_source, coordinate):
            if isinstance(coords_source, io.IOBase):
                return deserialize_coord(coordinate)
            else:
                return coordinate

        tmpdir = tempfile.mkdtemp()
        try:
            missing_low_file = os.path.join(tmpdir, 'missing.low.txt')
            missing_high_file = os.path.join(tmpdir, 'missing.high.txt')

            print(
                '[%s][make_meta_tiles] Splitting into high and low zoom lists'
                % (time.ctime()))

            # contains zooms 0 until group_by_zoom. the jobs between the group
            # zoom and RAWR zoom are merged into the parent at group zoom.
            missing_low = CoordSet(max_zoom=queue_zoom)  # 7

            # contains job coords at either queue_zoom or group_by_zoom only.
            # queue_zoom is always less than or equal to group_by_zoom?
            missing_high = CoordSet(min_zoom=queue_zoom,
                                    max_zoom=group_by_zoom)

            coords_src = self.generate_missing_tile_coords(
                tmpdir, try_generator)

            for c in coords_src:
                this_coord = get_coord(coords_src, c)
                if this_coord.zoom < group_by_zoom:  # 10
                    # in order to not have too many jobs in the queue, we
                    # group the low zoom jobs to the zoom_max (usually 7)
                    if this_coord.zoom > queue_zoom:  # 7
                        this_coord = this_coord.zoomTo(queue_zoom).container()
                    missing_low[this_coord] = True
                else:
                    # if the group of jobs at queue_zoom would be too big
                    # (according to big_jobs[]) then enqueue the original
                    # coordinate. this is to prevent a "long tail" of huge
                    # job groups.
                    job_coord = this_coord.zoomTo(queue_zoom).container()  # 7
                    if not big_jobs[job_coord]:
                        this_coord = job_coord
                    missing_high[this_coord] = True

            if isinstance(coords_src, io.IOBase):
                coords_src.close()

            is_using_tile_verifier = try_generator and self.tile_verifier is not None

            with open(missing_low_file, 'w') as fh:
                for coord in missing_low:
                    fh.write(serialize_coord(coord) + '\n')
                    if is_using_tile_verifier:
                        self.tile_verifier.generate_tile_coords_rebuild_paths_low_zoom(
                            job_coords=[coord],
                            queue_zoom=queue_zoom,
                            group_by_zoom=group_by_zoom)

            with open(missing_high_file, 'w') as fh:
                for coord in missing_high:
                    fh.write(serialize_coord(coord) + '\n')
                    if is_using_tile_verifier:
                        self.tile_verifier.generate_tile_coords_rebuild_paths_high_zoom(
                            job_coords=[coord],
                            queue_zoom=queue_zoom,
                            group_by_zoom=group_by_zoom)

            yield MissingTiles(missing_low_file, missing_high_file)

        finally:
            shutil.rmtree(tmpdir)
Esempio n. 33
0
 def marshall(self, coords):
     assert len(coords) == 1
     coord = coords[0]
     return serialize_coord(coord)
Esempio n. 34
0
File: file.py Progetto: kjartab/map
 def enqueue(self, coord):
     with self.lock:
         payload = serialize_coord(coord)
         self.fp.write(payload + '\n')
Esempio n. 35
0
def save_set_to_fp(the_set, fp):
    for coord_int in sorted(the_set):
        coord = coord_unmarshall_int(coord_int)
        fp.write(serialize_coord(coord))
        fp.write('\n')
Esempio n. 36
0
    def __call__(self, stop):
        saw_sentinel = False

        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']

            start = time.time()
            async_jobs = self.save_tiles(data['formatted_tiles'])

            async_exc_info = None
            n_stored = 0
            n_not_stored = 0
            for async_job in async_jobs:
                try:
                    did_store = async_job.get()
                    if did_store:
                        n_stored += 1
                    else:
                        n_not_stored += 1
                except:
                    # it's important to wait for all async jobs to
                    # complete
                    # but we just keep a reference to the last
                    # exception
                    # it's unlikely that we would receive multiple
                    # different exceptions when uploading to s3
                    async_exc_info = sys.exc_info()

            if async_exc_info:
                stacktrace = format_stacktrace_one_line(async_exc_info)
                self.logger.error('Error storing: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['s3_seconds'] = time.time() - start
            metadata['store'] = dict(
                stored=n_stored,
                not_stored=n_not_stored,
            )

            data = dict(
                coord=coord,
                metadata=metadata,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('s3 storage stopped')
Esempio n. 37
0
    def __call__(self, stop):
        saw_sentinel = False

        while not stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            coord = data['coord']

            start = time.time()
            async_jobs = []
            for formatted_tile in data['formatted_tiles']:

                async_result = self.io_pool.apply_async(
                    write_tile_if_changed, (
                        self.store,
                        formatted_tile['tile'],
                        # important to use the coord from the
                        # formatted tile here, because we could have
                        # cut children tiles that have separate zooms
                        # too
                        formatted_tile['coord'],
                        formatted_tile['format'],
                        formatted_tile['layer']))
                async_jobs.append(async_result)

            async_exc_info = None
            n_stored = 0
            n_not_stored = 0
            for async_job in async_jobs:
                try:
                    did_store = async_job.get()
                    if did_store:
                        n_stored += 1
                    else:
                        n_not_stored += 1
                except:
                    # it's important to wait for all async jobs to
                    # complete
                    # but we just keep a reference to the last
                    # exception
                    # it's unlikely that we would receive multiple
                    # different exceptions when uploading to s3
                    async_exc_info = sys.exc_info()

            if async_exc_info:
                stacktrace = format_stacktrace_one_line(async_exc_info)
                self.logger.error('Error storing: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            metadata = data['metadata']
            metadata['timing']['s3_seconds'] = time.time() - start
            metadata['store'] = dict(
                stored=n_stored,
                not_stored=n_not_stored,
            )

            data = dict(
                coord=coord,
                metadata=metadata,
            )

            while not _non_blocking_put(self.output_queue, data):
                if stop.is_set():
                    break

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('s3 storage stopped')
Esempio n. 38
0
    def __call__(self):
        saw_sentinel = False
        while not self.stop.is_set():
            try:
                data = self.input_queue.get(timeout=timeout_seconds)
            except Queue.Empty:
                continue
            if data is None:
                saw_sentinel = True
                break

            metadata = data['metadata']
            sqs_handle = metadata['sqs_handle']
            coord = data['coord']
            coord_message = CoordMessage(coord, sqs_handle)

            start = time.time()
            try:
                self.sqs_queue.job_done(coord_message)
            except:
                stacktrace = format_stacktrace_one_line()
                self.logger.error('Error acknowledging: %s - %s' % (
                    serialize_coord(coord), stacktrace))
                continue

            timing = metadata['timing']
            now = time.time()
            timing['ack_seconds'] = now - start

            sqs_timestamp_millis = metadata['timestamp']
            sqs_timestamp_seconds = sqs_timestamp_millis / 1000.0
            time_in_queue = now - sqs_timestamp_seconds

            layers = metadata['layers']
            size = layers['size']
            size_as_str = repr(size)

            store_info = metadata['store']

            self.logger.info(
                '%s '
                'data(%.2fs) '
                'proc(%.2fs) '
                's3(%.2fs) '
                'ack(%.2fs) '
                'sqs(%.2fs) '
                'size(%s) '
                'stored(%s) '
                'not_stored(%s)' % (
                    serialize_coord(coord),
                    timing['fetch_seconds'],
                    timing['process_seconds'],
                    timing['s3_seconds'],
                    timing['ack_seconds'],
                    time_in_queue,
                    size_as_str,
                    store_info['stored'],
                    store_info['not_stored'],
                ))

        if not saw_sentinel:
            _force_empty_queue(self.input_queue)
        self.logger.debug('sqs queue writer stopped')
Esempio n. 39
0
 def test_unmarshall_single(self):
     from tilequeue.tile import serialize_coord
     coords = self.msg_marshaller.unmarshall('1/1/1')
     self.assertEqual(1, len(coords))
     self.assertEqual('1/1/1', serialize_coord(coords[0]))