def post(self): # Do not run for more than 9 minutes. Exceeding 10min hard limit causes 500. time_to_stop = time.time() + 9 * 60 data = json.loads(self.request.body) start = utils.parse_datetime(data['start']) end = utils.parse_datetime(data['end']) logging.info('Deleting between %s and %s', start, end) triggered = 0 total = 0 q = model.ContentEntry.query(model.ContentEntry.expiration_ts >= start, model.ContentEntry.expiration_ts < end) cursor = None more = True while more and time.time() < time_to_stop: # Since this query dooes not fetch the ContentEntry entities themselves, # we cannot easily compute the size of the data deleted. keys, cursor, more = q.fetch_page(500, start_cursor=cursor, keys_only=True) if not keys: break total += len(keys) data = utils.encode_to_json([k.string_id() for k in keys]) if utils.enqueue_task('/internal/taskqueue/cleanup/expired', 'cleanup-expired', payload=data): triggered += 1 else: logging.warning('Failed to trigger task') logging.info('Triggered %d tasks for %d entries', triggered, total)
def get_json(request, response, handler, resolution, description, order): """Returns the statistic data as a Google Visualization compatible reply. The return can be either JSON or JSONP, depending if the header 'X-DataSource-Auth' is set in the request. Note that this is not real JSON, as explained in developers.google.com/chart/interactive/docs/dev/implementing_data_source Exposes the data in the format described at https://developers.google.com/chart/interactive/docs/reference#dataparam and https://developers.google.com/chart/interactive/docs/querylanguage Arguments: - request: A webapp2.Request. - response: A webapp2.Response. - handler: A StatisticsFramework. - resolution: One of 'days', 'hours' or 'minutes'. - description: Dict describing the columns. - order: List describing the order to use for the columns. Raises: ValueError if a 400 should be returned. """ tqx_args = process_tqx(request.params.get('tqx', '')) duration = utils.get_request_as_int(request, 'duration', 120, 1, 256) now = None now_text = request.params.get('now') if now_text: now = utils.parse_datetime(now_text) table = stats_framework.get_stats(handler, resolution, now, duration, True) return get_json_raw(request, response, table, description, order, tqx_args)
def rebuild_task_cache_async(payload): """Rebuilds the TaskDimensions cache. This function is called in two cases: - A new kind of task request dimensions never seen before - The TaskDimensions.valid_until_ts expired It is a cache miss, query all the bots and check for the ones which can run the task. Warning: There's a race condition, where the TaskDimensions query could be missing some instances due to eventually coherent consistency in the BotInfo query. This only happens when there's new request dimensions set AND a bot that can run this task recently showed up. Runtime expectation: the scale on the number of bots that can run the task, via BotInfo.dimensions_flat filtering. As there can be tens of thousands of bots that can run the task, this can take a long time to store all the entities on a new kind of request. As such, it must be called in the backend. Arguments: - payload: dict as created in assert_task_async() with: - 'dimensions': dict of task dimensions to refresh - 'dimensions_hash': precalculated hash for dimensions - 'valid_until_ts': expiration_ts + _EXTEND_VALIDITY for how long this cache is valid Returns: True if everything was processed, False if it needs to be retried. """ data = json.loads(payload) logging.debug('rebuild_task_cache(%s)', data) task_dimensions = data[u'dimensions'] task_dimensions_hash = int(data[u'dimensions_hash']) valid_until_ts = utils.parse_datetime(data[u'valid_until_ts']) task_dimensions_key = _get_task_dimensions_key(task_dimensions_hash, task_dimensions) now = utils.utcnow() expanded_task_dimensions_flats = expand_dimensions_to_flats( task_dimensions) try: yield [ _refresh_all_BotTaskDimensions_async(now, valid_until_ts, df, task_dimensions_hash) for df in expanded_task_dimensions_flats ] # Done updating, now store the entity. Must use a transaction as there # could be other dimensions set in the entity. yield _refresh_TaskDimensions_async(now, valid_until_ts, expanded_task_dimensions_flats, task_dimensions_key) finally: # Any of the calls above could throw. Log how far long we processed. duration = (utils.utcnow() - now).total_seconds() logging.debug( 'rebuild_task_cache(%d) in %.3fs\n%s\ndimensions_flat size=%d', task_dimensions_hash, duration, task_dimensions, len(expanded_task_dimensions_flats)) raise ndb.Return(True)
def rebuild_task_cache(payload): """Rebuilds the TaskDimensions cache. This function is called in two cases: - A new kind of task request dimensions never seen before - The TaskDimensions.valid_until_ts expired It is a cache miss, query all the bots and check for the ones which can run the task. Warning: There's a race condition, where the TaskDimensions query could be missing some instances due to eventually coherent consistency in the BotInfo query. This only happens when there's new request dimensions set AND a bot that can run this task recently showed up. Runtime expectation: the scale on the number of bots that can run the task, via BotInfo.dimensions_flat filtering. As there can be tens of thousands of bots that can run the task, this can take a long time to store all the entities on a new kind of request. As such, it must be called in the backend. Arguments: - payload: dict as created in assert_task() with: - 'dimensions': dict of task dimensions to refresh - 'dimensions_hash': precalculated hash for dimensions - 'valid_until_ts': expiration_ts + _ADVANCE for how long this cache is valid Returns: True if everything was processed, False if it needs to be retried. """ data = json.loads(payload) logging.debug('rebuild_task_cache(%s)', data) dimensions = data[u'dimensions'] dimensions_hash = int(data[u'dimensions_hash']) valid_until_ts = utils.parse_datetime(data[u'valid_until_ts']) dimensions_flat = [] for k, values in dimensions.iteritems(): for v in values: dimensions_flat.append(u'%s:%s' % (k, v)) dimensions_flat.sort() now = utils.utcnow() updated = 0 viable = 0 try: pending = set() for bot_task_key in _yield_BotTaskDimensions_keys( dimensions_hash, dimensions_flat): viable += 1 future = _refresh_BotTaskDimensions(bot_task_key, dimensions_flat, now, valid_until_ts) pending.add(future) updated += sum(1 for i in _cap_futures(pending) if i) updated += sum(1 for i in _flush_futures(pending) if i) # Done updating, now store the entity. Must use a transaction as there could # be other dimensions set in the entity. task_dims_key = _get_task_dims_key(dimensions_hash, dimensions) def run(): obj = task_dims_key.get() if not obj: obj = TaskDimensions(key=task_dims_key) if obj.assert_request(now, valid_until_ts, dimensions_flat): obj.put() return obj try: # Retry often. This transaction tends to fail frequently, and this is # running from a task queue so it's fine if it takes more time, success is # more important. datastore_utils.transaction(run, retries=4) except datastore_utils.CommitError as e: # Still log an error but no need for a stack trace in the logs. It is # important to surface that the call failed so the task queue is retried # later. logging.error('Failed updating TaskDimensions: %s', e) return False finally: # Any of the _refresh_BotTaskDimensions() calls above could throw. Still log # how far we went. logging.debug( 'rebuild_task_cache(%d) in %.3fs. viable bots: %d; bots updated: %d\n' '%s', dimensions_hash, (utils.utcnow() - now).total_seconds(), viable, updated, '\n'.join(' ' + d for d in dimensions_flat)) return True
def rebuild_task_cache(payload): """Rebuilds the TaskDimensions cache. This function is called in two cases: - A new kind of task request dimensions never seen before - The TaskDimensions.valid_until_ts expired It is a cache miss, query all the bots and check for the ones which can run the task. Warning: There's a race condition, where the TaskDimensions query could be missing some instances due to eventually coherent consistency in the BotInfo query. This only happens when there's new request dimensions set AND a bot that can run this task recently showed up. Runtime expectation: the scale on the number of bots that can run the task, via BotInfo.dimensions_flat filtering. As there can be tens of thousands of bots that can run the task, this can take a long time to store all the entities on a new kind of request. As such, it must be called in the backend. Arguments: - payload: dict as created in assert_task() with: - 'dimensions': dict of task dimensions to refresh - 'dimensions_hash': precalculated hash for dimensions - 'valid_until_ts': expiration_ts + _EXTEND_VALIDITY for how long this cache is valid Returns: True if everything was processed, False if it needs to be retried. """ data = json.loads(payload) logging.debug('rebuild_task_cache(%s)', data) dimensions = data[u'dimensions'] dimensions_hash = int(data[u'dimensions_hash']) valid_until_ts = utils.parse_datetime(data[u'valid_until_ts']) dimensions_flat = [] for k, values in dimensions.iteritems(): for v in values: dimensions_flat.append(u'%s:%s' % (k, v)) dimensions_flat.sort() now = utils.utcnow() # Number of BotTaskDimensions entities that were created/updated in the DB. updated = 0 # Number of BotTaskDimensions entities that matched this task queue. viable = 0 try: pending = [] for bot_task_key in _yield_BotTaskDimensions_keys( dimensions_hash, dimensions_flat): viable += 1 future = _refresh_BotTaskDimensions( bot_task_key, dimensions_flat, now, valid_until_ts) pending.append(future) done, pending = _cap_futures(pending) updated += sum(1 for i in done if i) updated += sum(1 for i in _flush_futures(pending) if i) # The main reason for this log entry is to confirm the timing of the first # part (updating BotTaskDimensions) versus the second part (updating # TaskDimensions). logging.debug('Updated %d BotTaskDimensions', updated) # Done updating, now store the entity. Must use a transaction as there could # be other dimensions set in the entity. task_dims_key = _get_task_dims_key(dimensions_hash, dimensions) # First do a dry run. If the dry run passes, skip the transaction. # # The rationale is that there can be concurrent trigger of this taskqueue # (rebuild-cache) when there are conccurent task creation. The dry run cost # not much overhead and if it passes, it saves transaction contention. # # The transaction contention can be problematic on pool with a high # cardinality of the dimension sets. obj = task_dims_key.get() if not obj or obj.assert_request(now, valid_until_ts, dimensions_flat): def _run(): action = None obj = task_dims_key.get() if not obj: obj = TaskDimensions(key=task_dims_key) action = 'created' if obj.assert_request(now, valid_until_ts, dimensions_flat): if action: action = 'updated' if not obj.sets: obj.key.delete() return 'deleted' obj.put() return action # Do an adhoc transaction instead of using datastore_utils.transaction(). # This is because for some pools, the transaction rate may be so high that # it's impossible to get a good performance on the entity group. # # In practice the odds of conflict is ~nil, because it can only conflict # if a TaskDimensions.set has more than one item and this happens when # there's a hash conflict (odds 2^31) plus two concurrent task running # simultaneously (over _EXTEND_VALIDITY period) so we can do it in a more # adhoc way. key = '%s:%s' % ( task_dims_key.parent().string_id(), task_dims_key.string_id()) if not memcache.add(key, True, time=60, namespace='task_queues_tx'): # add() returns True if the entry was added, False otherwise. That's # perfect. logging.warning('Failed taking pseudo-lock for %s; reenqueuing', key) return False try: action = _run() finally: memcache.delete(key, namespace='task_queues_tx') # Keeping this dead code for now, in case we find a solution for the # transaction rate issue. #try: # action = datastore_utils.transaction(_run, retries=4) #except datastore_utils.CommitError as e: # # Still log an error but no need for a stack trace in the logs. It is # # important to surface that the call failed so the task queue is # # retried later. # logging.warning('Failed updating TaskDimensions: %s; reenqueuing', e) # return False if action: # Only log at info level when something was done. This helps scanning # quickly the logs. logging.info('Did %s', action) else: logging.debug('Did nothing') else: logging.debug('Skipped transaction!') finally: # Any of the _refresh_BotTaskDimensions() calls above could throw. Still log # how far we went. msg = ( 'rebuild_task_cache(%d) in %.3fs. viable bots: %d; bots updated: %d\n%s') dims = '\n'.join(' ' + d for d in dimensions_flat) duration = (utils.utcnow()-now).total_seconds() # Only log at info level when something was done. This helps scanning # quickly the logs. if updated: logging.info(msg, dimensions_hash, duration, viable, updated, dims) else: logging.debug(msg, dimensions_hash, duration, viable, updated, dims) return True