def test_simple_group_by(): results = [] def simple_handler(): results.append(1) batcher = PrioritizedBatcher() batcher.start() batcher.add('test', simple_handler, group_by='group1') batcher.add('test', simple_handler, group_by='group2') batcher.add('test', simple_handler, group_by='group1') batcher.commit() assert len(results) == 2
def test_priorities(): results = [] def simple_handler(value): def handler(): results.append(value) return handler batcher = PrioritizedBatcher(priorities={'test1': 2, 'test2': 1}) batcher.start() batcher.add('test1', simple_handler(1)) batcher.add('test2', simple_handler(2)) batcher.add('test1', simple_handler(3)) batcher.commit() assert results == [2, 1, 3]
def test_batch(): results = [] def simple_handler(): results.append(1) batcher = PrioritizedBatcher() assert not batcher.is_started batcher.start() assert batcher.is_started batcher.add('test', simple_handler) batcher.add('test', simple_handler) batcher.add('test', simple_handler) batcher.commit() assert len(results) == 3 assert not batcher.is_started
async def run(self): """Run the main listener run loop. Doesn't return until :meth:`terminate` is called. """ logger.info( __("Starting Resolwe listener on channel '{}'.", state.MANAGER_EXECUTOR_CHANNELS.queue)) while not self._should_stop: await self.push_stats() ret = await self._call_redis(aioredis.Redis.blpop, state.MANAGER_EXECUTOR_CHANNELS.queue, timeout=1) if ret is None: self.load_avg.add(0) continue remaining = await self._call_redis( aioredis.Redis.llen, state.MANAGER_EXECUTOR_CHANNELS.queue) self.load_avg.add(remaining + 1) self.check_critical_load() _, item = ret try: item = item.decode('utf-8') logger.debug(__("Got command from executor: {}", item)) obj = json.loads(item) except json.JSONDecodeError: logger.error(__("Undecodable command packet:\n\n{}"), traceback.format_exc()) continue command = obj.get(ExecutorProtocol.COMMAND, None) if command is None: continue service_start = time.perf_counter() handler = getattr(self, 'handle_' + command, None) if handler: try: with PrioritizedBatcher.global_instance(): await database_sync_to_async(handler)(obj) except Exception: # pylint: disable=broad-except logger.error( __("Executor command handling error:\n\n{}", traceback.format_exc())) else: logger.error(__("Unknown executor command '{}'.", command), extra={'decoded_packet': obj}) # We do want to measure wall-clock time elapsed, because # system load will impact event handling performance. On # a lagging system, good internal performance is meaningless. service_end = time.perf_counter() self.service_time.update(service_end - service_start) logger.info( __("Stopping Resolwe listener on channel '{}'.", state.MANAGER_EXECUTOR_CHANNELS.queue))
def test_context_manager(): results = [] def simple_handler(): results.append(1) with PrioritizedBatcher() as batcher: batcher.add('test', simple_handler) assert len(results) == 1
def notify_observers(table, kind, primary_key=None): """Transmit ORM table change notification. :param table: Name of the table that has changed :param kind: Change type :param primary_key: Primary key of the affected instance """ if IN_MIGRATIONS: return # Don't propagate events when there are no observers to receive them. if not Observer.objects.filter(dependencies__table=table).exists(): return def handler(): """Send a notification to the given channel.""" try: async_to_sync(get_channel_layer().send)( CHANNEL_MAIN, { 'type': TYPE_ORM_NOTIFY, 'table': table, 'kind': kind, 'primary_key': str(primary_key), }, ) except ChannelFull: logger.exception("Unable to notify workers.") batcher = PrioritizedBatcher.global_instance() if batcher.is_started: # If a batch is open, queue the send via the batcher. batcher.add('rest_framework_reactive', handler, group_by=(table, kind, primary_key)) else: # If no batch is open, invoke immediately. handler()
def notify_observers(table, kind, primary_key=None): """Transmit ORM table change notification. :param table: Name of the table that has changed :param kind: Change type :param primary_key: Primary key of the affected instance """ if IN_MIGRATIONS: return # Don't propagate events when there are no observers to receive them. if not Observer.objects.filter(dependencies__table=table).exists(): return def handler(): """Send a notification to the given channel.""" try: async_to_sync(get_channel_layer().send)( CHANNEL_MAIN, { 'type': TYPE_ORM_NOTIFY, 'table': table, 'kind': kind, 'primary_key': str(primary_key), }, ) except ChannelFull: logger.exception("Unable to notify workers.") batcher = PrioritizedBatcher.global_instance() if batcher.is_started: # If a batch is open, queue the send via the batcher. batcher.add( 'rest_framework_reactive', handler, group_by=(table, kind, primary_key) ) else: # If no batch is open, invoke immediately. handler()
def build(self, obj=None, queryset=None, push=True): """Build indexes.""" if obj is not None and queryset is not None: raise ValueError( "Only one of 'obj' and 'queryset' parameters can be passed to the build method." ) if obj is not None: if self.queryset.model != obj._meta.model: # pylint: disable=protected-access logger.debug( "Object type mismatch, skipping build of '%s' Elasticsearch index.", self.__class__.__name__ ) return if not self.queryset.filter(pk=self.get_object_id(obj)).exists(): logger.debug( "Object not in predefined queryset, skipping build of '%s' Elasticsearch index.", self.__class__.__name__ ) return elif queryset is not None: if self.queryset.model != queryset.model: logger.debug( "Queryset type mismatch, skipping build of '%s' Elasticsearch index.", self.__class__.__name__ ) return FULL_REBUILD = 'full' # pylint: disable=invalid-name def handler(agg=None): """Index build handler.""" if agg == FULL_REBUILD: queryset = self.queryset.all() else: queryset = self.queryset.none().union(*agg) self._build(queryset=queryset, push=push) def aggregator(agg=None): """Index build aggregator.""" if agg == FULL_REBUILD: # A full rebuild is required, ignore any other builds. pass else: if agg is None: agg = [] if obj is not None: # Build of a single object. agg.append(self.queryset.filter(pk=obj.pk)) elif queryset is not None: # Build of multiple objects. agg.append(queryset) else: # Full rebuild, ignore any other builds. agg = FULL_REBUILD return agg batcher = PrioritizedBatcher.global_instance() if batcher.is_started: batcher.add('resolwe.elastic', handler, group_by=(self._index_name, push), aggregator=aggregator) else: self._build(obj=obj, queryset=queryset, push=push)
def test_exception_in_handler(): results = [] def simple_handler(): results.append(1) def exception_handler(): raise Exception batcher = PrioritizedBatcher() batcher.start() batcher.add('test', simple_handler) batcher.add('test', exception_handler) batcher.add('test', simple_handler) batcher.commit() assert len(results) == 2
def test_aggregation(): results = [] def simple_handler(agg): results.append(agg) def aggregate(value): def aggregator(state=None): if state is None: state = [] state.append(value) return state return aggregator batcher = PrioritizedBatcher() batcher.start() batcher.add('test', simple_handler, group_by='group1', aggregator=aggregate(1)) batcher.add('test', simple_handler, group_by='group2', aggregator=aggregate(2)) batcher.add('test', simple_handler, group_by='group1', aggregator=aggregate(3)) batcher.commit() assert results == [[1, 3], [2]]
def process_command(self, message: Message) -> Response: """Process a single command from the peer. This command is run in the database_sync_to_async so it is safe to perform Django ORM operations inside. Exceptions will be handler one level up and error response will be sent in this case. """ # This worker must be in status processing or preparing. # All messages from workers not in this status will be discarted and # error will be returned. if self.worker.status not in [ Worker.STATUS_PROCESSING, Worker.STATUS_PREPARING, ]: self._log_error( f"Wrong worker status: {self.worker.status} for peer with id {self.data_id}." ) return message.respond_error( f"Wrong worker status: {self.worker.status}") command_name = message.command_name handler_name = f"handle_{command_name}" handler = plugin_manager.get_handler(command_name) if not handler: error = f"No command handler for '{command_name}'." self._log_error(error, save_to_data_object=False) return message.respond_error(error) # Read sequence number and refresh data object if it differs. if self.expected_sequence_number != message.sequence_number: try: self.data.refresh_from_db() self.worker.refresh_from_db() except: self._log_exception("Unable to refresh data object") return message.respond_error( "Unable to refresh the data object") if self.worker.status != Worker.STATUS_PROCESSING: self.worker.status = Worker.STATUS_PROCESSING self.worker.save(update_fields=["status"]) if self.data.started is None: self.data.started = now() self.data.save(update_fields=["started"]) self.expected_sequence_number = message.sequence_number + 1 try: with PrioritizedBatcher.global_instance(): result = handler(message, self) # Set status of the response to ERROR when data object status # is Data.STATUS_ERROR. Such response will trigger terminate # procedure in the processing container and stop processing. if self.data.status == Data.STATUS_ERROR: result.type_data = ResponseStatus.ERROR.value return result except ValidationError as err: error = (f"Validation error when saving Data object of process " f"'{self.data.process.slug}' ({handler_name}): " f"{err}") self._log_exception(error) return message.respond_error("Validation error") except Exception as err: error = f"Error in command handler '{handler_name}': {err}" self._log_exception(error) return message.respond_error( f"Error in command handler '{handler_name}'")
def handle_update(self, obj, internal_call=False): """Handle an incoming ``Data`` object update request. :param obj: The Channels message object. Command object format: .. code-block:: none { 'command': 'update', 'data_id': [id of the :class:`~resolwe.flow.models.Data` object this command changes], 'changeset': { [keys to be changed] } } :param internal_call: If ``True``, this is an internal delegate call, so a reply to the executor won't be sent. """ data_id = obj[ExecutorProtocol.DATA_ID] changeset = obj[ExecutorProtocol.UPDATE_CHANGESET] if not internal_call: logger.debug(__( "Handling update for Data with id {} (handle_update).", data_id), extra={ 'data_id': data_id, 'packet': obj }) try: d = Data.objects.get(pk=data_id) except Data.DoesNotExist: logger.warning("Data object does not exist (handle_update).", extra={ 'data_id': data_id, }) if not internal_call: async_to_sync(self._send_reply)( obj, { ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_ERROR }) async_to_sync(consumer.send_event)({ WorkerProtocol.COMMAND: WorkerProtocol.ABORT, WorkerProtocol.DATA_ID: obj[ExecutorProtocol.DATA_ID], WorkerProtocol.FINISH_COMMUNICATE_EXTRA: { 'executor': getattr(settings, 'FLOW_EXECUTOR', {}).get('NAME', 'resolwe.flow.executors.local'), }, }) return if changeset.get('status', None) == Data.STATUS_ERROR: logger.error(__( "Error occured while running process '{}' (handle_update).", d.process.slug), extra={ 'data_id': data_id, 'api_url': '{}{}'.format( getattr(settings, 'RESOLWE_HOST_URL', ''), reverse('resolwe-api:data-detail', kwargs={'pk': data_id})), }) if d.status == Data.STATUS_ERROR: changeset['status'] = Data.STATUS_ERROR if not d.started: changeset['started'] = now() changeset['modified'] = now() for key, val in changeset.items(): if key in ['process_error', 'process_warning', 'process_info']: # Trim process_* fields to not exceed max length of the database field. for i, entry in enumerate(val): max_length = Data._meta.get_field( key).base_field.max_length # pylint: disable=protected-access if len(entry) > max_length: val[i] = entry[:max_length - 3] + '...' getattr(d, key).extend(val) elif key != 'output': setattr(d, key, val) if 'output' in changeset: if not isinstance(d.output, dict): d.output = {} for key, val in changeset['output'].items(): dict_dot(d.output, key, val) try: with PrioritizedBatcher.global_instance(): d.save(update_fields=list(changeset.keys())) except ValidationError as exc: logger.error(__( "Validation error when saving Data object of process '{}' (handle_update):\n\n{}", d.process.slug, traceback.format_exc()), extra={'data_id': data_id}) d.refresh_from_db() d.process_error.append(exc.message) d.status = Data.STATUS_ERROR try: with PrioritizedBatcher.global_instance(): d.save(update_fields=['process_error', 'status']) except Exception: # pylint: disable=broad-except pass except Exception: # pylint: disable=broad-except logger.error(__( "Error when saving Data object of process '{}' (handle_update):\n\n{}", d.process.slug, traceback.format_exc()), extra={'data_id': data_id}) if not internal_call: async_to_sync(self._send_reply)( obj, { ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK })
async def run(self): """Run the main listener run loop. Doesn't return until :meth:`terminate` is called. """ logger.info(__( "Starting Resolwe listener on channel '{}'.", state.MANAGER_EXECUTOR_CHANNELS.queue )) while not self._should_stop: await self.push_stats() ret = await self._call_redis(aioredis.Redis.blpop, state.MANAGER_EXECUTOR_CHANNELS.queue, timeout=1) if ret is None: self.load_avg.add(0) continue remaining = await self._call_redis(aioredis.Redis.llen, state.MANAGER_EXECUTOR_CHANNELS.queue) self.load_avg.add(remaining + 1) self.check_critical_load() _, item = ret try: item = item.decode('utf-8') logger.debug(__("Got command from executor: {}", item)) obj = json.loads(item) except json.JSONDecodeError: logger.error( __("Undecodable command packet:\n\n{}"), traceback.format_exc() ) continue command = obj.get(ExecutorProtocol.COMMAND, None) if command is None: continue service_start = time.perf_counter() handler = getattr(self, 'handle_' + command, None) if handler: try: with PrioritizedBatcher.global_instance(): await database_sync_to_async(handler)(obj) except Exception: # pylint: disable=broad-except logger.error(__( "Executor command handling error:\n\n{}", traceback.format_exc() )) else: logger.error( __("Unknown executor command '{}'.", command), extra={'decoded_packet': obj} ) # We do want to measure wall-clock time elapsed, because # system load will impact event handling performance. On # a lagging system, good internal performance is meaningless. service_end = time.perf_counter() self.service_time.update(service_end - service_start) logger.info(__( "Stopping Resolwe listener on channel '{}'.", state.MANAGER_EXECUTOR_CHANNELS.queue ))