def process_data_object(data: Data): """Process a single data object.""" # Lock for update. Note that we want this transaction to be as short as possible in # order to reduce contention and avoid deadlocks. This is why we do not lock all # resolving objects for update, but instead only lock one object at a time. This # allows managers running in parallel to process different objects. data = Data.objects.select_for_update().get(pk=data.pk) if data.status != Data.STATUS_RESOLVING: # The object might have already been processed while waiting for the lock to be # obtained. In this case, skip the object. return dep_status = dependency_status(data) if dep_status == Data.STATUS_ERROR: data.status = Data.STATUS_ERROR data.process_error.append("One or more inputs have status ERROR") data.process_rc = 1 data.save() if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save(update_fields=["status"]) return elif dep_status != Data.STATUS_DONE: return run_in_executor = False if data.process.run: try: # Check if execution engine is sound and evaluate workflow. execution_engine_name = data.process.run.get("language", None) execution_engine = self.get_execution_engine(execution_engine_name) run_in_executor = execution_engine_name != "workflow" if not run_in_executor: execution_engine.evaluate(data) else: # Set allocated resources resource_limits = data.process.get_resource_limits() data.process_memory = resource_limits["memory"] data.process_cores = resource_limits["cores"] except (ExecutionError, InvalidEngineError) as error: data.status = Data.STATUS_ERROR data.process_error.append( "Error in process script: {}".format(error) ) data.save() if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save(update_fields=["status"]) return if data.status != Data.STATUS_DONE: # The data object may already be marked as done by the execution engine. In this # case we must not revert the status to STATUS_WAITING. data.status = Data.STATUS_WAITING data.save(render_name=True) # Actually run the object only if there was nothing with the # transaction and was not already evaluated. if run_in_executor: transaction.on_commit( # Make sure the closure gets the right values here, since they're # changed in the loop. lambda d=data: self._data_execute(d) )
def process_data_object(data: Data): """Process a single data object.""" # Lock for update. Note that we want this transaction to be as short as possible in # order to reduce contention and avoid deadlocks. This is why we do not lock all # resolving objects for update, but instead only lock one object at a time. This # allows managers running in parallel to process different objects. data = Data.objects.select_for_update().get(pk=data.pk) if data.status != Data.STATUS_RESOLVING: # The object might have already been processed while waiting for the lock to be # obtained. In this case, skip the object. return dep_status = dependency_status(data) if dep_status == Data.STATUS_ERROR: data.status = Data.STATUS_ERROR data.process_error.append( "One or more inputs have status ERROR") data.process_rc = 1 data.save() if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save(update_fields=["status"]) return elif dep_status != Data.STATUS_DONE: return if data.process.run: try: execution_engine = data.process.run.get("language", None) # Evaluation by the execution engine may spawn additional data objects and # perform other queries on the database. Queries of all possible execution # engines need to be audited for possibilities of deadlocks in case any # additional locks are introduced. Currently, we only take an explicit lock on # the currently processing object. program = self.get_execution_engine( execution_engine).evaluate(data) except (ExecutionError, InvalidEngineError) as error: data.status = Data.STATUS_ERROR data.process_error.append( "Error in process script: {}".format(error)) data.save() if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save(update_fields=["status"]) return # Set allocated resources: resource_limits = data.process.get_resource_limits() data.process_memory = resource_limits["memory"] data.process_cores = resource_limits["cores"] else: # If there is no run section, then we should not try to run # anything. But the program must not be set to None as then # the process will be stuck in waiting state. program = "" if data.status != Data.STATUS_DONE: # The data object may already be marked as done by the execution engine. In this # case we must not revert the status to STATUS_WAITING. data.status = Data.STATUS_WAITING data.save(render_name=True) # Actually run the object only if there was nothing with the transaction. transaction.on_commit( # Make sure the closure gets the right values here, since they're # changed in the loop. lambda d=data, p=program: self._data_execute(d, p))