Example #1
0
        def process_data_object(data: Data):
            """Process a single data object."""
            # Lock for update. Note that we want this transaction to be as short as possible in
            # order to reduce contention and avoid deadlocks. This is why we do not lock all
            # resolving objects for update, but instead only lock one object at a time. This
            # allows managers running in parallel to process different objects.
            data = Data.objects.select_for_update().get(pk=data.pk)
            if data.status != Data.STATUS_RESOLVING:
                # The object might have already been processed while waiting for the lock to be
                # obtained. In this case, skip the object.
                return

            dep_status = dependency_status(data)

            if dep_status == Data.STATUS_ERROR:
                data.status = Data.STATUS_ERROR
                data.process_error.append("One or more inputs have status ERROR")
                data.process_rc = 1
                data.save()
                if hasattr(data, "worker"):
                    data.worker.status = Worker.STATUS_ERROR_PREPARING
                    data.worker.save(update_fields=["status"])

                return

            elif dep_status != Data.STATUS_DONE:
                return

            run_in_executor = False
            if data.process.run:
                try:
                    # Check if execution engine is sound and evaluate workflow.
                    execution_engine_name = data.process.run.get("language", None)
                    execution_engine = self.get_execution_engine(execution_engine_name)
                    run_in_executor = execution_engine_name != "workflow"
                    if not run_in_executor:
                        execution_engine.evaluate(data)
                    else:
                        # Set allocated resources
                        resource_limits = data.process.get_resource_limits()
                        data.process_memory = resource_limits["memory"]
                        data.process_cores = resource_limits["cores"]

                except (ExecutionError, InvalidEngineError) as error:
                    data.status = Data.STATUS_ERROR
                    data.process_error.append(
                        "Error in process script: {}".format(error)
                    )
                    data.save()
                    if hasattr(data, "worker"):
                        data.worker.status = Worker.STATUS_ERROR_PREPARING
                        data.worker.save(update_fields=["status"])

                    return
            if data.status != Data.STATUS_DONE:
                # The data object may already be marked as done by the execution engine. In this
                # case we must not revert the status to STATUS_WAITING.
                data.status = Data.STATUS_WAITING
            data.save(render_name=True)

            # Actually run the object only if there was nothing with the
            # transaction and was not already evaluated.
            if run_in_executor:
                transaction.on_commit(
                    # Make sure the closure gets the right values here, since they're
                    # changed in the loop.
                    lambda d=data: self._data_execute(d)
                )
Example #2
0
        def process_data_object(data: Data):
            """Process a single data object."""
            # Lock for update. Note that we want this transaction to be as short as possible in
            # order to reduce contention and avoid deadlocks. This is why we do not lock all
            # resolving objects for update, but instead only lock one object at a time. This
            # allows managers running in parallel to process different objects.
            data = Data.objects.select_for_update().get(pk=data.pk)
            if data.status != Data.STATUS_RESOLVING:
                # The object might have already been processed while waiting for the lock to be
                # obtained. In this case, skip the object.
                return

            dep_status = dependency_status(data)

            if dep_status == Data.STATUS_ERROR:
                data.status = Data.STATUS_ERROR
                data.process_error.append(
                    "One or more inputs have status ERROR")
                data.process_rc = 1
                data.save()
                if hasattr(data, "worker"):
                    data.worker.status = Worker.STATUS_ERROR_PREPARING
                    data.worker.save(update_fields=["status"])

                return

            elif dep_status != Data.STATUS_DONE:
                return

            if data.process.run:
                try:
                    execution_engine = data.process.run.get("language", None)
                    # Evaluation by the execution engine may spawn additional data objects and
                    # perform other queries on the database. Queries of all possible execution
                    # engines need to be audited for possibilities of deadlocks in case any
                    # additional locks are introduced. Currently, we only take an explicit lock on
                    # the currently processing object.
                    program = self.get_execution_engine(
                        execution_engine).evaluate(data)
                except (ExecutionError, InvalidEngineError) as error:
                    data.status = Data.STATUS_ERROR
                    data.process_error.append(
                        "Error in process script: {}".format(error))
                    data.save()
                    if hasattr(data, "worker"):
                        data.worker.status = Worker.STATUS_ERROR_PREPARING
                        data.worker.save(update_fields=["status"])

                    return

                # Set allocated resources:
                resource_limits = data.process.get_resource_limits()
                data.process_memory = resource_limits["memory"]
                data.process_cores = resource_limits["cores"]
            else:
                # If there is no run section, then we should not try to run
                # anything. But the program must not be set to None as then
                # the process will be stuck in waiting state.
                program = ""

            if data.status != Data.STATUS_DONE:
                # The data object may already be marked as done by the execution engine. In this
                # case we must not revert the status to STATUS_WAITING.
                data.status = Data.STATUS_WAITING
            data.save(render_name=True)

            # Actually run the object only if there was nothing with the transaction.
            transaction.on_commit(
                # Make sure the closure gets the right values here, since they're
                # changed in the loop.
                lambda d=data, p=program: self._data_execute(d, p))