def run(self, data: Data, argv: List): """Select a concrete connector and run the process through it. :param data: The :class:`~resolwe.flow.models.Data` object that is to be run. :param argv: The argument vector used to spawn the executor. """ process_scheduling = self.scheduling_class_map[ data.process.scheduling_class] if "DISPATCHER_MAPPING" in getattr(settings, "FLOW_MANAGER", {}): class_name = settings.FLOW_MANAGER["DISPATCHER_MAPPING"][ process_scheduling] else: class_name = getattr(settings, "FLOW_MANAGER", {}).get("NAME", DEFAULT_CONNECTOR) data.scheduled = now() data.save(update_fields=["scheduled"]) workload_class = class_name.rsplit(".", maxsplit=1)[1] host, port, protocol = self._get_listener_settings( data, workload_class) argv[-1] += " {} {} {}".format(host, port, protocol) return self.connectors[class_name].submit(data, argv)
def _data_execute(self, data: Data): """Execute the Data object. The activities carried out here include target directory preparation, executor copying, setting serialization and actual execution of the object. :param data: The :class:`~resolwe.flow.models.Data` object to execute. """ logger.debug(__("Manager preparing Data with id {} for processing.", data.id)) # Prepare the executor's environment. try: self._prepare_data_dir(data) executor_module = ".{}".format( getattr(settings, "FLOW_EXECUTOR", {}) .get("NAME", "resolwe.flow.executors.local") .rpartition(".executors.")[-1] ) self._lock_inputs_local_storage_locations(data) argv = [ "/bin/bash", "-c", getattr(settings, "FLOW_EXECUTOR", {}).get( "PYTHON", "/usr/bin/env python" ) + " -m executors " + executor_module + " {}".format(data.pk), ] self.executor.prepare_for_execution(data) except PermissionDenied as error: data.status = Data.STATUS_ERROR data.process_error.append("Permission denied for process: {}".format(error)) data.save() if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save() return except OSError as err: logger.exception( __( "OSError occurred while preparing data {} (will skip): {}", data.id, err, ) ) if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save() return # Hand off to the run() method for execution. logger.info(__("Running executor for data with id {}", data.pk)) self.run(data, argv)
def test_checksum_consistency(self): process = Process(version='1.0.0', slug='my-process') data = Data() data.input = {'tss': 0, 'genome': 'HG19'} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual(checksum, 'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c') data.input = {'genome': 'HG19', 'tss': 0} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual(checksum, 'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c')
def setUp(self): super(BackendTest, self).setUp() self.p = Process(slug='test-processor', name='Test Process', contributor=self.contributor, type='data:test', version=1) self.p.save() self.d = Data(slug='test-data', name='Test Data', contributor=self.contributor, process=self.p) self.d.save()
def test_checksum_consistency(self): process = Process(version="1.0.0", slug="my-process") data = Data() data.input = {"tss": 0, "genome": "HG19"} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual( checksum, "ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c") data.input = {"genome": "HG19", "tss": 0} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual( checksum, "ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c")
def test_checksum_consistency(self): process = Process(version='1.0.0', slug='my-process') data = Data() data.input = {'tss': 0, 'genome': 'HG19'} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual( checksum, 'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c') data.input = {'genome': 'HG19', 'tss': 0} checksum = get_data_checksum(data.input, process.slug, process.version) self.assertEqual( checksum, 'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c')
def submit(self, data: Data, argv): """Run process with SLURM. For details, see :meth:`~resolwe.flow.managers.workload_connectors.base.BaseConnector.submit`. """ limits = data.get_resource_limits() logger.debug( __( "Connector '{}' running for Data with id {} ({}).", self.__class__.__module__, data.id, repr(argv), )) # Compute target partition. partition = getattr(settings, "FLOW_SLURM_PARTITION_DEFAULT", None) if data.process.slug in getattr(settings, "FLOW_SLURM_PARTITION_OVERRIDES", {}): partition = settings.FLOW_SLURM_PARTITION_OVERRIDES[ data.process.slug] try: # Make sure the resulting file is executable on creation. runtime_dir = storage_settings.FLOW_VOLUMES["runtime"]["config"][ "path"] script_path = os.path.join(runtime_dir, "slurm-{}.sh".format(data.pk)) file_descriptor = os.open(script_path, os.O_WRONLY | os.O_CREAT, mode=0o555) with os.fdopen(file_descriptor, "wt") as script: script.write("#!/bin/bash\n") script.write( "#SBATCH --mem={}M\n".format(limits["memory"] + EXECUTOR_MEMORY_OVERHEAD)) script.write("#SBATCH --cpus-per-task={}\n".format( limits["cores"])) if partition: script.write("#SBATCH --partition={}\n".format(partition)) script.write( "#SBATCH --output slurm-url-{}-job-%j.out\n".format( data.location.subpath)) # Render the argument vector into a command line. line = " ".join(map(shlex.quote, argv)) script.write(line + "\n") command = ["/usr/bin/env", "sbatch", script_path] subprocess.Popen(command, cwd=runtime_dir, stdin=subprocess.DEVNULL).wait() except OSError as err: logger.error( __( "OSError occurred while preparing SLURM script for Data {}: {}", data.id, err, ))
def test_dtlbash(self): self.p.slug = 'test-processor-dtlbash' self.p.run = { 'script': """ gen-info \"Test processor info\" gen-warning \"Test processor warning\" echo '{"proc.info": "foo"}' """ } self.p.save() self.d.slug = 'test-data-dtlbash' self.d.process = self.p self.d.save() self.d = Data(id=self.d.id)
def run(self, data: Data, runtime_dir: Path, argv): """Select a concrete connector and run the process through it. :param data: The :class:`~resolwe.flow.models.Data` object that is to be run. :param runtime_dir: The directory the executor is run from. :param argv: The argument vector used to spawn the executor. """ process_scheduling = self.scheduling_class_map[ data.process.scheduling_class] if "DISPATCHER_MAPPING" in getattr(settings, "FLOW_MANAGER", {}): class_name = settings.FLOW_MANAGER["DISPATCHER_MAPPING"][ process_scheduling] else: class_name = getattr(settings, "FLOW_MANAGER", {}).get("NAME", DEFAULT_CONNECTOR) data.scheduled = now() data.save(update_fields=["scheduled"]) return self.connectors[class_name].submit(data, runtime_dir, argv)
class BackendTest(TestCase): def setUp(self): u = get_user_model().objects.create_superuser('test', '*****@*****.**', 'test') self.p = Process(slug='test-processor', name='Test Process', contributor=u, type='data:test', version=1) self.p.save() self.d = Data(slug='test-data', name='Test Data', contributor=u, process=self.p) self.d.save() def tearDown(self): for data in Data.objects.all(): data_dir = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(data.id)) shutil.rmtree(data_dir, ignore_errors=True) def test_manager(self): manager.communicate(verbosity=0) def test_dtlbash(self): self.p.slug = 'test-processor-dtlbash' self.p.run = {'script': """ gen-info \"Test processor info\" gen-warning \"Test processor warning\" echo '{"proc.info": "foo"}' """} self.p.save() self.d.slug = 'test-data-dtlbash' self.d.process = self.p self.d.save() self.d = Data(id=self.d.id)
def test_dtlbash(self): self.p.slug = 'test-processor-dtlbash' self.p.run = {'script': """ gen-info \"Test processor info\" gen-warning \"Test processor warning\" echo '{"proc.info": "foo"}' """} self.p.save() self.d.slug = 'test-data-dtlbash' self.d.process = self.p self.d.save() self.d = Data(id=self.d.id)
def setUp(self): super().setUp() self.process = Process( output_schema=[ {'name': 'test_file', 'type': 'basic:file:', 'required': False}, {'name': 'file_list', 'type': 'list:basic:file:', 'required': False} ] ) self.data = Data( pk=13, process=self.process, output={'test_file': {'file': 'test_file.tmp'}} )
def setUp(self): u = get_user_model().objects.create_superuser('test', '*****@*****.**', 'test') self.p = Process(slug='test-processor', name='Test Process', contributor=u, type='data:test', version=1) self.p.save() self.d = Data(slug='test-data', name='Test Data', contributor=u, process=self.p) self.d.save()
class BackendTest(TestCase): def setUp(self): super(BackendTest, self).setUp() self.p = Process(slug='test-processor', name='Test Process', contributor=self.contributor, type='data:test', version=1) self.p.save() self.d = Data(slug='test-data', name='Test Data', contributor=self.contributor, process=self.p) self.d.save() def test_manager(self): manager.communicate(verbosity=0) def test_dtlbash(self): self.p.slug = 'test-processor-dtlbash' self.p.run = { 'script': """ gen-info \"Test processor info\" gen-warning \"Test processor warning\" echo '{"proc.info": "foo"}' """ } self.p.save() self.d.slug = 'test-data-dtlbash' self.d.process = self.p self.d.save() self.d = Data(id=self.d.id)
def start(self, data: Data, listener_connection: Tuple[str, str, str]): """Start process execution. Construct kubernetes job description and pass it to the kubernetes. """ container_environment = self._prepare_environment( data, listener_connection) location_subpath = Path(data.location.subpath) # Create kubernetes API every time otherwise it will time out # eventually and raise API exception. try: kubernetes.config.load_kube_config() except kubernetes.config.config_exception.ConfigException: kubernetes.config.load_incluster_config() batch_api = kubernetes.client.BatchV1Api() core_api = kubernetes.client.CoreV1Api() container_name_prefix = (getattr(settings, "FLOW_EXECUTOR", {}).get( "CONTAINER_NAME_PREFIX", "resolwe").replace("_", "-").lower()) container_name = self._generate_container_name(container_name_prefix, data.pk) # Set resource limits. requests = dict() limits = data.get_resource_limits() requests["cpu"] = limits.pop("cores") limits["cpu"] = requests["cpu"] + 1 # Overcommit CPU by 20%. requests["cpu"] *= 0.8 # The memory in the database is stored in megabytes but the kubertenes # requires memory in bytes. # We request 10% less memory than stored in the database and set limit # at 10% more plus KUBERNETES_MEMORY_HARD_LIMIT_BUFFER. The processes # usually require 16GB, 32GB... and since the node usualy has 64GB of # memory and some of it is consumed by the system processes only one # process process that requires 32GB can run on a node instead of 2. requests["memory"] = 0.9 * limits["memory"] limits["memory"] = 1.1 * limits[ "memory"] + KUBERNETES_MEMORY_HARD_LIMIT_BUFFER limits["memory"] *= 2**20 # 2 ** 20 = mebibyte requests["memory"] *= 2**20 # Get the limits and requests for the communicator container. communicator_limits = getattr( settings, "FLOW_KUBERNETES_COMMUNICATOR_LIMITS", { "memory": "256M", "cpu": 0.1 }, ) communicator_requests = getattr( settings, "FLOW_KUBERNETES_COMMUNICATOR_REQUESTS", { "memory": "256M", "cpu": 0.1 }, ) resources = data.process.requirements.get("resources", {}) network = "bridge" use_host_network = False if "network" in resources: # Configure Docker network mode for the container (if specified). # By default, current Docker versions use the 'bridge' mode which # creates a network stack on the default Docker bridge. network = getattr(settings, "FLOW_EXECUTOR", {}).get("NETWORK", "") use_host_network = network == "host" # Generate and set seccomp policy to limit syscalls. security_context = { "runAsUser": os.getuid(), "runAsGroup": os.getgid(), "allowPrivilegeEscalation": False, "privileged": False, "capabilities": { "drop": ["ALL"] }, } annotations = dict() # Do not evict job from node. annotations["cluster-autoscaler.kubernetes.io/safe-to-evict"] = "false" if not getattr(settings, "FLOW_DOCKER_DISABLE_SECCOMP", False): # The path is a relative path in the kubelet root # directory: # <seccomp_root>/<path>, where <seccomp_root> is defined via the # --seccomp-profile-root flag on the Kubelet. If the # --seccomp-profile-root flag is not defined, the default path will # be used, which is <root-dir>/seccomp where <root-dir> is # specified by the --root-dir flag. # https://kubernetes.io/docs/concepts/policy/pod-security-policy/ # # The file is transfered to kubelets with daemonset ? Currently I # mount my /tmp directory to the /seccomp directory in minikube. annotations[ "seccomp.security.alpha.kubernetes.io/pod"] = "runtime/default" mapper = getattr(settings, "FLOW_CONTAINER_IMAGE_MAP", {}) communicator_image = getattr( settings, "FLOW_DOCKER_COMMUNICATOR_IMAGE", "public.ecr.aws/s4q6j6e8/resolwe/com:latest", ) communicator_image = self._image_mapper(communicator_image, mapper) requirements = data.process.requirements.get("executor", {}).get("docker", {}) processing_container_image = str( requirements.get( "image", getattr( settings, "FLOW_DOCKER_DEFAULT_PROCESSING_CONTAINER_IMAGE", "public.ecr.aws/s4q6j6e8/resolwe/base:ubuntu-20.04", ), ), ) processing_container_image = self._image_mapper( processing_container_image, mapper) affinity = {} kubernetes_affinity = getattr(settings, "FLOW_KUBERNETES_AFFINITY", None) if kubernetes_affinity: affinity = { "nodeAffinity": { "requiredDuringSchedulingIgnoredDuringExecution": { "nodeSelectorTerms": [{ "matchExpressions": [{ "key": "nodegroup", "operator": "In", "values": [kubernetes_affinity], }] }] } } } job_type = dict( Process.SCHEDULING_CLASS_CHOICES)[data.process.scheduling_class] job_description = { "apiVersion": "batch/v1", "kind": "Job", "metadata": { "name": sanitize_kubernetes_label(container_name) }, "spec": { # Keep finished pods around for ten seconds. If job is not # deleted its PVC claim persists and it causes PV to stay # around. # This can be changed by running a cron job that periodically # checks for PVC that can be deleted. "ttlSecondsAfterFinished": 300, "template": { "metadata": { "name": sanitize_kubernetes_label(container_name), "labels": { "app": "resolwe", "data_id": str(data.pk), "process": sanitize_kubernetes_label(data.process.slug), "job_type": sanitize_kubernetes_label(job_type), }, "annotations": annotations, }, "spec": { "affinity": affinity, "hostNetwork": use_host_network, "volumes": self._volumes(data.id, location_subpath, core_api), "initContainers": [ { "name": sanitize_kubernetes_label( f"{container_name}-init"), "image": communicator_image, "imagePullPolicy": "Always", "workingDir": "/", "command": ["/usr/local/bin/python3"], "args": ["-m", "executors.init_container"], "securityContext": { "privileged": True }, "volumeMounts": self._init_container_mountpoints(), "env": container_environment, }, ], "containers": [ { "name": sanitize_kubernetes_label(container_name), "image": processing_container_image, "resources": { "limits": limits, "requests": requests }, "securityContext": security_context, "env": container_environment, "workingDir": os.fspath(constants.PROCESSING_VOLUME), "imagePullPolicy": "Always", "command": ["/usr/bin/python3"], "args": ["/processing.py"], "volumeMounts": self._processing_mountpoints( location_subpath, data.process.run.get("language", None), ), }, { "name": sanitize_kubernetes_label( f"{container_name}-communicator"), "image": communicator_image, "imagePullPolicy": "Always", "resources": { "limits": communicator_limits, "requests": communicator_requests, }, "securityContext": security_context, "env": container_environment, "command": ["/usr/local/bin/python3"], "args": ["/startup.py"], "volumeMounts": self._communicator_mountpoints( location_subpath), }, ], "restartPolicy": "Never", }, }, "backoffLimit": 0, }, } start_time = time.time() processing_name = constants.PROCESSING_VOLUME_NAME input_name = constants.INPUTS_VOLUME_NAME if self._should_create_pvc( storage_settings.FLOW_VOLUMES[processing_name]): claim_name = unique_volume_name( storage_settings.FLOW_VOLUMES[processing_name]["config"] ["name"], data.id, ) claim_size = limits.pop("storage", 200) * ( 2**30) # Default 200 gibibytes core_api.create_namespaced_persistent_volume_claim( body=self._persistent_volume_claim( claim_name, claim_size, storage_settings.FLOW_VOLUMES[processing_name]["config"], ), namespace=self.kubernetes_namespace, _request_timeout=KUBERNETES_TIMEOUT, ) if input_name in storage_settings.FLOW_VOLUMES: if self._should_create_pvc( storage_settings.FLOW_VOLUMES[input_name]): claim_size = self._data_inputs_size(data) claim_name = unique_volume_name( storage_settings.FLOW_VOLUMES[input_name]["config"] ["name"], data.id, ) core_api.create_namespaced_persistent_volume_claim( body=self._persistent_volume_claim( claim_name, claim_size, storage_settings.FLOW_VOLUMES[input_name]["config"], ), namespace=self.kubernetes_namespace, _request_timeout=KUBERNETES_TIMEOUT, ) logger.debug(f"Creating namespaced job: {job_description}") batch_api.create_namespaced_job( body=job_description, namespace=self.kubernetes_namespace, _request_timeout=KUBERNETES_TIMEOUT, ) end_time = time.time() logger.info( "It took {:.2f}s to send config to kubernetes".format(end_time - start_time))
def process_data_object(data: Data): """Process a single data object.""" # Lock for update. Note that we want this transaction to be as short as possible in # order to reduce contention and avoid deadlocks. This is why we do not lock all # resolving objects for update, but instead only lock one object at a time. This # allows managers running in parallel to process different objects. data = Data.objects.select_for_update().get(pk=data.pk) if data.status != Data.STATUS_RESOLVING: # The object might have already been processed while waiting for the lock to be # obtained. In this case, skip the object. return dep_status = dependency_status(data) if dep_status == Data.STATUS_ERROR: data.status = Data.STATUS_ERROR data.process_error.append( "One or more inputs have status ERROR") data.process_rc = 1 data.save() if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save(update_fields=["status"]) return elif dep_status != Data.STATUS_DONE: return if data.process.run: try: execution_engine = data.process.run.get("language", None) # Evaluation by the execution engine may spawn additional data objects and # perform other queries on the database. Queries of all possible execution # engines need to be audited for possibilities of deadlocks in case any # additional locks are introduced. Currently, we only take an explicit lock on # the currently processing object. program = self.get_execution_engine( execution_engine).evaluate(data) except (ExecutionError, InvalidEngineError) as error: data.status = Data.STATUS_ERROR data.process_error.append( "Error in process script: {}".format(error)) data.save() if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save(update_fields=["status"]) return # Set allocated resources: resource_limits = data.process.get_resource_limits() data.process_memory = resource_limits["memory"] data.process_cores = resource_limits["cores"] else: # If there is no run section, then we should not try to run # anything. But the program must not be set to None as then # the process will be stuck in waiting state. program = "" if data.status != Data.STATUS_DONE: # The data object may already be marked as done by the execution engine. In this # case we must not revert the status to STATUS_WAITING. data.status = Data.STATUS_WAITING data.save(render_name=True) # Actually run the object only if there was nothing with the transaction. transaction.on_commit( # Make sure the closure gets the right values here, since they're # changed in the loop. lambda d=data, p=program: self._data_execute(d, p))
def migrate_data(self, data): """Migrate data.""" contributor = self.get_contributor(data[u'author_id']) # DESCRIPTOR SCHEMA ############################################ ds_fields = [] ds_fields.extend(data.get(u'static_schema', [])) ds_fields.extend(data.get(u'var_template', [])) ds_fields.sort(key=lambda d: d[u'name']) ds_fields_dumped = json.dumps(ds_fields) if ds_fields_dumped in self.descriptor_schema_index: descriptor_schema = self.descriptor_schema_index[ds_fields_dumped] else: descriptor_schema = DescriptorSchema(schema=ds_fields) descriptor_schema.name = 'data_{}_descriptor'.format(data[u'_id']) descriptor_schema.contributor = contributor descriptor_schema.save() self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema descriptor = {} descriptor.update(data.get(u'static', {})) descriptor.update(data.get(u'var', {})) # PROCESS ###################################################### if u'processor_version' not in data: data[u'processor_version'] = '0.0.0' process_slug = self.process_slug(data[u'processor_name']) process_version = data[u'processor_version'] try: process = Process.objects.get(slug=process_slug, version=process_version) except Process.DoesNotExist: latest = Process.objects.filter( slug=process_slug).order_by('-version').first() if latest: process = Process() process.name = latest.name process.slug = latest.slug process.category = latest.category process.description = latest.description process.contributor = latest.contributor process.version = process_version process.type = data[u'type'] process.output_schema = data[u'output_schema'] process.input_schema = data.get(u'input_schema', {}) process.persistence = self.persistence_dict[ data[u'persistence']] process.run[ 'script'] = 'gen-require common\ngen-error "Depricated process, use the latest version."' # noqa pylint: disable=unsubscriptable-object # XXX # process.created = # process.modified = process.save() # copy permissions from latest process for user, perms in six.iteritems( get_users_with_perms(latest, attach_perms=True)): for perm in perms: assign_perm(perm, user, process) for group, perms in six.iteritems( get_groups_with_perms(latest, attach_perms=True)): for perm in perms: assign_perm(perm, group, process) else: # Create dummy processor if there is no other version dummy_name = 'Dummy processor of type {}'.format(data[u'type']) try: process = Process.objects.get(name=dummy_name) except Process.DoesNotExist: process = Process.objects.create( name=dummy_name, slug='non-existent', contributor=get_user_model().objects.filter( is_superuser=True).first(), type=data[u'type'], category='data:non-existent', run={ 'script': { 'gen-require common\ngen-error "This processor is not intendent to be run."' } }, ) # DATA ######################################################### new = Data() new.name = data.get(u'static', {}).get(u'name', '') if len(new.name) > 100: self.long_names.append(new.name) new.name = new.name[:97] + '...' new.status = self.status_dict[data[u'status']] new.process = process new.contributor = contributor new.input = data[u'input'] if u'input' in data else {} new.output = data[u'output'] new.descriptor_schema = descriptor_schema new.descriptor = descriptor new.checksum = data.get(u'checksum', '') # XXX: Django will change this on create new.created = data[u'date_created'] # XXX: Django will change this on save new.modified = data[u'date_modified'] if u'date_start' in data and u'date_finish' in data: new.started = data[u'date_start'] new.finished = data[u'date_finish'] elif u'date_finish' in data: new.started = data[u'date_finish'] new.finished = data[u'date_finish'] elif u'date_start' in data: new.started = data[u'date_start'] new.finished = data[u'date_start'] else: new.started = datetime.fromtimestamp(0) new.finished = datetime.fromtimestamp(0) new.save() for case_id in data[u'case_ids']: try: collection = Collection.objects.get( pk=self.id_mapping[u'collection'][str(case_id)]) except KeyError: self.missing_collections.add(str(case_id)) continue collection.data.add(new) for field_schema, fields, path in iterate_fields( data[u'output'], data[u'output_schema'], ''): if 'type' in field_schema and field_schema['type'].startswith( 'basic:json:'): self.storage_index[fields[field_schema['name']]] = { 'id': new.pk, 'path': path, } self.migrate_permissions(new, data) self.id_mapping['data'][str(data[u'_id'])] = new.pk # DESCRIPTOR SCHEMA PERMISSIONS ################################ for user in get_users_with_perms(new): assign_perm('view_descriptorschema', user, obj=descriptor_schema) for group in get_groups_with_perms(new): assign_perm('view_descriptorschema', group, obj=descriptor_schema)
def migrate_data(self, data): contributor = self.get_contributor(data["author_id"]) # DESCRIPTOR SCHEMA ############################################ ds_fields = [] ds_fields.extend(data.get("static_schema", [])) ds_fields.extend(data.get("var_template", [])) ds_fields.sort(key=lambda d: d["name"]) ds_fields_dumped = json.dumps(ds_fields) if ds_fields_dumped in self.descriptor_schema_index: descriptor_schema = self.descriptor_schema_index[ds_fields_dumped] else: descriptor_schema = DescriptorSchema(schema=ds_fields) descriptor_schema.name = "data_{}_descriptor".format(data["_id"]) descriptor_schema.contributor = contributor descriptor_schema.save() self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema descriptor = {} descriptor.update(data.get("static", {})) descriptor.update(data.get("var", {})) # PROCESS ###################################################### if "processor_version" not in data: data["processor_version"] = "0.0.0" process_slug = self.process_slug(data["processor_name"]) process_version = data["processor_version"] try: process = Process.objects.get(slug=process_slug, version=process_version) except Process.DoesNotExist: latest = Process.objects.filter(slug=process_slug).order_by("-version").first() if latest: process = Process() process.name = latest.name process.slug = latest.slug process.category = latest.category process.description = latest.description process.contributor = latest.contributor process.version = process_version process.type = data["type"] process.output_schema = data["output_schema"] process.input_schema = data.get("input_schema", {}) process.persistence = self.persistence_dict[data["persistence"]] process.run["script"] = 'gen-require common\ngen-error "Depricated process, use the latest version."' # XXX # process.created = # process.modified = process.save() # copy permissions from latest process for user, perms in get_users_with_perms(latest, attach_perms=True).iteritems(): for perm in perms: assign_perm(perm, user, process) for group, perms in get_groups_with_perms(latest, attach_perms=True).iteritems(): for perm in perms: assign_perm(perm, group, process) else: # Create dummy processor if there is no other version dummy_name = "Dummy processor of type {}".format(data["type"]) try: process = Process.objects.get(name=dummy_name) except Process.DoesNotExist: process = Process.objects.create( name=dummy_name, slug="non-existent", contributor=get_user_model().objects.filter(is_superuser=True).first(), type=data["type"], category="data:non-existent", run={"script": {'gen-require common\ngen-error "This processor is not intendent to be run."'}}, ) # DATA ######################################################### new = Data() new.name = data.get("static", {}).get("name", "") if len(new.name) > 100: self.long_names.append(new.name) new.name = new.name[:97] + "..." new.status = self.status_dict[data["status"]] new.process = process new.contributor = contributor new.input = data["input"] if "input" in data else {} new.output = data["output"] new.descriptor_schema = descriptor_schema new.descriptor = descriptor new.checksum = data.get("checksum", "") # XXX: Django will change this on create new.created = data["date_created"] # XXX: Django will change this on save new.modified = data["date_modified"] if "date_start" in data and "date_finish" in data: new.started = data["date_start"] new.finished = data["date_finish"] elif "date_finish" in data: new.started = data["date_finish"] new.finished = data["date_finish"] elif "date_start" in data: new.started = data["date_start"] new.finished = data["date_start"] else: new.started = datetime.fromtimestamp(0) new.finished = datetime.fromtimestamp(0) new.save() for case_id in data["case_ids"]: try: collection = Collection.objects.get(pk=self.id_mapping["collection"][str(case_id)]) except KeyError: self.missing_collections.add(str(case_id)) continue collection.data.add(new) for field_schema, fields, path in iterate_fields(data["output"], data["output_schema"], ""): if "type" in field_schema and field_schema["type"].startswith("basic:json:"): self.storage_index[fields[field_schema["name"]]] = {"id": new.pk, "path": path} self.migrate_permissions(new, data) self.id_mapping["data"][str(data["_id"])] = new.pk # DESCRIPTOR SCHEMA PERMISSIONS ################################ for user in get_users_with_perms(new): assign_perm("view_descriptorschema", user, obj=descriptor_schema) for group in get_groups_with_perms(new): assign_perm("view_descriptorschema", group, obj=descriptor_schema)
def process_data_object(data: Data): """Process a single data object.""" # Lock for update. Note that we want this transaction to be as short as possible in # order to reduce contention and avoid deadlocks. This is why we do not lock all # resolving objects for update, but instead only lock one object at a time. This # allows managers running in parallel to process different objects. data = Data.objects.select_for_update().get(pk=data.pk) if data.status != Data.STATUS_RESOLVING: # The object might have already been processed while waiting for the lock to be # obtained. In this case, skip the object. return dep_status = dependency_status(data) if dep_status == Data.STATUS_ERROR: data.status = Data.STATUS_ERROR data.process_error.append("One or more inputs have status ERROR") data.process_rc = 1 data.save() if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save(update_fields=["status"]) return elif dep_status != Data.STATUS_DONE: return run_in_executor = False if data.process.run: try: # Check if execution engine is sound and evaluate workflow. execution_engine_name = data.process.run.get("language", None) execution_engine = self.get_execution_engine(execution_engine_name) run_in_executor = execution_engine_name != "workflow" if not run_in_executor: execution_engine.evaluate(data) else: # Set allocated resources resource_limits = data.process.get_resource_limits() data.process_memory = resource_limits["memory"] data.process_cores = resource_limits["cores"] except (ExecutionError, InvalidEngineError) as error: data.status = Data.STATUS_ERROR data.process_error.append( "Error in process script: {}".format(error) ) data.save() if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save(update_fields=["status"]) return if data.status != Data.STATUS_DONE: # The data object may already be marked as done by the execution engine. In this # case we must not revert the status to STATUS_WAITING. data.status = Data.STATUS_WAITING data.save(render_name=True) # Actually run the object only if there was nothing with the # transaction and was not already evaluated. if run_in_executor: transaction.on_commit( # Make sure the closure gets the right values here, since they're # changed in the loop. lambda d=data: self._data_execute(d) )
def _data_execute(self, data: Data, program: str): """Execute the Data object. The activities carried out here include target directory preparation, executor copying, setting serialization and actual execution of the object. :param data: The :class:`~resolwe.flow.models.Data` object to execute. :param program: The process text the manager got out of execution engine evaluation. :param executor: The executor to use for this object. """ # Notify dispatcher if there is nothing to do so it can check whether # conditions for raising runtime barrier are fulfilled. if not program: return logger.debug( __("Manager preparing Data with id {} for processing.", data.id)) # Prepare the executor's environment. try: executor_env_vars = self.get_executor().get_environment_variables() program = self._include_environment_variables( program, executor_env_vars) data_dir = self._prepare_data_dir(data) executor_module, runtime_dir = self._prepare_executor(data) self._prepare_storage_connectors(runtime_dir) self._lock_inputs_local_storage_locations(data) # Execute execution engine specific runtime preparation. execution_engine = data.process.run.get("language", None) volume_maps = self.get_execution_engine( execution_engine).prepare_runtime(runtime_dir, data) self._prepare_context(data, data_dir, runtime_dir, RUNTIME_VOLUME_MAPS=volume_maps) self._prepare_script(runtime_dir, program) argv = [ "/bin/bash", "-c", getattr(settings, "FLOW_EXECUTOR", {}).get( "PYTHON", "/usr/bin/env python") + " -m executors " + executor_module, ] except PermissionDenied as error: data.status = Data.STATUS_ERROR data.process_error.append( "Permission denied for process: {}".format(error)) data.save() if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save() return except OSError as err: logger.error( __( "OSError occurred while preparing data {} (will skip): {}", data.id, err, )) if hasattr(data, "worker"): data.worker.status = Worker.STATUS_ERROR_PREPARING data.worker.save() return # Hand off to the run() method for execution. logger.info(__("Running {}", runtime_dir)) self.run(data, runtime_dir, argv)
def migrate_data(self, data): """Migrate data.""" contributor = self.get_contributor(data[u'author_id']) # DESCRIPTOR SCHEMA ############################################ ds_fields = [] ds_fields.extend(data.get(u'static_schema', [])) ds_fields.extend(data.get(u'var_template', [])) ds_fields.sort(key=lambda d: d[u'name']) ds_fields_dumped = json.dumps(ds_fields) if ds_fields_dumped in self.descriptor_schema_index: descriptor_schema = self.descriptor_schema_index[ds_fields_dumped] else: descriptor_schema = DescriptorSchema(schema=ds_fields) descriptor_schema.name = 'data_{}_descriptor'.format(data[u'_id']) descriptor_schema.contributor = contributor descriptor_schema.save() self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema descriptor = {} descriptor.update(data.get(u'static', {})) descriptor.update(data.get(u'var', {})) # PROCESS ###################################################### if u'processor_version' not in data: data[u'processor_version'] = '0.0.0' process_slug = self.process_slug(data[u'processor_name']) process_version = data[u'processor_version'] try: process = Process.objects.get(slug=process_slug, version=process_version) except Process.DoesNotExist: latest = Process.objects.filter(slug=process_slug).order_by('-version').first() if latest: process = Process() process.name = latest.name process.slug = latest.slug process.category = latest.category process.description = latest.description process.contributor = latest.contributor process.version = process_version process.type = data[u'type'] process.output_schema = data[u'output_schema'] process.input_schema = data.get(u'input_schema', {}) process.persistence = self.persistence_dict[data[u'persistence']] process.run['script'] = 'gen-require common\ngen-error "Depricated process, use the latest version."' # noqa pylint: disable=unsubscriptable-object # XXX # process.created = # process.modified = process.save() # copy permissions from latest process for user, perms in six.iteritems(get_users_with_perms(latest, attach_perms=True)): for perm in perms: assign_perm(perm, user, process) for group, perms in six.iteritems(get_groups_with_perms(latest, attach_perms=True)): for perm in perms: assign_perm(perm, group, process) else: # Create dummy processor if there is no other version dummy_name = 'Dummy processor of type {}'.format(data[u'type']) try: process = Process.objects.get(name=dummy_name) except Process.DoesNotExist: process = Process.objects.create( name=dummy_name, slug='non-existent', contributor=get_user_model().objects.filter(is_superuser=True).first(), type=data[u'type'], category='data:non-existent', run={'script': {'gen-require common\ngen-error "This processor is not intendent to be run."'}}, ) # DATA ######################################################### new = Data() new.name = data.get(u'static', {}).get(u'name', '') if len(new.name) > 100: self.long_names.append(new.name) new.name = new.name[:97] + '...' new.status = self.status_dict[data[u'status']] new.process = process new.contributor = contributor new.input = data[u'input'] if u'input' in data else {} new.output = data[u'output'] new.descriptor_schema = descriptor_schema new.descriptor = descriptor new.checksum = data.get(u'checksum', '') # XXX: Django will change this on create new.created = data[u'date_created'] # XXX: Django will change this on save new.modified = data[u'date_modified'] if u'date_start' in data and u'date_finish' in data: new.started = data[u'date_start'] new.finished = data[u'date_finish'] elif u'date_finish' in data: new.started = data[u'date_finish'] new.finished = data[u'date_finish'] elif u'date_start' in data: new.started = data[u'date_start'] new.finished = data[u'date_start'] else: new.started = datetime.fromtimestamp(0) new.finished = datetime.fromtimestamp(0) new.save() for case_id in data[u'case_ids']: try: collection = Collection.objects.get(pk=self.id_mapping[u'collection'][str(case_id)]) except KeyError: self.missing_collections.add(str(case_id)) continue collection.data.add(new) for field_schema, fields, path in iterate_fields(data[u'output'], data[u'output_schema'], ''): if 'type' in field_schema and field_schema['type'].startswith('basic:json:'): self.storage_index[fields[field_schema['name']]] = { 'id': new.pk, 'path': path, } self.migrate_permissions(new, data) self.id_mapping['data'][str(data[u'_id'])] = new.pk # DESCRIPTOR SCHEMA PERMISSIONS ################################ for user in get_users_with_perms(new): assign_perm('view_descriptorschema', user, obj=descriptor_schema) for group in get_groups_with_perms(new): assign_perm('view_descriptorschema', group, obj=descriptor_schema)