def test_referenced_storage(self): proc = Process.objects.create( name="Test process", contributor=self.user, output_schema=[{ "name": "big_result", "type": "basic:json:", "required": True }], ) data = { "name": "Test data", "contributor": self.user, "process": proc, } d = Data.objects.create(**data) d.status = Data.STATUS_DONE # `Data` object with referenced non-existing `Storage` d.output = {"big_result": 245} with self.assertRaisesRegex(ValidationError, "`Storage` object does not exist"): d.save() validate_data_object(d) d.storages.create(name="storage", contributor=self.user, json={"value": 42}) self.assertEqual(d.storages.count(), 1) d.output = {"big_result": d.storages.first().id} d.save() validate_data_object(d)
def handle_finish(self, message: Message[Dict], manager: "Processor") -> Response[str]: """Handle an incoming ``Data`` finished processing request.""" process_rc = int(message.message_data.get("rc", 1)) changeset = { "process_progress": 100, "finished": now(), } if process_rc != 0: changeset["process_rc"] = process_rc if manager.data.status != Data.STATUS_ERROR: changeset["status"] = Data.STATUS_ERROR manager.data.process_error.append( message.message_data.get("error", "Process return code is not 0")) changeset["process_error"] = manager.data.process_error elif manager.data.status != Data.STATUS_ERROR: changeset["status"] = Data.STATUS_DONE changeset["size"] = manager.data.location.files.aggregate( size=Coalesce(Sum("size"), 0)).get("size") manager._update_data(changeset) local_location = manager.data.location.default_storage_location local_location.status = StorageLocation.STATUS_DONE local_location.save() # Only validate objects with DONE status. Validating objects in ERROR # status will only cause unnecessary errors to be displayed. if manager.data.status == Data.STATUS_DONE: validate_data_object(manager.data) return message.respond_ok("OK")
def test_delete_input(self): proc1 = Process.objects.create( name="Referenced process", contributor=self.user, type="data:referenced:object:", ) proc2 = Process.objects.create( name="Test process", contributor=self.user, input_schema=[{ "name": "data_object", "type": "data:referenced:object:" }], ) data1 = Data.objects.create(name="Referenced object", contributor=self.user, process=proc1) data2 = Data.objects.create( name="Test data", contributor=self.user, process=proc2, input={"data_object": data1.pk}, ) validate_data_object(data1) validate_data_object(data2) data1.delete() data2.name = "New name" data2.save() validate_data_object(data2, skip_missing_data=True)
def test_validating_data_object(self): """Diferent validations are performed depending on status""" proc = Process.objects.create( name="Test process", contributor=self.user, input_schema=[{ "name": "value", "type": "basic:integer:", "required": True }], output_schema=[{ "name": "result", "type": "basic:string:", "required": True }], ) data = { "name": "Test data", "contributor": self.user, "process": proc, } with self.assertRaisesRegex(ValidationError, '"value" not given'): validate_data_object(Data.objects.create(input={}, **data)) with self.assertRaisesRegex(ValidationError, "Required fields .* not given"): validate_data_object(Data.objects.create(input={}, **data)) d = Data.objects.create(input={"value": 42}, **data) d.status = Data.STATUS_DONE with self.assertRaisesRegex(ValidationError, '"result" not given'): d.save() validate_data_object(d) d.output = {"result": "forty-two"} d.save() validate_data_object(d)
def test_referenced_data(self): proc1 = Process.objects.create( name="Referenced process", contributor=self.user, type="data:referenced:object:", ) proc2 = Process.objects.create( name="Test process", contributor=self.user, input_schema=[{ "name": "data_object", "type": "data:referenced:object:" }], ) d = Data.objects.create(name="Referenced object", contributor=self.user, process=proc1) validate_data_object(d) data = { "name": "Test data", "contributor": self.user, "process": proc2, "input": { "data_object": d.pk }, } validate_data_object(Data.objects.create(**data)) # less specific type proc2.input_schema = [{ "name": "data_object", "type": "data:referenced:" }] validate_data_object(Data.objects.create(**data)) # wrong type proc2.input_schema = [{ "name": "data_object", "type": "data:wrong:type:" }] with self.assertRaisesRegex(ValidationError, "Data object of type .* is required"): validate_data_object(Data.objects.create(**data)) # Non-existing referenced `Data` object on create. data["input"] = {"data_object": 631} with self.assertRaisesRegex(ValidationError, "`Data` object does not exist"): Data.objects.create(**data) # Non-existing `Data` object when status is set to `DONE` should not # raise exception. First make sure the object is created. proc2.input_schema = [{ "name": "data_object", "type": "data:referenced:" }] data["input"] = {"data_object": d.pk} d = Data.objects.create(**data) data["input"] = {"data_object": 631} # Now validate with missing input. validate_data_object(d, skip_missing_data=True)
def handle_referenced_files(self, obj): """Store a list of files and directories produced by the worker. :param obj: The Channels message object. Command object format: .. code-block:: none { 'command': 'referenced_data', 'data_id': [id of the :class:`~resolwe.flow.models.Data` object], 'referenced_files': list of referenced file names relative to the DATA_DIR. 'referenced_dirs': list of referenced directory paths relative to the DATA_DIR. } """ data_id = obj[ExecutorProtocol.DATA_ID] logger.debug( __( "Handling referenced files for Data with id {} (handle_referenced_files).", data_id, ), extra={ "data_id": data_id, "packet": obj }, ) try: data = Data.objects.get(pk=data_id) except Data.DoesNotExist: logger.error( "Data object does not exist (handle_referenced_files).", extra={"data_id": data_id}, ) self._abort_processing(obj) return file_storage = data.location # At this point default_storage_location will always be local. local_location = file_storage.default_storage_location with transaction.atomic(): try: referenced_paths = [ ReferencedPath(**object_) for object_ in obj[ExecutorProtocol.REFERENCED_FILES] ] # Remove files previously referenced by updating data.output. ReferencedPath.objects.filter( storage_locations=local_location).delete() ReferencedPath.objects.bulk_create(referenced_paths) local_location.files.add(*referenced_paths) local_location.status = StorageLocation.STATUS_DONE local_location.save() except Exception: logger.exception( "Exception while saving referenced files (handle_referenced_files).", extra={"data_id": data_id}, ) self._abort_processing(obj) return try: # Set Data status to DONE and perfoem validation only if it is # still in status PROCESSING. if data.status == Data.STATUS_PROCESSING: data.status = Data.STATUS_DONE data.save() # Inputs have been validated when data object was created. # Locks on their storage locations assure that the data # necessary for computation is available even if input # Data object was deleted. So it is not necessary to check # if input data objects still exists at this point. validate_data_object(data, skip_missing_data=True) except ValidationError as exc: logger.error( __( "Validation error when saving Data object of process '{}' (handle_referenced_files):\n\n{}", data.process.slug, traceback.format_exc(), ), extra={"data_id": data_id}, ) data.refresh_from_db() data.process_error.append(exc.message) data.status = Data.STATUS_ERROR with suppress(Exception): data.save(update_fields=["process_error", "status"]) except Exception: logger.exception( "Exception while setting data status to DONE (handle_referenced_files).", extra={"data_id": data_id}, ) self._abort_processing(obj) return async_to_sync(self._send_reply)( obj, { ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK })