def test_preserve_container(self): Path.objects.create(entity='ingest_reception', value='ingest_reception') Path.objects.create(entity='temp', value='temp') cache_storage = StorageMethod.objects.create() cache_storage_target = StorageTarget.objects.create( name='cache target') StorageMethodTargetRelation.objects.create( storage_method=cache_storage, storage_target=cache_storage_target, status=STORAGE_TARGET_STATUS_ENABLED) policy = StoragePolicy.objects.create( cache_storage=cache_storage, ingest_path=Path.objects.create(), ) aic = InformationPackage.objects.create() ip = InformationPackage.objects.create(aic=aic, policy=policy) storage_method = StorageMethod.objects.create() storage_target = StorageTarget.objects.create() StorageMethodTargetRelation.objects.create( storage_method=storage_method, storage_target=storage_target, status=STORAGE_TARGET_STATUS_ENABLED) workflow = ip.create_preservation_workflow() create_workflow(workflow, ip)
def test_tasks(self): spec = [{ "name": "foo.bar.task", "label": "Foo Bar Task", "args": [1, 2, 3], "params": { 'a': 'b' } }, { "name": "foo.bar.task2", "label": "Foo Bar Task2", "args": [3, 2, 1], "params": { 'b': 'a' } }] create_workflow(spec) self.assertEqual(ProcessStep.objects.count(), 1) self.assertEqual(ProcessTask.objects.count(), 2) step = ProcessStep.objects.get() self.assertEqual(step.tasks.count(), 2) self.assertEqual( step.tasks.earliest('processstep_pos').name, spec[0]['name']) self.assertEqual( step.tasks.latest('processstep_pos').name, spec[1]['name']) self.assertEqual(step.on_error.count(), 0)
def run(self): proj = settings.PROJECT_SHORTNAME pollers = getattr(settings, 'ESSARCH_WORKFLOW_POLLERS', {}) for name, poller in pollers.items(): backend = get_backend(name) poll_path = poller['path'] poll_sa = poller.get('sa') context = { 'WORKFLOW_POLLER': name, 'WORKFLOW_POLL_PATH': poll_path } for ip in backend.poll(poll_path, poll_sa): profile = ip.submission_agreement.profile_workflow try: spec = profile.specification[proj] except KeyError: self.logger.debug(u'No workflow specified in {} for current project {}'.format(profile, proj)) continue except AttributeError: if profile is None: self.logger.debug(u'No workflow profile in SA') continue raise workflow = create_workflow(spec['tasks'], ip=ip, name=spec.get('name', ''), on_error=spec.get('on_error'), context=context) workflow.run()
def get_workflows(): pollers = getattr(settings, 'ESSARCH_WORKFLOW_POLLERS', {}) for name, poller in pollers.items(): backend = get_backend(name) poll_path = poller['path'] poll_sa = poller.get('sa') context = { 'WORKFLOW_POLLER': name, 'WORKFLOW_POLL_PATH': poll_path } for ip in backend.poll(poll_path, poll_sa): profile = ip.submission_agreement.profile_workflow try: spec = profile.specification except AttributeError: if profile is None: self.logger.debug('No workflow profile in SA') continue raise yield create_workflow(spec['tasks'], ip=ip, name=spec.get('name', ''), on_error=spec.get('on_error'), context=context)
def preserve_new_generation(aip_profile, aip_profile_data, dstdir, ip, mets_path, new_ip, policy): workflow = new_ip.create_preservation_workflow() workflow = create_workflow(workflow, new_ip, name='Preserve Information Package', eager=True) workflow.run()
def test_preserve_container(self): Path.objects.create(entity='ingest_reception', value='ingest_reception') Path.objects.create(entity='temp', value='temp') policy = StoragePolicy.objects.create( ingest_path=Path.objects.create(), ) aic = InformationPackage.objects.create() sa = SubmissionAgreement.objects.create(policy=policy) ip = InformationPackage.objects.create(aic=aic, submission_agreement=sa) storage_method = StorageMethod.objects.create() storage_target = StorageTarget.objects.create() StorageMethodTargetRelation.objects.create( storage_method=storage_method, storage_target=storage_target, status=STORAGE_TARGET_STATUS_ENABLED ) workflow = ip.create_preservation_workflow() create_workflow(workflow, ip)
def test_steps(self): spec = [{ "step": True, "name": "My step", "children": [{ "name": "foo.bar.task", "label": "Foo Bar Task", "args": [1, 2, 3], "params": { 'a': 'b' } }, { "name": "foo.bar.task2", "label": "Foo Bar Task2", "args": [3, 2, 1], "params": { 'b': 'a' } }] }] root_step = create_workflow(spec) self.assertEqual(ProcessStep.objects.count(), 2) self.assertEqual(ProcessTask.objects.count(), 2) self.assertEqual(root_step.tasks.count(), 0) self.assertEqual(root_step.child_steps.count(), 1) self.assertEqual(root_step.on_error.count(), 0) child_step = root_step.child_steps.get() self.assertEqual(child_step.tasks.count(), 2) self.assertEqual(child_step.child_steps.count(), 0) self.assertEqual( child_step.tasks.earliest('processstep_pos').name, spec[0]['children'][0]['name']) self.assertEqual( child_step.tasks.latest('processstep_pos').name, spec[0]['children'][1]['name']) self.assertEqual(child_step.on_error.count(), 0)
def test_on_error_step(self): spec = [ { "step": True, "name": "My step", "on_error": [{ "name": "on_error.task", "label": "On-error Task", "args": [1, 2, 3], "params": { 'a': 'b' }, }], "children": [{ "name": "foo.bar.task", "label": "Foo Bar Task", "args": [1, 2, 3], "params": { 'a': 'b' }, }] }, ] root_step = create_workflow(spec) self.assertEqual(ProcessStep.objects.count(), 2) self.assertEqual(ProcessTask.objects.count(), 2) self.assertEqual(root_step.tasks.count(), 0) self.assertEqual(root_step.child_steps.count(), 1) self.assertEqual(root_step.on_error.count(), 0) child_step = root_step.child_steps.get() self.assertEqual(child_step.name, spec[0]['name']) self.assertEqual(child_step.tasks.count(), 1) self.assertEqual(child_step.on_error.count(), 1) self.assertEqual(child_step.on_error.get().name, spec[0]['on_error'][0]['name'])
def test_references(self): workflow = [ { "name": "ESSArch_Core.WorkflowEngine.tests.tasks.Add", "reference": "first", "args": [1, 2], }, { "name": "ESSArch_Core.WorkflowEngine.tests.tasks.Add", "reference": "second", "args": [3, 4], }, { "name": "ESSArch_Core.WorkflowEngine.tests.tasks.Add", "result_params": { 'x': 'first', 'y': 'second', }, }, ] w = create_workflow(workflow) result = w.run().get() self.assertEqual(result, 10)
def test_on_error_task(self): spec = [ { "name": "foo.bar.task", "label": "Foo Bar Task", "args": [1, 2, 3], "params": { 'a': 'b' }, "on_error": [{ "name": "foo.bar.task2", "label": "Foo Bar Task2", "args": [3, 2, 1], "params": { 'b': 'a' } }] }, ] root_step = create_workflow(spec) self.assertEqual(ProcessStep.objects.count(), 1) self.assertEqual(ProcessTask.objects.count(), 2) self.assertEqual(root_step.tasks.count(), 1) self.assertEqual(root_step.child_steps.count(), 0) self.assertEqual(root_step.on_error.count(), 0) task = root_step.tasks.get() self.assertEqual(task.name, spec[0]['name']) self.assertEqual(task.on_error.count(), 1) self.assertEqual(task.on_error.get().name, spec[0]['on_error'][0]['name'])
def preserve_new_generation(new_ip): generate_premis = new_ip.profile_locked('preservation_metadata') has_representations = find_destination( "representations", new_ip.get_structure(), new_ip.object_path, )[1] is not None # remove existing premis and mets paths: mets_path = os.path.join(new_ip.object_path, new_ip.get_content_mets_file_path()) try: os.remove(mets_path) except FileNotFoundError: pass events_file = os.path.join(new_ip.object_path, new_ip.get_events_file_path()) try: os.remove(events_file) except FileNotFoundError: pass if generate_premis: premis_profile_data = new_ip.get_profile_data('preservation_metadata') data = fill_specification_data(premis_profile_data, ip=new_ip) premis_path = parseContent(new_ip.get_premis_file_path(), data) full_premis_path = os.path.join(new_ip.object_path, premis_path) try: os.remove(full_premis_path) except FileNotFoundError: pass workflow = [ { "step": True, "name": "Generate AIP", "children": [ { "name": "ESSArch_Core.ip.tasks.DownloadSchemas", "label": "Download Schemas", }, { "step": True, "name": "Create Log File", "children": [ { "name": "ESSArch_Core.ip.tasks.GenerateEventsXML", "label": "Generate events xml file", }, { "name": "ESSArch_Core.tasks.AppendEvents", "label": "Add events to xml file", }, { "name": "ESSArch_Core.ip.tasks.AddPremisIPObjectElementToEventsFile", "label": "Add premis IP object to xml file", }, ] }, { "name": "ESSArch_Core.ip.tasks.GenerateContentMetadata", "label": "Generate contentmetadata", }, ] }, { "step": True, "name": "Validate AIP", "children": [{ "name": "ESSArch_Core.tasks.ValidateXMLFile", "label": "Validate content-mets", "params": { "xml_filename": "{{_CONTENT_METS_PATH}}", } }, { "name": "ESSArch_Core.tasks.ValidateXMLFile", "if": generate_premis, "label": "Validate premis", "params": { "xml_filename": "{{_PREMIS_PATH}}", } }, { "name": "ESSArch_Core.tasks.ValidateLogicalPhysicalRepresentation", "label": "Diff-check against content-mets", "args": ["{{_OBJPATH}}", "{{_CONTENT_METS_PATH}}"], }, { "name": "ESSArch_Core.tasks.CompareXMLFiles", "if": generate_premis, "label": "Compare premis and content-mets", "args": ["{{_PREMIS_PATH}}", "{{_CONTENT_METS_PATH}}"], "params": { 'recursive': False }, }, { "name": "ESSArch_Core.tasks.CompareRepresentationXMLFiles", "if": has_representations and generate_premis, "label": "Compare representation premis and mets", }] }, { "name": "ESSArch_Core.tasks.UpdateIPSizeAndCount", "label": "Update IP size and file count", }, ] workflow += new_ip.create_preservation_workflow() workflow = create_workflow(workflow, new_ip, name='Preserve Information Package', eager=True) workflow.run()
def test_steps(self): spec = [{ "step": True, "name": "My step", "children": [{ "name": "ESSArch_Core.WorkflowEngine.tests.tasks.First", "label": "Foo Bar Task", "args": [1, 2, 3], "params": { 'a': 'b' } }, { "name": "ESSArch_Core.WorkflowEngine.tests.tasks.Second", "label": "Foo Bar Task2", "args": [3, 2, 1], "params": { 'b': 'a' } }] }, { "step": True, "name": "Parallel step", "parallel": True, "children": [{ "name": "ESSArch_Core.WorkflowEngine.tests.tasks.First", "label": "Parallel Foo Bar Task", "args": [1, 2, 3], "params": { 'a': 'b' } }, { "name": "ESSArch_Core.WorkflowEngine.tests.tasks.Second", "label": "Parallel Foo Bar Task2", "args": [3, 2, 1], "params": { 'b': 'a' } }] }] root_step = create_workflow(spec) self.assertEqual(ProcessStep.objects.count(), 3) self.assertEqual(ProcessTask.objects.count(), 4) self.assertEqual(root_step.tasks.count(), 0) self.assertEqual(root_step.child_steps.count(), 2) self.assertEqual(root_step.on_error.count(), 0) child_step = root_step.child_steps.get(parallel=False) self.assertEqual(child_step.tasks.count(), 2) self.assertEqual(child_step.child_steps.count(), 0) self.assertEqual( child_step.tasks.earliest('processstep_pos').name, spec[0]['children'][0]['name']) self.assertEqual( child_step.tasks.latest('processstep_pos').name, spec[0]['children'][1]['name']) self.assertEqual(child_step.on_error.count(), 0) parallel_child_step = root_step.child_steps.get(parallel=True) self.assertEqual(parallel_child_step.tasks.count(), 2) self.assertEqual(parallel_child_step.child_steps.count(), 0) self.assertEqual( parallel_child_step.tasks.earliest('processstep_pos').name, spec[0]['children'][0]['name']) self.assertEqual( parallel_child_step.tasks.latest('processstep_pos').name, spec[0]['children'][1]['name']) self.assertEqual(parallel_child_step.on_error.count(), 0)
def test_empty_child_steps_are_removed(self): spec = [{ "step": True, "name": "My step", "children": [ { "name": "ESSArch_Core.WorkflowEngine.tests.tasks.First", "label": "Foo Bar Task", "args": [1, 2, 3], "params": { 'a': 'b' } }, { "step": True, "name": "step_a", "children": [{ "step": True, "name": "step_aa", "children": [ { "name": "ESSArch_Core.WorkflowEngine.tests.tasks.First", "label": "Foo Bar Task 2", "args": [1, 2, 3], "params": { 'a': 'b' } }, ] }] }, { "step": True, "name": "step_b", "children": [ { "step": True, "name": "step_ba", "children": [] }, { "name": "ESSArch_Core.WorkflowEngine.tests.tasks.First", "label": "Foo Bar Task 2", "args": [1, 2, 3], "params": { 'a': 'b' } }, ] }, { "step": True, "name": "step_c", "children": [ { "step": True, "name": "step_ca", "children": [] }, ] }, ] }] create_workflow(spec) # verify that only step_ba, step_c and step_ca has been deleted self.assertEqual(ProcessStep.objects.count(), 5) self.assertEqual(ProcessTask.objects.count(), 3) self.assertFalse(ProcessStep.objects.filter(name="step_ba").exists()) self.assertFalse(ProcessStep.objects.filter(name="step_c").exists()) self.assertFalse(ProcessStep.objects.filter(name="step_ca").exists())
def submit(self, request, pk=None): """ Submits the specified information package Args: pk: The primary key (id) of the information package to submit Returns: None """ ip = self.get_object_for_update() if ip.is_locked(): raise Conflict('Information package is locked') if ip.state != "Created": return Response( "The IP (%s) is in the state '%s' but should be 'Created'" % (pk, ip.state), status=status.HTTP_400_BAD_REQUEST) ip.state = 'Submitting' ip.save() sd_profile = ip.get_profile('submit_description') if sd_profile is None: return Response("The IP (%s) has no submit description profile" % pk, status=status.HTTP_400_BAD_REQUEST) email_subject = None email_body = None recipient = ip.get_email_recipient() if recipient: for arg in ['subject', 'body']: if arg not in request.data: raise exceptions.ParseError('%s parameter missing' % arg) email_subject = request.data['subject'] email_body = request.data['body'] validators = request.data.get('validators', {}) validate_xml_file = validators.get('validate_xml_file', False) validate_logical_physical_representation = validators.get( 'validate_logical_physical_representation', False) workflow_spec = [ { "name": "ESSArch_Core.ip.tasks.GeneratePackageMets", "label": "Generate package-mets", }, { "name": "ESSArch_Core.tasks.ValidateXMLFile", "if": validate_xml_file, "label": "Validate package-mets", "params": { "xml_filename": "{{_PACKAGE_METS_PATH}}", } }, { "name": "ESSArch_Core.tasks.ValidateLogicalPhysicalRepresentation", "if": validate_logical_physical_representation, "label": "Diff-check against package-mets", "args": ["{{_OBJPATH}}", "{{_PACKAGE_METS_PATH}}"], }, { "name": "preingest.tasks.SubmitSIP", "label": "Submit SIP", }, { "name": "ESSArch_Core.tasks.UpdateIPStatus", "label": "Set status to submitted", "args": ["Submitted"], }, { "name": "ESSArch_Core.tasks.SendEmail", "if": recipient, "label": "Send email", "params": { "subject": email_subject, "body": email_body, "recipients": [recipient], "attachments": [ "{{_PACKAGE_METS_PATH}}", ], } }, ] workflow = create_workflow(workflow_spec, ip) workflow.name = "Submit SIP" workflow.information_package = ip workflow.save() workflow.run() return Response({'status': 'submitting ip'})
def create_ip(self, request, pk=None): """ Creates the specified information package Args: pk: The primary key (id) of the information package to create Returns: None """ ip = self.get_object_for_update() if ip.is_locked(): raise Conflict('Information package is locked') if ip.state != "Uploaded": raise exceptions.ParseError( "The IP (%s) is in the state '%s' but should be 'Uploaded'" % (pk, ip.state)) ip.state = 'Creating' ip.save() generate_premis = ip.profile_locked('preservation_metadata') convert_files = request.data.get('file_conversion', False) file_format_map = {'doc': 'pdf', 'docx': 'pdf'} validators = request.data.get('validators', {}) validate_xml_file = validators.get('validate_xml_file', False) validate_logical_physical_representation = validators.get( 'validate_logical_physical_representation', False) workflow_spec = [ { "name": "ESSArch_Core.tasks.ConvertFile", "if": convert_files, "label": "Convert Files", "args": ["{{_OBJPATH}}", file_format_map] }, { "name": "ESSArch_Core.ip.tasks.DownloadSchemas", "label": "Download Schemas", }, { "step": True, "name": "Create Log File", "children": [ { "name": "ESSArch_Core.ip.tasks.GenerateEventsXML", "label": "Generate events xml file", }, { "name": "ESSArch_Core.tasks.AppendEvents", "label": "Add events to xml file", }, { "name": "ESSArch_Core.ip.tasks.AddPremisIPObjectElementToEventsFile", "label": "Add premis IP object to xml file", }, ] }, { "name": "ESSArch_Core.ip.tasks.GeneratePremis", "if": generate_premis, "label": "Generate premis", }, { "name": "ESSArch_Core.ip.tasks.GenerateContentMets", "label": "Generate content-mets", }, { "step": True, "name": "Validation", "if": any([ validate_xml_file, validate_logical_physical_representation ]), "children": [{ "name": "ESSArch_Core.tasks.ValidateXMLFile", "if": validate_xml_file, "label": "Validate content-mets", "params": { "xml_filename": "{{_CONTENT_METS_PATH}}", } }, { "name": "ESSArch_Core.tasks.ValidateXMLFile", "if": generate_premis and validate_xml_file, "label": "Validate premis", "params": { "xml_filename": "{{_PREMIS_PATH}}", } }, { "name": "ESSArch_Core.tasks.ValidateLogicalPhysicalRepresentation", "if": validate_logical_physical_representation, "label": "Diff-check against content-mets", "args": ["{{_OBJPATH}}", "{{_CONTENT_METS_PATH}}"], }, { "name": "ESSArch_Core.tasks.CompareXMLFiles", "if": generate_premis, "label": "Compare premis and content-mets", "args": ["{{_PREMIS_PATH}}", "{{_CONTENT_METS_PATH}}"], }] }, { "name": "ESSArch_Core.ip.tasks.CreateContainer", "label": "Create container", }, { "name": "ESSArch_Core.tasks.UpdateIPStatus", "label": "Set status to created", "args": ["Created"], }, ] workflow = create_workflow(workflow_spec, ip) workflow.name = "Create SIP" workflow.information_package = ip workflow.save() workflow.run() return Response({'status': 'creating ip'})