def generate_events_xml(ip): xml_path = os.path.join(ip.object_path, ip.get_events_file_path()) files_to_create = { xml_path: { 'spec': get_event_spec(), 'data': fill_specification_data(ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, algorithm=algorithm)
def run(self, verify=True): ip = self.get_information_package() ip_profile_type = ip.get_package_type_display().lower() ip_profile = ip.get_profile_rel(ip_profile_type).profile structure = ip.get_structure() rootdir = ip.object_path specifications = [ip_profile.specification, get_event_spec()] premis_profile_rel = ip.get_profile_rel('preservation_metadata') if premis_profile_rel is not None: specifications.append(premis_profile_rel.profile.specification) self.logger.debug(u'Downloading schemas') for spec in specifications: schema_preserve_loc = spec.get('-schemaPreservationLocation', 'xsd_files') if schema_preserve_loc and structure: reldir, _ = find_destination(schema_preserve_loc, structure) dirname = os.path.join(rootdir, reldir) else: dirname = rootdir for schema in spec.get('-schemasToPreserve', []): dst = os.path.join(dirname, os.path.basename(schema)) self.logger.info(u'Downloading schema from {} to {}'.format( schema, dst)) try: r = requests.get(schema, stream=True, verify=verify) r.raise_for_status() with open(dst, 'wb') as f: for chunk in r: f.write(chunk) except Exception: self.logger.exception( u'Download of schema failed: {}'.format(schema)) try: self.logger.debug( u'Deleting downloaded file if it exists: {}'. format(dst)) os.remove(dst) except OSError as e: if e.errno != errno.ENOENT: self.logger.exception( u'Failed to delete downloaded file: {}'.format( dst)) raise else: self.logger.info( u'Deleted downloaded file: {}'.format(dst)) raise else: self.logger.info(u'Downloaded schema to {}'.format(dst)) else: self.logger.info(u'No schemas to download')
def run(self): ip = self.get_information_package() xml_path = os.path.join(ip.object_path, ip.get_events_file_path()) files_to_create = { xml_path: { 'spec': get_event_spec(), 'data': fill_specification_data(ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, algorithm=algorithm)
def download_schemas(ip, logger, verify): ip_profile_type = ip.get_package_type_display().lower() ip_profile = ip.get_profile_rel(ip_profile_type).profile structure = ip.get_structure() rootdir = ip.object_path specifications = [ip_profile.specification, get_event_spec()] premis_profile_rel = ip.get_profile_rel('preservation_metadata') if premis_profile_rel is not None: specifications.append(premis_profile_rel.profile.specification) for spec in specifications: schema_preserve_loc = spec.get('-schemaPreservationLocation', 'xsd_files') if schema_preserve_loc and structure: reldir, _ = find_destination(schema_preserve_loc, structure) dirname = os.path.join(rootdir, reldir) else: dirname = rootdir for schema in spec.get('-schemasToPreserve', []): download_schema(dirname, logger, schema, verify)
def create_ip(self, request, pk=None): """ Creates the specified information package Args: pk: The primary key (id) of the information package to create Returns: None """ ip = self.get_object() sa = ip.SubmissionAgreement agent = request.user if ip.State != "Uploaded": raise ValueError( "The IP (%s) is in the state '%s' but should be 'Uploaded'" % (pk, ip.State)) validators = request.data.get('validators', {}) validate_xml_file = validators.get('validate_xml_file', False) validate_file_format = validators.get('validate_file_format', False) validate_integrity = validators.get('validate_integrity', False) validate_logical_physical_representation = validators.get( 'validate_logical_physical_representation', False) container_format = ip.get_container_format() main_step = ProcessStep.objects.create(name="Create SIP", ) t0 = ProcessTask.objects.create( name="preingest.tasks.UpdateIPStatus", params={ "ip": ip, "status": "Creating", }, processstep_pos=0, log=EventIP, information_package=ip, responsible=self.request.user, ) start_create_sip_step = ProcessStep.objects.create( name="Update IP Status", parent_step_pos=0) start_create_sip_step.tasks.add(t0) event_type = EventType.objects.get(eventType=10200) create_event(event_type, 0, "Created SIP", get_versions()['version'], agent, ip=ip) prepare_path = Path.objects.get(entity="path_preingest_prepare").value reception_path = Path.objects.get( entity="path_preingest_reception").value ip_prepare_path = os.path.join(prepare_path, str(ip.pk)) ip_reception_path = os.path.join(reception_path, str(ip.pk)) events_path = os.path.join(ip_prepare_path, "ipevents.xml") structure = ip.get_profile('sip').structure info = ip.get_profile('sip').fill_specification_data(sa, ip) # ensure premis is created before mets filesToCreate = OrderedDict() if ip.profile_locked('preservation_metadata'): premis_profile = ip.get_profile('preservation_metadata') premis_dir, premis_name = find_destination( "preservation_description_file", structure) premis_path = os.path.join(ip.ObjectPath, premis_dir, premis_name) filesToCreate[premis_path] = premis_profile.specification mets_dir, mets_name = find_destination("mets_file", structure) mets_path = os.path.join(ip.ObjectPath, mets_dir, mets_name) filesToCreate[mets_path] = ip.get_profile('sip').specification generate_xml_step = ProcessStep.objects.create(name="Generate XML", parent_step_pos=1) for fname, template in filesToCreate.iteritems(): dirname = os.path.dirname(fname) t = ProcessTask.objects.create( name="ESSArch_Core.tasks.DownloadSchemas", params={ "template": template, "dirname": dirname, "structure": structure, "root": ip.ObjectPath, }, processstep_pos=1, log=EventIP, information_package=ip, responsible=self.request.user, ) generate_xml_step.tasks.add(t) t = ProcessTask.objects.create( name="preingest.tasks.GenerateXML", params={ "info": info, "filesToCreate": filesToCreate, "folderToParse": ip_prepare_path, "algorithm": ip.get_checksum_algorithm(), }, processstep_pos=3, log=EventIP, information_package=ip, responsible=self.request.user, ) generate_xml_step.tasks.add(t) if any(validators.itervalues()): validate_step = ProcessStep.objects.create( name="Validation", parent_step=main_step, parent_step_pos=2, ) if validate_xml_file: validate_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.ValidateXMLFile", params={ "xml_filename": mets_path, }, processstep_pos=1, log=EventIP, information_package=ip, responsible=self.request.user, )) if ip.profile_locked("preservation_metadata"): validate_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.ValidateXMLFile", params={ "xml_filename": premis_path, }, processstep_pos=2, log=EventIP, information_package=ip, responsible=self.request.user, )) if validate_logical_physical_representation: validate_step.tasks.add( ProcessTask.objects.create( name= "preingest.tasks.ValidateLogicalPhysicalRepresentation", params={ "dirname": ip.ObjectPath, "xmlfile": mets_path, }, processstep_pos=3, log=EventIP, information_package=ip, responsible=self.request.user, )) validate_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.ValidateFiles", params={ "ip": ip, "xmlfile": mets_path, "validate_fileformat": validate_file_format, "validate_integrity": validate_integrity, }, processstep_pos=4, log=EventIP, information_package=ip, responsible=self.request.user, )) validate_step.save() info = {"_OBJID": str(ip.pk), "_OBJLABEL": ip.Label} filesToCreate = OrderedDict() filesToCreate[events_path] = get_event_spec() create_sip_step = ProcessStep.objects.create(name="Create SIP", parent_step_pos=3) for fname, template in filesToCreate.iteritems(): dirname = os.path.dirname(fname) create_sip_step.tasks.add( ProcessTask.objects.create( name="ESSArch_Core.tasks.DownloadSchemas", params={ "template": template, "dirname": dirname, "structure": structure, "root": ip.ObjectPath, }, processstep_pos=-1, log=EventIP, information_package=ip, responsible=self.request.user, )) create_sip_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.GenerateXML", params={ "info": info, "filesToCreate": filesToCreate, "algorithm": ip.get_checksum_algorithm(), }, processstep_pos=0, log=EventIP, information_package=ip, responsible=self.request.user, )) create_sip_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.AppendEvents", params={ "filename": events_path, }, processstep_pos=1, log=EventIP, information_package=ip, responsible=self.request.user, )) spec = { "-name": "object", "-namespace": "premis", "-children": [{ "-name": "objectIdentifier", "-namespace": "premis", "-children": [{ "-name": "objectIdentifierType", "-namespace": "premis", "#content": [{ "var": "FIDType" }], "-children": [] }, { "-name": "objectIdentifierValue", "-namespace": "premis", "#content": [{ "var": "FID" }], "-children": [] }] }, { "-name": "objectCharacteristics", "-namespace": "premis", "-children": [{ "-name": "format", "-namespace": "premis", "-children": [{ "-name": "formatDesignation", "-namespace": "premis", "-children": [{ "-name": "formatName", "-namespace": "premis", "#content": [{ "var": "FFormatName" }], "-children": [] }] }] }] }, { "-name": "storage", "-namespace": "premis", "-children": [{ "-name": "contentLocation", "-namespace": "premis", "-children": [{ "-name": "contentLocationType", "-namespace": "premis", "#content": [{ "var": "FLocationType" }], "-children": [] }, { "-name": "contentLocationValue", "-namespace": "premis", "#content": [{ "text": "file:///%s.%s" % (ip.pk, container_format.lower()) }], "-children": [] }] }] }], "-attr": [{ "-name": "type", '-namespace': 'xsi', "-req": "1", "#content": [{ "text": "premis:file" }] }], } info = { 'FIDType': "UUID", 'FID': ip.ObjectIdentifierValue, 'FFormatName': container_format.upper(), 'FLocationType': 'URI', 'FName': ip.ObjectPath, } create_sip_step.tasks.add( ProcessTask.objects.create( name="ESSArch_Core.tasks.InsertXML", params={ "filename": events_path, "elementToAppendTo": "premis", "spec": spec, "info": info, "index": 0 }, processstep_pos=2, information_package=ip, responsible=self.request.user, )) if validate_xml_file: create_sip_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.ValidateXMLFile", params={ "xml_filename": events_path, }, processstep_pos=3, log=EventIP, information_package=ip, responsible=self.request.user, )) if container_format.lower() == 'zip': zipname = os.path.join(ip_reception_path) + '.zip' container_task = ProcessTask.objects.create( name="preingest.tasks.CreateZIP", params={ "dirname": ip_prepare_path, "zipname": zipname, }, processstep_pos=4, log=EventIP, information_package=ip, responsible=self.request.user, ) else: tarname = os.path.join(ip_reception_path) + '.tar' container_task = ProcessTask.objects.create( name="preingest.tasks.CreateTAR", params={ "dirname": ip_prepare_path, "tarname": tarname, }, processstep_pos=4, log=EventIP, information_package=ip, responsible=self.request.user, ) create_sip_step.tasks.add(container_task) create_sip_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.DeleteFiles", params={"path": ip.ObjectPath}, processstep_pos=45, log=EventIP, information_package=ip, responsible=self.request.user, )) create_sip_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.UpdateIPPath", params={ "ip": ip, }, result_params={"path": container_task.pk}, processstep_pos=50, log=EventIP, information_package=ip, responsible=self.request.user, )) create_sip_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.UpdateIPStatus", params={ "ip": ip, "status": "Created", }, processstep_pos=60, log=EventIP, information_package=ip, responsible=self.request.user, )) create_sip_step.save() main_step.child_steps.add(start_create_sip_step, generate_xml_step, create_sip_step) main_step.information_package = ip main_step.save() main_step.run() return Response({'status': 'creating ip'})
def create_ip(self, request, pk=None): """ Creates the specified information package Args: pk: The primary key (id) of the information package to create Returns: None """ ip = self.get_object() sa = ip.SubmissionAgreement agent = request.user if ip.State != "Uploaded": raise ValueError( "The IP (%s) is in the state '%s' but should be 'Uploaded'" % (pk, ip.State) ) validators = request.data.get('validators', {}) validate_xml_file = validators.get('validate_xml_file', False) validate_file_format = validators.get('validate_file_format', False) validate_integrity = validators.get('validate_integrity', False) validate_logical_physical_representation = validators.get('validate_logical_physical_representation', False) container_format = ip.get_container_format() main_step = ProcessStep.objects.create( name="Create SIP", ) t0 = ProcessTask.objects.create( name="preingest.tasks.UpdateIPStatus", params={ "ip": ip, "status": "Creating", }, processstep_pos=0, log=EventIP, information_package=ip, responsible=self.request.user, ) start_create_sip_step = ProcessStep.objects.create( name="Update IP Status", parent_step_pos=0 ) start_create_sip_step.tasks.add(t0) event_type = EventType.objects.get(eventType=10200) create_event(event_type, 0, "Created SIP", get_versions()['version'], agent, ip=ip) prepare_path = Path.objects.get( entity="path_preingest_prepare" ).value reception_path = Path.objects.get( entity="path_preingest_reception" ).value ip_prepare_path = os.path.join(prepare_path, str(ip.pk)) ip_reception_path = os.path.join(reception_path, str(ip.pk)) events_path = os.path.join(ip_prepare_path, "ipevents.xml") structure = ip.get_profile('sip').structure info = ip.get_profile('sip').fill_specification_data(sa, ip) # ensure premis is created before mets filesToCreate = OrderedDict() if ip.profile_locked('preservation_metadata'): premis_profile = ip.get_profile('preservation_metadata') premis_dir, premis_name = find_destination("preservation_description_file", structure) premis_path = os.path.join(ip.ObjectPath, premis_dir, premis_name) filesToCreate[premis_path] = premis_profile.specification mets_dir, mets_name = find_destination("mets_file", structure) mets_path = os.path.join(ip.ObjectPath, mets_dir, mets_name) filesToCreate[mets_path] = ip.get_profile('sip').specification generate_xml_step = ProcessStep.objects.create( name="Generate XML", parent_step_pos=1 ) for fname, template in filesToCreate.iteritems(): dirname = os.path.dirname(fname) t = ProcessTask.objects.create( name="ESSArch_Core.tasks.DownloadSchemas", params={ "template": template, "dirname": dirname, "structure": structure, "root": ip.ObjectPath, }, processstep_pos=1, log=EventIP, information_package=ip, responsible=self.request.user, ) generate_xml_step.tasks.add(t) t = ProcessTask.objects.create( name="preingest.tasks.GenerateXML", params={ "info": info, "filesToCreate": filesToCreate, "folderToParse": ip_prepare_path, "algorithm": ip.get_checksum_algorithm(), }, processstep_pos=3, log=EventIP, information_package=ip, responsible=self.request.user, ) generate_xml_step.tasks.add(t) if any(validators.itervalues()): validate_step = ProcessStep.objects.create( name="Validation", parent_step=main_step, parent_step_pos=2, ) if validate_xml_file: validate_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.ValidateXMLFile", params={ "xml_filename": mets_path, }, processstep_pos=1, log=EventIP, information_package=ip, responsible=self.request.user, ) ) if ip.profile_locked("preservation_metadata"): validate_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.ValidateXMLFile", params={ "xml_filename": premis_path, }, processstep_pos=2, log=EventIP, information_package=ip, responsible=self.request.user, ) ) if validate_logical_physical_representation: validate_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.ValidateLogicalPhysicalRepresentation", params={ "dirname": ip.ObjectPath, "xmlfile": mets_path, }, processstep_pos=3, log=EventIP, information_package=ip, responsible=self.request.user, ) ) validate_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.ValidateFiles", params={ "ip": ip, "xmlfile": mets_path, "validate_fileformat": validate_file_format, "validate_integrity": validate_integrity, }, processstep_pos=4, log=EventIP, information_package=ip, responsible=self.request.user, ) ) validate_step.save() info = { "_OBJID": str(ip.pk), "_OBJLABEL": ip.Label } filesToCreate = OrderedDict() filesToCreate[events_path] = get_event_spec() create_sip_step = ProcessStep.objects.create( name="Create SIP", parent_step_pos=3 ) for fname, template in filesToCreate.iteritems(): dirname = os.path.dirname(fname) create_sip_step.tasks.add(ProcessTask.objects.create( name="ESSArch_Core.tasks.DownloadSchemas", params={ "template": template, "dirname": dirname, "structure": structure, "root": ip.ObjectPath, }, processstep_pos=-1, log=EventIP, information_package=ip, responsible=self.request.user, )) create_sip_step.tasks.add(ProcessTask.objects.create( name="preingest.tasks.GenerateXML", params={ "info": info, "filesToCreate": filesToCreate, "algorithm": ip.get_checksum_algorithm(), }, processstep_pos=0, log=EventIP, information_package=ip, responsible=self.request.user, )) create_sip_step.tasks.add(ProcessTask.objects.create( name="preingest.tasks.AppendEvents", params={ "filename": events_path, }, processstep_pos=1, log=EventIP, information_package=ip, responsible=self.request.user, )) spec = { "-name": "object", "-namespace": "premis", "-children": [ { "-name": "objectIdentifier", "-namespace": "premis", "-children": [ { "-name": "objectIdentifierType", "-namespace": "premis", "#content": [{"var": "FIDType"}], "-children": [] }, { "-name": "objectIdentifierValue", "-namespace": "premis", "#content": [{"var": "FID"}], "-children": [] } ] }, { "-name": "objectCharacteristics", "-namespace": "premis", "-children": [ { "-name": "format", "-namespace": "premis", "-children": [ { "-name": "formatDesignation", "-namespace": "premis", "-children": [ { "-name": "formatName", "-namespace": "premis", "#content": [{"var": "FFormatName"}], "-children": [] } ] } ] } ] }, { "-name": "storage", "-namespace": "premis", "-children": [ { "-name": "contentLocation", "-namespace": "premis", "-children": [ { "-name": "contentLocationType", "-namespace": "premis", "#content": [{"var": "FLocationType"}], "-children": [] }, { "-name": "contentLocationValue", "-namespace": "premis", "#content": [{"text": "file:///%s.%s" % (ip.pk, container_format.lower())}], "-children": [] } ] } ] } ], "-attr": [ { "-name": "type", '-namespace': 'xsi', "-req": "1", "#content": [{"text": "premis:file"}] } ], } info = { 'FIDType': "UUID", 'FID': ip.ObjectIdentifierValue, 'FFormatName': container_format.upper(), 'FLocationType': 'URI', 'FName': ip.ObjectPath, } create_sip_step.tasks.add(ProcessTask.objects.create( name="ESSArch_Core.tasks.InsertXML", params={ "filename": events_path, "elementToAppendTo": "premis", "spec": spec, "info": info, "index": 0 }, processstep_pos=2, information_package=ip, responsible=self.request.user, )) if validate_xml_file: create_sip_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.ValidateXMLFile", params={ "xml_filename": events_path, }, processstep_pos=3, log=EventIP, information_package=ip, responsible=self.request.user, ) ) if container_format.lower() == 'zip': zipname = os.path.join(ip_reception_path) + '.zip' container_task = ProcessTask.objects.create( name="preingest.tasks.CreateZIP", params={ "dirname": ip_prepare_path, "zipname": zipname, }, processstep_pos=4, log=EventIP, information_package=ip, responsible=self.request.user, ) else: tarname = os.path.join(ip_reception_path) + '.tar' container_task = ProcessTask.objects.create( name="preingest.tasks.CreateTAR", params={ "dirname": ip_prepare_path, "tarname": tarname, }, processstep_pos=4, log=EventIP, information_package=ip, responsible=self.request.user, ) create_sip_step.tasks.add(container_task) create_sip_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.DeleteFiles", params={ "path": ip.ObjectPath }, processstep_pos=45, log=EventIP, information_package=ip, responsible=self.request.user, ) ) create_sip_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.UpdateIPPath", params={ "ip": ip, }, result_params={ "path": container_task.pk }, processstep_pos=50, log=EventIP, information_package=ip, responsible=self.request.user, ) ) create_sip_step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.UpdateIPStatus", params={ "ip": ip, "status": "Created", }, processstep_pos=60, log=EventIP, information_package=ip, responsible=self.request.user, ) ) create_sip_step.save() main_step.child_steps.add( start_create_sip_step, generate_xml_step, create_sip_step ) main_step.information_package = ip main_step.save() main_step.run() return Response({'status': 'creating ip'})
def transfer(self, request, pk=None): ip = self.get_object() dstdir = Path.objects.get(entity="path_gate_reception").value info = { "_OBJID": str(pk), "_OBJLABEL": ip.Label } events_path = os.path.join(dstdir, "%s_ipevents.xml" % pk) filesToCreate = { events_path: get_event_spec() } step = ProcessStep.objects.create( name="Transfer SIP", information_package=ip ) step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.UpdateIPStatus", params={ "ip": ip, "status": "Transferring", }, processstep_pos=10, log=EventIP, information_package=ip, responsible=self.request.user, ) ) step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.TransferSIP", params={ "ip": ip, }, processstep_pos=15, log=EventIP, information_package=ip, responsible=self.request.user, ) ) step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.GenerateXML", params={ "info": info, "filesToCreate": filesToCreate, }, processstep_pos=20, log=EventIP, information_package=ip, responsible=self.request.user, ) ) step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.AppendEvents", params={ "filename": events_path, "events": ip.events.all(), }, processstep_pos=30, log=EventIP, information_package=ip, responsible=self.request.user, ) ) spec = { "-name": "object", "-namespace": "premis", "-children": [ { "-name": "objectIdentifier", "-namespace": "premis", "-children": [ { "-name": "objectIdentifierType", "-namespace": "premis", "#content": [{"var": "FIDType"}], "-children": [] }, { "-name": "objectIdentifierValue", "-namespace": "premis", "#content": [{"var": "FID"}], "-children": [] } ] }, { "-name": "objectCharacteristics", "-namespace": "premis", "-children": [ { "-name": "format", "-namespace": "premis", "-children": [ { "-name": "formatDesignation", "-namespace": "premis", "-children": [ { "-name": "formatName", "-namespace": "premis", "#content": [{"var": "FFormatName"}], "-children": [] } ] } ] } ] }, { "-name": "storage", "-namespace": "premis", "-children": [ { "-name": "contentLocation", "-namespace": "premis", "-children": [ { "-name": "contentLocationType", "-namespace": "premis", "#content": [{"var": "FLocationType"}], "-children": [] }, { "-name": "contentLocationValue", "-namespace": "premis", "#content": [{"text": "file:///%s.tar" % pk}], "-children": [] } ] } ] } ], "-attr": [ { "-name": "type", '-namespace': 'xsi', "-req": "1", "#content": [{"text": "premis:file"}] } ], } info = { 'FIDType': "UUID", 'FID': "%s" % str(ip.pk), 'FFormatName': 'TAR', 'FLocationType': 'URI', 'FName': ip.ObjectPath, } step.tasks.add( ProcessTask.objects.create( name="ESSArch_Core.tasks.InsertXML", params={ "filename": events_path, "elementToAppendTo": "premis", "spec": spec, "info": info, "index": 0 }, processstep_pos=40, information_package=ip, responsible=self.request.user, ) ) step.tasks.add( ProcessTask.objects.create( name="preingest.tasks.UpdateIPStatus", params={ "ip": ip, "status": "Transferred", }, processstep_pos=50, log=EventIP, information_package=ip, responsible=self.request.user, ) ) step.run() return Response("IP Transferred")