def populate_sys_meta(pid, format_id, size, md5, name, rights_holder): """ Fills out the system metadata object with the needed properties :param pid: The pid of the system metadata document :param format_id: The format of the document being described :param size: The size of the document that is being described :param md5: The md5 hash of the document being described :param name: The name of the file :param rights_holder: The owner of this object :type pid: str :type format_id: str :type size: int :type md5: str :type name: str :type rights_holder: str :return: The populated system metadata document """ pid = check_pid(pid) sys_meta = dataoneTypes.systemMetadata() sys_meta.identifier = pid sys_meta.formatId = format_id sys_meta.size = size sys_meta.rightsHolder = rights_holder sys_meta.checksum = dataoneTypes.checksum(str(md5)) sys_meta.checksum.algorithm = 'MD5' sys_meta.accessPolicy = generate_public_access_policy() sys_meta.fileName = name return sys_meta
def generate_system_metadata(self, pid: str, name: str, format_id: str, size: int, md5: str, rights_holder: str) -> SystemMetadata: """ Generates a metadata document describing the file_object. :param pid: The pid that the object will have :param name: The name of the object being described :param format_id: The format of the object (e.g text/csv) :param size: The size of the file :param md5: The md5 of the file :param rights_holder: The owner of this object :return: The metadata describing file_object """ sys_meta = dataoneTypes.systemMetadata() sys_meta.identifier = pid sys_meta.formatId = format_id sys_meta.size = size sys_meta.submitter = rights_holder sys_meta.rightsHolder = rights_holder sys_meta.checksum = dataoneTypes.checksum(str(md5)) sys_meta.checksum.algorithm = 'MD5' sys_meta.accessPolicy = self.get_access_policy() sys_meta.fileName = name return sys_meta
def update_sysmeta(sysmeta: SystemMetadata, bytes_to_upload: Union[str, bytes], new_pid): """ Updates a system metadata document to describe a different object. The idea is that the DataONE server will set various fields on the system metadata (AuthortativeMemberNode, for example) and when obsoleting an object-those fields are desired. Some fields like the checksum and file size will be different and need to be updated, which is what this method is for. :param sysmeta: The system metadata document :param bytes_to_upload: The bytes that are being uploaded to DataONE :param new_pid: The pid of the object representing the bytes """ if not isinstance(bytes_to_upload, bytes): if isinstance(bytes_to_upload, str): bytes_to_upload = bytes_to_upload.encode("utf-8") else: raise ValueError( 'Unable to convert the data object with pid {} to bytes'. format(new_pid)) size = len(bytes_to_upload) checksum = md5(bytes_to_upload).hexdigest() sysmeta.identifier = str(new_pid) sysmeta.size = size sysmeta.checksum = dataoneTypes.checksum(str(checksum)) sysmeta.checksum.algorithm = 'MD5' sysmeta.obsoletes = None return sysmeta
def test_1010(self): """Serialization: Checksum -> XML -> Checksum""" checksum_obj_in = dataoneTypes.checksum('1' * 32) checksum_obj_in.algorithm = 'MD5' checksum_xml = checksum_obj_in.toxml('utf-8') checksum_obj_out = dataoneTypes.CreateFromDocument(checksum_xml) assert checksum_obj_in.value() == checksum_obj_out.value() assert checksum_obj_in.algorithm == checksum_obj_out.algorithm
def test_130(self): """Serialization: Checksum -> XML -> Checksum. """ checksum_obj_in = dataoneTypes.checksum('1' * 32) checksum_obj_in.algorithm = 'MD5' checksum_xml = checksum_obj_in.toxml() checksum_obj_out = dataoneTypes.CreateFromDocument(checksum_xml) self.assertEqual(checksum_obj_in.value(), checksum_obj_out.value()) self.assertEqual(checksum_obj_in.algorithm, checksum_obj_out.algorithm)
def generate_sys_meta(pid, format_id, size, md5, now): sys_meta = dataoneTypes.systemMetadata() sys_meta.identifier = pid sys_meta.formatId = format_id sys_meta.size = size sys_meta.rightsHolder = SYSMETA_RIGHTSHOLDER sys_meta.checksum = dataoneTypes.checksum(md5) sys_meta.checksum.algorithm = 'MD5' sys_meta.dateUploaded = now sys_meta.dateSysMetadataModified = now sys_meta.accessPolicy = generate_public_access_policy() return sys_meta
def create_system_metadata(self, operation): #import pyxb #pyxb.RequireValidWhenGenerating(False) cli_util.assert_file_exists(operation['parameters']['science-file']) pid = operation['parameters']['identifier'] format_id = operation['parameters']['format-id'] file_size = self._get_file_size( operation['parameters']['science-file']) checksum = dataoneTypes.checksum( self._get_file_checksum(operation['parameters']['science-file'])) return self._create_pyxb_object(operation, pid, format_id, file_size, checksum)
def test_generate_system_metadata(): from d1_common.types import dataoneTypes with httmock.HTTMock(mock_dataone_formats): node = "https://cn-stage-2.test.dataone.org/cn/" metadata = DataONEMetadata(node) pid = "urn:uuid:3c5d3c8d-b6c2-4dff-ac28-9f2e60a157a1" name = "run-local.sh" format_id = "application/octet-stream" size = 1338 md5 = "50321b197d014a1f3d7a3adf99277919," rights_holder = "http://orcid.org/0000-0002-1756-2128" sys_meta = metadata.generate_system_metadata(pid, name, format_id, size, md5, rights_holder) assert sys_meta.identifier.value() == pid assert sys_meta.formatId == format_id assert sys_meta.size == size assert sys_meta.submitter.value() == rights_holder assert sys_meta.rightsHolder.value() == rights_holder assert sys_meta.checksum.value() == dataoneTypes.checksum(md5).value() assert sys_meta.checksum.algorithm == 'MD5' assert sys_meta.fileName == name