def populate_sys_meta(pid, format_id, size, md5, name, rights_holder):
    """
    Fills out the system metadata object with the needed properties

    :param pid: The pid of the system metadata document
    :param format_id: The format of the document being described
    :param size: The size of the document that is being described
    :param md5: The md5 hash of the document being described
    :param name: The name of the file
    :param rights_holder: The owner of this object
    :type pid: str
    :type format_id: str
    :type size: int
    :type md5: str
    :type name: str
    :type rights_holder: str
    :return: The populated system metadata document
    """

    pid = check_pid(pid)
    sys_meta = dataoneTypes.systemMetadata()
    sys_meta.identifier = pid
    sys_meta.formatId = format_id
    sys_meta.size = size
    sys_meta.rightsHolder = rights_holder
    sys_meta.checksum = dataoneTypes.checksum(str(md5))
    sys_meta.checksum.algorithm = 'MD5'
    sys_meta.accessPolicy = generate_public_access_policy()
    sys_meta.fileName = name
    return sys_meta
Beispiel #2
0
    def generate_system_metadata(self, pid: str, name: str, format_id: str,
                                 size: int, md5: str, rights_holder: str) -> SystemMetadata:
        """
        Generates a metadata document describing the file_object.

        :param pid: The pid that the object will have
        :param name: The name of the object being described
        :param format_id: The format of the object (e.g text/csv)
        :param size: The size of the file
        :param md5: The md5 of the file
        :param rights_holder: The owner of this object
        :return: The metadata describing file_object
        """

        sys_meta = dataoneTypes.systemMetadata()
        sys_meta.identifier = pid
        sys_meta.formatId = format_id
        sys_meta.size = size
        sys_meta.submitter = rights_holder
        sys_meta.rightsHolder = rights_holder
        sys_meta.checksum = dataoneTypes.checksum(str(md5))
        sys_meta.checksum.algorithm = 'MD5'
        sys_meta.accessPolicy = self.get_access_policy()
        sys_meta.fileName = name
        return sys_meta
Beispiel #3
0
 def update_sysmeta(sysmeta: SystemMetadata,
                    bytes_to_upload: Union[str, bytes], new_pid):
     """
     Updates a system metadata document to describe a different object. The idea is that the
     DataONE server will set various fields on the system metadata (AuthortativeMemberNode, for example)
     and when obsoleting an object-those fields are desired. Some fields like the checksum and file size will
     be different and need to be updated, which is what this method is for.
     :param sysmeta: The system metadata document
     :param bytes_to_upload: The bytes that are being uploaded to DataONE
     :param new_pid: The pid of the object representing the bytes
     """
     if not isinstance(bytes_to_upload, bytes):
         if isinstance(bytes_to_upload, str):
             bytes_to_upload = bytes_to_upload.encode("utf-8")
         else:
             raise ValueError(
                 'Unable to convert the data object with pid {} to bytes'.
                 format(new_pid))
     size = len(bytes_to_upload)
     checksum = md5(bytes_to_upload).hexdigest()
     sysmeta.identifier = str(new_pid)
     sysmeta.size = size
     sysmeta.checksum = dataoneTypes.checksum(str(checksum))
     sysmeta.checksum.algorithm = 'MD5'
     sysmeta.obsoletes = None
     return sysmeta
Beispiel #4
0
 def test_1010(self):
     """Serialization: Checksum -> XML -> Checksum"""
     checksum_obj_in = dataoneTypes.checksum('1' * 32)
     checksum_obj_in.algorithm = 'MD5'
     checksum_xml = checksum_obj_in.toxml('utf-8')
     checksum_obj_out = dataoneTypes.CreateFromDocument(checksum_xml)
     assert checksum_obj_in.value() == checksum_obj_out.value()
     assert checksum_obj_in.algorithm == checksum_obj_out.algorithm
Beispiel #5
0
 def test_130(self):
   """Serialization: Checksum -> XML -> Checksum.
   """
   checksum_obj_in = dataoneTypes.checksum('1' * 32)
   checksum_obj_in.algorithm = 'MD5'
   checksum_xml = checksum_obj_in.toxml()
   checksum_obj_out = dataoneTypes.CreateFromDocument(checksum_xml)
   self.assertEqual(checksum_obj_in.value(), checksum_obj_out.value())
   self.assertEqual(checksum_obj_in.algorithm, checksum_obj_out.algorithm)
Beispiel #6
0
def generate_sys_meta(pid, format_id, size, md5, now):
    sys_meta = dataoneTypes.systemMetadata()
    sys_meta.identifier = pid
    sys_meta.formatId = format_id
    sys_meta.size = size
    sys_meta.rightsHolder = SYSMETA_RIGHTSHOLDER
    sys_meta.checksum = dataoneTypes.checksum(md5)
    sys_meta.checksum.algorithm = 'MD5'
    sys_meta.dateUploaded = now
    sys_meta.dateSysMetadataModified = now
    sys_meta.accessPolicy = generate_public_access_policy()
    return sys_meta
Beispiel #7
0
 def create_system_metadata(self, operation):
     #import pyxb
     #pyxb.RequireValidWhenGenerating(False)
     cli_util.assert_file_exists(operation['parameters']['science-file'])
     pid = operation['parameters']['identifier']
     format_id = operation['parameters']['format-id']
     file_size = self._get_file_size(
         operation['parameters']['science-file'])
     checksum = dataoneTypes.checksum(
         self._get_file_checksum(operation['parameters']['science-file']))
     return self._create_pyxb_object(operation, pid, format_id, file_size,
                                     checksum)
def test_generate_system_metadata():
    from d1_common.types import dataoneTypes

    with httmock.HTTMock(mock_dataone_formats):
        node = "https://cn-stage-2.test.dataone.org/cn/"
        metadata = DataONEMetadata(node)
        pid = "urn:uuid:3c5d3c8d-b6c2-4dff-ac28-9f2e60a157a1"
        name = "run-local.sh"
        format_id = "application/octet-stream"
        size = 1338
        md5 = "50321b197d014a1f3d7a3adf99277919,"
        rights_holder = "http://orcid.org/0000-0002-1756-2128"
        sys_meta = metadata.generate_system_metadata(pid, name, format_id, size, md5,
                                                     rights_holder)

        assert sys_meta.identifier.value() == pid
        assert sys_meta.formatId == format_id
        assert sys_meta.size == size
        assert sys_meta.submitter.value() == rights_holder
        assert sys_meta.rightsHolder.value() == rights_holder
        assert sys_meta.checksum.value() == dataoneTypes.checksum(md5).value()
        assert sys_meta.checksum.algorithm == 'MD5'
        assert sys_meta.fileName == name