def _process_single_fields(self, item: dict):
                            # RDM field name                # PURE json path
        self._add_field(item, 'title',                       ['title'])
        self._add_field(item, 'uuid',                        ['uuid'])
        self._add_field(item, 'pureId',                      ['pureId'])
        self._add_field(item, 'publicationDate',             ['publicationStatuses', 0, 'publicationDate', 'year'])
        self._add_field(item, 'createdDate',                 ['info', 'createdDate'])
        self._add_field(item, 'pages',                       ['info','pages'])   
        self._add_field(item, 'volume',                      ['info','volume'])
        self._add_field(item, 'journalTitle',                ['info', 'journalAssociation', 'title', 'value'])
        self._add_field(item, 'journalNumber',               ['info', 'journalNumber'])
        self._add_field(item, 'metadataModifBy',             ['info', 'modifiedBy'])
        self._add_field(item, 'metadataModifDate',           ['info', 'modifiedDate'])
        self._add_field(item, 'pure_link',                   ['info', 'portalUrl'])
        self._add_field(item, 'recordType',                  ['types', 0, 'value'])    
        self._add_field(item, 'category',                    ['categories', 0, 'value'])  
        self._add_field(item, 'peerReview',                  ['peerReview'])    
        self._add_field(item, 'publicationStatus',           ['publicationStatuses', 0, 'publicationStatuses', 0, 'value'])
        self._add_field(item, 'numberOfAuthors',             ['totalNumberOfAuthors'])
        self._add_field(item, 'workflow',                    ['workflows', 0, 'value'])
        self._add_field(item, 'confidential',                ['confidential'])
        self._add_field(item, 'publisherName',               ['publisher', 'names', 0, 'value'])
        self._add_field(item, 'abstract',                    ['abstracts', 0, 'value'])
        self._add_field(item, 'managingOrganisationalUnit_name',       ['managingOrganisationalUnit', 'names', 0, 'value'])
        self._add_field(item, 'managingOrganisationalUnit_uuid',       ['managingOrganisationalUnit', 'uuid'])
        self._add_field(item, 'managingOrganisationalUnit_externalId', ['managingOrganisationalUnit', 'externalId'])

        # Access right
        value = get_value(item, ['openAccessPermissions', 0, 'value'])
        self.data['access_right'] = self._accessright_conversion(value)

        # Language
        value = get_value(item, ['languages', 0, 'value'])
        self.data['language'] = self._language_conversion(value)
    def _get_contributor_name(self, item: object):
        first_name = get_value(item, ['name', 'firstName'])
        last_name  = get_value(item, ['name', 'lastName'])

        if not first_name:
            first_name = '(first name not specified)'
        if not last_name:
            first_name = '(last name not specified)'

        self.sub_data['name'] = f'{last_name}, {first_name}'
    def _populate_xml(self, item, name_space):

        # Dataset element
        body = ET.SubElement(self.root, "{%s}dataset" % name_space['dataset'])
        body.set('type', 'dataset')

        # Title                     (mandatory field)
        value = get_value(item, ['title'])
        if not value:
            return False
        self._sub_element(body, name_space['dataset'], 'title').text = value

        # Managing organisation     (mandatory field)
        organisational_unit = self._sub_element(body, name_space['dataset'],
                                                'managingOrganisation')
        self._add_attribute(item, organisational_unit, 'lookupId',
                            ['managingOrganisationalUnit_externalId'])

        # Persons                   (mandatory field)
        self._add_persons(body, name_space, item)

        # Available date            (mandatory field)
        date = self._sub_element(body, name_space['dataset'], 'availableDate')
        sub_date = self._sub_element(date, name_space['commons'], 'year')
        sub_date.text = get_value(item, ['publication_date'])

        # Publisher                 (mandatory field)
        publisher = self._sub_element(body, name_space['dataset'],
                                      'publisher')  # REVIEW!!!!
        publisher.set(
            'lookupId',
            '45d22915-6545-4428-896a-8b8046191d5d')  # Data not in rdm
        self._sub_element(publisher, name_space['dataset'],
                          'name').text = 'Test publisher'  # Data not in rdm
        self._sub_element(publisher, name_space['dataset'],
                          'type').text = 'publisher'  # Data not in rdm

        # Description
        value = get_value(item, ['abstract'])
        value = 'test description'
        if value:
            descriptions = self._sub_element(body, name_space['dataset'],
                                             'descriptions')
            description = self._sub_element(descriptions,
                                            name_space['dataset'],
                                            'description')
            description.set('type', 'datasetdescription')
            description.text = value

        # Links
        self._add_links(body, name_space)

        # Organisations
        self._add_organisations(body, name_space, item)
    def get_files_data(self, item: dict):
        """ Gets metadata information from electronicVersions and additionalFiles files.
            It also downloads the relative files. The Metadata without file will be ignored """

        if 'file' not in item:
            return False
        elif 'fileURL' not in item['file'] or 'fileName' not in item['file']:
            return False

        internal_review = False     # Default value

        pure_file_size  = get_value(item, ['file', 'size'])
        file_name       = get_value(item, ['file', 'fileName'])
        file_url        = get_value(item, ['file', 'fileURL'])

        self.pure_rdm_file_match = []

        # Checks if pure_file_size and file_name are the same as any of the files in RDM with the same uuid
        for rdm_file in self.rdm_file_review:

            rdm_file_size   = str(rdm_file['size'])
            rdm_review      = rdm_file['review']

            if pure_file_size == rdm_file_size and file_name == rdm_file['name']:
                self.pure_rdm_file_match.append(True)            # Do the old and new file match?
                self.pure_rdm_file_match.append(rdm_review)      # Was the old file reviewed?
                internal_review = rdm_review       # The new uploaded file will have the same review value as in RDM
                break

        self.sub_data = {}
        self.sub_data['internalReview'] = internal_review

        self._add_subdata(item, 'name',            ['file', 'fileName'])
        self._add_subdata(item, 'size',            ['file', 'size'])
        self._add_subdata(item, 'mimeType',        ['file', 'mimeType'])
        self._add_subdata(item, 'digest',          ['file', 'digest'])
        self._add_subdata(item, 'digestAlgorithm', ['file', 'digestAlgorithm'])
        self._add_subdata(item, 'createdBy',       ['creator'])
        self._add_subdata(item, 'createdDate',     ['created'])
        self._add_subdata(item, 'versionType',     ['versionTypes', 0, 'value'])
        self._add_subdata(item, 'licenseType',     ['licenseTypes', 0, 'value'])

        # Access type
        value = get_value(item, ['accessTypes', 0, 'value'])
        self.sub_data['accessType'] = self._accessright_conversion(value)

        # Append to sub_data to .data
        self.data['versionFiles'].append(self.sub_data)

        # Download file from Pure
        response = get_pure_file(self, file_url, file_name)
        # Checks if the file is already in RDM, and if it has already been reviewed
        self._process_file_download_response(response, file_name)
    def _add_field(self, item: list, rdm_field: str, path: list):
        """ Adds the field to the data json """

        value = get_value(item, path)
        if value:
            self.data[rdm_field] = value
        return
    def _process_person_associations(self):
        """ Process data ralative to the record contributors """

        if 'personAssociations' not in self.item:
            return
            
        self.data['contributors'] = []

        file_data = file_read_lines('user_ids_match')

        for item in self.item['personAssociations']:

            self.sub_data = {}
            self._get_contributor_name(item)

            self._add_subdata(item, 'uuid',                   ['person', 'uuid'])
            self._add_subdata(item, 'externalId',             ['person', 'externalId'])
            self._add_subdata(item, 'authorCollaboratorName', ['authorCollaboration', 'names', 0, 'value'])   
            self._add_subdata(item, 'personRole',             ['personRoles', 0, 'value'])    
            self._add_subdata(item, 'organisationalUnit',     ['organisationalUnits', 0, 'names', 0, 'value'])
            self._add_subdata(item, 'type_p',                 ['externalPerson', 'types', 0, 'value'])
            self._add_subdata(item, 'uuid',                   ['externalPerson', 'uuid'])
            
            # Checks if the record owner is available in user_ids_match.txt
            person_external_id = get_value(item, ['person', 'externalId'])
            owner = self.general_functions.get_userid_from_list_by_externalid(person_external_id, file_data)
                
            if owner and int(owner) not in self.data['owners']:
                self.data['owners'].append(int(owner))

            # ORCID
            self._process_contributor_orcid()

            self.data['contributors'].append(self.sub_data)
    def _add_persons(self, body, name_space, item):
        persons = self._sub_element(body, name_space['dataset'], 'persons')

        for person_data in item['contributors']:
            person = self._sub_element(persons, name_space['dataset'],
                                       'person')
            person.set('contactPerson', 'true')
            self._add_attribute(person_data, person, 'id', ['uuid'])
            # External id
            person_id = self._sub_element(person, name_space['dataset'],
                                          'person')
            self._add_attribute(person_data, person_id, 'lookupId',
                                ['externalId'])
            # Role
            role = self._sub_element(person, name_space['dataset'], 'role')
            role.text = get_value(person_data, ['personRole'])
            # Name
            name = self._sub_element(person, name_space['dataset'], 'name')
            name.text = get_value(person_data, ['name'])
 def _add_links(self, body, name_space):
     """ Adds relative links for RDM files and api """
     link_files = get_value(self.full_item, ['links', 'files'])
     link_self = get_value(self.full_item, ['links', 'self'])
     recid = get_value(self.full_item, ['id'])
     if link_files or link_self:
         links = self._sub_element(body, name_space['dataset'], 'links')
         # Files
         if link_files:
             link = self._sub_element(links, name_space['dataset'], 'link')
             link.set('id', recid)  # REVIEW - which id?
             self._sub_element(link, name_space['dataset'],
                               'url').text = link_files
             self._sub_element(link, name_space['dataset'],
                               'description').text = 'Link to record files'
         # Self
         if link_self:
             link = self._sub_element(links, name_space['dataset'], 'link')
             link.set('id', recid)  # REVIEW - which id?
             url = self._sub_element(link, name_space['dataset'],
                                     'url').text = link_self
             self._sub_element(link, name_space['dataset'],
                               'description').text = 'Link to record API'
    def _process_organisational_units(self):
        """ Process the metadata relative to the organisational units """
        if 'organisationalUnits' in self.item:
            self.data['organisationalUnits'] = []
            self.data['groupRestrictions']   = []

            for i in self.item['organisationalUnits']:
                sub_data = {}

                organisational_unit_name       = get_value(i, ['names', 0, 'value'])
                organisational_unit_uuid       = get_value(i, ['uuid'])
                organisational_unit_externalId = get_value(i, ['externalId'])

                sub_data['name']        = organisational_unit_name
                sub_data['uuid']        = organisational_unit_uuid
                sub_data['externalId']  = organisational_unit_externalId

                self.data['organisationalUnits'].append(sub_data)

                # Adding organisational unit as group owner
                self.data['groupRestrictions'].append(organisational_unit_externalId)

                # Create group
                self.groups.rdm_create_group(organisational_unit_externalId, organisational_unit_name)
    def _add_organisations(self, body, name_space, item):
        organisations = self._sub_element(body, name_space['dataset'],
                                          'organisations')

        for unit_data in item['organisationalUnits']:

            # Pure dataset documentation:
            # Can be both an internal and external organisation, use origin to enforce either internal or external.
            # If the organisation is an internal organisation in Pure, then the lookupId attribute must be used.
            # If the organisation is an external organisation and id is given matching will be done on the id,
            # if not found mathching will be done on name, if still not found then an external
            # organisation with the specified id and organisation will be created.

            organisation = self._sub_element(organisations,
                                             name_space['dataset'],
                                             'organisation')
            self._add_attribute(unit_data, organisation, 'lookupId',
                                ['externalId'])
            name = self._sub_element(organisation, name_space['dataset'],
                                     'name')
            name.text = get_value(unit_data, ['name'])
 def _add_text(self, item: object, sub_element: object, path):
     """ Gets from the rdm response a value and adds it as text to a given xml element """
     sub_element.text = get_value(item, path)
 def _add_attribute(self, item: object, sub_element, attribute: str,
                    value_path: list):
     """ Gets from the rdm response a value and adds it as attribute to a given xml element """
     value = get_value(item, value_path)
     if value:
         sub_element.set(attribute, value)
 def _add_subdata(self, item: list, rdm_field: str, path: list):
     """ Adds the field to sub_data """
     value = get_value(item, path)
     if value:
         self.sub_data[rdm_field] = value