Beispiel #1
0
 def __init__(self):
     self.rdm_requests = Requests()
     self.rdm_db = RdmDatabase()
     self.report = Reports()
     self.rdm_add_record = RdmAddRecord()
     self.general_functions = GeneralFunctions()
     self.report_files = ['console', 'owners']
Beispiel #2
0
def rdm_add_file(file_name: str, recid: str):
    
    rdm_requests = Requests()
    reports      = Reports()
    
    file_path_name = f"{temporary_files_name['base_path']}/{file_name}"

    # PUT FILE TO RDM
    response = rdm_requests.put_file(file_path_name, recid)

    # Report
    reports.add(f'\tRDM put file @ {response} @ {file_name}')

    if response.status_code >= 300:
        reports.add(response.content)
        return False

    else:
        # if the upload was successful then delete file from /reports/temporary_files
        os.remove(file_path_name) 
        return True
Beispiel #3
0
 def __init__(self):
     self.report = Reports()
     self.rdm_requests = Requests()
     self.general_functions = GeneralFunctions()
Beispiel #4
0
class Versioning:
    def __init__(self):
        self.report = Reports()
        self.rdm_requests = Requests()
        self.general_functions = GeneralFunctions()

    def get_uuid_version(self, uuid):
        """ Gives the version to use for a new record and old versions of the same uuid """

        # Request
        response = self.rdm_requests.get_metadata_by_query(uuid)

        resp_json = json.loads(response.content)

        message = f'\tRDM metadata version  - {response} - '

        total_recids = resp_json['hits']['total']
        all_metadata_versions = []

        if total_recids == 0:
            # If there are no records with the same uuid means it is the first one (version 1)
            new_version = 1
            self.report.add(
                f'{message}Record NOT found    - Metadata version: 1')
            return [new_version, all_metadata_versions]

        new_version = None

        # Iterates over all records in response
        for item in resp_json['hits']['hits']:
            rdm_metadata = item['metadata']

            # If a record has a differnt uuid than it will be ignored
            if uuid != rdm_metadata['uuid']:
                self.report.add(
                    f" VERSIONING - Different uuid {rdm_metadata['uuid']}")
                continue

            # Get the latest version
            if 'metadataVersion' in rdm_metadata and not new_version:
                new_version = rdm_metadata['metadataVersion'] + 1

            # Add data to listed versions (old versions)
            recid = item['id']
            creation_date = item['created'].split('T')[0]
            version = str(rdm_metadata['metadataVersion'])
            all_metadata_versions.append([recid, version, creation_date])

        # In case the record has no metadataVersion
        if not new_version:
            message += f'Vers. not specified - New metadata version: 1'
            new_version = 1
        else:
            count_old_versions = add_spaces(len(all_metadata_versions))
            message += f'Older versions{count_old_versions} - New version: {new_version}'

        self.report.add(message)

        return [new_version, all_metadata_versions]

    def update_all_uuid_versions(self, uuid):
        # Request
        response = self.rdm_requests.get_metadata_by_query(uuid)

        resp_json = json.loads(response.content)
        total_recids = resp_json['hits']['total']

        if total_recids == 0:
            self.report.add('There are no records with this uuid')
            return

        all_metadata_versions = []
        for item in resp_json['hits']['hits']:
            # Add data to listed versions
            recid = item['id']
            creation_date = item['created'].split('T')[0]
            version = str(item['metadata']['metadataVersion'])
            all_metadata_versions.append([recid, version, creation_date])

        self.report.add(f'\tUpdate uuid versions')

        for item in resp_json['hits']['hits']:

            recid = item['id']
            item = item['metadata']

            if item['metadataOtherVersions'] == all_metadata_versions:
                self.report.add(f'\tRecord update @ Up to date @ {recid}')
                continue

            item['metadataOtherVersions'] = all_metadata_versions

            # Update record
            self.general_functions.update_rdm_record(recid, item)
 def __init__(self):
     self.rdm_requests = Requests()
     self.report = Reports()
class Delete:
    def __init__(self):
        self.rdm_requests = Requests()
        self.report = Reports()

    def record(self, recid: str):
        """ Deletes record from RDM """

        # NOTE: the user ACCOUNT related to the used TOKEN must be ADMIN

        # Delete record request
        response = self.rdm_requests.delete_metadata(recid)

        report = f'\tRDM delete record @ {response} @ Deleted recid:        {recid}'
        self.report.add(report)

        # 410 -> "PID has been deleted"
        if response.status_code >= 300 and response.status_code != 410:
            return response

        # Remove deleted recid from to_delete.txt
        self._remove_recid_from_delete_list(recid)

        # remove record from all_rdm_records.txt
        self._remove_recid_from_records_list(recid)

        return response

    def _set_counters_and_title(func):
        def _wrapper(self):
            self.report.add_template(['console'], ['general', 'title'],
                                     ["DELETE FROM LIST"])
            self.counters = {'total': 0, 'success': 0, 'error': 0}
            # Decorated function
            func(self)

            report = f"\nTotal: {self.counters['total']} @ Success: {self.counters['success']} @ Error: {self.counters['error']}"
            self.report.add(report)

        return _wrapper

    @_set_counters_and_title
    def from_list(self):
        """ Deletes all recids that are listed into data/to_delete.txt """

        recids = self._read_file_recids()
        if not recids:
            return

        for recid in recids:

            recid = recid.strip('\n')

            # Ignore empty lines
            if len(recid) == 0:
                continue

            self.counters['total'] += 1

            if len(recid) != 11:
                self.report.add(f'\n{recid} -> Wrong recid lenght! \n')
                continue

            # -- REQUEST --
            response = self.record(recid)

            # 410 -> "PID has been deleted"
            if response.status_code < 300 or response.status_code == 410:
                self.counters['success'] += 1
            else:
                self.counters['error'] += 1

    def all_records(self):
        """ Delete all RDM records """
        file_data = open(data_files_name['all_rdm_records']).readlines()
        for line in file_data:
            recid = line.split(' ')[1].strip('\n')
            self.record(recid)

    def _read_file_recids(self):
        """ Reads from to_delete.txt all recids to be deleted """
        file_name = data_files_name['delete_recid_list']
        recids = open(file_name, 'r').readlines()
        if len(recids) == 0:
            self.report.add('\nNothing to delete.\n')
            return False
        return recids

    def _remove_recid_from_delete_list(self, recid):
        file_name = 'delete_recid_list'
        lines = file_read_lines(file_name)
        with open(data_files_name[file_name], "w") as f:
            for line in lines:
                if line.strip("\n") != recid:
                    f.write(line)

    def _remove_recid_from_records_list(self, recid):
        file_name = 'all_rdm_records'
        lines = file_read_lines(file_name)
        with open(data_files_name[file_name], "w") as f:
            for line in lines:
                if line.strip("\n").split(' ')[1] != recid:
                    f.write(line)
Beispiel #7
0
class RdmOwners:
    def __init__(self):
        self.rdm_requests = Requests()
        self.rdm_db = RdmDatabase()
        self.report = Reports()
        self.rdm_add_record = RdmAddRecord()
        self.general_functions = GeneralFunctions()
        self.report_files = ['console', 'owners']

    def _set_counters_and_title(func):
        def _wrapper(self, identifier):

            self.report.add_template(['console'], ['general', 'title'],
                                     ['OWNERS CHECK'])
            self.global_counters = initialize_counters()

            # Decorated function
            func(self, identifier)

        return _wrapper

    @_set_counters_and_title
    def run_owners(self, identifier: str):
        """ Gets from pure all the records related to a certain user (based on orcid or externalId),
            afterwards it modifies/create RDM records accordingly. """

        identifier_value = '0000-0002-4154-6945'  # TEMPORARY
        if identifier == 'externalId':  # TEMPORARY
            # identifier_value = '3261'                 # TEMPORARY
            identifier_value = '30'  # TEMPORARY

        self.report.add(f'\n{identifier}: {identifier_value}\n')

        # Gets the ID and IP of the logged in user
        self.user_id = self._get_user_id_from_rdm()
        # If the user was not found in RDM then there is no owner to add to the record.
        if not self.user_id:
            return

        # Get from pure user_uuid
        self.user_uuid = self._get_user_uuid_from_pure(identifier,
                                                       identifier_value)
        if not self.user_uuid:
            return False

        # Add user to user_ids_match.txt
        if identifier == 'externalId':
            self._add_user_ids_match(identifier_value)

        next_page = True
        page = 1
        self.local_counters = {'create': 0, 'in_record': 0, 'to_update': 0}

        while next_page:

            # Pure request
            params = {'sort': 'modified', 'page': page, 'pageSize': 100}
            response = get_pure_metadata('persons',
                                         f'{self.user_uuid}/research-outputs',
                                         params)
            if response.status_code >= 300:
                return False

            # Initial response proceses and json load
            pure_json = self._process_response(response, page)
            # In case the user has no records
            if not pure_json:
                return True

            # Checks if there is a 'next' page to be processed
            next_page = get_next_page(pure_json)

            # Iterates over all items in the page
            for item in pure_json['items']:

                uuid = item['uuid']
                title = shorten_file_name(item['title'])

                self.report.add(f"\n\tRecord uuid  @ {uuid} @ {title}")

                # Get from RDM the recid
                recid = self.general_functions.get_recid(
                    uuid, self.global_counters)

                # Record NOT in RDM, create it
                if recid == False:
                    self._create_rdm_record(item)
                    continue

                # Gets record metadata from RDM and checks if the user is already a record owner
                self._process_record_owners(recid)

            page += 1

        self._final_report()

    def _process_record_owners(self, recid):
        """ Gets record metadata from RDM and checks if the user is already a record owner """

        response = self.rdm_requests.get_metadata_by_recid(recid)
        rdm_json = json.loads(response.content)['metadata']

        self.report.add(
            f"\tRDM get metadata @ {response} @ Current owners: @ {rdm_json['owners']}"
        )

        if self.user_id not in rdm_json['owners']:
            # The record is in RDM but the logged in user is not among the recod owners
            self._add_user_as_owner(rdm_json, recid)
        else:
            # The record is in RDM and the user is an owner
            self.report.add('\tRDM record status @@ Owner IN record')
            self.local_counters['in_record'] += 1

    def _add_user_as_owner(self, data, recid):
        """ Adds the current logged in user as record owner """

        data['owners'].append(self.user_id)

        self.report.add(
            f"\tRDM record status @ ADDING owner @ New owners: @ {data['owners']}"
        )

        # Add owner to an existing RDM record
        self.general_functions.update_rdm_record(recid, data)

        self.local_counters['to_update'] += 1

    def _create_rdm_record(self, item: dict):
        """ If a record of the processed user is not in RDM creates it """
        item['owners'] = [self.user_id]

        self.report.add('\tRDM record status @@ CREATE record')
        self.local_counters['create'] += 1

        # Creates record metadata and pushes it to RDM
        self.rdm_add_record.create_invenio_data(self.global_counters, item)

    def _final_report(self):
        # Final report
        create = self.local_counters['create']
        update = self.local_counters['to_update']
        in_rec = self.local_counters['in_record']
        report = f"\nCreate: {create} - To update: {update} - In record: {in_rec}"
        self.report.add(report, self.report_files)
        self.report.summary_global_counters(self.report_files,
                                            self.global_counters)

    def _process_response(self, response: object, page: int):
        """ Checks if there are records to process """

        # Load response json
        resp_json = json.loads(response.content)

        total_items = resp_json['count']

        if page == 1:
            self.report.add(f'Total records: {total_items}')

        if page == 1 and total_items == 0:
            self.report.add('\nThe user has no records @ End task\n')
            return False

        self.report.add(f'\nPag {page} - Get person records    - {response}')
        return resp_json

    def _get_user_uuid_from_pure(self, key_name: str, key_value: str):
        """ Given the user's external id it return the relative user uuid  """

        # If the uuid is not found in the first x items then it will continue with the next page
        page = 1
        page_size = 10
        next_page = True

        while next_page:

            params = {
                'page': page,
                'pageSize': page_size,
                'q': f'"{key_value}"'
            }
            response = get_pure_metadata('persons', '', params)

            if response.status_code >= 300:
                self.report.add(response.content, self.report_files)
                return False

            record_json = json.loads(response.content)

            total_items = record_json['count']

            for item in record_json['items']:

                if item[key_name] == key_value:
                    first_name = item['name']['firstName']
                    lastName = item['name']['lastName']
                    uuid = item['uuid']

                    self.report.add(
                        f'Name:    {first_name} {lastName}\nUuid:    {uuid}',
                        self.report_files)

                    if len(uuid) != 36:
                        self.report.add(
                            '\n- Warning! Incorrect user_uuid length -\n',
                            self.report_files)
                        return False
                    return uuid

            # Checks if there is a 'next' page to be processed
            next_page = get_next_page(record_json)

            page += 1

        self.report.add(f'Uuid NOT FOUND - End task\n', self.report_files)
        return False

    #   ---         ---         ---
    def _get_user_id_from_rdm(self):
        """ Gets the ID and IP of the logged in user """

        table_name = 'accounts_user_session_activity'

        # SQL query
        response = self.rdm_db.select_query('user_id, ip', table_name)

        if not response:
            self.report.add(f'\n- {table_name}: No user is logged in -\n',
                            self.report_files)
            return False

        elif len(response) > 1:
            self.report.add(f'\n- {table_name}: Multiple users logged in \n',
                            self.report_files)
            return False

        self.report.add(
            f'user IP: {response[0][1]}\nUser id: {response[0][0]}',
            self.report_files)

        return response[0][0]

    def _add_user_ids_match(self, external_id: str):
        """ Add user to user_ids_match.txt, where are specified:
            rdm_user_id, user_uuid and user_external_id """
        file_name = data_files_name['user_ids_match']

        needs_to_add = self._check_user_ids_match('user_ids_match',
                                                  external_id)

        if needs_to_add:
            open(file_name,
                 'a').write(f'{self.user_id} {self.user_uuid} {external_id}\n')
            report = f'user_ids_match @ Adding id toList @ {self.user_id}, {self.user_uuid}, {external_id}'
            self.report.add(report, self.report_files)

    def _check_user_ids_match(self, file_name: str, external_id: str):

        lines = file_read_lines(file_name)
        for line in lines:
            line = line.split('\n')[0]
            line = line.split(' ')

            # Checks if at least one of the ids match
            if str(self.user_id) == line[0] or self.user_uuid == line[
                    1] or external_id == line[2]:

                if line == [str(self.user_id), self.user_uuid, external_id]:
                    self.report.add('Ids list:   user in list',
                                    self.report_files)
                    return False
        return True

    def _initalizing_method(func):
        def _wrapper(self):

            self.report.add_template(['console'], ['general', 'title'],
                                     ['RECORDS OWNER'])

            # Empty file rdm_reocrds_owner.txt
            file_owner = data_files_name['rdm_record_owners']
            open(file_owner, 'w').close()

            # Decorated function
            func(self)

        return _wrapper

    @_initalizing_method
    def get_rdm_record_owners(self):
        """ Gets all records from RDM and counts how many records belong to each user.
            It also updates the content of all_rdm_records.txt """

        pag = 1
        pag_size = 250

        count = 0
        count_records_per_owner = {}
        all_records_list = ''
        next_page = True

        while next_page == True:

            # REQUEST to RDM
            params = {'sort': 'mostrecent', 'size': pag_size, 'page': pag}
            response = self.rdm_requests.get_metadata(params)

            self.report.add(f'\n{response}\n')

            if response.status_code >= 300:
                self.report.add(response.content)
                break

            resp_json = json.loads(response.content)
            data = ''

            for item in resp_json['hits']['hits']:
                count += 1

                uuid = item['metadata']['uuid']
                recid = item['metadata']['recid']
                owners = item['metadata']['owners']

                line = f'{uuid} - {recid} - {owners}'
                self.report.add(line)
                data += f'{line}\n'

                all_records_list += f'{uuid} {recid}\n'

                for i in owners:
                    if i not in count_records_per_owner:
                        count_records_per_owner[i] = 0
                    count_records_per_owner[i] += 1

            self.report.add(f'\nPag {str(pag)} - Records {count}\n')

            open(data_files_name['rdm_record_owners'], 'a').write(data)

            if 'next' not in resp_json['links']:
                next_page = False

            pag += 1

        # Counts how many records have each owner
        self._count_records_per_owner(count_records_per_owner)

        # Update all_rdm_records.txt file
        self._update_all_rdm_records_file(all_records_list)

    def _count_records_per_owner(self, count_records_per_owner):

        self.report.add('Owner  Records')

        for key in count_records_per_owner:
            records = add_spaces(count_records_per_owner[key])
            key = add_spaces(key)
            self.report.add(f'{key}    {records}')

    def _update_all_rdm_records_file(self, all_records_list):

        # Updates content of all_rdm_records.txt file
        file_all_records_list = data_files_name['all_rdm_records']
        # Empty file
        open(file_all_records_list, 'w').close()
        # Add all records to file
        open(file_all_records_list, 'a').write(all_records_list)
class ImportRecords:
    def __init__(self):
        self.rdm_requests = Requests()
        self.report = Reports()
        self.file_name = "/home/bootcamp/src/pure_sync_rdm/synchronizer/data/temporary_files/test.xml"

    def run_import(self):

        # Report title
        self.report.add_template(['console'], ['general', 'title'],
                                 ['PURE IMPORT'])

        page = 1
        page_size = 20
        next_page = True

        # Get RDM records by page
        while next_page:

            data = self._get_rdm_records_metadata(page, page_size)

            if not data:
                self.report.add("\n\tEnd task\n")
                return

            self._create_xml(data)

            page += 1

    def _check_uuid(self, item):
        """ If a uuid is specified in the RDM record means that it was imported
            from Pure. In this case, the record will be ignored """
        if 'uuid' in item:
            self.report.add(f"{self.report_base} Already in Pure")
            return False
        return True

    def _check_date(self, item):
        """ Checks if the record was created today """
        if item['created'] > current_date():
            return True
        else:
            date = item['created'].split('T')[0]
            self.report.add(f"{self.report_base} Too old: {date}")
            return False

    def _create_xml(self, data):
        """ Creates the xml file that will be imported in pure """

        name_space = {
            'dataset': 'v1.dataset.pure.atira.dk',
            'commons': 'v3.commons.pure.atira.dk',
        }

        ET.register_namespace('v1', name_space['dataset'])
        ET.register_namespace('v3', name_space['commons'])

        # Build a tree structure
        self.root = ET.Element("{%s}datasets" % name_space['dataset'])

        count = 0

        for item in data:

            count += 1
            self.full_item = item
            self.report_base = f"{add_spaces(count)} - {item['id']} -"
            item_metadata = item['metadata']

            # # Checks if the record was created today
            # if not self._check_date(item):
            #     self.report.add("\n\tEnd task\n")
            #     next_page = False
            #     break

            # # If the rdm record has a uuid means that it was imported from pure
            # if not self._check_uuid(item_metadata):
            #     continue

            self.report.add(f"{self.report_base} Adding")

            # Adds fields to the created xml element
            self._populate_xml(item_metadata, name_space)

        self._parse_xml()

    def _populate_xml(self, item, name_space):

        # Dataset element
        body = ET.SubElement(self.root, "{%s}dataset" % name_space['dataset'])
        body.set('type', 'dataset')

        # Title                     (mandatory field)
        value = get_value(item, ['title'])
        if not value:
            return False
        self._sub_element(body, name_space['dataset'], 'title').text = value

        # Managing organisation     (mandatory field)
        organisational_unit = self._sub_element(body, name_space['dataset'],
                                                'managingOrganisation')
        self._add_attribute(item, organisational_unit, 'lookupId',
                            ['managingOrganisationalUnit_externalId'])

        # Persons                   (mandatory field)
        self._add_persons(body, name_space, item)

        # Available date            (mandatory field)
        date = self._sub_element(body, name_space['dataset'], 'availableDate')
        sub_date = self._sub_element(date, name_space['commons'], 'year')
        sub_date.text = get_value(item, ['publication_date'])

        # Publisher                 (mandatory field)
        publisher = self._sub_element(body, name_space['dataset'],
                                      'publisher')  # REVIEW!!!!
        publisher.set(
            'lookupId',
            '45d22915-6545-4428-896a-8b8046191d5d')  # Data not in rdm
        self._sub_element(publisher, name_space['dataset'],
                          'name').text = 'Test publisher'  # Data not in rdm
        self._sub_element(publisher, name_space['dataset'],
                          'type').text = 'publisher'  # Data not in rdm

        # Description
        value = get_value(item, ['abstract'])
        value = 'test description'
        if value:
            descriptions = self._sub_element(body, name_space['dataset'],
                                             'descriptions')
            description = self._sub_element(descriptions,
                                            name_space['dataset'],
                                            'description')
            description.set('type', 'datasetdescription')
            description.text = value

        # Links
        self._add_links(body, name_space)

        # Organisations
        self._add_organisations(body, name_space, item)

        # FIELDS THAT ARE NOT IN DATASET XSD - NEEDS REVIEW:
        # language                  ['languages', 0, 'value']
        # organisationalUnits       ['personAssociations' ...]
        # peerReview                ['peerReview']
        # createdDate               ['info', 'createdDate']
        # publicationDate           ['publicationStatuses', 0, 'publicationDate', 'year']
        # publicationStatus         ['publicationStatuses', 0, 'publicationStatuses', 0, 'value']
        # recordType                ['types', 0, 'value']
        # workflow                  ['workflows', 0, 'value']
        # pages                     ['info','pages']
        # volume                    ['info','volume']
        # journalTitle              ['info', 'journalAssociation', 'title', 'value']
        # journalNumber             ['info', 'journalNumber']

        # PURE RESPONSE
        # cvc-complex-type.2.4.b: The content of element 'v1:dataset' is not complete.
        # One of '{
        # "v1.dataset.pure.atira.dk":translatedTitles,
        # "v1.dataset.pure.atira.dk":description,
        # "v1.dataset.pure.atira.dk":ids,
        # "v1.dataset.pure.atira.dk":additionalDescriptions,
        # "v1.dataset.pure.atira.dk":temporalCoverage,
        # "v1.dataset.pure.atira.dk":productionDate,
        # "v1.dataset.pure.atira.dk":geoLocation,
        # "v1.dataset.pure.atira.dk":organisations,
        # "v1.dataset.pure.atira.dk":DOI,
        # "v1.dataset.pure.atira.dk":physicalDatas,
        # "v1.dataset.pure.atira.dk":publisher,
        # "v1.dataset.pure.atira.dk":openAccess,
        # "v1.dataset.pure.atira.dk":embargoPeriod,
        # "v1.dataset.pure.atira.dk":constraints,
        # "v1.dataset.pure.atira.dk":keywords,
        # "v1.dataset.pure.atira.dk":links,
        # "v1.dataset.pure.atira.dk":documents,
        # "v1.dataset.pure.atira.dk":relatedProjects,
        # "v1.dataset.pure.atira.dk":relatedEquipments,
        # "v1.dataset.pure.atira.dk":relatedStudentThesis,
        # "v1.dataset.pure.atira.dk":relatedPublications,
        # "v1.dataset.pure.atira.dk":relatedActivities,
        # "v1.dataset.pure.atira.dk":relatedDatasets,
        # "v1.dataset.pure.atira.dk":visibility,
        # "v1.dataset.pure.atira.dk":workflow
        # }' is expected.

    def _add_organisations(self, body, name_space, item):
        organisations = self._sub_element(body, name_space['dataset'],
                                          'organisations')

        for unit_data in item['organisationalUnits']:

            # Pure dataset documentation:
            # Can be both an internal and external organisation, use origin to enforce either internal or external.
            # If the organisation is an internal organisation in Pure, then the lookupId attribute must be used.
            # If the organisation is an external organisation and id is given matching will be done on the id,
            # if not found mathching will be done on name, if still not found then an external
            # organisation with the specified id and organisation will be created.

            organisation = self._sub_element(organisations,
                                             name_space['dataset'],
                                             'organisation')
            self._add_attribute(unit_data, organisation, 'lookupId',
                                ['externalId'])
            name = self._sub_element(organisation, name_space['dataset'],
                                     'name')
            name.text = get_value(unit_data, ['name'])

    def _add_persons(self, body, name_space, item):
        persons = self._sub_element(body, name_space['dataset'], 'persons')

        for person_data in item['contributors']:
            person = self._sub_element(persons, name_space['dataset'],
                                       'person')
            person.set('contactPerson', 'true')
            self._add_attribute(person_data, person, 'id', ['uuid'])
            # External id
            person_id = self._sub_element(person, name_space['dataset'],
                                          'person')
            self._add_attribute(person_data, person_id, 'lookupId',
                                ['externalId'])
            # Role
            role = self._sub_element(person, name_space['dataset'], 'role')
            role.text = get_value(person_data, ['personRole'])
            # Name
            name = self._sub_element(person, name_space['dataset'], 'name')
            name.text = get_value(person_data, ['name'])

    def _add_links(self, body, name_space):
        """ Adds relative links for RDM files and api """
        link_files = get_value(self.full_item, ['links', 'files'])
        link_self = get_value(self.full_item, ['links', 'self'])
        recid = get_value(self.full_item, ['id'])
        if link_files or link_self:
            links = self._sub_element(body, name_space['dataset'], 'links')
            # Files
            if link_files:
                link = self._sub_element(links, name_space['dataset'], 'link')
                link.set('id', recid)  # REVIEW - which id?
                self._sub_element(link, name_space['dataset'],
                                  'url').text = link_files
                self._sub_element(link, name_space['dataset'],
                                  'description').text = 'Link to record files'
            # Self
            if link_self:
                link = self._sub_element(links, name_space['dataset'], 'link')
                link.set('id', recid)  # REVIEW - which id?
                url = self._sub_element(link, name_space['dataset'],
                                        'url').text = link_self
                self._sub_element(link, name_space['dataset'],
                                  'description').text = 'Link to record API'

    def _parse_xml(self):
        # Wrap it in an ElementTree instance and save as XML
        xml_str = minidom.parseString(ET.tostring(
            self.root)).toprettyxml(indent="   ")
        open(self.file_name, "w").write(xml_str)

    def _sub_element(self, element, namespace: str, sub_element_name: str):
        """ Adds the the xml a sub element """
        return ET.SubElement(element, "{%s}%s" % (namespace, sub_element_name))

    def _add_attribute(self, item: object, sub_element, attribute: str,
                       value_path: list):
        """ Gets from the rdm response a value and adds it as attribute to a given xml element """
        value = get_value(item, value_path)
        if value:
            sub_element.set(attribute, value)

    def _add_text(self, item: object, sub_element: object, path):
        """ Gets from the rdm response a value and adds it as text to a given xml element """
        sub_element.text = get_value(item, path)

    def _get_rdm_records_metadata(self, page: int, page_size: int):
        """ Requests to rdm records metadata by page """

        params = {'sort': 'mostrecent', 'size': page_size, 'page': page}
        response = self.rdm_requests.get_metadata(params)

        if response.status_code >= 300:
            return False
        # Load response
        json_data = json.loads(response.content)['hits']['hits']

        # Checks if any record is listed
        if not json_data:
            return False

        self.report.add_template(['console'], ['pages', 'page_and_size'],
                                 [page, page_size])
        self.report.add('')  # adds empty line

        return json_data
Beispiel #9
0
class GeneralFunctions:
    def __init__(self):
        self.rdm_requests = Requests()
        self.reports = Reports()
        self.delete = Delete()

    def get_recid(self, uuid: str, global_counters: object):
        """
        1 - to check if there are duplicates
        2 - to delete duplicates
        3 - to add the record uuid and recid to all_rdm_records.txt
        """

        response = self.rdm_requests.get_metadata_by_query(uuid)

        resp_json = json.loads(response.content)

        total_recids = resp_json['hits']['total']
        if total_recids == 0:
            # If there are no records with the same uuid means it is the first one (version 1)
            return False

        # Iterate over all records with the same uuid
        # The first record is the most recent (they are sorted)
        count = 0
        for item in resp_json['hits']['hits']:
            count += 1

            recid = item['metadata']['recid']

            if count == 1:
                # URLs to be transmitted to Pure if the record is successfuly added in RDM      # TODO TODO TODO TODO TODO
                api_url = f'{rdm_host_url}api/records/{recid}'
                landing_page_url = f'{rdm_host_url}records/{recid}'
                newest_recid = recid

                report = f'\tRDM get recid @ {response} @ Total: {add_spaces(total_recids)} @ {api_url}'
                self.reports.add(report)

            else:
                # If versioning is running then it is not necessary to delete older versions of the record
                if not versioning_running:
                    # Duplicate records are deleted
                    response = self.delete.record(recid)

                    if response:
                        global_counters['delete']['success'] += 1
                    else:
                        global_counters['delete']['error'] += 1

        return newest_recid

    #   ---         ---         ---
    def get_userid_from_list_by_externalid(self, external_id: str,
                                           file_data: list):
        """  """
        for line in file_data:
            line = line.split('\n')[0]
            line = line.split(' ')

            # Checks if at least one of the ids match
            if external_id == line[2]:
                user_id = line[0]
                user_id_spaces = add_spaces(user_id)

                report = f'\tRDM owner list @@ User id:     {user_id_spaces} @ externalId: {external_id}'
                self.reports.add(report)

                return user_id

    #   ---         ---         ---
    def update_rdm_record(self, recid: str, data: object):

        response = self.rdm_requests.put_metadata(recid, data)

        url = f'{rdm_host_url}api/records/{recid}'
        self.reports.add(f'\tRecord update @ {response} @ {url}')

        return response
 def __init__(self):
     self.rdm_requests = Requests()
     self.report = Reports()
     self.file_name = "/home/bootcamp/src/pure_sync_rdm/synchronizer/data/temporary_files/test.xml"
Beispiel #11
0
 def __init__(self):
     self.rdm_requests = Requests()
     self.reports = Reports()
     self.delete = Delete()
 def __init__(self):
     self.rdm_requests       = Requests()
     self.report             = Reports()
     self.groups             = RdmGroups()
     self.general_functions  = GeneralFunctions()
     self.versioning         = Versioning()
class RdmAddRecord:

    def __init__(self):
        self.rdm_requests       = Requests()
        self.report             = Reports()
        self.groups             = RdmGroups()
        self.general_functions  = GeneralFunctions()
        self.versioning         = Versioning()
        

    def push_record_by_uuid(self, global_counters: dict, uuid: str):
        """ Gets from Pure the metadata of a given uuid """
        item = get_pure_record_metadata_by_uuid(uuid)
        if not item:
            return False
        return self.create_invenio_data(global_counters, item)


    def _set_initial_variables(func):
        def _wrapper(self, global_counters, item) :
    
            self.global_counters = global_counters
            self.global_counters['total'] += 1      

            self.uuid = item['uuid']
            self.item = item
            self.data = {}
            # Stores the name of the record files
            # Necessary because we need first to create the record and then to put the files
            self.record_files = []      

            # Decorated function
            func(self, global_counters, item)

        return _wrapper

    @_set_initial_variables
    def create_invenio_data(self, global_counters: dict, item: dict):
        """ Process the data received from Pure and submits it to RDM """

        # Versioning
        self._check_record_version()

        # Record owners
        self._check_record_owners()

        # self.data['metadataOtherVersions'] = [['1', ''], ['2', '']]
        # self.data['owners'].append(3)     # TEMPORARY

        # Restrictions
        self.data['appliedRestrictions'] = ['owners', 'groups', 'ip_single', 'ip_range']    # TO REVIEW - TO REVIEW
        self.data['_access'] = {'metadata_restricted': False, 'files_restricted': False}    # TO REVIEW - TO REVIEW

        # Process various single fields
        self._process_single_fields(item)
    
        # Electronic Versions (files)
        self._process_electronic_versions()

        # Additional Files
        if 'additionalFiles' in item:
            for i in item['additionalFiles']:
                self.get_files_data(i)

        # Person Associations
        self._process_person_associations()

        # Organisational Units
        self._process_organisational_units()

        # Checks if the restrictions applied to the record are valid
        self._applied_restrictions_check()

        self.data = json.dumps(self.data)

        # Post request to RDM
        self._post_metadata()

        # Updates the versioning data of all records with the same uuid
        self._update_all_uuid_versions()


    def _versioning_required(func):
        def _wrapper(self) :
            if not versioning_running:
                return
            func(self)
        return _wrapper

    @_versioning_required
    def _check_record_version(self):
        """ Checks if there are in RDM other versions of the same uuid """
        # Get metadata version
        response = self.versioning.get_uuid_version(self.uuid)
        if response:
            self.data['metadataVersion']       = response[0]
            self.data['metadataOtherVersions'] = response[1]

    @_versioning_required
    def _update_all_uuid_versions(self):
        """ Updates the versioning data of all records with the same uuid """
        self.versioning.update_all_uuid_versions(self.uuid)



    def _check_record_owners(self):
        """ Removes duplicate owners """
        if 'owners' in self.item:
            self.data['owners'] = list(set(self.item['owners']))        
        else:
            self.data['owners'] = list(set([1]))



    def _process_single_fields(self, item: dict):
                            # RDM field name                # PURE json path
        self._add_field(item, 'title',                       ['title'])
        self._add_field(item, 'uuid',                        ['uuid'])
        self._add_field(item, 'pureId',                      ['pureId'])
        self._add_field(item, 'publicationDate',             ['publicationStatuses', 0, 'publicationDate', 'year'])
        self._add_field(item, 'createdDate',                 ['info', 'createdDate'])
        self._add_field(item, 'pages',                       ['info','pages'])   
        self._add_field(item, 'volume',                      ['info','volume'])
        self._add_field(item, 'journalTitle',                ['info', 'journalAssociation', 'title', 'value'])
        self._add_field(item, 'journalNumber',               ['info', 'journalNumber'])
        self._add_field(item, 'metadataModifBy',             ['info', 'modifiedBy'])
        self._add_field(item, 'metadataModifDate',           ['info', 'modifiedDate'])
        self._add_field(item, 'pure_link',                   ['info', 'portalUrl'])
        self._add_field(item, 'recordType',                  ['types', 0, 'value'])    
        self._add_field(item, 'category',                    ['categories', 0, 'value'])  
        self._add_field(item, 'peerReview',                  ['peerReview'])    
        self._add_field(item, 'publicationStatus',           ['publicationStatuses', 0, 'publicationStatuses', 0, 'value'])
        self._add_field(item, 'numberOfAuthors',             ['totalNumberOfAuthors'])
        self._add_field(item, 'workflow',                    ['workflows', 0, 'value'])
        self._add_field(item, 'confidential',                ['confidential'])
        self._add_field(item, 'publisherName',               ['publisher', 'names', 0, 'value'])
        self._add_field(item, 'abstract',                    ['abstracts', 0, 'value'])
        self._add_field(item, 'managingOrganisationalUnit_name',       ['managingOrganisationalUnit', 'names', 0, 'value'])
        self._add_field(item, 'managingOrganisationalUnit_uuid',       ['managingOrganisationalUnit', 'uuid'])
        self._add_field(item, 'managingOrganisationalUnit_externalId', ['managingOrganisationalUnit', 'externalId'])

        # Access right
        value = get_value(item, ['openAccessPermissions', 0, 'value'])
        self.data['access_right'] = self._accessright_conversion(value)

        # Language
        value = get_value(item, ['languages', 0, 'value'])
        self.data['language'] = self._language_conversion(value)



    def _process_electronic_versions(self):
        """ Data relative to files """

        self.data['versionFiles'] = []
        self.rdm_file_review = []

        if 'electronicVersions' in self.item or 'additionalFiles' in self.item:
            # Checks if the file has been already uploaded to RDM and if it has been internally reviewed
            self._get_rdm_file_review()

        if 'electronicVersions' in self.item:
            for i in self.item['electronicVersions']:
                self.get_files_data(i)
    


    def _process_person_associations(self):
        """ Process data ralative to the record contributors """

        if 'personAssociations' not in self.item:
            return
            
        self.data['contributors'] = []

        file_data = file_read_lines('user_ids_match')

        for item in self.item['personAssociations']:

            self.sub_data = {}
            self._get_contributor_name(item)

            self._add_subdata(item, 'uuid',                   ['person', 'uuid'])
            self._add_subdata(item, 'externalId',             ['person', 'externalId'])
            self._add_subdata(item, 'authorCollaboratorName', ['authorCollaboration', 'names', 0, 'value'])   
            self._add_subdata(item, 'personRole',             ['personRoles', 0, 'value'])    
            self._add_subdata(item, 'organisationalUnit',     ['organisationalUnits', 0, 'names', 0, 'value'])
            self._add_subdata(item, 'type_p',                 ['externalPerson', 'types', 0, 'value'])
            self._add_subdata(item, 'uuid',                   ['externalPerson', 'uuid'])
            
            # Checks if the record owner is available in user_ids_match.txt
            person_external_id = get_value(item, ['person', 'externalId'])
            owner = self.general_functions.get_userid_from_list_by_externalid(person_external_id, file_data)
                
            if owner and int(owner) not in self.data['owners']:
                self.data['owners'].append(int(owner))

            # ORCID
            self._process_contributor_orcid()

            self.data['contributors'].append(self.sub_data)

        

    def _get_contributor_name(self, item: object):
        first_name = get_value(item, ['name', 'firstName'])
        last_name  = get_value(item, ['name', 'lastName'])

        if not first_name:
            first_name = '(first name not specified)'
        if not last_name:
            first_name = '(last name not specified)'

        self.sub_data['name'] = f'{last_name}, {first_name}'


    def _process_contributor_orcid(self):
        if 'uuid' in self.sub_data:
            person_uuid = self.sub_data['uuid']
            person_name = self.sub_data['name']
            
            # External persons are not present in 'persons' Pure API endpoint
            if 'type_p' in self.sub_data and self.sub_data['type_p'] == 'External person':
                report = f'\tPure get orcid @@ External person @ {person_uuid} @ {person_name}'
                self.report.add(report)
            else:
                orcid = self._get_orcid(person_uuid, person_name)
                if orcid:
                    self.sub_data['orcid'] = orcid


    def _process_organisational_units(self):
        """ Process the metadata relative to the organisational units """
        if 'organisationalUnits' in self.item:
            self.data['organisationalUnits'] = []
            self.data['groupRestrictions']   = []

            for i in self.item['organisationalUnits']:
                sub_data = {}

                organisational_unit_name       = get_value(i, ['names', 0, 'value'])
                organisational_unit_uuid       = get_value(i, ['uuid'])
                organisational_unit_externalId = get_value(i, ['externalId'])

                sub_data['name']        = organisational_unit_name
                sub_data['uuid']        = organisational_unit_uuid
                sub_data['externalId']  = organisational_unit_externalId

                self.data['organisationalUnits'].append(sub_data)

                # Adding organisational unit as group owner
                self.data['groupRestrictions'].append(organisational_unit_externalId)

                # Create group
                self.groups.rdm_create_group(organisational_unit_externalId, organisational_unit_name)


    def _applied_restrictions_check(self):
        """ Checks if the restrictions applied to the record are valid.
            e.g. ['groups', 'owners', 'ip_range', 'ip_single'] """
        
        if not 'appliedRestrictions' in self.data:
            return False

        for i in self.data['appliedRestrictions']:
            if i not in possible_record_restrictions:
                report = f"Warning: the value '{i}' is not amont the accepted restrictions\n"
                self.report.add(report)
        return True



    def _post_metadata(self):
        """ Submits the created json to RDM """

        uuid = self.item['uuid']
        success_check = { 'metadata': False, 'file': False }

        # POST REQUEST metadata
        response = self.rdm_requests.post_metadata(self.data)

        # Process response
        if not self._process_post_response(response, uuid):
            return False

        success_check['metadata'] = True

        # After pushing a record's metadata to RDM it takes about one second to be able to get its recid
        time.sleep(1)

        # Gets recid from RDM
        recid = self.general_functions.get_recid(uuid, self.global_counters)
        if not recid:
            return False

        # add record to all_rdm_records.txt
        open(data_files_name['all_rdm_records'], "a").write(f'{uuid} {recid}\n')
        
        # Submit record FILES
        for file_name in self.record_files:
        
            # Submit request
            response = rdm_add_file(file_name, recid)
            # Process response
            successful = self._process_file_response(response, success_check)

            # if successful:
                # # Sends email to remove record from Pure
                # send_email(uuid, file_name)

        if not self.record_files:
            success_check['file'] = True
        
        # Checks if both metadata and files were correctly transmitted
        self._metadata_and_file_submission_check(success_check)


    def _process_post_response(self, response: object, uuid: str):
    
        # Count http responses
        self._http_response_counter(response.status_code)

        self.report.add(f"\tRDM post metadata @ {response} @ Uuid:                 {uuid}")

        if response.status_code >= 300:
            self.global_counters['metadata']['error'] += 1
            return False

        self.global_counters['metadata']['success'] += 1
        return True


    def _process_file_response(self, response: object, success_check: object):
        if response:
            self.global_counters['file']['success'] += 1
            success_check['file'] = True

        else:
            self.global_counters['file']['error'] += 1


    def _remove_uuid_from_list(self, uuid: str, file_name: str):
        """ If the given uuid is in the given file then the line will be removed """

        check_if_file_exists(file_name)
        
        with open(file_name, "r") as f:
            lines = f.readlines()
        with open(file_name, "w") as f:
            for line in lines:
                if line.strip("\n") != uuid:
                    f.write(line)



    def _add_field(self, item: list, rdm_field: str, path: list):
        """ Adds the field to the data json """

        value = get_value(item, path)
        if value:
            self.data[rdm_field] = value
        return



    def _accessright_conversion(self, pure_value: str):
        """ Converts the Pure access right to the corresponding RDM value """

        if pure_value in accessright_pure_to_rdm:
            return accessright_pure_to_rdm[pure_value]

        self.report.add('\n--- new access_right ---> not in accessright_pure_to_rdmk array\n\n')
        return False


    def _language_conversion(self, pure_language: str):
        """ Converts from pure full language name to iso6393 (3 characters) """

        if pure_language == 'Undefined/Unknown':
            return False
        
        # Read iso6393 json file
        resp_json = json.load(open(iso6393_file_name, 'r'))

        for i in resp_json:
            if i['name'] == pure_language:
                return i['iso6393']

        # in case there is no match (e.g. spelling mistake in Pure) ignore field
        return False



    def _get_rdm_file_review(self):
        """ When a record is updated in Pure, there will be a check if the new file from Pure is the same as the old file in RDM.
        To do so it makes a comparison on the file size.
        If the size is not the same, then it will be uploaded to RDM and a new internal review will be required. """

        # Get from RDM file size and internalReview
        params = {'sort': 'mostrecent', 'size': '100', 'page': '1', 'q': self.uuid}
        response = self.rdm_requests.get_metadata(params)

        if response.status_code >= 300:
            self.report.add(f'\nget_rdm_file_size @ {self.uuid} @ {response}')
            return False

        # Load response
        resp_json = json.loads(response.content)

        total_recids = resp_json['hits']['total']
        if total_recids == 0:
            return False

        record = resp_json['hits']['hits'][0]['metadata']  # [0] because they are ordered, therefore it is the most recent

        if 'versionFiles' in record:
            for file in record['versionFiles']:
                if 'size' in file and 'internalReview' in file and 'name' in file:
                    file_size   = file['size']
                    file_review = file['internalReview']
                    file_name   = file['name']
                    self.rdm_file_review.append({'size': file_size, 'review': file_review, 'name': file_name})
        return



    def get_files_data(self, item: dict):
        """ Gets metadata information from electronicVersions and additionalFiles files.
            It also downloads the relative files. The Metadata without file will be ignored """

        if 'file' not in item:
            return False
        elif 'fileURL' not in item['file'] or 'fileName' not in item['file']:
            return False

        internal_review = False     # Default value

        pure_file_size  = get_value(item, ['file', 'size'])
        file_name       = get_value(item, ['file', 'fileName'])
        file_url        = get_value(item, ['file', 'fileURL'])

        self.pure_rdm_file_match = []

        # Checks if pure_file_size and file_name are the same as any of the files in RDM with the same uuid
        for rdm_file in self.rdm_file_review:

            rdm_file_size   = str(rdm_file['size'])
            rdm_review      = rdm_file['review']

            if pure_file_size == rdm_file_size and file_name == rdm_file['name']:
                self.pure_rdm_file_match.append(True)            # Do the old and new file match?
                self.pure_rdm_file_match.append(rdm_review)      # Was the old file reviewed?
                internal_review = rdm_review       # The new uploaded file will have the same review value as in RDM
                break

        self.sub_data = {}
        self.sub_data['internalReview'] = internal_review

        self._add_subdata(item, 'name',            ['file', 'fileName'])
        self._add_subdata(item, 'size',            ['file', 'size'])
        self._add_subdata(item, 'mimeType',        ['file', 'mimeType'])
        self._add_subdata(item, 'digest',          ['file', 'digest'])
        self._add_subdata(item, 'digestAlgorithm', ['file', 'digestAlgorithm'])
        self._add_subdata(item, 'createdBy',       ['creator'])
        self._add_subdata(item, 'createdDate',     ['created'])
        self._add_subdata(item, 'versionType',     ['versionTypes', 0, 'value'])
        self._add_subdata(item, 'licenseType',     ['licenseTypes', 0, 'value'])

        # Access type
        value = get_value(item, ['accessTypes', 0, 'value'])
        self.sub_data['accessType'] = self._accessright_conversion(value)

        # Append to sub_data to .data
        self.data['versionFiles'].append(self.sub_data)

        # Download file from Pure
        response = get_pure_file(self, file_url, file_name)
        # Checks if the file is already in RDM, and if it has already been reviewed
        self._process_file_download_response(response, file_name)
        


    def _add_subdata(self, item: list, rdm_field: str, path: list):
        """ Adds the field to sub_data """
        value = get_value(item, path)
        if value:
            self.sub_data[rdm_field] = value



    def _process_file_download_response(self, response, file_name):
        """ Checks if the file is already in RDM, and if it has already been reviewed """
        # If the file is not in RDM
        if len(self.pure_rdm_file_match) == 0:
            match_review = 'File not in RDM    '

        # If the file in pure is different from the one in RDM
        elif self.pure_rdm_file_match[0] == False:
            match_review = 'Match: F, Review: -'

        # If the file is the same, checks if the one in RDM has been reviewed by internal stuff
        else:
            match_review = 'Match: T, Review: F'
            if self.pure_rdm_file_match[1]:
                match_review = 'Match: T, Review: T'
        
        file_name_report = shorten_file_name(file_name)

        report = f'\tPure get file @ {response} @ {match_review} @ {file_name_report}'
        self.report.add(report)

        self.record_files.append(file_name)



    def _get_orcid(self, person_uuid: str, name: str):
        """ Gets from pure a person orcid """
        # Pure request
        response = get_pure_metadata('persons', person_uuid, {}, False)

        message = f'\tPure get orcid @ {response} @'

        # Error
        if response.status_code >= 300:
            self.report.add(f'{message} Error: {response.content}')
            return False

        # Load json
        resp_json = json.loads(response.content)

        # Read orcid
        if 'orcid' in resp_json:
            orcid = resp_json['orcid']
            self.report.add(f'{message} {orcid} @ {person_uuid} @ {name}')
            return orcid

        # Not found
        self.report.add(f'{message} Orcid not found @ {person_uuid} @ {name}')
        return False



    def _metadata_and_file_submission_check(self, success_check: dict):
        """ Checks if both metadata and files were correctly transmitted """
    
        if (success_check['metadata'] == True and success_check['file'] == True):
            # Remove uuid from to_transmit.txt
            self._remove_uuid_from_list(self.uuid, data_files_name['transfer_uuid_list'])
        else:
            # Add uuid to to_transmit.txt to be re-transmitted
            open(data_files_name['transfer_uuid_list'], "a").write(f'{self.uuid}\n')
            return False
        return True  



    def _http_response_counter(self, status_code: int):
        """ According to the given http status code 
            creates a new object element or increaes an existing one  """
        if status_code not in self.global_counters['http_responses']:
            self.global_counters['http_responses'][status_code] = 0
        self.global_counters['http_responses'][status_code] += 1
Beispiel #14
0
 def __init__(self):
     self.rdm_db = RdmDatabase()
     self.report = Reports()
     self.rdm_requests = Requests()
     self.general_functions = GeneralFunctions()
     self.report_files = ['console', 'groups']
Beispiel #15
0
class RdmGroups:
    def __init__(self):
        self.rdm_db = RdmDatabase()
        self.report = Reports()
        self.rdm_requests = Requests()
        self.general_functions = GeneralFunctions()
        self.report_files = ['console', 'groups']

    def _general_report_and_variables(func):
        def _wrapper(self, old_group_externalId, new_groups_externalIds):
            self.report.add_template(self.report_files, ['general', 'title'],
                                     ['GROUP SPLIT'])
            self.report.add(
                f'\nOld group: {old_group_externalId} @ New groups: {new_groups_externalIds}\n',
                self.report_files)

            # Get name and uuid of new groups
            self.new_groups_data = []

            # Decorated function
            func(self, old_group_externalId, new_groups_externalIds)

        return _wrapper

    @_general_report_and_variables
    def rdm_group_split(self, old_group_externalId: str,
                        new_groups_externalIds: list):
        """ 
        1 - Create new groups
        2 - Add users to new groups
        3 - Remove users from old group
        4 - Delete old group
        5 - Modify RDM record: 
            . groupRestrictions
            . managingOrganisationUnit (if necessary)
            . organisationUnits
        """
        for externalId in new_groups_externalIds:
            # Get group information
            group_name = self._get_pure_group_metadata(externalId)
            if not group_name:
                return False

            # Create new group
            response = self.rdm_create_group(externalId, group_name)

        # Get old group id
        old_group_id = self._get_rdm_group_id(old_group_externalId)

        # Removes users from old group and adds to new groups
        self._rdm_split_users_from_old_to_new_group(old_group_id,
                                                    old_group_externalId,
                                                    new_groups_externalIds)

        # Modify all related records
        self._rdm_split_modify_record(old_group_externalId,
                                      new_groups_externalIds)

    def _general_report_and_variables(func):
        def _wrapper(self, old_groups_externalId, new_group_externalId):
            self.report.add_template(self.report_files, ['general', 'title'],
                                     ['GROUP MERGE'])
            report = f'\nOld groups: {old_groups_externalId} @ New group: {new_group_externalId}\n'
            self.report.add(report, self.report_files)

            # Get new group information
            self.new_groups_data = []

            # Decorated function
            func(self, old_groups_externalId, new_group_externalId)

        return _wrapper

    @_general_report_and_variables
    def rdm_group_merge(self, old_groups_externalId: list,
                        new_group_externalId: str):
        """ 
        1 - Create new group
        2 - Remove users from old groups
        3 - Add users to new group
        4 - Delete old groups
        5 - Modify RDM records: 
            . groupRestrictions
            . managingOrganisationUnit (if necessary)
            . organisationUnits
        """
        group_name = self._get_pure_group_metadata(new_group_externalId)
        if not group_name:
            return False

        # Create new group
        response = self.rdm_create_group(new_group_externalId, group_name)

        # Adds users to new group and removes them from the old ones
        self._merge_users_from_old_to_new_group(old_groups_externalId,
                                                new_group_externalId)

        # Modify all related records
        self._rdm_merge_modify_records(old_groups_externalId,
                                       self.new_groups_data[0],
                                       new_group_externalId)

    def _get_rdm_group_id(self, externalId: str):
        response = self.rdm_db.select_query('id, description', 'accounts_role',
                                            {'name': f"'{externalId}'"})

        group_id = response[0][0]
        group_name = response[0][1]

        report = f'\tOld group info @ ExtId: {add_spaces(externalId)} @ RDM id: {add_spaces(group_id)} @ {group_name}'
        self.report.add(report, self.report_files)
        return group_id

    def _rdm_split_modify_record(self, old_group_externalId: str,
                                 new_groups_externalIds: list):

        # Get from RDM all old group's records
        response = self.rdm_requests.get_metadata_by_query(
            old_group_externalId)

        resp_json = json.loads(response.content)
        total_items = resp_json['hits']['total']

        report = f"\tModify old g. records @ ExtId: {add_spaces(old_group_externalId)} @ Num. of records: {total_items}"
        self.report.add(report, self.report_files)

        if total_items == 0:
            self.report.add('\tNothing to modify @ End', self.report_files)
            return True

        # Iterates over all old group records
        for item in resp_json['hits']['hits']:
            item = item['metadata']

            # Removes old organisationalUnit from organisationalUnits
            for i in item['organisationalUnits']:
                if i['externalId'] == old_group_externalId:
                    item['organisationalUnits'].remove(i)

            # Adds new organisationalUnits
            for i in self.new_groups_data:
                item['organisationalUnits'].append(i)

            # Change group restrictions
            if old_group_externalId in item['groupRestrictions']:
                item['groupRestrictions'].remove(old_group_externalId)
            for i in new_groups_externalIds:
                item['groupRestrictions'].append(i)

            # Change managingOrganisationalUnit
            item = self._process_managing_organisational_unit(
                item, old_group_externalId)

            # Update record
            recid = item['recid']
            response = self.general_functions.update_rdm_record(recid, item)

        return True

    def _process_managing_organisational_unit(self, item: object,
                                              old_group_externalId: str):
        if item['managingOrganisationalUnit_externalId'] == old_group_externalId:
            item['managingOrganisationalUnit_name'] = self.new_groups_data[0][
                'name']
            item['managingOrganisationalUnit_uuid'] = self.new_groups_data[0][
                'uuid']
            item[
                'managingOrganisationalUnit_externalId'] = self.new_groups_data[
                    0]['externalId']
        return item

    def _rdm_split_users_from_old_to_new_group(self, old_group_id: str,
                                               old_group_externalId: str,
                                               new_groups_externalIds: list):

        # Get all users in old group
        response = self.rdm_db.select_query('user_id', 'accounts_userrole',
                                            {'role_id': old_group_id})

        report = 'Old group @@ Num. of users:  '
        if not response:
            self.report.add(f'\t{report} 0', self.report_files)
            return

        self.report.add(f'\t{report} {len(response)}', self.report_files)

        for i in response:
            user_id = i[0]

            # Get user email
            user_email = self.rdm_db.select_query('email', 'accounts_user',
                                                  {'id': user_id})[0][0]

            for new_group_externalId in new_groups_externalIds:
                # Add user to new groups
                self._group_add_user(user_email, new_group_externalId, user_id)

            # Remove user from old group
            response = self._group_remove_user(user_email,
                                               old_group_externalId)

    def _rdm_merge_modify_records(self, old_groups_externalId: list,
                                  new_group_data: dict,
                                  new_group_externalId: str):

        # Get from RDM all records with old groups
        for old_group_externalId in old_groups_externalId:

            self._rdm_check_if_group_exists(old_group_externalId)

            # Get record metadata
            response = self.rdm_requests.get_metadata_by_query(
                old_group_externalId)

            resp_json = json.loads(response.content)
            total_items = resp_json['hits']['total']

            report = f"\tModify records @ Group: {add_spaces(old_group_externalId)} @ Num. of records: {total_items}"
            self.report.add(report, self.report_files)

            if total_items == 0:
                continue

            # Iterates over all old group records
            for item in resp_json['hits']['hits']:

                item = item['metadata']

                # Organisational units
                item = self._process_organisational_units(
                    item, new_group_data, old_groups_externalId)

                # Group restrictions
                self._process_group_restrictions(item, old_group_externalId,
                                                 new_group_externalId)

                # Managing Organisational Unit
                if item['managingOrganisationalUnit_externalId'] == old_group_externalId:
                    item['managingOrganisationalUnit_name'] = new_group_data[
                        'name']
                    item['managingOrganisationalUnit_uuid'] = new_group_data[
                        'uuid']
                    item[
                        'managingOrganisationalUnit_externalId'] = new_group_data[
                            'externalId']

                # Update record
                recid = item['recid']
                response = self.general_functions.update_rdm_record(
                    recid, item)

    def _process_organisational_units(self, item, new_group_data,
                                      old_groups_externalId):
        new_organisationalUnits_data = [new_group_data]

        for i in item['organisationalUnits']:
            if (i['externalId'] in old_groups_externalId
                    or i['externalId'] == new_group_data['externalId']):
                continue

            new_organisationalUnits_data.append(i)

        item['organisationalUnits'] = new_organisationalUnits_data
        return item

    def _process_group_restrictions(self, item, old_group_externalId,
                                    new_group_externalId):
        # Remove old group
        if old_group_externalId in item['groupRestrictions']:
            item['groupRestrictions'].remove(old_group_externalId)
        # Add new group
        if new_group_externalId not in item['groupRestrictions']:
            item['groupRestrictions'].append(new_group_externalId)
        return item

    def _merge_users_from_old_to_new_group(self, old_groups_externalId: list,
                                           new_group_externalId: str):
        # Iterate over old groups
        for old_group_externalId in old_groups_externalId:

            # Get group id
            response = self.rdm_db.select_query(
                'id, description', 'accounts_role',
                {'name': f"'{old_group_externalId}'"})

            if not response:
                self.report.add(
                    '\nWarning @ Old group ({old_groups_externalId}) not in database @ END TASK\n'
                )
                return False

            old_group_id = response[0][0]
            old_group_name = response[0][1]

            # Get all users id that are in this group
            old_group_users = self.rdm_db.select_query(
                'user_id', 'accounts_userrole', {'role_id': old_group_id})

            if not old_group_users:
                old_group_users = []

            report = f"\tOld group @ ExtId:     {add_spaces(old_group_externalId)} @ Num. users:  {add_spaces(len(old_group_users))} @ {old_group_name}"
            self.report.add(report, self.report_files)

            for i in old_group_users:
                user_id = i[0]

                # Get user email
                user_email = self.rdm_db.select_query('email', 'accounts_user',
                                                      {'id': user_id})[0][0]

                # - - Add user to new group - -
                self._group_add_user(user_email, new_group_externalId, user_id)

                # - - Remove user from old group - -
                response = self._group_remove_user(user_email,
                                                   old_group_externalId)

            # Delete old group

    def _get_pure_group_metadata(self, externalId: str):
        """ Get organisationalUnit name and uuid """

        # PURE REQUEST
        response = get_pure_metadata('organisational-units',
                                     f'{externalId}/research-outputs', {
                                         'page': 1,
                                         'pageSize': 100
                                     })

        report = f'\tNew group info @ ExtId:     {add_spaces(externalId)} @ '

        # Check response
        if response.status_code >= 300:
            report += 'Not in pure - END TASK\n'
            self.report.add(report, self.report_files)
            self.report.add(response.content, self.report_files)
            return False

        # Load json
        data = json.loads(response.content)
        data = data['items'][0]['organisationalUnits']

        for organisationalUnit in data:
            if organisationalUnit['externalId'] == externalId:

                organisationalUnit_data = {}
                organisationalUnit_data['externalId'] = externalId
                organisationalUnit_data['uuid'] = organisationalUnit['uuid']
                organisationalUnit_data['name'] = organisationalUnit['names'][
                    0]['value']

                report += f"{organisationalUnit_data['uuid']} @ {organisationalUnit_data['name']}"
                self.report.add(report, self.report_files)

                self.new_groups_data.append(organisationalUnit_data)
                return organisationalUnit_data['name']
        return False

    def _rdm_check_if_group_exists(self, group_externalId: str):
        """ Checks if the group already exists"""

        response = self.rdm_db.select_query('*', 'accounts_role',
                                            {'name': f"'{group_externalId}'"})

        if response:
            report = f'\tNew group check @@ ExtId:        {add_spaces(group_externalId)} @ Already exists'
            self.report.add(report)
            return True
        return False

    def rdm_create_group(self, externalId: str, group_name: str):

        # Checks if the group already exists
        response = self._rdm_check_if_group_exists(externalId)
        if response:
            return True

        group_name = group_name.replace('(', '\(')
        group_name = group_name.replace(')', '\)')
        group_name = group_name.replace(' ', '_')

        # Run command
        command = f'pipenv run invenio roles create {externalId} -d {group_name}'
        response = os.system(command)

        report = f'\tNew group check @@'

        if response != 0:
            self.report.add(f'{report} Error: {response}')
            return False

        self.report.add(f'{report} Group created @ External id: {externalId}')
        return True

    def _rdm_add_user_to_group(self, user_id: int, group_externalId: str,
                               group_name: str):

        # Get user's rdm email
        user_email = self.rdm_db.select_query('email', 'accounts_user',
                                              {'id': user_id})[0][0]

        # Get group id
        response = self.rdm_db.select_query('id', 'accounts_role',
                                            {'name': f"'{group_externalId}'"})

        if not response:
            # If the group does not exist then creates it
            self.rdm_create_group(group_externalId, group_name)
            # Repeats the query to get the group id
            response = self.rdm_db.select_query(
                'id', 'accounts_role', {'name': f"'{group_externalId}'"})

        group_id = response[0][0]

        # Checks if match already exists
        response = self.rdm_db.select_query('*', 'accounts_userrole', {
            'user_id': user_id,
            'role_id': group_id
        })

        if response:
            report = f'\tRDM user in group @ User id: {add_spaces(user_id)} @@ Already belongs to group {group_externalId} (id {group_id})'
            self.report.add(report)
            return True

        # Adds user to group
        command = f'pipenv run invenio roles add {user_email} {group_externalId}'
        response = os.system(command)
        if response != 0:
            self.report.add(f'Warning @ Creating group response: {response}')

    def _group_add_user(self, user_email: str, new_group_externalId: str,
                        user_id: str):

        # Get group id
        group_id = self.rdm_db.select_query(
            'id', 'accounts_role', {'name': f"'{new_group_externalId}'"})[0][0]

        # Check if the user is already in the group
        response = self.rdm_db.select_query('*', 'accounts_userrole', {
            'user_id': user_id,
            'role_id': group_id
        })

        if response:
            return True

        command = f'pipenv run invenio roles add {user_email} {new_group_externalId}'
        response = os.system(command)

        report = f'\tAdd user to group @ ExtId:     {add_spaces(new_group_externalId)} @ User id:     {add_spaces(user_id)}'
        if response != 0:
            self.report.add(f'{report} @ Error: {response}', self.report_files)
            return False

        self.report.add(f'{report} @ Success', self.report_files)
        return True

    def _group_remove_user(self, user_email: str, group_name: str):

        # Get user id
        user_id = self.rdm_db.select_query('id', 'accounts_user',
                                           {'email': f"'{user_email}'"})[0][0]

        # Get group id
        group_id = self.rdm_db.select_query('id', 'accounts_role',
                                            {'name': f"'{group_name}'"})[0][0]

        # Check if the user is already in the group
        response = self.rdm_db.select_query('*', 'accounts_userrole', {
            'user_id': user_id,
            'role_id': group_id
        })

        report = f'Remove user fromGroup @ ExtId:     {add_spaces(group_name)} @ User id:     {add_spaces(user_id)}'

        if not response:
            self.report.add(f'\t{report} @ Not in group (already removed)',
                            self.report_files)
            return True

        # Remove user from old group
        command = f'pipenv run invenio roles remove {user_email} {group_name}'
        response = os.system(command)

        if response != 0:
            self.report.add(f'\t{report} @ Error: {response}',
                            self.report_files)
            return False

        self.report.add(f'\t{report} @ Success', self.report_files)
        return True