Exemple #1
 def __init__(self):
     self.rdm_requests = Requests()
     self.rdm_db = RdmDatabase()
     self.report = Reports()
     self.rdm_add_record = RdmAddRecord()
     self.general_functions = GeneralFunctions()
     self.report_files = ['console', 'owners']
 def __init__(self):
     self.add_record = RdmAddRecord()
     self.report = Reports()
     self.delete = Delete()
     self.general_functions = GeneralFunctions()
class PureChangesByDate:
    def __init__(self):
        self.add_record = RdmAddRecord()
        self.report = Reports()
        self.delete = Delete()
        self.general_functions = GeneralFunctions()

    def get_pure_changes(self):
        """ Gets from Pure 'changes' endpoint all records that have been created / updated / deleted 
            and modifies accordingly the relative RDM records """

        # Get date of last update
        missing_updates = self._get_missing_updates()
        missing_updates = ['2020-05-15']  # TEMPORARY !!!!!

        if missing_updates == []:
            self.report.add('\nNothing to update.\n')

        for date_to_update in reversed(missing_updates):

    def _set_counters_and_title(func):
        def _wrapper(self, changes_date: str):

            # Initialize global counters
            self.global_counters = initialize_counters()

            self.report_files = ['console', 'changes']

            self.report.add_template(self.report_files, ['general', 'title'],
            self.report.add(f'\nProcessed date: {changes_date}',

            # Decorated function
            func(self, changes_date)


        return _wrapper

    def _changes_by_date(self, changes_date: str):
        """ Gets from Pure all changes that took place in a certain date """

        reference = changes_date
        page = 1

        while reference:
            # Get from pure all changes of a certain date
            response = get_pure_metadata('changes', reference, {})

            if response.status_code >= 300:
                self.report.add(response.content, self.report_files)
                return False

            # Check if there are records in the response from pure
            json_response = self._records_to_process(response, page,

            # If there are no records to process
            if not json_response:
                return True

            # Used to check if there are doubled tasks (e.g. update uuid and delete same uuid)
            self.duplicated_uuid = []


            # Iterates over all records that need to be deleted

            # Create / Add / Update

            # Gets the reference code of the next page
            reference = get_next_page(json_response).split('/')[-1]

            page += 1

    def _records_to_process(self, response: object, page: int,
                            changes_date: str):
        """ Check if there are records in the response from pure """

        # Load response json
        json_response = json.loads(response.content)

        number_records = json_response["count"]

        if number_records == 0:
            # Adds the date to successful_changes.txt

            if page == 1:
                # If there are no changes at all
                self.report.add(f'\n\nNothing to transfer.\n\n',
            return False

        report_line = f'\nPag{add_spaces(page)} @ Pure get changes @ {response} @ Number of items: {add_spaces(number_records)}'
        self.report.add(report_line, self.report_files)

        return json_response

    def _delete_records(self, json_response: dict):
        """ Iterates over the Pure response and process all records that need to be deleted """

        for item in json_response['items']:

            if 'changeType' not in item or 'uuid' not in item:
            elif item['familySystemName'] != 'ResearchOutput':
            elif item['changeType'] != 'DELETE':

            uuid = item['uuid']
            self.local_counters['delete'] += 1

            report = f"\n{self.local_counters['delete']} @ {item['changeType']}"

            # Gets the record recid
            recid = self.general_functions.get_recid(uuid,

            if recid:
                # Deletes the record from RDM
                # The record is not in RDM
                self.global_counters['delete']['success'] += 1
        return True

    def _update_records(self, json_response: dict):
        """ Iterates over the Pure response and process all records that need to be created/updated """

        for item in json_response['items']:

            if 'changeType' not in item or 'uuid' not in item:
                self.local_counters['incomplete'] += 1
            elif item['familySystemName'] != 'ResearchOutput':
                self.local_counters['not_ResearchOutput'] += 1
            elif item['changeType'] == 'DELETE':

            uuid = item['uuid']
            if uuid in self.duplicated_uuid:
                self.local_counters['duplicated'] += 1

            record_number = add_spaces(self.global_counters['total'] + 1)
            report = f"\n{record_number} - Change type           - {item['changeType']}"

            if item['changeType'] == 'ADD' or item['changeType'] == 'CREATE':
                self.local_counters['create'] += 1

            if item['changeType'] == 'UPDATE':
                self.local_counters['update'] += 1

            # Checks if this uuid has already been created / updated / deleted

            # Adds record to RDM
            self.add_record.push_record_by_uuid(self.global_counters, uuid)

    def _get_missing_updates(self):
        """ Reading successful_changes.txt gets the dates in 
            which Pure changes have not been processed """

        file_name = data_files_name['successful_changes']

        missing_updates = []
        count = 0
        days_span = 7

        date_today = str(datetime.today().strftime('%Y-%m-%d'))
        date_check = datetime.strptime(date_today, "%Y-%m-%d").date()

        while count < days_span:

            if str(date_check) not in open(file_name, 'r').read():

            date_check = date_check - timedelta(days=1)
            count += 1

        return missing_updates

    def _report_summary(self):

        # Global counters

        arguments = []
        for i in self.local_counters:
        self.report.add_template(self.report_files, ['changes', 'summary'],

    def _initialize_local_counters(self):

        # Incomplete:  when the uuid or changeType are not specified
        # Duplicated:  e.g. when a record has been modified twice in a day
        # Irrelevant:  when familySystemName is not ResearchOutput

        self.local_counters = {
            'delete': 0,
            'update': 0,
            'create': 0,
            'incomplete': 0,
            'duplicated': 0,
            'not_ResearchOutput': 0,
Exemple #4
 def __init__(self):
     self.report = Reports()
     self.rdm_requests = Requests()
     self.general_functions = GeneralFunctions()
Exemple #5
class Versioning:
    def __init__(self):
        self.report = Reports()
        self.rdm_requests = Requests()
        self.general_functions = GeneralFunctions()

    def get_uuid_version(self, uuid):
        """ Gives the version to use for a new record and old versions of the same uuid """

        # Request
        response = self.rdm_requests.get_metadata_by_query(uuid)

        resp_json = json.loads(response.content)

        message = f'\tRDM metadata version  - {response} - '

        total_recids = resp_json['hits']['total']
        all_metadata_versions = []

        if total_recids == 0:
            # If there are no records with the same uuid means it is the first one (version 1)
            new_version = 1
                f'{message}Record NOT found    - Metadata version: 1')
            return [new_version, all_metadata_versions]

        new_version = None

        # Iterates over all records in response
        for item in resp_json['hits']['hits']:
            rdm_metadata = item['metadata']

            # If a record has a differnt uuid than it will be ignored
            if uuid != rdm_metadata['uuid']:
                    f" VERSIONING - Different uuid {rdm_metadata['uuid']}")

            # Get the latest version
            if 'metadataVersion' in rdm_metadata and not new_version:
                new_version = rdm_metadata['metadataVersion'] + 1

            # Add data to listed versions (old versions)
            recid = item['id']
            creation_date = item['created'].split('T')[0]
            version = str(rdm_metadata['metadataVersion'])
            all_metadata_versions.append([recid, version, creation_date])

        # In case the record has no metadataVersion
        if not new_version:
            message += f'Vers. not specified - New metadata version: 1'
            new_version = 1
            count_old_versions = add_spaces(len(all_metadata_versions))
            message += f'Older versions{count_old_versions} - New version: {new_version}'


        return [new_version, all_metadata_versions]

    def update_all_uuid_versions(self, uuid):
        # Request
        response = self.rdm_requests.get_metadata_by_query(uuid)

        resp_json = json.loads(response.content)
        total_recids = resp_json['hits']['total']

        if total_recids == 0:
            self.report.add('There are no records with this uuid')

        all_metadata_versions = []
        for item in resp_json['hits']['hits']:
            # Add data to listed versions
            recid = item['id']
            creation_date = item['created'].split('T')[0]
            version = str(item['metadata']['metadataVersion'])
            all_metadata_versions.append([recid, version, creation_date])

        self.report.add(f'\tUpdate uuid versions')

        for item in resp_json['hits']['hits']:

            recid = item['id']
            item = item['metadata']

            if item['metadataOtherVersions'] == all_metadata_versions:
                self.report.add(f'\tRecord update @ Up to date @ {recid}')

            item['metadataOtherVersions'] = all_metadata_versions

            # Update record
            self.general_functions.update_rdm_record(recid, item)
Exemple #6
class RdmOwners:
    def __init__(self):
        self.rdm_requests = Requests()
        self.rdm_db = RdmDatabase()
        self.report = Reports()
        self.rdm_add_record = RdmAddRecord()
        self.general_functions = GeneralFunctions()
        self.report_files = ['console', 'owners']

    def _set_counters_and_title(func):
        def _wrapper(self, identifier):

            self.report.add_template(['console'], ['general', 'title'],
                                     ['OWNERS CHECK'])
            self.global_counters = initialize_counters()

            # Decorated function
            func(self, identifier)

        return _wrapper

    def run_owners(self, identifier: str):
        """ Gets from pure all the records related to a certain user (based on orcid or externalId),
            afterwards it modifies/create RDM records accordingly. """

        identifier_value = '0000-0002-4154-6945'  # TEMPORARY
        if identifier == 'externalId':  # TEMPORARY
            # identifier_value = '3261'                 # TEMPORARY
            identifier_value = '30'  # TEMPORARY

        self.report.add(f'\n{identifier}: {identifier_value}\n')

        # Gets the ID and IP of the logged in user
        self.user_id = self._get_user_id_from_rdm()
        # If the user was not found in RDM then there is no owner to add to the record.
        if not self.user_id:

        # Get from pure user_uuid
        self.user_uuid = self._get_user_uuid_from_pure(identifier,
        if not self.user_uuid:
            return False

        # Add user to user_ids_match.txt
        if identifier == 'externalId':

        next_page = True
        page = 1
        self.local_counters = {'create': 0, 'in_record': 0, 'to_update': 0}

        while next_page:

            # Pure request
            params = {'sort': 'modified', 'page': page, 'pageSize': 100}
            response = get_pure_metadata('persons',
            if response.status_code >= 300:
                return False

            # Initial response proceses and json load
            pure_json = self._process_response(response, page)
            # In case the user has no records
            if not pure_json:
                return True

            # Checks if there is a 'next' page to be processed
            next_page = get_next_page(pure_json)

            # Iterates over all items in the page
            for item in pure_json['items']:

                uuid = item['uuid']
                title = shorten_file_name(item['title'])

                self.report.add(f"\n\tRecord uuid  @ {uuid} @ {title}")

                # Get from RDM the recid
                recid = self.general_functions.get_recid(
                    uuid, self.global_counters)

                # Record NOT in RDM, create it
                if recid == False:

                # Gets record metadata from RDM and checks if the user is already a record owner

            page += 1


    def _process_record_owners(self, recid):
        """ Gets record metadata from RDM and checks if the user is already a record owner """

        response = self.rdm_requests.get_metadata_by_recid(recid)
        rdm_json = json.loads(response.content)['metadata']

            f"\tRDM get metadata @ {response} @ Current owners: @ {rdm_json['owners']}"

        if self.user_id not in rdm_json['owners']:
            # The record is in RDM but the logged in user is not among the recod owners
            self._add_user_as_owner(rdm_json, recid)
            # The record is in RDM and the user is an owner
            self.report.add('\tRDM record status @@ Owner IN record')
            self.local_counters['in_record'] += 1

    def _add_user_as_owner(self, data, recid):
        """ Adds the current logged in user as record owner """


            f"\tRDM record status @ ADDING owner @ New owners: @ {data['owners']}"

        # Add owner to an existing RDM record
        self.general_functions.update_rdm_record(recid, data)

        self.local_counters['to_update'] += 1

    def _create_rdm_record(self, item: dict):
        """ If a record of the processed user is not in RDM creates it """
        item['owners'] = [self.user_id]

        self.report.add('\tRDM record status @@ CREATE record')
        self.local_counters['create'] += 1

        # Creates record metadata and pushes it to RDM
        self.rdm_add_record.create_invenio_data(self.global_counters, item)

    def _final_report(self):
        # Final report
        create = self.local_counters['create']
        update = self.local_counters['to_update']
        in_rec = self.local_counters['in_record']
        report = f"\nCreate: {create} - To update: {update} - In record: {in_rec}"
        self.report.add(report, self.report_files)

    def _process_response(self, response: object, page: int):
        """ Checks if there are records to process """

        # Load response json
        resp_json = json.loads(response.content)

        total_items = resp_json['count']

        if page == 1:
            self.report.add(f'Total records: {total_items}')

        if page == 1 and total_items == 0:
            self.report.add('\nThe user has no records @ End task\n')
            return False

        self.report.add(f'\nPag {page} - Get person records    - {response}')
        return resp_json

    def _get_user_uuid_from_pure(self, key_name: str, key_value: str):
        """ Given the user's external id it return the relative user uuid  """

        # If the uuid is not found in the first x items then it will continue with the next page
        page = 1
        page_size = 10
        next_page = True

        while next_page:

            params = {
                'page': page,
                'pageSize': page_size,
                'q': f'"{key_value}"'
            response = get_pure_metadata('persons', '', params)

            if response.status_code >= 300:
                self.report.add(response.content, self.report_files)
                return False

            record_json = json.loads(response.content)

            total_items = record_json['count']

            for item in record_json['items']:

                if item[key_name] == key_value:
                    first_name = item['name']['firstName']
                    lastName = item['name']['lastName']
                    uuid = item['uuid']

                        f'Name:    {first_name} {lastName}\nUuid:    {uuid}',

                    if len(uuid) != 36:
                            '\n- Warning! Incorrect user_uuid length -\n',
                        return False
                    return uuid

            # Checks if there is a 'next' page to be processed
            next_page = get_next_page(record_json)

            page += 1

        self.report.add(f'Uuid NOT FOUND - End task\n', self.report_files)
        return False

    #   ---         ---         ---
    def _get_user_id_from_rdm(self):
        """ Gets the ID and IP of the logged in user """

        table_name = 'accounts_user_session_activity'

        # SQL query
        response = self.rdm_db.select_query('user_id, ip', table_name)

        if not response:
            self.report.add(f'\n- {table_name}: No user is logged in -\n',
            return False

        elif len(response) > 1:
            self.report.add(f'\n- {table_name}: Multiple users logged in \n',
            return False

            f'user IP: {response[0][1]}\nUser id: {response[0][0]}',

        return response[0][0]

    def _add_user_ids_match(self, external_id: str):
        """ Add user to user_ids_match.txt, where are specified:
            rdm_user_id, user_uuid and user_external_id """
        file_name = data_files_name['user_ids_match']

        needs_to_add = self._check_user_ids_match('user_ids_match',

        if needs_to_add:
                 'a').write(f'{self.user_id} {self.user_uuid} {external_id}\n')
            report = f'user_ids_match @ Adding id toList @ {self.user_id}, {self.user_uuid}, {external_id}'
            self.report.add(report, self.report_files)

    def _check_user_ids_match(self, file_name: str, external_id: str):

        lines = file_read_lines(file_name)
        for line in lines:
            line = line.split('\n')[0]
            line = line.split(' ')

            # Checks if at least one of the ids match
            if str(self.user_id) == line[0] or self.user_uuid == line[
                    1] or external_id == line[2]:

                if line == [str(self.user_id), self.user_uuid, external_id]:
                    self.report.add('Ids list:   user in list',
                    return False
        return True

    def _initalizing_method(func):
        def _wrapper(self):

            self.report.add_template(['console'], ['general', 'title'],
                                     ['RECORDS OWNER'])

            # Empty file rdm_reocrds_owner.txt
            file_owner = data_files_name['rdm_record_owners']
            open(file_owner, 'w').close()

            # Decorated function

        return _wrapper

    def get_rdm_record_owners(self):
        """ Gets all records from RDM and counts how many records belong to each user.
            It also updates the content of all_rdm_records.txt """

        pag = 1
        pag_size = 250

        count = 0
        count_records_per_owner = {}
        all_records_list = ''
        next_page = True

        while next_page == True:

            # REQUEST to RDM
            params = {'sort': 'mostrecent', 'size': pag_size, 'page': pag}
            response = self.rdm_requests.get_metadata(params)


            if response.status_code >= 300:

            resp_json = json.loads(response.content)
            data = ''

            for item in resp_json['hits']['hits']:
                count += 1

                uuid = item['metadata']['uuid']
                recid = item['metadata']['recid']
                owners = item['metadata']['owners']

                line = f'{uuid} - {recid} - {owners}'
                data += f'{line}\n'

                all_records_list += f'{uuid} {recid}\n'

                for i in owners:
                    if i not in count_records_per_owner:
                        count_records_per_owner[i] = 0
                    count_records_per_owner[i] += 1

            self.report.add(f'\nPag {str(pag)} - Records {count}\n')

            open(data_files_name['rdm_record_owners'], 'a').write(data)

            if 'next' not in resp_json['links']:
                next_page = False

            pag += 1

        # Counts how many records have each owner

        # Update all_rdm_records.txt file

    def _count_records_per_owner(self, count_records_per_owner):

        self.report.add('Owner  Records')

        for key in count_records_per_owner:
            records = add_spaces(count_records_per_owner[key])
            key = add_spaces(key)
            self.report.add(f'{key}    {records}')

    def _update_all_rdm_records_file(self, all_records_list):

        # Updates content of all_rdm_records.txt file
        file_all_records_list = data_files_name['all_rdm_records']
        # Empty file
        open(file_all_records_list, 'w').close()
        # Add all records to file
        open(file_all_records_list, 'a').write(all_records_list)
 def __init__(self):
     self.rdm_requests       = Requests()
     self.report             = Reports()
     self.groups             = RdmGroups()
     self.general_functions  = GeneralFunctions()
     self.versioning         = Versioning()
class RdmAddRecord:

    def __init__(self):
        self.rdm_requests       = Requests()
        self.report             = Reports()
        self.groups             = RdmGroups()
        self.general_functions  = GeneralFunctions()
        self.versioning         = Versioning()

    def push_record_by_uuid(self, global_counters: dict, uuid: str):
        """ Gets from Pure the metadata of a given uuid """
        item = get_pure_record_metadata_by_uuid(uuid)
        if not item:
            return False
        return self.create_invenio_data(global_counters, item)

    def _set_initial_variables(func):
        def _wrapper(self, global_counters, item) :
            self.global_counters = global_counters
            self.global_counters['total'] += 1      

            self.uuid = item['uuid']
            self.item = item
            self.data = {}
            # Stores the name of the record files
            # Necessary because we need first to create the record and then to put the files
            self.record_files = []      

            # Decorated function
            func(self, global_counters, item)

        return _wrapper

    def create_invenio_data(self, global_counters: dict, item: dict):
        """ Process the data received from Pure and submits it to RDM """

        # Versioning

        # Record owners

        # self.data['metadataOtherVersions'] = [['1', ''], ['2', '']]
        # self.data['owners'].append(3)     # TEMPORARY

        # Restrictions
        self.data['appliedRestrictions'] = ['owners', 'groups', 'ip_single', 'ip_range']    # TO REVIEW - TO REVIEW
        self.data['_access'] = {'metadata_restricted': False, 'files_restricted': False}    # TO REVIEW - TO REVIEW

        # Process various single fields
        # Electronic Versions (files)

        # Additional Files
        if 'additionalFiles' in item:
            for i in item['additionalFiles']:

        # Person Associations

        # Organisational Units

        # Checks if the restrictions applied to the record are valid

        self.data = json.dumps(self.data)

        # Post request to RDM

        # Updates the versioning data of all records with the same uuid

    def _versioning_required(func):
        def _wrapper(self) :
            if not versioning_running:
        return _wrapper

    def _check_record_version(self):
        """ Checks if there are in RDM other versions of the same uuid """
        # Get metadata version
        response = self.versioning.get_uuid_version(self.uuid)
        if response:
            self.data['metadataVersion']       = response[0]
            self.data['metadataOtherVersions'] = response[1]

    def _update_all_uuid_versions(self):
        """ Updates the versioning data of all records with the same uuid """

    def _check_record_owners(self):
        """ Removes duplicate owners """
        if 'owners' in self.item:
            self.data['owners'] = list(set(self.item['owners']))        
            self.data['owners'] = list(set([1]))

    def _process_single_fields(self, item: dict):
                            # RDM field name                # PURE json path
        self._add_field(item, 'title',                       ['title'])
        self._add_field(item, 'uuid',                        ['uuid'])
        self._add_field(item, 'pureId',                      ['pureId'])
        self._add_field(item, 'publicationDate',             ['publicationStatuses', 0, 'publicationDate', 'year'])
        self._add_field(item, 'createdDate',                 ['info', 'createdDate'])
        self._add_field(item, 'pages',                       ['info','pages'])   
        self._add_field(item, 'volume',                      ['info','volume'])
        self._add_field(item, 'journalTitle',                ['info', 'journalAssociation', 'title', 'value'])
        self._add_field(item, 'journalNumber',               ['info', 'journalNumber'])
        self._add_field(item, 'metadataModifBy',             ['info', 'modifiedBy'])
        self._add_field(item, 'metadataModifDate',           ['info', 'modifiedDate'])
        self._add_field(item, 'pure_link',                   ['info', 'portalUrl'])
        self._add_field(item, 'recordType',                  ['types', 0, 'value'])    
        self._add_field(item, 'category',                    ['categories', 0, 'value'])  
        self._add_field(item, 'peerReview',                  ['peerReview'])    
        self._add_field(item, 'publicationStatus',           ['publicationStatuses', 0, 'publicationStatuses', 0, 'value'])
        self._add_field(item, 'numberOfAuthors',             ['totalNumberOfAuthors'])
        self._add_field(item, 'workflow',                    ['workflows', 0, 'value'])
        self._add_field(item, 'confidential',                ['confidential'])
        self._add_field(item, 'publisherName',               ['publisher', 'names', 0, 'value'])
        self._add_field(item, 'abstract',                    ['abstracts', 0, 'value'])
        self._add_field(item, 'managingOrganisationalUnit_name',       ['managingOrganisationalUnit', 'names', 0, 'value'])
        self._add_field(item, 'managingOrganisationalUnit_uuid',       ['managingOrganisationalUnit', 'uuid'])
        self._add_field(item, 'managingOrganisationalUnit_externalId', ['managingOrganisationalUnit', 'externalId'])

        # Access right
        value = get_value(item, ['openAccessPermissions', 0, 'value'])
        self.data['access_right'] = self._accessright_conversion(value)

        # Language
        value = get_value(item, ['languages', 0, 'value'])
        self.data['language'] = self._language_conversion(value)

    def _process_electronic_versions(self):
        """ Data relative to files """

        self.data['versionFiles'] = []
        self.rdm_file_review = []

        if 'electronicVersions' in self.item or 'additionalFiles' in self.item:
            # Checks if the file has been already uploaded to RDM and if it has been internally reviewed

        if 'electronicVersions' in self.item:
            for i in self.item['electronicVersions']:

    def _process_person_associations(self):
        """ Process data ralative to the record contributors """

        if 'personAssociations' not in self.item:
        self.data['contributors'] = []

        file_data = file_read_lines('user_ids_match')

        for item in self.item['personAssociations']:

            self.sub_data = {}

            self._add_subdata(item, 'uuid',                   ['person', 'uuid'])
            self._add_subdata(item, 'externalId',             ['person', 'externalId'])
            self._add_subdata(item, 'authorCollaboratorName', ['authorCollaboration', 'names', 0, 'value'])   
            self._add_subdata(item, 'personRole',             ['personRoles', 0, 'value'])    
            self._add_subdata(item, 'organisationalUnit',     ['organisationalUnits', 0, 'names', 0, 'value'])
            self._add_subdata(item, 'type_p',                 ['externalPerson', 'types', 0, 'value'])
            self._add_subdata(item, 'uuid',                   ['externalPerson', 'uuid'])
            # Checks if the record owner is available in user_ids_match.txt
            person_external_id = get_value(item, ['person', 'externalId'])
            owner = self.general_functions.get_userid_from_list_by_externalid(person_external_id, file_data)
            if owner and int(owner) not in self.data['owners']:

            # ORCID



    def _get_contributor_name(self, item: object):
        first_name = get_value(item, ['name', 'firstName'])
        last_name  = get_value(item, ['name', 'lastName'])

        if not first_name:
            first_name = '(first name not specified)'
        if not last_name:
            first_name = '(last name not specified)'

        self.sub_data['name'] = f'{last_name}, {first_name}'

    def _process_contributor_orcid(self):
        if 'uuid' in self.sub_data:
            person_uuid = self.sub_data['uuid']
            person_name = self.sub_data['name']
            # External persons are not present in 'persons' Pure API endpoint
            if 'type_p' in self.sub_data and self.sub_data['type_p'] == 'External person':
                report = f'\tPure get orcid @@ External person @ {person_uuid} @ {person_name}'
                orcid = self._get_orcid(person_uuid, person_name)
                if orcid:
                    self.sub_data['orcid'] = orcid

    def _process_organisational_units(self):
        """ Process the metadata relative to the organisational units """
        if 'organisationalUnits' in self.item:
            self.data['organisationalUnits'] = []
            self.data['groupRestrictions']   = []

            for i in self.item['organisationalUnits']:
                sub_data = {}

                organisational_unit_name       = get_value(i, ['names', 0, 'value'])
                organisational_unit_uuid       = get_value(i, ['uuid'])
                organisational_unit_externalId = get_value(i, ['externalId'])

                sub_data['name']        = organisational_unit_name
                sub_data['uuid']        = organisational_unit_uuid
                sub_data['externalId']  = organisational_unit_externalId


                # Adding organisational unit as group owner

                # Create group
                self.groups.rdm_create_group(organisational_unit_externalId, organisational_unit_name)

    def _applied_restrictions_check(self):
        """ Checks if the restrictions applied to the record are valid.
            e.g. ['groups', 'owners', 'ip_range', 'ip_single'] """
        if not 'appliedRestrictions' in self.data:
            return False

        for i in self.data['appliedRestrictions']:
            if i not in possible_record_restrictions:
                report = f"Warning: the value '{i}' is not amont the accepted restrictions\n"
        return True

    def _post_metadata(self):
        """ Submits the created json to RDM """

        uuid = self.item['uuid']
        success_check = { 'metadata': False, 'file': False }

        # POST REQUEST metadata
        response = self.rdm_requests.post_metadata(self.data)

        # Process response
        if not self._process_post_response(response, uuid):
            return False

        success_check['metadata'] = True

        # After pushing a record's metadata to RDM it takes about one second to be able to get its recid

        # Gets recid from RDM
        recid = self.general_functions.get_recid(uuid, self.global_counters)
        if not recid:
            return False

        # add record to all_rdm_records.txt
        open(data_files_name['all_rdm_records'], "a").write(f'{uuid} {recid}\n')
        # Submit record FILES
        for file_name in self.record_files:
            # Submit request
            response = rdm_add_file(file_name, recid)
            # Process response
            successful = self._process_file_response(response, success_check)

            # if successful:
                # # Sends email to remove record from Pure
                # send_email(uuid, file_name)

        if not self.record_files:
            success_check['file'] = True
        # Checks if both metadata and files were correctly transmitted

    def _process_post_response(self, response: object, uuid: str):
        # Count http responses

        self.report.add(f"\tRDM post metadata @ {response} @ Uuid:                 {uuid}")

        if response.status_code >= 300:
            self.global_counters['metadata']['error'] += 1
            return False

        self.global_counters['metadata']['success'] += 1
        return True

    def _process_file_response(self, response: object, success_check: object):
        if response:
            self.global_counters['file']['success'] += 1
            success_check['file'] = True

            self.global_counters['file']['error'] += 1

    def _remove_uuid_from_list(self, uuid: str, file_name: str):
        """ If the given uuid is in the given file then the line will be removed """

        with open(file_name, "r") as f:
            lines = f.readlines()
        with open(file_name, "w") as f:
            for line in lines:
                if line.strip("\n") != uuid:

    def _add_field(self, item: list, rdm_field: str, path: list):
        """ Adds the field to the data json """

        value = get_value(item, path)
        if value:
            self.data[rdm_field] = value

    def _accessright_conversion(self, pure_value: str):
        """ Converts the Pure access right to the corresponding RDM value """

        if pure_value in accessright_pure_to_rdm:
            return accessright_pure_to_rdm[pure_value]

        self.report.add('\n--- new access_right ---> not in accessright_pure_to_rdmk array\n\n')
        return False

    def _language_conversion(self, pure_language: str):
        """ Converts from pure full language name to iso6393 (3 characters) """

        if pure_language == 'Undefined/Unknown':
            return False
        # Read iso6393 json file
        resp_json = json.load(open(iso6393_file_name, 'r'))

        for i in resp_json:
            if i['name'] == pure_language:
                return i['iso6393']

        # in case there is no match (e.g. spelling mistake in Pure) ignore field
        return False

    def _get_rdm_file_review(self):
        """ When a record is updated in Pure, there will be a check if the new file from Pure is the same as the old file in RDM.
        To do so it makes a comparison on the file size.
        If the size is not the same, then it will be uploaded to RDM and a new internal review will be required. """

        # Get from RDM file size and internalReview
        params = {'sort': 'mostrecent', 'size': '100', 'page': '1', 'q': self.uuid}
        response = self.rdm_requests.get_metadata(params)

        if response.status_code >= 300:
            self.report.add(f'\nget_rdm_file_size @ {self.uuid} @ {response}')
            return False

        # Load response
        resp_json = json.loads(response.content)

        total_recids = resp_json['hits']['total']
        if total_recids == 0:
            return False

        record = resp_json['hits']['hits'][0]['metadata']  # [0] because they are ordered, therefore it is the most recent

        if 'versionFiles' in record:
            for file in record['versionFiles']:
                if 'size' in file and 'internalReview' in file and 'name' in file:
                    file_size   = file['size']
                    file_review = file['internalReview']
                    file_name   = file['name']
                    self.rdm_file_review.append({'size': file_size, 'review': file_review, 'name': file_name})

    def get_files_data(self, item: dict):
        """ Gets metadata information from electronicVersions and additionalFiles files.
            It also downloads the relative files. The Metadata without file will be ignored """

        if 'file' not in item:
            return False
        elif 'fileURL' not in item['file'] or 'fileName' not in item['file']:
            return False

        internal_review = False     # Default value

        pure_file_size  = get_value(item, ['file', 'size'])
        file_name       = get_value(item, ['file', 'fileName'])
        file_url        = get_value(item, ['file', 'fileURL'])

        self.pure_rdm_file_match = []

        # Checks if pure_file_size and file_name are the same as any of the files in RDM with the same uuid
        for rdm_file in self.rdm_file_review:

            rdm_file_size   = str(rdm_file['size'])
            rdm_review      = rdm_file['review']

            if pure_file_size == rdm_file_size and file_name == rdm_file['name']:
                self.pure_rdm_file_match.append(True)            # Do the old and new file match?
                self.pure_rdm_file_match.append(rdm_review)      # Was the old file reviewed?
                internal_review = rdm_review       # The new uploaded file will have the same review value as in RDM

        self.sub_data = {}
        self.sub_data['internalReview'] = internal_review

        self._add_subdata(item, 'name',            ['file', 'fileName'])
        self._add_subdata(item, 'size',            ['file', 'size'])
        self._add_subdata(item, 'mimeType',        ['file', 'mimeType'])
        self._add_subdata(item, 'digest',          ['file', 'digest'])
        self._add_subdata(item, 'digestAlgorithm', ['file', 'digestAlgorithm'])
        self._add_subdata(item, 'createdBy',       ['creator'])
        self._add_subdata(item, 'createdDate',     ['created'])
        self._add_subdata(item, 'versionType',     ['versionTypes', 0, 'value'])
        self._add_subdata(item, 'licenseType',     ['licenseTypes', 0, 'value'])

        # Access type
        value = get_value(item, ['accessTypes', 0, 'value'])
        self.sub_data['accessType'] = self._accessright_conversion(value)

        # Append to sub_data to .data

        # Download file from Pure
        response = get_pure_file(self, file_url, file_name)
        # Checks if the file is already in RDM, and if it has already been reviewed
        self._process_file_download_response(response, file_name)

    def _add_subdata(self, item: list, rdm_field: str, path: list):
        """ Adds the field to sub_data """
        value = get_value(item, path)
        if value:
            self.sub_data[rdm_field] = value

    def _process_file_download_response(self, response, file_name):
        """ Checks if the file is already in RDM, and if it has already been reviewed """
        # If the file is not in RDM
        if len(self.pure_rdm_file_match) == 0:
            match_review = 'File not in RDM    '

        # If the file in pure is different from the one in RDM
        elif self.pure_rdm_file_match[0] == False:
            match_review = 'Match: F, Review: -'

        # If the file is the same, checks if the one in RDM has been reviewed by internal stuff
            match_review = 'Match: T, Review: F'
            if self.pure_rdm_file_match[1]:
                match_review = 'Match: T, Review: T'
        file_name_report = shorten_file_name(file_name)

        report = f'\tPure get file @ {response} @ {match_review} @ {file_name_report}'


    def _get_orcid(self, person_uuid: str, name: str):
        """ Gets from pure a person orcid """
        # Pure request
        response = get_pure_metadata('persons', person_uuid, {}, False)

        message = f'\tPure get orcid @ {response} @'

        # Error
        if response.status_code >= 300:
            self.report.add(f'{message} Error: {response.content}')
            return False

        # Load json
        resp_json = json.loads(response.content)

        # Read orcid
        if 'orcid' in resp_json:
            orcid = resp_json['orcid']
            self.report.add(f'{message} {orcid} @ {person_uuid} @ {name}')
            return orcid

        # Not found
        self.report.add(f'{message} Orcid not found @ {person_uuid} @ {name}')
        return False

    def _metadata_and_file_submission_check(self, success_check: dict):
        """ Checks if both metadata and files were correctly transmitted """
        if (success_check['metadata'] == True and success_check['file'] == True):
            # Remove uuid from to_transmit.txt
            self._remove_uuid_from_list(self.uuid, data_files_name['transfer_uuid_list'])
            # Add uuid to to_transmit.txt to be re-transmitted
            open(data_files_name['transfer_uuid_list'], "a").write(f'{self.uuid}\n')
            return False
        return True  

    def _http_response_counter(self, status_code: int):
        """ According to the given http status code 
            creates a new object element or increaes an existing one  """
        if status_code not in self.global_counters['http_responses']:
            self.global_counters['http_responses'][status_code] = 0
        self.global_counters['http_responses'][status_code] += 1
Exemple #9
 def __init__(self):
     self.rdm_db = RdmDatabase()
     self.report = Reports()
     self.rdm_requests = Requests()
     self.general_functions = GeneralFunctions()
     self.report_files = ['console', 'groups']
Exemple #10
class RdmGroups:
    def __init__(self):
        self.rdm_db = RdmDatabase()
        self.report = Reports()
        self.rdm_requests = Requests()
        self.general_functions = GeneralFunctions()
        self.report_files = ['console', 'groups']

    def _general_report_and_variables(func):
        def _wrapper(self, old_group_externalId, new_groups_externalIds):
            self.report.add_template(self.report_files, ['general', 'title'],
                                     ['GROUP SPLIT'])
                f'\nOld group: {old_group_externalId} @ New groups: {new_groups_externalIds}\n',

            # Get name and uuid of new groups
            self.new_groups_data = []

            # Decorated function
            func(self, old_group_externalId, new_groups_externalIds)

        return _wrapper

    def rdm_group_split(self, old_group_externalId: str,
                        new_groups_externalIds: list):
        1 - Create new groups
        2 - Add users to new groups
        3 - Remove users from old group
        4 - Delete old group
        5 - Modify RDM record: 
            . groupRestrictions
            . managingOrganisationUnit (if necessary)
            . organisationUnits
        for externalId in new_groups_externalIds:
            # Get group information
            group_name = self._get_pure_group_metadata(externalId)
            if not group_name:
                return False

            # Create new group
            response = self.rdm_create_group(externalId, group_name)

        # Get old group id
        old_group_id = self._get_rdm_group_id(old_group_externalId)

        # Removes users from old group and adds to new groups

        # Modify all related records

    def _general_report_and_variables(func):
        def _wrapper(self, old_groups_externalId, new_group_externalId):
            self.report.add_template(self.report_files, ['general', 'title'],
                                     ['GROUP MERGE'])
            report = f'\nOld groups: {old_groups_externalId} @ New group: {new_group_externalId}\n'
            self.report.add(report, self.report_files)

            # Get new group information
            self.new_groups_data = []

            # Decorated function
            func(self, old_groups_externalId, new_group_externalId)

        return _wrapper

    def rdm_group_merge(self, old_groups_externalId: list,
                        new_group_externalId: str):
        1 - Create new group
        2 - Remove users from old groups
        3 - Add users to new group
        4 - Delete old groups
        5 - Modify RDM records: 
            . groupRestrictions
            . managingOrganisationUnit (if necessary)
            . organisationUnits
        group_name = self._get_pure_group_metadata(new_group_externalId)
        if not group_name:
            return False

        # Create new group
        response = self.rdm_create_group(new_group_externalId, group_name)

        # Adds users to new group and removes them from the old ones

        # Modify all related records

    def _get_rdm_group_id(self, externalId: str):
        response = self.rdm_db.select_query('id, description', 'accounts_role',
                                            {'name': f"'{externalId}'"})

        group_id = response[0][0]
        group_name = response[0][1]

        report = f'\tOld group info @ ExtId: {add_spaces(externalId)} @ RDM id: {add_spaces(group_id)} @ {group_name}'
        self.report.add(report, self.report_files)
        return group_id

    def _rdm_split_modify_record(self, old_group_externalId: str,
                                 new_groups_externalIds: list):

        # Get from RDM all old group's records
        response = self.rdm_requests.get_metadata_by_query(

        resp_json = json.loads(response.content)
        total_items = resp_json['hits']['total']

        report = f"\tModify old g. records @ ExtId: {add_spaces(old_group_externalId)} @ Num. of records: {total_items}"
        self.report.add(report, self.report_files)

        if total_items == 0:
            self.report.add('\tNothing to modify @ End', self.report_files)
            return True

        # Iterates over all old group records
        for item in resp_json['hits']['hits']:
            item = item['metadata']

            # Removes old organisationalUnit from organisationalUnits
            for i in item['organisationalUnits']:
                if i['externalId'] == old_group_externalId:

            # Adds new organisationalUnits
            for i in self.new_groups_data:

            # Change group restrictions
            if old_group_externalId in item['groupRestrictions']:
            for i in new_groups_externalIds:

            # Change managingOrganisationalUnit
            item = self._process_managing_organisational_unit(
                item, old_group_externalId)

            # Update record
            recid = item['recid']
            response = self.general_functions.update_rdm_record(recid, item)

        return True

    def _process_managing_organisational_unit(self, item: object,
                                              old_group_externalId: str):
        if item['managingOrganisationalUnit_externalId'] == old_group_externalId:
            item['managingOrganisationalUnit_name'] = self.new_groups_data[0][
            item['managingOrganisationalUnit_uuid'] = self.new_groups_data[0][
                'managingOrganisationalUnit_externalId'] = self.new_groups_data[
        return item

    def _rdm_split_users_from_old_to_new_group(self, old_group_id: str,
                                               old_group_externalId: str,
                                               new_groups_externalIds: list):

        # Get all users in old group
        response = self.rdm_db.select_query('user_id', 'accounts_userrole',
                                            {'role_id': old_group_id})

        report = 'Old group @@ Num. of users:  '
        if not response:
            self.report.add(f'\t{report} 0', self.report_files)

        self.report.add(f'\t{report} {len(response)}', self.report_files)

        for i in response:
            user_id = i[0]

            # Get user email
            user_email = self.rdm_db.select_query('email', 'accounts_user',
                                                  {'id': user_id})[0][0]

            for new_group_externalId in new_groups_externalIds:
                # Add user to new groups
                self._group_add_user(user_email, new_group_externalId, user_id)

            # Remove user from old group
            response = self._group_remove_user(user_email,

    def _rdm_merge_modify_records(self, old_groups_externalId: list,
                                  new_group_data: dict,
                                  new_group_externalId: str):

        # Get from RDM all records with old groups
        for old_group_externalId in old_groups_externalId:


            # Get record metadata
            response = self.rdm_requests.get_metadata_by_query(

            resp_json = json.loads(response.content)
            total_items = resp_json['hits']['total']

            report = f"\tModify records @ Group: {add_spaces(old_group_externalId)} @ Num. of records: {total_items}"
            self.report.add(report, self.report_files)

            if total_items == 0:

            # Iterates over all old group records
            for item in resp_json['hits']['hits']:

                item = item['metadata']

                # Organisational units
                item = self._process_organisational_units(
                    item, new_group_data, old_groups_externalId)

                # Group restrictions
                self._process_group_restrictions(item, old_group_externalId,

                # Managing Organisational Unit
                if item['managingOrganisationalUnit_externalId'] == old_group_externalId:
                    item['managingOrganisationalUnit_name'] = new_group_data[
                    item['managingOrganisationalUnit_uuid'] = new_group_data[
                        'managingOrganisationalUnit_externalId'] = new_group_data[

                # Update record
                recid = item['recid']
                response = self.general_functions.update_rdm_record(
                    recid, item)

    def _process_organisational_units(self, item, new_group_data,
        new_organisationalUnits_data = [new_group_data]

        for i in item['organisationalUnits']:
            if (i['externalId'] in old_groups_externalId
                    or i['externalId'] == new_group_data['externalId']):


        item['organisationalUnits'] = new_organisationalUnits_data
        return item

    def _process_group_restrictions(self, item, old_group_externalId,
        # Remove old group
        if old_group_externalId in item['groupRestrictions']:
        # Add new group
        if new_group_externalId not in item['groupRestrictions']:
        return item

    def _merge_users_from_old_to_new_group(self, old_groups_externalId: list,
                                           new_group_externalId: str):
        # Iterate over old groups
        for old_group_externalId in old_groups_externalId:

            # Get group id
            response = self.rdm_db.select_query(
                'id, description', 'accounts_role',
                {'name': f"'{old_group_externalId}'"})

            if not response:
                    '\nWarning @ Old group ({old_groups_externalId}) not in database @ END TASK\n'
                return False

            old_group_id = response[0][0]
            old_group_name = response[0][1]

            # Get all users id that are in this group
            old_group_users = self.rdm_db.select_query(
                'user_id', 'accounts_userrole', {'role_id': old_group_id})

            if not old_group_users:
                old_group_users = []

            report = f"\tOld group @ ExtId:     {add_spaces(old_group_externalId)} @ Num. users:  {add_spaces(len(old_group_users))} @ {old_group_name}"
            self.report.add(report, self.report_files)

            for i in old_group_users:
                user_id = i[0]

                # Get user email
                user_email = self.rdm_db.select_query('email', 'accounts_user',
                                                      {'id': user_id})[0][0]

                # - - Add user to new group - -
                self._group_add_user(user_email, new_group_externalId, user_id)

                # - - Remove user from old group - -
                response = self._group_remove_user(user_email,

            # Delete old group

    def _get_pure_group_metadata(self, externalId: str):
        """ Get organisationalUnit name and uuid """

        # PURE REQUEST
        response = get_pure_metadata('organisational-units',
                                     f'{externalId}/research-outputs', {
                                         'page': 1,
                                         'pageSize': 100

        report = f'\tNew group info @ ExtId:     {add_spaces(externalId)} @ '

        # Check response
        if response.status_code >= 300:
            report += 'Not in pure - END TASK\n'
            self.report.add(report, self.report_files)
            self.report.add(response.content, self.report_files)
            return False

        # Load json
        data = json.loads(response.content)
        data = data['items'][0]['organisationalUnits']

        for organisationalUnit in data:
            if organisationalUnit['externalId'] == externalId:

                organisationalUnit_data = {}
                organisationalUnit_data['externalId'] = externalId
                organisationalUnit_data['uuid'] = organisationalUnit['uuid']
                organisationalUnit_data['name'] = organisationalUnit['names'][

                report += f"{organisationalUnit_data['uuid']} @ {organisationalUnit_data['name']}"
                self.report.add(report, self.report_files)

                return organisationalUnit_data['name']
        return False

    def _rdm_check_if_group_exists(self, group_externalId: str):
        """ Checks if the group already exists"""

        response = self.rdm_db.select_query('*', 'accounts_role',
                                            {'name': f"'{group_externalId}'"})

        if response:
            report = f'\tNew group check @@ ExtId:        {add_spaces(group_externalId)} @ Already exists'
            return True
        return False

    def rdm_create_group(self, externalId: str, group_name: str):

        # Checks if the group already exists
        response = self._rdm_check_if_group_exists(externalId)
        if response:
            return True

        group_name = group_name.replace('(', '\(')
        group_name = group_name.replace(')', '\)')
        group_name = group_name.replace(' ', '_')

        # Run command
        command = f'pipenv run invenio roles create {externalId} -d {group_name}'
        response = os.system(command)

        report = f'\tNew group check @@'

        if response != 0:
            self.report.add(f'{report} Error: {response}')
            return False

        self.report.add(f'{report} Group created @ External id: {externalId}')
        return True

    def _rdm_add_user_to_group(self, user_id: int, group_externalId: str,
                               group_name: str):

        # Get user's rdm email
        user_email = self.rdm_db.select_query('email', 'accounts_user',
                                              {'id': user_id})[0][0]

        # Get group id
        response = self.rdm_db.select_query('id', 'accounts_role',
                                            {'name': f"'{group_externalId}'"})

        if not response:
            # If the group does not exist then creates it
            self.rdm_create_group(group_externalId, group_name)
            # Repeats the query to get the group id
            response = self.rdm_db.select_query(
                'id', 'accounts_role', {'name': f"'{group_externalId}'"})

        group_id = response[0][0]

        # Checks if match already exists
        response = self.rdm_db.select_query('*', 'accounts_userrole', {
            'user_id': user_id,
            'role_id': group_id

        if response:
            report = f'\tRDM user in group @ User id: {add_spaces(user_id)} @@ Already belongs to group {group_externalId} (id {group_id})'
            return True

        # Adds user to group
        command = f'pipenv run invenio roles add {user_email} {group_externalId}'
        response = os.system(command)
        if response != 0:
            self.report.add(f'Warning @ Creating group response: {response}')

    def _group_add_user(self, user_email: str, new_group_externalId: str,
                        user_id: str):

        # Get group id
        group_id = self.rdm_db.select_query(
            'id', 'accounts_role', {'name': f"'{new_group_externalId}'"})[0][0]

        # Check if the user is already in the group
        response = self.rdm_db.select_query('*', 'accounts_userrole', {
            'user_id': user_id,
            'role_id': group_id

        if response:
            return True

        command = f'pipenv run invenio roles add {user_email} {new_group_externalId}'
        response = os.system(command)

        report = f'\tAdd user to group @ ExtId:     {add_spaces(new_group_externalId)} @ User id:     {add_spaces(user_id)}'
        if response != 0:
            self.report.add(f'{report} @ Error: {response}', self.report_files)
            return False

        self.report.add(f'{report} @ Success', self.report_files)
        return True

    def _group_remove_user(self, user_email: str, group_name: str):

        # Get user id
        user_id = self.rdm_db.select_query('id', 'accounts_user',
                                           {'email': f"'{user_email}'"})[0][0]

        # Get group id
        group_id = self.rdm_db.select_query('id', 'accounts_role',
                                            {'name': f"'{group_name}'"})[0][0]

        # Check if the user is already in the group
        response = self.rdm_db.select_query('*', 'accounts_userrole', {
            'user_id': user_id,
            'role_id': group_id

        report = f'Remove user fromGroup @ ExtId:     {add_spaces(group_name)} @ User id:     {add_spaces(user_id)}'

        if not response:
            self.report.add(f'\t{report} @ Not in group (already removed)',
            return True

        # Remove user from old group
        command = f'pipenv run invenio roles remove {user_email} {group_name}'
        response = os.system(command)

        if response != 0:
            self.report.add(f'\t{report} @ Error: {response}',
            return False

        self.report.add(f'\t{report} @ Success', self.report_files)
        return True