Esempio n. 1
0
class AddFromUuidList:
    """ Reads from a txt file a list of record uuids and submit them to RDM """

    def __init__(self):
        self.report = Reports()
        self.add_record = RdmAddRecord()


    def _set_counters_and_title(func):
        def _wrapper(self):
            self.report.add_template(['console'], ['general', 'title'], ['PUSH RECORDS FROM LIST'])
            self.global_counters = initialize_counters()
            # Decorated method
            func(self)
        return _wrapper

    @_set_counters_and_title
    def add_from_uuid_list(self):
        """ Submits to RDM all uuids in list (data/to_transfer.txt) """

        uuids = self._read_file()
        if not uuids:
            return

        for uuid in uuids:
            uuid = uuid.split('\n')[0]

            # Checks if lenght of the uuid is correct
            if not check_uuid_authenticity(uuid):
                self.report.add('Invalid uuid lenght.')
                continue
            
            self.add_record.push_record_by_uuid(self.global_counters, uuid)
        return


    def _read_file(self):
    
        # read to_transmit.txt
        file_name = data_files_name['transfer_uuid_list']
        uuids = open(file_name, 'r').readlines()

        if len(uuids) == 0:
            self.report.add('\nThere is nothing to transfer.\n')
            return False

        return uuids
Esempio n. 2
0
class RunPages:

    def __init__(self):
        self.report = Reports()
        self.rdm_add_record = RdmAddRecord()
        
    def get_pure_by_page(self, page_begin: int, page_end: int, page_size: int):
        """ Gets records from Pure 'research-outputs' endpoint by page and submit them to RDM. """

        for page in range(page_begin, page_end):
    
            self.global_counters = initialize_counters()

            # Report intro
            self.report.add_template(['console'], ['general', 'title'], ['PAGES'])
            self.report.add_template(['console'], ['pages', 'page_and_size'], [page, page_size])

            # Pure get request
            response = get_pure_metadata('research-outputs', '', {'page': page, 'pageSize': page_size})

            # Load json response
            resp_json = json.loads(response.content)

            # Creates data to push to RDM
            for item in resp_json['items']:
                self.report.add('')          # adds new line in the console
                self.rdm_add_record.create_invenio_data(self.global_counters, item)

            self.report_summary(page, page_size)


    def report_summary(self, pag, page_size):
        # Global counters
        self.report.summary_global_counters(['console'], self.global_counters)
        # Summary pages.log
        self.report.pages_single_line(self.global_counters, pag, page_size)
Esempio n. 3
0
class PureChangesByDate:
    def __init__(self):
        self.add_record = RdmAddRecord()
        self.report = Reports()
        self.delete = Delete()
        self.general_functions = GeneralFunctions()

    def get_pure_changes(self):
        """ Gets from Pure 'changes' endpoint all records that have been created / updated / deleted 
            and modifies accordingly the relative RDM records """

        # Get date of last update
        missing_updates = self._get_missing_updates()
        missing_updates = ['2020-05-15']  # TEMPORARY !!!!!

        if missing_updates == []:
            self.report.add('\nNothing to update.\n')
            return

        for date_to_update in reversed(missing_updates):
            self._changes_by_date(date_to_update)
        return

    def _set_counters_and_title(func):
        def _wrapper(self, changes_date: str):

            # Initialize global counters
            self.global_counters = initialize_counters()

            self.report_files = ['console', 'changes']

            self.report.add_template(self.report_files, ['general', 'title'],
                                     ['CHANGES'])
            self.report.add(f'\nProcessed date: {changes_date}',
                            self.report_files)

            # Decorated function
            func(self, changes_date)

            self._report_summary()

        return _wrapper

    @_set_counters_and_title
    def _changes_by_date(self, changes_date: str):
        """ Gets from Pure all changes that took place in a certain date """

        reference = changes_date
        page = 1

        while reference:
            # Get from pure all changes of a certain date
            response = get_pure_metadata('changes', reference, {})

            if response.status_code >= 300:
                self.report.add(response.content, self.report_files)
                return False

            # Check if there are records in the response from pure
            json_response = self._records_to_process(response, page,
                                                     changes_date)

            # If there are no records to process
            if not json_response:
                return True

            # Used to check if there are doubled tasks (e.g. update uuid and delete same uuid)
            self.duplicated_uuid = []

            self._initialize_local_counters()

            # Iterates over all records that need to be deleted
            self._delete_records(json_response)

            # Create / Add / Update
            self._update_records(json_response)

            # Gets the reference code of the next page
            reference = get_next_page(json_response).split('/')[-1]

            page += 1

    def _records_to_process(self, response: object, page: int,
                            changes_date: str):
        """ Check if there are records in the response from pure """

        # Load response json
        json_response = json.loads(response.content)

        number_records = json_response["count"]

        if number_records == 0:
            # Adds the date to successful_changes.txt
            open(data_files_name['successful_changes'],
                 "a").write(f'{changes_date}\n')

            if page == 1:
                # If there are no changes at all
                self.report.add(f'\n\nNothing to transfer.\n\n',
                                self.report_files)
            return False

        report_line = f'\nPag{add_spaces(page)} @ Pure get changes @ {response} @ Number of items: {add_spaces(number_records)}'
        self.report.add(report_line, self.report_files)

        return json_response

    def _delete_records(self, json_response: dict):
        """ Iterates over the Pure response and process all records that need to be deleted """

        for item in json_response['items']:

            if 'changeType' not in item or 'uuid' not in item:
                continue
            elif item['familySystemName'] != 'ResearchOutput':
                continue
            elif item['changeType'] != 'DELETE':
                continue

            uuid = item['uuid']
            self.duplicated_uuid.append(uuid)
            self.local_counters['delete'] += 1

            report = f"\n{self.local_counters['delete']} @ {item['changeType']}"
            self.report.add(report)

            # Gets the record recid
            recid = self.general_functions.get_recid(uuid,
                                                     self.global_counters)

            if recid:
                # Deletes the record from RDM
                self.delete.record(recid)
            else:
                # The record is not in RDM
                self.global_counters['delete']['success'] += 1
        return True

    def _update_records(self, json_response: dict):
        """ Iterates over the Pure response and process all records that need to be created/updated """

        for item in json_response['items']:

            if 'changeType' not in item or 'uuid' not in item:
                self.local_counters['incomplete'] += 1
                continue
            elif item['familySystemName'] != 'ResearchOutput':
                self.local_counters['not_ResearchOutput'] += 1
                continue
            elif item['changeType'] == 'DELETE':
                continue

            uuid = item['uuid']
            if uuid in self.duplicated_uuid:
                self.local_counters['duplicated'] += 1
                continue

            record_number = add_spaces(self.global_counters['total'] + 1)
            report = f"\n{record_number} - Change type           - {item['changeType']}"
            self.report.add(report)

            if item['changeType'] == 'ADD' or item['changeType'] == 'CREATE':
                self.local_counters['create'] += 1

            if item['changeType'] == 'UPDATE':
                self.local_counters['update'] += 1

            # Checks if this uuid has already been created / updated / deleted
            self.duplicated_uuid.append(uuid)

            # Adds record to RDM
            self.add_record.push_record_by_uuid(self.global_counters, uuid)

    def _get_missing_updates(self):
        """ Reading successful_changes.txt gets the dates in 
            which Pure changes have not been processed """

        file_name = data_files_name['successful_changes']
        check_if_file_exists(file_name)

        missing_updates = []
        count = 0
        days_span = 7

        date_today = str(datetime.today().strftime('%Y-%m-%d'))
        date_check = datetime.strptime(date_today, "%Y-%m-%d").date()

        while count < days_span:

            if str(date_check) not in open(file_name, 'r').read():
                missing_updates.append(str(date_check))

            date_check = date_check - timedelta(days=1)
            count += 1

        return missing_updates

    def _report_summary(self):

        # Global counters
        self.report.summary_global_counters(self.report_files,
                                            self.global_counters)

        arguments = []
        for i in self.local_counters:
            arguments.append(add_spaces(self.local_counters[i]))
        self.report.add_template(self.report_files, ['changes', 'summary'],
                                 arguments)
        return

    def _initialize_local_counters(self):

        # Incomplete:  when the uuid or changeType are not specified
        # Duplicated:  e.g. when a record has been modified twice in a day
        # Irrelevant:  when familySystemName is not ResearchOutput

        self.local_counters = {
            'delete': 0,
            'update': 0,
            'create': 0,
            'incomplete': 0,
            'duplicated': 0,
            'not_ResearchOutput': 0,
        }
class Delete:
    def __init__(self):
        self.rdm_requests = Requests()
        self.report = Reports()

    def record(self, recid: str):
        """ Deletes record from RDM """

        # NOTE: the user ACCOUNT related to the used TOKEN must be ADMIN

        # Delete record request
        response = self.rdm_requests.delete_metadata(recid)

        report = f'\tRDM delete record @ {response} @ Deleted recid:        {recid}'
        self.report.add(report)

        # 410 -> "PID has been deleted"
        if response.status_code >= 300 and response.status_code != 410:
            return response

        # Remove deleted recid from to_delete.txt
        self._remove_recid_from_delete_list(recid)

        # remove record from all_rdm_records.txt
        self._remove_recid_from_records_list(recid)

        return response

    def _set_counters_and_title(func):
        def _wrapper(self):
            self.report.add_template(['console'], ['general', 'title'],
                                     ["DELETE FROM LIST"])
            self.counters = {'total': 0, 'success': 0, 'error': 0}
            # Decorated function
            func(self)

            report = f"\nTotal: {self.counters['total']} @ Success: {self.counters['success']} @ Error: {self.counters['error']}"
            self.report.add(report)

        return _wrapper

    @_set_counters_and_title
    def from_list(self):
        """ Deletes all recids that are listed into data/to_delete.txt """

        recids = self._read_file_recids()
        if not recids:
            return

        for recid in recids:

            recid = recid.strip('\n')

            # Ignore empty lines
            if len(recid) == 0:
                continue

            self.counters['total'] += 1

            if len(recid) != 11:
                self.report.add(f'\n{recid} -> Wrong recid lenght! \n')
                continue

            # -- REQUEST --
            response = self.record(recid)

            # 410 -> "PID has been deleted"
            if response.status_code < 300 or response.status_code == 410:
                self.counters['success'] += 1
            else:
                self.counters['error'] += 1

    def all_records(self):
        """ Delete all RDM records """
        file_data = open(data_files_name['all_rdm_records']).readlines()
        for line in file_data:
            recid = line.split(' ')[1].strip('\n')
            self.record(recid)

    def _read_file_recids(self):
        """ Reads from to_delete.txt all recids to be deleted """
        file_name = data_files_name['delete_recid_list']
        recids = open(file_name, 'r').readlines()
        if len(recids) == 0:
            self.report.add('\nNothing to delete.\n')
            return False
        return recids

    def _remove_recid_from_delete_list(self, recid):
        file_name = 'delete_recid_list'
        lines = file_read_lines(file_name)
        with open(data_files_name[file_name], "w") as f:
            for line in lines:
                if line.strip("\n") != recid:
                    f.write(line)

    def _remove_recid_from_records_list(self, recid):
        file_name = 'all_rdm_records'
        lines = file_read_lines(file_name)
        with open(data_files_name[file_name], "w") as f:
            for line in lines:
                if line.strip("\n").split(' ')[1] != recid:
                    f.write(line)
Esempio n. 5
0
class RdmOwners:
    def __init__(self):
        self.rdm_requests = Requests()
        self.rdm_db = RdmDatabase()
        self.report = Reports()
        self.rdm_add_record = RdmAddRecord()
        self.general_functions = GeneralFunctions()
        self.report_files = ['console', 'owners']

    def _set_counters_and_title(func):
        def _wrapper(self, identifier):

            self.report.add_template(['console'], ['general', 'title'],
                                     ['OWNERS CHECK'])
            self.global_counters = initialize_counters()

            # Decorated function
            func(self, identifier)

        return _wrapper

    @_set_counters_and_title
    def run_owners(self, identifier: str):
        """ Gets from pure all the records related to a certain user (based on orcid or externalId),
            afterwards it modifies/create RDM records accordingly. """

        identifier_value = '0000-0002-4154-6945'  # TEMPORARY
        if identifier == 'externalId':  # TEMPORARY
            # identifier_value = '3261'                 # TEMPORARY
            identifier_value = '30'  # TEMPORARY

        self.report.add(f'\n{identifier}: {identifier_value}\n')

        # Gets the ID and IP of the logged in user
        self.user_id = self._get_user_id_from_rdm()
        # If the user was not found in RDM then there is no owner to add to the record.
        if not self.user_id:
            return

        # Get from pure user_uuid
        self.user_uuid = self._get_user_uuid_from_pure(identifier,
                                                       identifier_value)
        if not self.user_uuid:
            return False

        # Add user to user_ids_match.txt
        if identifier == 'externalId':
            self._add_user_ids_match(identifier_value)

        next_page = True
        page = 1
        self.local_counters = {'create': 0, 'in_record': 0, 'to_update': 0}

        while next_page:

            # Pure request
            params = {'sort': 'modified', 'page': page, 'pageSize': 100}
            response = get_pure_metadata('persons',
                                         f'{self.user_uuid}/research-outputs',
                                         params)
            if response.status_code >= 300:
                return False

            # Initial response proceses and json load
            pure_json = self._process_response(response, page)
            # In case the user has no records
            if not pure_json:
                return True

            # Checks if there is a 'next' page to be processed
            next_page = get_next_page(pure_json)

            # Iterates over all items in the page
            for item in pure_json['items']:

                uuid = item['uuid']
                title = shorten_file_name(item['title'])

                self.report.add(f"\n\tRecord uuid  @ {uuid} @ {title}")

                # Get from RDM the recid
                recid = self.general_functions.get_recid(
                    uuid, self.global_counters)

                # Record NOT in RDM, create it
                if recid == False:
                    self._create_rdm_record(item)
                    continue

                # Gets record metadata from RDM and checks if the user is already a record owner
                self._process_record_owners(recid)

            page += 1

        self._final_report()

    def _process_record_owners(self, recid):
        """ Gets record metadata from RDM and checks if the user is already a record owner """

        response = self.rdm_requests.get_metadata_by_recid(recid)
        rdm_json = json.loads(response.content)['metadata']

        self.report.add(
            f"\tRDM get metadata @ {response} @ Current owners: @ {rdm_json['owners']}"
        )

        if self.user_id not in rdm_json['owners']:
            # The record is in RDM but the logged in user is not among the recod owners
            self._add_user_as_owner(rdm_json, recid)
        else:
            # The record is in RDM and the user is an owner
            self.report.add('\tRDM record status @@ Owner IN record')
            self.local_counters['in_record'] += 1

    def _add_user_as_owner(self, data, recid):
        """ Adds the current logged in user as record owner """

        data['owners'].append(self.user_id)

        self.report.add(
            f"\tRDM record status @ ADDING owner @ New owners: @ {data['owners']}"
        )

        # Add owner to an existing RDM record
        self.general_functions.update_rdm_record(recid, data)

        self.local_counters['to_update'] += 1

    def _create_rdm_record(self, item: dict):
        """ If a record of the processed user is not in RDM creates it """
        item['owners'] = [self.user_id]

        self.report.add('\tRDM record status @@ CREATE record')
        self.local_counters['create'] += 1

        # Creates record metadata and pushes it to RDM
        self.rdm_add_record.create_invenio_data(self.global_counters, item)

    def _final_report(self):
        # Final report
        create = self.local_counters['create']
        update = self.local_counters['to_update']
        in_rec = self.local_counters['in_record']
        report = f"\nCreate: {create} - To update: {update} - In record: {in_rec}"
        self.report.add(report, self.report_files)
        self.report.summary_global_counters(self.report_files,
                                            self.global_counters)

    def _process_response(self, response: object, page: int):
        """ Checks if there are records to process """

        # Load response json
        resp_json = json.loads(response.content)

        total_items = resp_json['count']

        if page == 1:
            self.report.add(f'Total records: {total_items}')

        if page == 1 and total_items == 0:
            self.report.add('\nThe user has no records @ End task\n')
            return False

        self.report.add(f'\nPag {page} - Get person records    - {response}')
        return resp_json

    def _get_user_uuid_from_pure(self, key_name: str, key_value: str):
        """ Given the user's external id it return the relative user uuid  """

        # If the uuid is not found in the first x items then it will continue with the next page
        page = 1
        page_size = 10
        next_page = True

        while next_page:

            params = {
                'page': page,
                'pageSize': page_size,
                'q': f'"{key_value}"'
            }
            response = get_pure_metadata('persons', '', params)

            if response.status_code >= 300:
                self.report.add(response.content, self.report_files)
                return False

            record_json = json.loads(response.content)

            total_items = record_json['count']

            for item in record_json['items']:

                if item[key_name] == key_value:
                    first_name = item['name']['firstName']
                    lastName = item['name']['lastName']
                    uuid = item['uuid']

                    self.report.add(
                        f'Name:    {first_name} {lastName}\nUuid:    {uuid}',
                        self.report_files)

                    if len(uuid) != 36:
                        self.report.add(
                            '\n- Warning! Incorrect user_uuid length -\n',
                            self.report_files)
                        return False
                    return uuid

            # Checks if there is a 'next' page to be processed
            next_page = get_next_page(record_json)

            page += 1

        self.report.add(f'Uuid NOT FOUND - End task\n', self.report_files)
        return False

    #   ---         ---         ---
    def _get_user_id_from_rdm(self):
        """ Gets the ID and IP of the logged in user """

        table_name = 'accounts_user_session_activity'

        # SQL query
        response = self.rdm_db.select_query('user_id, ip', table_name)

        if not response:
            self.report.add(f'\n- {table_name}: No user is logged in -\n',
                            self.report_files)
            return False

        elif len(response) > 1:
            self.report.add(f'\n- {table_name}: Multiple users logged in \n',
                            self.report_files)
            return False

        self.report.add(
            f'user IP: {response[0][1]}\nUser id: {response[0][0]}',
            self.report_files)

        return response[0][0]

    def _add_user_ids_match(self, external_id: str):
        """ Add user to user_ids_match.txt, where are specified:
            rdm_user_id, user_uuid and user_external_id """
        file_name = data_files_name['user_ids_match']

        needs_to_add = self._check_user_ids_match('user_ids_match',
                                                  external_id)

        if needs_to_add:
            open(file_name,
                 'a').write(f'{self.user_id} {self.user_uuid} {external_id}\n')
            report = f'user_ids_match @ Adding id toList @ {self.user_id}, {self.user_uuid}, {external_id}'
            self.report.add(report, self.report_files)

    def _check_user_ids_match(self, file_name: str, external_id: str):

        lines = file_read_lines(file_name)
        for line in lines:
            line = line.split('\n')[0]
            line = line.split(' ')

            # Checks if at least one of the ids match
            if str(self.user_id) == line[0] or self.user_uuid == line[
                    1] or external_id == line[2]:

                if line == [str(self.user_id), self.user_uuid, external_id]:
                    self.report.add('Ids list:   user in list',
                                    self.report_files)
                    return False
        return True

    def _initalizing_method(func):
        def _wrapper(self):

            self.report.add_template(['console'], ['general', 'title'],
                                     ['RECORDS OWNER'])

            # Empty file rdm_reocrds_owner.txt
            file_owner = data_files_name['rdm_record_owners']
            open(file_owner, 'w').close()

            # Decorated function
            func(self)

        return _wrapper

    @_initalizing_method
    def get_rdm_record_owners(self):
        """ Gets all records from RDM and counts how many records belong to each user.
            It also updates the content of all_rdm_records.txt """

        pag = 1
        pag_size = 250

        count = 0
        count_records_per_owner = {}
        all_records_list = ''
        next_page = True

        while next_page == True:

            # REQUEST to RDM
            params = {'sort': 'mostrecent', 'size': pag_size, 'page': pag}
            response = self.rdm_requests.get_metadata(params)

            self.report.add(f'\n{response}\n')

            if response.status_code >= 300:
                self.report.add(response.content)
                break

            resp_json = json.loads(response.content)
            data = ''

            for item in resp_json['hits']['hits']:
                count += 1

                uuid = item['metadata']['uuid']
                recid = item['metadata']['recid']
                owners = item['metadata']['owners']

                line = f'{uuid} - {recid} - {owners}'
                self.report.add(line)
                data += f'{line}\n'

                all_records_list += f'{uuid} {recid}\n'

                for i in owners:
                    if i not in count_records_per_owner:
                        count_records_per_owner[i] = 0
                    count_records_per_owner[i] += 1

            self.report.add(f'\nPag {str(pag)} - Records {count}\n')

            open(data_files_name['rdm_record_owners'], 'a').write(data)

            if 'next' not in resp_json['links']:
                next_page = False

            pag += 1

        # Counts how many records have each owner
        self._count_records_per_owner(count_records_per_owner)

        # Update all_rdm_records.txt file
        self._update_all_rdm_records_file(all_records_list)

    def _count_records_per_owner(self, count_records_per_owner):

        self.report.add('Owner  Records')

        for key in count_records_per_owner:
            records = add_spaces(count_records_per_owner[key])
            key = add_spaces(key)
            self.report.add(f'{key}    {records}')

    def _update_all_rdm_records_file(self, all_records_list):

        # Updates content of all_rdm_records.txt file
        file_all_records_list = data_files_name['all_rdm_records']
        # Empty file
        open(file_all_records_list, 'w').close()
        # Add all records to file
        open(file_all_records_list, 'a').write(all_records_list)
class ImportRecords:
    def __init__(self):
        self.rdm_requests = Requests()
        self.report = Reports()
        self.file_name = "/home/bootcamp/src/pure_sync_rdm/synchronizer/data/temporary_files/test.xml"

    def run_import(self):

        # Report title
        self.report.add_template(['console'], ['general', 'title'],
                                 ['PURE IMPORT'])

        page = 1
        page_size = 20
        next_page = True

        # Get RDM records by page
        while next_page:

            data = self._get_rdm_records_metadata(page, page_size)

            if not data:
                self.report.add("\n\tEnd task\n")
                return

            self._create_xml(data)

            page += 1

    def _check_uuid(self, item):
        """ If a uuid is specified in the RDM record means that it was imported
            from Pure. In this case, the record will be ignored """
        if 'uuid' in item:
            self.report.add(f"{self.report_base} Already in Pure")
            return False
        return True

    def _check_date(self, item):
        """ Checks if the record was created today """
        if item['created'] > current_date():
            return True
        else:
            date = item['created'].split('T')[0]
            self.report.add(f"{self.report_base} Too old: {date}")
            return False

    def _create_xml(self, data):
        """ Creates the xml file that will be imported in pure """

        name_space = {
            'dataset': 'v1.dataset.pure.atira.dk',
            'commons': 'v3.commons.pure.atira.dk',
        }

        ET.register_namespace('v1', name_space['dataset'])
        ET.register_namespace('v3', name_space['commons'])

        # Build a tree structure
        self.root = ET.Element("{%s}datasets" % name_space['dataset'])

        count = 0

        for item in data:

            count += 1
            self.full_item = item
            self.report_base = f"{add_spaces(count)} - {item['id']} -"
            item_metadata = item['metadata']

            # # Checks if the record was created today
            # if not self._check_date(item):
            #     self.report.add("\n\tEnd task\n")
            #     next_page = False
            #     break

            # # If the rdm record has a uuid means that it was imported from pure
            # if not self._check_uuid(item_metadata):
            #     continue

            self.report.add(f"{self.report_base} Adding")

            # Adds fields to the created xml element
            self._populate_xml(item_metadata, name_space)

        self._parse_xml()

    def _populate_xml(self, item, name_space):

        # Dataset element
        body = ET.SubElement(self.root, "{%s}dataset" % name_space['dataset'])
        body.set('type', 'dataset')

        # Title                     (mandatory field)
        value = get_value(item, ['title'])
        if not value:
            return False
        self._sub_element(body, name_space['dataset'], 'title').text = value

        # Managing organisation     (mandatory field)
        organisational_unit = self._sub_element(body, name_space['dataset'],
                                                'managingOrganisation')
        self._add_attribute(item, organisational_unit, 'lookupId',
                            ['managingOrganisationalUnit_externalId'])

        # Persons                   (mandatory field)
        self._add_persons(body, name_space, item)

        # Available date            (mandatory field)
        date = self._sub_element(body, name_space['dataset'], 'availableDate')
        sub_date = self._sub_element(date, name_space['commons'], 'year')
        sub_date.text = get_value(item, ['publication_date'])

        # Publisher                 (mandatory field)
        publisher = self._sub_element(body, name_space['dataset'],
                                      'publisher')  # REVIEW!!!!
        publisher.set(
            'lookupId',
            '45d22915-6545-4428-896a-8b8046191d5d')  # Data not in rdm
        self._sub_element(publisher, name_space['dataset'],
                          'name').text = 'Test publisher'  # Data not in rdm
        self._sub_element(publisher, name_space['dataset'],
                          'type').text = 'publisher'  # Data not in rdm

        # Description
        value = get_value(item, ['abstract'])
        value = 'test description'
        if value:
            descriptions = self._sub_element(body, name_space['dataset'],
                                             'descriptions')
            description = self._sub_element(descriptions,
                                            name_space['dataset'],
                                            'description')
            description.set('type', 'datasetdescription')
            description.text = value

        # Links
        self._add_links(body, name_space)

        # Organisations
        self._add_organisations(body, name_space, item)

        # FIELDS THAT ARE NOT IN DATASET XSD - NEEDS REVIEW:
        # language                  ['languages', 0, 'value']
        # organisationalUnits       ['personAssociations' ...]
        # peerReview                ['peerReview']
        # createdDate               ['info', 'createdDate']
        # publicationDate           ['publicationStatuses', 0, 'publicationDate', 'year']
        # publicationStatus         ['publicationStatuses', 0, 'publicationStatuses', 0, 'value']
        # recordType                ['types', 0, 'value']
        # workflow                  ['workflows', 0, 'value']
        # pages                     ['info','pages']
        # volume                    ['info','volume']
        # journalTitle              ['info', 'journalAssociation', 'title', 'value']
        # journalNumber             ['info', 'journalNumber']

        # PURE RESPONSE
        # cvc-complex-type.2.4.b: The content of element 'v1:dataset' is not complete.
        # One of '{
        # "v1.dataset.pure.atira.dk":translatedTitles,
        # "v1.dataset.pure.atira.dk":description,
        # "v1.dataset.pure.atira.dk":ids,
        # "v1.dataset.pure.atira.dk":additionalDescriptions,
        # "v1.dataset.pure.atira.dk":temporalCoverage,
        # "v1.dataset.pure.atira.dk":productionDate,
        # "v1.dataset.pure.atira.dk":geoLocation,
        # "v1.dataset.pure.atira.dk":organisations,
        # "v1.dataset.pure.atira.dk":DOI,
        # "v1.dataset.pure.atira.dk":physicalDatas,
        # "v1.dataset.pure.atira.dk":publisher,
        # "v1.dataset.pure.atira.dk":openAccess,
        # "v1.dataset.pure.atira.dk":embargoPeriod,
        # "v1.dataset.pure.atira.dk":constraints,
        # "v1.dataset.pure.atira.dk":keywords,
        # "v1.dataset.pure.atira.dk":links,
        # "v1.dataset.pure.atira.dk":documents,
        # "v1.dataset.pure.atira.dk":relatedProjects,
        # "v1.dataset.pure.atira.dk":relatedEquipments,
        # "v1.dataset.pure.atira.dk":relatedStudentThesis,
        # "v1.dataset.pure.atira.dk":relatedPublications,
        # "v1.dataset.pure.atira.dk":relatedActivities,
        # "v1.dataset.pure.atira.dk":relatedDatasets,
        # "v1.dataset.pure.atira.dk":visibility,
        # "v1.dataset.pure.atira.dk":workflow
        # }' is expected.

    def _add_organisations(self, body, name_space, item):
        organisations = self._sub_element(body, name_space['dataset'],
                                          'organisations')

        for unit_data in item['organisationalUnits']:

            # Pure dataset documentation:
            # Can be both an internal and external organisation, use origin to enforce either internal or external.
            # If the organisation is an internal organisation in Pure, then the lookupId attribute must be used.
            # If the organisation is an external organisation and id is given matching will be done on the id,
            # if not found mathching will be done on name, if still not found then an external
            # organisation with the specified id and organisation will be created.

            organisation = self._sub_element(organisations,
                                             name_space['dataset'],
                                             'organisation')
            self._add_attribute(unit_data, organisation, 'lookupId',
                                ['externalId'])
            name = self._sub_element(organisation, name_space['dataset'],
                                     'name')
            name.text = get_value(unit_data, ['name'])

    def _add_persons(self, body, name_space, item):
        persons = self._sub_element(body, name_space['dataset'], 'persons')

        for person_data in item['contributors']:
            person = self._sub_element(persons, name_space['dataset'],
                                       'person')
            person.set('contactPerson', 'true')
            self._add_attribute(person_data, person, 'id', ['uuid'])
            # External id
            person_id = self._sub_element(person, name_space['dataset'],
                                          'person')
            self._add_attribute(person_data, person_id, 'lookupId',
                                ['externalId'])
            # Role
            role = self._sub_element(person, name_space['dataset'], 'role')
            role.text = get_value(person_data, ['personRole'])
            # Name
            name = self._sub_element(person, name_space['dataset'], 'name')
            name.text = get_value(person_data, ['name'])

    def _add_links(self, body, name_space):
        """ Adds relative links for RDM files and api """
        link_files = get_value(self.full_item, ['links', 'files'])
        link_self = get_value(self.full_item, ['links', 'self'])
        recid = get_value(self.full_item, ['id'])
        if link_files or link_self:
            links = self._sub_element(body, name_space['dataset'], 'links')
            # Files
            if link_files:
                link = self._sub_element(links, name_space['dataset'], 'link')
                link.set('id', recid)  # REVIEW - which id?
                self._sub_element(link, name_space['dataset'],
                                  'url').text = link_files
                self._sub_element(link, name_space['dataset'],
                                  'description').text = 'Link to record files'
            # Self
            if link_self:
                link = self._sub_element(links, name_space['dataset'], 'link')
                link.set('id', recid)  # REVIEW - which id?
                url = self._sub_element(link, name_space['dataset'],
                                        'url').text = link_self
                self._sub_element(link, name_space['dataset'],
                                  'description').text = 'Link to record API'

    def _parse_xml(self):
        # Wrap it in an ElementTree instance and save as XML
        xml_str = minidom.parseString(ET.tostring(
            self.root)).toprettyxml(indent="   ")
        open(self.file_name, "w").write(xml_str)

    def _sub_element(self, element, namespace: str, sub_element_name: str):
        """ Adds the the xml a sub element """
        return ET.SubElement(element, "{%s}%s" % (namespace, sub_element_name))

    def _add_attribute(self, item: object, sub_element, attribute: str,
                       value_path: list):
        """ Gets from the rdm response a value and adds it as attribute to a given xml element """
        value = get_value(item, value_path)
        if value:
            sub_element.set(attribute, value)

    def _add_text(self, item: object, sub_element: object, path):
        """ Gets from the rdm response a value and adds it as text to a given xml element """
        sub_element.text = get_value(item, path)

    def _get_rdm_records_metadata(self, page: int, page_size: int):
        """ Requests to rdm records metadata by page """

        params = {'sort': 'mostrecent', 'size': page_size, 'page': page}
        response = self.rdm_requests.get_metadata(params)

        if response.status_code >= 300:
            return False
        # Load response
        json_data = json.loads(response.content)['hits']['hits']

        # Checks if any record is listed
        if not json_data:
            return False

        self.report.add_template(['console'], ['pages', 'page_and_size'],
                                 [page, page_size])
        self.report.add('')  # adds empty line

        return json_data
Esempio n. 7
0
class RdmGroups:
    def __init__(self):
        self.rdm_db = RdmDatabase()
        self.report = Reports()
        self.rdm_requests = Requests()
        self.general_functions = GeneralFunctions()
        self.report_files = ['console', 'groups']

    def _general_report_and_variables(func):
        def _wrapper(self, old_group_externalId, new_groups_externalIds):
            self.report.add_template(self.report_files, ['general', 'title'],
                                     ['GROUP SPLIT'])
            self.report.add(
                f'\nOld group: {old_group_externalId} @ New groups: {new_groups_externalIds}\n',
                self.report_files)

            # Get name and uuid of new groups
            self.new_groups_data = []

            # Decorated function
            func(self, old_group_externalId, new_groups_externalIds)

        return _wrapper

    @_general_report_and_variables
    def rdm_group_split(self, old_group_externalId: str,
                        new_groups_externalIds: list):
        """ 
        1 - Create new groups
        2 - Add users to new groups
        3 - Remove users from old group
        4 - Delete old group
        5 - Modify RDM record: 
            . groupRestrictions
            . managingOrganisationUnit (if necessary)
            . organisationUnits
        """
        for externalId in new_groups_externalIds:
            # Get group information
            group_name = self._get_pure_group_metadata(externalId)
            if not group_name:
                return False

            # Create new group
            response = self.rdm_create_group(externalId, group_name)

        # Get old group id
        old_group_id = self._get_rdm_group_id(old_group_externalId)

        # Removes users from old group and adds to new groups
        self._rdm_split_users_from_old_to_new_group(old_group_id,
                                                    old_group_externalId,
                                                    new_groups_externalIds)

        # Modify all related records
        self._rdm_split_modify_record(old_group_externalId,
                                      new_groups_externalIds)

    def _general_report_and_variables(func):
        def _wrapper(self, old_groups_externalId, new_group_externalId):
            self.report.add_template(self.report_files, ['general', 'title'],
                                     ['GROUP MERGE'])
            report = f'\nOld groups: {old_groups_externalId} @ New group: {new_group_externalId}\n'
            self.report.add(report, self.report_files)

            # Get new group information
            self.new_groups_data = []

            # Decorated function
            func(self, old_groups_externalId, new_group_externalId)

        return _wrapper

    @_general_report_and_variables
    def rdm_group_merge(self, old_groups_externalId: list,
                        new_group_externalId: str):
        """ 
        1 - Create new group
        2 - Remove users from old groups
        3 - Add users to new group
        4 - Delete old groups
        5 - Modify RDM records: 
            . groupRestrictions
            . managingOrganisationUnit (if necessary)
            . organisationUnits
        """
        group_name = self._get_pure_group_metadata(new_group_externalId)
        if not group_name:
            return False

        # Create new group
        response = self.rdm_create_group(new_group_externalId, group_name)

        # Adds users to new group and removes them from the old ones
        self._merge_users_from_old_to_new_group(old_groups_externalId,
                                                new_group_externalId)

        # Modify all related records
        self._rdm_merge_modify_records(old_groups_externalId,
                                       self.new_groups_data[0],
                                       new_group_externalId)

    def _get_rdm_group_id(self, externalId: str):
        response = self.rdm_db.select_query('id, description', 'accounts_role',
                                            {'name': f"'{externalId}'"})

        group_id = response[0][0]
        group_name = response[0][1]

        report = f'\tOld group info @ ExtId: {add_spaces(externalId)} @ RDM id: {add_spaces(group_id)} @ {group_name}'
        self.report.add(report, self.report_files)
        return group_id

    def _rdm_split_modify_record(self, old_group_externalId: str,
                                 new_groups_externalIds: list):

        # Get from RDM all old group's records
        response = self.rdm_requests.get_metadata_by_query(
            old_group_externalId)

        resp_json = json.loads(response.content)
        total_items = resp_json['hits']['total']

        report = f"\tModify old g. records @ ExtId: {add_spaces(old_group_externalId)} @ Num. of records: {total_items}"
        self.report.add(report, self.report_files)

        if total_items == 0:
            self.report.add('\tNothing to modify @ End', self.report_files)
            return True

        # Iterates over all old group records
        for item in resp_json['hits']['hits']:
            item = item['metadata']

            # Removes old organisationalUnit from organisationalUnits
            for i in item['organisationalUnits']:
                if i['externalId'] == old_group_externalId:
                    item['organisationalUnits'].remove(i)

            # Adds new organisationalUnits
            for i in self.new_groups_data:
                item['organisationalUnits'].append(i)

            # Change group restrictions
            if old_group_externalId in item['groupRestrictions']:
                item['groupRestrictions'].remove(old_group_externalId)
            for i in new_groups_externalIds:
                item['groupRestrictions'].append(i)

            # Change managingOrganisationalUnit
            item = self._process_managing_organisational_unit(
                item, old_group_externalId)

            # Update record
            recid = item['recid']
            response = self.general_functions.update_rdm_record(recid, item)

        return True

    def _process_managing_organisational_unit(self, item: object,
                                              old_group_externalId: str):
        if item['managingOrganisationalUnit_externalId'] == old_group_externalId:
            item['managingOrganisationalUnit_name'] = self.new_groups_data[0][
                'name']
            item['managingOrganisationalUnit_uuid'] = self.new_groups_data[0][
                'uuid']
            item[
                'managingOrganisationalUnit_externalId'] = self.new_groups_data[
                    0]['externalId']
        return item

    def _rdm_split_users_from_old_to_new_group(self, old_group_id: str,
                                               old_group_externalId: str,
                                               new_groups_externalIds: list):

        # Get all users in old group
        response = self.rdm_db.select_query('user_id', 'accounts_userrole',
                                            {'role_id': old_group_id})

        report = 'Old group @@ Num. of users:  '
        if not response:
            self.report.add(f'\t{report} 0', self.report_files)
            return

        self.report.add(f'\t{report} {len(response)}', self.report_files)

        for i in response:
            user_id = i[0]

            # Get user email
            user_email = self.rdm_db.select_query('email', 'accounts_user',
                                                  {'id': user_id})[0][0]

            for new_group_externalId in new_groups_externalIds:
                # Add user to new groups
                self._group_add_user(user_email, new_group_externalId, user_id)

            # Remove user from old group
            response = self._group_remove_user(user_email,
                                               old_group_externalId)

    def _rdm_merge_modify_records(self, old_groups_externalId: list,
                                  new_group_data: dict,
                                  new_group_externalId: str):

        # Get from RDM all records with old groups
        for old_group_externalId in old_groups_externalId:

            self._rdm_check_if_group_exists(old_group_externalId)

            # Get record metadata
            response = self.rdm_requests.get_metadata_by_query(
                old_group_externalId)

            resp_json = json.loads(response.content)
            total_items = resp_json['hits']['total']

            report = f"\tModify records @ Group: {add_spaces(old_group_externalId)} @ Num. of records: {total_items}"
            self.report.add(report, self.report_files)

            if total_items == 0:
                continue

            # Iterates over all old group records
            for item in resp_json['hits']['hits']:

                item = item['metadata']

                # Organisational units
                item = self._process_organisational_units(
                    item, new_group_data, old_groups_externalId)

                # Group restrictions
                self._process_group_restrictions(item, old_group_externalId,
                                                 new_group_externalId)

                # Managing Organisational Unit
                if item['managingOrganisationalUnit_externalId'] == old_group_externalId:
                    item['managingOrganisationalUnit_name'] = new_group_data[
                        'name']
                    item['managingOrganisationalUnit_uuid'] = new_group_data[
                        'uuid']
                    item[
                        'managingOrganisationalUnit_externalId'] = new_group_data[
                            'externalId']

                # Update record
                recid = item['recid']
                response = self.general_functions.update_rdm_record(
                    recid, item)

    def _process_organisational_units(self, item, new_group_data,
                                      old_groups_externalId):
        new_organisationalUnits_data = [new_group_data]

        for i in item['organisationalUnits']:
            if (i['externalId'] in old_groups_externalId
                    or i['externalId'] == new_group_data['externalId']):
                continue

            new_organisationalUnits_data.append(i)

        item['organisationalUnits'] = new_organisationalUnits_data
        return item

    def _process_group_restrictions(self, item, old_group_externalId,
                                    new_group_externalId):
        # Remove old group
        if old_group_externalId in item['groupRestrictions']:
            item['groupRestrictions'].remove(old_group_externalId)
        # Add new group
        if new_group_externalId not in item['groupRestrictions']:
            item['groupRestrictions'].append(new_group_externalId)
        return item

    def _merge_users_from_old_to_new_group(self, old_groups_externalId: list,
                                           new_group_externalId: str):
        # Iterate over old groups
        for old_group_externalId in old_groups_externalId:

            # Get group id
            response = self.rdm_db.select_query(
                'id, description', 'accounts_role',
                {'name': f"'{old_group_externalId}'"})

            if not response:
                self.report.add(
                    '\nWarning @ Old group ({old_groups_externalId}) not in database @ END TASK\n'
                )
                return False

            old_group_id = response[0][0]
            old_group_name = response[0][1]

            # Get all users id that are in this group
            old_group_users = self.rdm_db.select_query(
                'user_id', 'accounts_userrole', {'role_id': old_group_id})

            if not old_group_users:
                old_group_users = []

            report = f"\tOld group @ ExtId:     {add_spaces(old_group_externalId)} @ Num. users:  {add_spaces(len(old_group_users))} @ {old_group_name}"
            self.report.add(report, self.report_files)

            for i in old_group_users:
                user_id = i[0]

                # Get user email
                user_email = self.rdm_db.select_query('email', 'accounts_user',
                                                      {'id': user_id})[0][0]

                # - - Add user to new group - -
                self._group_add_user(user_email, new_group_externalId, user_id)

                # - - Remove user from old group - -
                response = self._group_remove_user(user_email,
                                                   old_group_externalId)

            # Delete old group

    def _get_pure_group_metadata(self, externalId: str):
        """ Get organisationalUnit name and uuid """

        # PURE REQUEST
        response = get_pure_metadata('organisational-units',
                                     f'{externalId}/research-outputs', {
                                         'page': 1,
                                         'pageSize': 100
                                     })

        report = f'\tNew group info @ ExtId:     {add_spaces(externalId)} @ '

        # Check response
        if response.status_code >= 300:
            report += 'Not in pure - END TASK\n'
            self.report.add(report, self.report_files)
            self.report.add(response.content, self.report_files)
            return False

        # Load json
        data = json.loads(response.content)
        data = data['items'][0]['organisationalUnits']

        for organisationalUnit in data:
            if organisationalUnit['externalId'] == externalId:

                organisationalUnit_data = {}
                organisationalUnit_data['externalId'] = externalId
                organisationalUnit_data['uuid'] = organisationalUnit['uuid']
                organisationalUnit_data['name'] = organisationalUnit['names'][
                    0]['value']

                report += f"{organisationalUnit_data['uuid']} @ {organisationalUnit_data['name']}"
                self.report.add(report, self.report_files)

                self.new_groups_data.append(organisationalUnit_data)
                return organisationalUnit_data['name']
        return False

    def _rdm_check_if_group_exists(self, group_externalId: str):
        """ Checks if the group already exists"""

        response = self.rdm_db.select_query('*', 'accounts_role',
                                            {'name': f"'{group_externalId}'"})

        if response:
            report = f'\tNew group check @@ ExtId:        {add_spaces(group_externalId)} @ Already exists'
            self.report.add(report)
            return True
        return False

    def rdm_create_group(self, externalId: str, group_name: str):

        # Checks if the group already exists
        response = self._rdm_check_if_group_exists(externalId)
        if response:
            return True

        group_name = group_name.replace('(', '\(')
        group_name = group_name.replace(')', '\)')
        group_name = group_name.replace(' ', '_')

        # Run command
        command = f'pipenv run invenio roles create {externalId} -d {group_name}'
        response = os.system(command)

        report = f'\tNew group check @@'

        if response != 0:
            self.report.add(f'{report} Error: {response}')
            return False

        self.report.add(f'{report} Group created @ External id: {externalId}')
        return True

    def _rdm_add_user_to_group(self, user_id: int, group_externalId: str,
                               group_name: str):

        # Get user's rdm email
        user_email = self.rdm_db.select_query('email', 'accounts_user',
                                              {'id': user_id})[0][0]

        # Get group id
        response = self.rdm_db.select_query('id', 'accounts_role',
                                            {'name': f"'{group_externalId}'"})

        if not response:
            # If the group does not exist then creates it
            self.rdm_create_group(group_externalId, group_name)
            # Repeats the query to get the group id
            response = self.rdm_db.select_query(
                'id', 'accounts_role', {'name': f"'{group_externalId}'"})

        group_id = response[0][0]

        # Checks if match already exists
        response = self.rdm_db.select_query('*', 'accounts_userrole', {
            'user_id': user_id,
            'role_id': group_id
        })

        if response:
            report = f'\tRDM user in group @ User id: {add_spaces(user_id)} @@ Already belongs to group {group_externalId} (id {group_id})'
            self.report.add(report)
            return True

        # Adds user to group
        command = f'pipenv run invenio roles add {user_email} {group_externalId}'
        response = os.system(command)
        if response != 0:
            self.report.add(f'Warning @ Creating group response: {response}')

    def _group_add_user(self, user_email: str, new_group_externalId: str,
                        user_id: str):

        # Get group id
        group_id = self.rdm_db.select_query(
            'id', 'accounts_role', {'name': f"'{new_group_externalId}'"})[0][0]

        # Check if the user is already in the group
        response = self.rdm_db.select_query('*', 'accounts_userrole', {
            'user_id': user_id,
            'role_id': group_id
        })

        if response:
            return True

        command = f'pipenv run invenio roles add {user_email} {new_group_externalId}'
        response = os.system(command)

        report = f'\tAdd user to group @ ExtId:     {add_spaces(new_group_externalId)} @ User id:     {add_spaces(user_id)}'
        if response != 0:
            self.report.add(f'{report} @ Error: {response}', self.report_files)
            return False

        self.report.add(f'{report} @ Success', self.report_files)
        return True

    def _group_remove_user(self, user_email: str, group_name: str):

        # Get user id
        user_id = self.rdm_db.select_query('id', 'accounts_user',
                                           {'email': f"'{user_email}'"})[0][0]

        # Get group id
        group_id = self.rdm_db.select_query('id', 'accounts_role',
                                            {'name': f"'{group_name}'"})[0][0]

        # Check if the user is already in the group
        response = self.rdm_db.select_query('*', 'accounts_userrole', {
            'user_id': user_id,
            'role_id': group_id
        })

        report = f'Remove user fromGroup @ ExtId:     {add_spaces(group_name)} @ User id:     {add_spaces(user_id)}'

        if not response:
            self.report.add(f'\t{report} @ Not in group (already removed)',
                            self.report_files)
            return True

        # Remove user from old group
        command = f'pipenv run invenio roles remove {user_email} {group_name}'
        response = os.system(command)

        if response != 0:
            self.report.add(f'\t{report} @ Error: {response}',
                            self.report_files)
            return False

        self.report.add(f'\t{report} @ Success', self.report_files)
        return True