class AddFromUuidList: """ Reads from a txt file a list of record uuids and submit them to RDM """ def __init__(self): self.report = Reports() self.add_record = RdmAddRecord() def _set_counters_and_title(func): def _wrapper(self): self.report.add_template(['console'], ['general', 'title'], ['PUSH RECORDS FROM LIST']) self.global_counters = initialize_counters() # Decorated method func(self) return _wrapper @_set_counters_and_title def add_from_uuid_list(self): """ Submits to RDM all uuids in list (data/to_transfer.txt) """ uuids = self._read_file() if not uuids: return for uuid in uuids: uuid = uuid.split('\n')[0] # Checks if lenght of the uuid is correct if not check_uuid_authenticity(uuid): self.report.add('Invalid uuid lenght.') continue self.add_record.push_record_by_uuid(self.global_counters, uuid) return def _read_file(self): # read to_transmit.txt file_name = data_files_name['transfer_uuid_list'] uuids = open(file_name, 'r').readlines() if len(uuids) == 0: self.report.add('\nThere is nothing to transfer.\n') return False return uuids
class RunPages: def __init__(self): self.report = Reports() self.rdm_add_record = RdmAddRecord() def get_pure_by_page(self, page_begin: int, page_end: int, page_size: int): """ Gets records from Pure 'research-outputs' endpoint by page and submit them to RDM. """ for page in range(page_begin, page_end): self.global_counters = initialize_counters() # Report intro self.report.add_template(['console'], ['general', 'title'], ['PAGES']) self.report.add_template(['console'], ['pages', 'page_and_size'], [page, page_size]) # Pure get request response = get_pure_metadata('research-outputs', '', {'page': page, 'pageSize': page_size}) # Load json response resp_json = json.loads(response.content) # Creates data to push to RDM for item in resp_json['items']: self.report.add('') # adds new line in the console self.rdm_add_record.create_invenio_data(self.global_counters, item) self.report_summary(page, page_size) def report_summary(self, pag, page_size): # Global counters self.report.summary_global_counters(['console'], self.global_counters) # Summary pages.log self.report.pages_single_line(self.global_counters, pag, page_size)
class PureChangesByDate: def __init__(self): self.add_record = RdmAddRecord() self.report = Reports() self.delete = Delete() self.general_functions = GeneralFunctions() def get_pure_changes(self): """ Gets from Pure 'changes' endpoint all records that have been created / updated / deleted and modifies accordingly the relative RDM records """ # Get date of last update missing_updates = self._get_missing_updates() missing_updates = ['2020-05-15'] # TEMPORARY !!!!! if missing_updates == []: self.report.add('\nNothing to update.\n') return for date_to_update in reversed(missing_updates): self._changes_by_date(date_to_update) return def _set_counters_and_title(func): def _wrapper(self, changes_date: str): # Initialize global counters self.global_counters = initialize_counters() self.report_files = ['console', 'changes'] self.report.add_template(self.report_files, ['general', 'title'], ['CHANGES']) self.report.add(f'\nProcessed date: {changes_date}', self.report_files) # Decorated function func(self, changes_date) self._report_summary() return _wrapper @_set_counters_and_title def _changes_by_date(self, changes_date: str): """ Gets from Pure all changes that took place in a certain date """ reference = changes_date page = 1 while reference: # Get from pure all changes of a certain date response = get_pure_metadata('changes', reference, {}) if response.status_code >= 300: self.report.add(response.content, self.report_files) return False # Check if there are records in the response from pure json_response = self._records_to_process(response, page, changes_date) # If there are no records to process if not json_response: return True # Used to check if there are doubled tasks (e.g. update uuid and delete same uuid) self.duplicated_uuid = [] self._initialize_local_counters() # Iterates over all records that need to be deleted self._delete_records(json_response) # Create / Add / Update self._update_records(json_response) # Gets the reference code of the next page reference = get_next_page(json_response).split('/')[-1] page += 1 def _records_to_process(self, response: object, page: int, changes_date: str): """ Check if there are records in the response from pure """ # Load response json json_response = json.loads(response.content) number_records = json_response["count"] if number_records == 0: # Adds the date to successful_changes.txt open(data_files_name['successful_changes'], "a").write(f'{changes_date}\n') if page == 1: # If there are no changes at all self.report.add(f'\n\nNothing to transfer.\n\n', self.report_files) return False report_line = f'\nPag{add_spaces(page)} @ Pure get changes @ {response} @ Number of items: {add_spaces(number_records)}' self.report.add(report_line, self.report_files) return json_response def _delete_records(self, json_response: dict): """ Iterates over the Pure response and process all records that need to be deleted """ for item in json_response['items']: if 'changeType' not in item or 'uuid' not in item: continue elif item['familySystemName'] != 'ResearchOutput': continue elif item['changeType'] != 'DELETE': continue uuid = item['uuid'] self.duplicated_uuid.append(uuid) self.local_counters['delete'] += 1 report = f"\n{self.local_counters['delete']} @ {item['changeType']}" self.report.add(report) # Gets the record recid recid = self.general_functions.get_recid(uuid, self.global_counters) if recid: # Deletes the record from RDM self.delete.record(recid) else: # The record is not in RDM self.global_counters['delete']['success'] += 1 return True def _update_records(self, json_response: dict): """ Iterates over the Pure response and process all records that need to be created/updated """ for item in json_response['items']: if 'changeType' not in item or 'uuid' not in item: self.local_counters['incomplete'] += 1 continue elif item['familySystemName'] != 'ResearchOutput': self.local_counters['not_ResearchOutput'] += 1 continue elif item['changeType'] == 'DELETE': continue uuid = item['uuid'] if uuid in self.duplicated_uuid: self.local_counters['duplicated'] += 1 continue record_number = add_spaces(self.global_counters['total'] + 1) report = f"\n{record_number} - Change type - {item['changeType']}" self.report.add(report) if item['changeType'] == 'ADD' or item['changeType'] == 'CREATE': self.local_counters['create'] += 1 if item['changeType'] == 'UPDATE': self.local_counters['update'] += 1 # Checks if this uuid has already been created / updated / deleted self.duplicated_uuid.append(uuid) # Adds record to RDM self.add_record.push_record_by_uuid(self.global_counters, uuid) def _get_missing_updates(self): """ Reading successful_changes.txt gets the dates in which Pure changes have not been processed """ file_name = data_files_name['successful_changes'] check_if_file_exists(file_name) missing_updates = [] count = 0 days_span = 7 date_today = str(datetime.today().strftime('%Y-%m-%d')) date_check = datetime.strptime(date_today, "%Y-%m-%d").date() while count < days_span: if str(date_check) not in open(file_name, 'r').read(): missing_updates.append(str(date_check)) date_check = date_check - timedelta(days=1) count += 1 return missing_updates def _report_summary(self): # Global counters self.report.summary_global_counters(self.report_files, self.global_counters) arguments = [] for i in self.local_counters: arguments.append(add_spaces(self.local_counters[i])) self.report.add_template(self.report_files, ['changes', 'summary'], arguments) return def _initialize_local_counters(self): # Incomplete: when the uuid or changeType are not specified # Duplicated: e.g. when a record has been modified twice in a day # Irrelevant: when familySystemName is not ResearchOutput self.local_counters = { 'delete': 0, 'update': 0, 'create': 0, 'incomplete': 0, 'duplicated': 0, 'not_ResearchOutput': 0, }
class Delete: def __init__(self): self.rdm_requests = Requests() self.report = Reports() def record(self, recid: str): """ Deletes record from RDM """ # NOTE: the user ACCOUNT related to the used TOKEN must be ADMIN # Delete record request response = self.rdm_requests.delete_metadata(recid) report = f'\tRDM delete record @ {response} @ Deleted recid: {recid}' self.report.add(report) # 410 -> "PID has been deleted" if response.status_code >= 300 and response.status_code != 410: return response # Remove deleted recid from to_delete.txt self._remove_recid_from_delete_list(recid) # remove record from all_rdm_records.txt self._remove_recid_from_records_list(recid) return response def _set_counters_and_title(func): def _wrapper(self): self.report.add_template(['console'], ['general', 'title'], ["DELETE FROM LIST"]) self.counters = {'total': 0, 'success': 0, 'error': 0} # Decorated function func(self) report = f"\nTotal: {self.counters['total']} @ Success: {self.counters['success']} @ Error: {self.counters['error']}" self.report.add(report) return _wrapper @_set_counters_and_title def from_list(self): """ Deletes all recids that are listed into data/to_delete.txt """ recids = self._read_file_recids() if not recids: return for recid in recids: recid = recid.strip('\n') # Ignore empty lines if len(recid) == 0: continue self.counters['total'] += 1 if len(recid) != 11: self.report.add(f'\n{recid} -> Wrong recid lenght! \n') continue # -- REQUEST -- response = self.record(recid) # 410 -> "PID has been deleted" if response.status_code < 300 or response.status_code == 410: self.counters['success'] += 1 else: self.counters['error'] += 1 def all_records(self): """ Delete all RDM records """ file_data = open(data_files_name['all_rdm_records']).readlines() for line in file_data: recid = line.split(' ')[1].strip('\n') self.record(recid) def _read_file_recids(self): """ Reads from to_delete.txt all recids to be deleted """ file_name = data_files_name['delete_recid_list'] recids = open(file_name, 'r').readlines() if len(recids) == 0: self.report.add('\nNothing to delete.\n') return False return recids def _remove_recid_from_delete_list(self, recid): file_name = 'delete_recid_list' lines = file_read_lines(file_name) with open(data_files_name[file_name], "w") as f: for line in lines: if line.strip("\n") != recid: f.write(line) def _remove_recid_from_records_list(self, recid): file_name = 'all_rdm_records' lines = file_read_lines(file_name) with open(data_files_name[file_name], "w") as f: for line in lines: if line.strip("\n").split(' ')[1] != recid: f.write(line)
class RdmOwners: def __init__(self): self.rdm_requests = Requests() self.rdm_db = RdmDatabase() self.report = Reports() self.rdm_add_record = RdmAddRecord() self.general_functions = GeneralFunctions() self.report_files = ['console', 'owners'] def _set_counters_and_title(func): def _wrapper(self, identifier): self.report.add_template(['console'], ['general', 'title'], ['OWNERS CHECK']) self.global_counters = initialize_counters() # Decorated function func(self, identifier) return _wrapper @_set_counters_and_title def run_owners(self, identifier: str): """ Gets from pure all the records related to a certain user (based on orcid or externalId), afterwards it modifies/create RDM records accordingly. """ identifier_value = '0000-0002-4154-6945' # TEMPORARY if identifier == 'externalId': # TEMPORARY # identifier_value = '3261' # TEMPORARY identifier_value = '30' # TEMPORARY self.report.add(f'\n{identifier}: {identifier_value}\n') # Gets the ID and IP of the logged in user self.user_id = self._get_user_id_from_rdm() # If the user was not found in RDM then there is no owner to add to the record. if not self.user_id: return # Get from pure user_uuid self.user_uuid = self._get_user_uuid_from_pure(identifier, identifier_value) if not self.user_uuid: return False # Add user to user_ids_match.txt if identifier == 'externalId': self._add_user_ids_match(identifier_value) next_page = True page = 1 self.local_counters = {'create': 0, 'in_record': 0, 'to_update': 0} while next_page: # Pure request params = {'sort': 'modified', 'page': page, 'pageSize': 100} response = get_pure_metadata('persons', f'{self.user_uuid}/research-outputs', params) if response.status_code >= 300: return False # Initial response proceses and json load pure_json = self._process_response(response, page) # In case the user has no records if not pure_json: return True # Checks if there is a 'next' page to be processed next_page = get_next_page(pure_json) # Iterates over all items in the page for item in pure_json['items']: uuid = item['uuid'] title = shorten_file_name(item['title']) self.report.add(f"\n\tRecord uuid @ {uuid} @ {title}") # Get from RDM the recid recid = self.general_functions.get_recid( uuid, self.global_counters) # Record NOT in RDM, create it if recid == False: self._create_rdm_record(item) continue # Gets record metadata from RDM and checks if the user is already a record owner self._process_record_owners(recid) page += 1 self._final_report() def _process_record_owners(self, recid): """ Gets record metadata from RDM and checks if the user is already a record owner """ response = self.rdm_requests.get_metadata_by_recid(recid) rdm_json = json.loads(response.content)['metadata'] self.report.add( f"\tRDM get metadata @ {response} @ Current owners: @ {rdm_json['owners']}" ) if self.user_id not in rdm_json['owners']: # The record is in RDM but the logged in user is not among the recod owners self._add_user_as_owner(rdm_json, recid) else: # The record is in RDM and the user is an owner self.report.add('\tRDM record status @@ Owner IN record') self.local_counters['in_record'] += 1 def _add_user_as_owner(self, data, recid): """ Adds the current logged in user as record owner """ data['owners'].append(self.user_id) self.report.add( f"\tRDM record status @ ADDING owner @ New owners: @ {data['owners']}" ) # Add owner to an existing RDM record self.general_functions.update_rdm_record(recid, data) self.local_counters['to_update'] += 1 def _create_rdm_record(self, item: dict): """ If a record of the processed user is not in RDM creates it """ item['owners'] = [self.user_id] self.report.add('\tRDM record status @@ CREATE record') self.local_counters['create'] += 1 # Creates record metadata and pushes it to RDM self.rdm_add_record.create_invenio_data(self.global_counters, item) def _final_report(self): # Final report create = self.local_counters['create'] update = self.local_counters['to_update'] in_rec = self.local_counters['in_record'] report = f"\nCreate: {create} - To update: {update} - In record: {in_rec}" self.report.add(report, self.report_files) self.report.summary_global_counters(self.report_files, self.global_counters) def _process_response(self, response: object, page: int): """ Checks if there are records to process """ # Load response json resp_json = json.loads(response.content) total_items = resp_json['count'] if page == 1: self.report.add(f'Total records: {total_items}') if page == 1 and total_items == 0: self.report.add('\nThe user has no records @ End task\n') return False self.report.add(f'\nPag {page} - Get person records - {response}') return resp_json def _get_user_uuid_from_pure(self, key_name: str, key_value: str): """ Given the user's external id it return the relative user uuid """ # If the uuid is not found in the first x items then it will continue with the next page page = 1 page_size = 10 next_page = True while next_page: params = { 'page': page, 'pageSize': page_size, 'q': f'"{key_value}"' } response = get_pure_metadata('persons', '', params) if response.status_code >= 300: self.report.add(response.content, self.report_files) return False record_json = json.loads(response.content) total_items = record_json['count'] for item in record_json['items']: if item[key_name] == key_value: first_name = item['name']['firstName'] lastName = item['name']['lastName'] uuid = item['uuid'] self.report.add( f'Name: {first_name} {lastName}\nUuid: {uuid}', self.report_files) if len(uuid) != 36: self.report.add( '\n- Warning! Incorrect user_uuid length -\n', self.report_files) return False return uuid # Checks if there is a 'next' page to be processed next_page = get_next_page(record_json) page += 1 self.report.add(f'Uuid NOT FOUND - End task\n', self.report_files) return False # --- --- --- def _get_user_id_from_rdm(self): """ Gets the ID and IP of the logged in user """ table_name = 'accounts_user_session_activity' # SQL query response = self.rdm_db.select_query('user_id, ip', table_name) if not response: self.report.add(f'\n- {table_name}: No user is logged in -\n', self.report_files) return False elif len(response) > 1: self.report.add(f'\n- {table_name}: Multiple users logged in \n', self.report_files) return False self.report.add( f'user IP: {response[0][1]}\nUser id: {response[0][0]}', self.report_files) return response[0][0] def _add_user_ids_match(self, external_id: str): """ Add user to user_ids_match.txt, where are specified: rdm_user_id, user_uuid and user_external_id """ file_name = data_files_name['user_ids_match'] needs_to_add = self._check_user_ids_match('user_ids_match', external_id) if needs_to_add: open(file_name, 'a').write(f'{self.user_id} {self.user_uuid} {external_id}\n') report = f'user_ids_match @ Adding id toList @ {self.user_id}, {self.user_uuid}, {external_id}' self.report.add(report, self.report_files) def _check_user_ids_match(self, file_name: str, external_id: str): lines = file_read_lines(file_name) for line in lines: line = line.split('\n')[0] line = line.split(' ') # Checks if at least one of the ids match if str(self.user_id) == line[0] or self.user_uuid == line[ 1] or external_id == line[2]: if line == [str(self.user_id), self.user_uuid, external_id]: self.report.add('Ids list: user in list', self.report_files) return False return True def _initalizing_method(func): def _wrapper(self): self.report.add_template(['console'], ['general', 'title'], ['RECORDS OWNER']) # Empty file rdm_reocrds_owner.txt file_owner = data_files_name['rdm_record_owners'] open(file_owner, 'w').close() # Decorated function func(self) return _wrapper @_initalizing_method def get_rdm_record_owners(self): """ Gets all records from RDM and counts how many records belong to each user. It also updates the content of all_rdm_records.txt """ pag = 1 pag_size = 250 count = 0 count_records_per_owner = {} all_records_list = '' next_page = True while next_page == True: # REQUEST to RDM params = {'sort': 'mostrecent', 'size': pag_size, 'page': pag} response = self.rdm_requests.get_metadata(params) self.report.add(f'\n{response}\n') if response.status_code >= 300: self.report.add(response.content) break resp_json = json.loads(response.content) data = '' for item in resp_json['hits']['hits']: count += 1 uuid = item['metadata']['uuid'] recid = item['metadata']['recid'] owners = item['metadata']['owners'] line = f'{uuid} - {recid} - {owners}' self.report.add(line) data += f'{line}\n' all_records_list += f'{uuid} {recid}\n' for i in owners: if i not in count_records_per_owner: count_records_per_owner[i] = 0 count_records_per_owner[i] += 1 self.report.add(f'\nPag {str(pag)} - Records {count}\n') open(data_files_name['rdm_record_owners'], 'a').write(data) if 'next' not in resp_json['links']: next_page = False pag += 1 # Counts how many records have each owner self._count_records_per_owner(count_records_per_owner) # Update all_rdm_records.txt file self._update_all_rdm_records_file(all_records_list) def _count_records_per_owner(self, count_records_per_owner): self.report.add('Owner Records') for key in count_records_per_owner: records = add_spaces(count_records_per_owner[key]) key = add_spaces(key) self.report.add(f'{key} {records}') def _update_all_rdm_records_file(self, all_records_list): # Updates content of all_rdm_records.txt file file_all_records_list = data_files_name['all_rdm_records'] # Empty file open(file_all_records_list, 'w').close() # Add all records to file open(file_all_records_list, 'a').write(all_records_list)
class ImportRecords: def __init__(self): self.rdm_requests = Requests() self.report = Reports() self.file_name = "/home/bootcamp/src/pure_sync_rdm/synchronizer/data/temporary_files/test.xml" def run_import(self): # Report title self.report.add_template(['console'], ['general', 'title'], ['PURE IMPORT']) page = 1 page_size = 20 next_page = True # Get RDM records by page while next_page: data = self._get_rdm_records_metadata(page, page_size) if not data: self.report.add("\n\tEnd task\n") return self._create_xml(data) page += 1 def _check_uuid(self, item): """ If a uuid is specified in the RDM record means that it was imported from Pure. In this case, the record will be ignored """ if 'uuid' in item: self.report.add(f"{self.report_base} Already in Pure") return False return True def _check_date(self, item): """ Checks if the record was created today """ if item['created'] > current_date(): return True else: date = item['created'].split('T')[0] self.report.add(f"{self.report_base} Too old: {date}") return False def _create_xml(self, data): """ Creates the xml file that will be imported in pure """ name_space = { 'dataset': 'v1.dataset.pure.atira.dk', 'commons': 'v3.commons.pure.atira.dk', } ET.register_namespace('v1', name_space['dataset']) ET.register_namespace('v3', name_space['commons']) # Build a tree structure self.root = ET.Element("{%s}datasets" % name_space['dataset']) count = 0 for item in data: count += 1 self.full_item = item self.report_base = f"{add_spaces(count)} - {item['id']} -" item_metadata = item['metadata'] # # Checks if the record was created today # if not self._check_date(item): # self.report.add("\n\tEnd task\n") # next_page = False # break # # If the rdm record has a uuid means that it was imported from pure # if not self._check_uuid(item_metadata): # continue self.report.add(f"{self.report_base} Adding") # Adds fields to the created xml element self._populate_xml(item_metadata, name_space) self._parse_xml() def _populate_xml(self, item, name_space): # Dataset element body = ET.SubElement(self.root, "{%s}dataset" % name_space['dataset']) body.set('type', 'dataset') # Title (mandatory field) value = get_value(item, ['title']) if not value: return False self._sub_element(body, name_space['dataset'], 'title').text = value # Managing organisation (mandatory field) organisational_unit = self._sub_element(body, name_space['dataset'], 'managingOrganisation') self._add_attribute(item, organisational_unit, 'lookupId', ['managingOrganisationalUnit_externalId']) # Persons (mandatory field) self._add_persons(body, name_space, item) # Available date (mandatory field) date = self._sub_element(body, name_space['dataset'], 'availableDate') sub_date = self._sub_element(date, name_space['commons'], 'year') sub_date.text = get_value(item, ['publication_date']) # Publisher (mandatory field) publisher = self._sub_element(body, name_space['dataset'], 'publisher') # REVIEW!!!! publisher.set( 'lookupId', '45d22915-6545-4428-896a-8b8046191d5d') # Data not in rdm self._sub_element(publisher, name_space['dataset'], 'name').text = 'Test publisher' # Data not in rdm self._sub_element(publisher, name_space['dataset'], 'type').text = 'publisher' # Data not in rdm # Description value = get_value(item, ['abstract']) value = 'test description' if value: descriptions = self._sub_element(body, name_space['dataset'], 'descriptions') description = self._sub_element(descriptions, name_space['dataset'], 'description') description.set('type', 'datasetdescription') description.text = value # Links self._add_links(body, name_space) # Organisations self._add_organisations(body, name_space, item) # FIELDS THAT ARE NOT IN DATASET XSD - NEEDS REVIEW: # language ['languages', 0, 'value'] # organisationalUnits ['personAssociations' ...] # peerReview ['peerReview'] # createdDate ['info', 'createdDate'] # publicationDate ['publicationStatuses', 0, 'publicationDate', 'year'] # publicationStatus ['publicationStatuses', 0, 'publicationStatuses', 0, 'value'] # recordType ['types', 0, 'value'] # workflow ['workflows', 0, 'value'] # pages ['info','pages'] # volume ['info','volume'] # journalTitle ['info', 'journalAssociation', 'title', 'value'] # journalNumber ['info', 'journalNumber'] # PURE RESPONSE # cvc-complex-type.2.4.b: The content of element 'v1:dataset' is not complete. # One of '{ # "v1.dataset.pure.atira.dk":translatedTitles, # "v1.dataset.pure.atira.dk":description, # "v1.dataset.pure.atira.dk":ids, # "v1.dataset.pure.atira.dk":additionalDescriptions, # "v1.dataset.pure.atira.dk":temporalCoverage, # "v1.dataset.pure.atira.dk":productionDate, # "v1.dataset.pure.atira.dk":geoLocation, # "v1.dataset.pure.atira.dk":organisations, # "v1.dataset.pure.atira.dk":DOI, # "v1.dataset.pure.atira.dk":physicalDatas, # "v1.dataset.pure.atira.dk":publisher, # "v1.dataset.pure.atira.dk":openAccess, # "v1.dataset.pure.atira.dk":embargoPeriod, # "v1.dataset.pure.atira.dk":constraints, # "v1.dataset.pure.atira.dk":keywords, # "v1.dataset.pure.atira.dk":links, # "v1.dataset.pure.atira.dk":documents, # "v1.dataset.pure.atira.dk":relatedProjects, # "v1.dataset.pure.atira.dk":relatedEquipments, # "v1.dataset.pure.atira.dk":relatedStudentThesis, # "v1.dataset.pure.atira.dk":relatedPublications, # "v1.dataset.pure.atira.dk":relatedActivities, # "v1.dataset.pure.atira.dk":relatedDatasets, # "v1.dataset.pure.atira.dk":visibility, # "v1.dataset.pure.atira.dk":workflow # }' is expected. def _add_organisations(self, body, name_space, item): organisations = self._sub_element(body, name_space['dataset'], 'organisations') for unit_data in item['organisationalUnits']: # Pure dataset documentation: # Can be both an internal and external organisation, use origin to enforce either internal or external. # If the organisation is an internal organisation in Pure, then the lookupId attribute must be used. # If the organisation is an external organisation and id is given matching will be done on the id, # if not found mathching will be done on name, if still not found then an external # organisation with the specified id and organisation will be created. organisation = self._sub_element(organisations, name_space['dataset'], 'organisation') self._add_attribute(unit_data, organisation, 'lookupId', ['externalId']) name = self._sub_element(organisation, name_space['dataset'], 'name') name.text = get_value(unit_data, ['name']) def _add_persons(self, body, name_space, item): persons = self._sub_element(body, name_space['dataset'], 'persons') for person_data in item['contributors']: person = self._sub_element(persons, name_space['dataset'], 'person') person.set('contactPerson', 'true') self._add_attribute(person_data, person, 'id', ['uuid']) # External id person_id = self._sub_element(person, name_space['dataset'], 'person') self._add_attribute(person_data, person_id, 'lookupId', ['externalId']) # Role role = self._sub_element(person, name_space['dataset'], 'role') role.text = get_value(person_data, ['personRole']) # Name name = self._sub_element(person, name_space['dataset'], 'name') name.text = get_value(person_data, ['name']) def _add_links(self, body, name_space): """ Adds relative links for RDM files and api """ link_files = get_value(self.full_item, ['links', 'files']) link_self = get_value(self.full_item, ['links', 'self']) recid = get_value(self.full_item, ['id']) if link_files or link_self: links = self._sub_element(body, name_space['dataset'], 'links') # Files if link_files: link = self._sub_element(links, name_space['dataset'], 'link') link.set('id', recid) # REVIEW - which id? self._sub_element(link, name_space['dataset'], 'url').text = link_files self._sub_element(link, name_space['dataset'], 'description').text = 'Link to record files' # Self if link_self: link = self._sub_element(links, name_space['dataset'], 'link') link.set('id', recid) # REVIEW - which id? url = self._sub_element(link, name_space['dataset'], 'url').text = link_self self._sub_element(link, name_space['dataset'], 'description').text = 'Link to record API' def _parse_xml(self): # Wrap it in an ElementTree instance and save as XML xml_str = minidom.parseString(ET.tostring( self.root)).toprettyxml(indent=" ") open(self.file_name, "w").write(xml_str) def _sub_element(self, element, namespace: str, sub_element_name: str): """ Adds the the xml a sub element """ return ET.SubElement(element, "{%s}%s" % (namespace, sub_element_name)) def _add_attribute(self, item: object, sub_element, attribute: str, value_path: list): """ Gets from the rdm response a value and adds it as attribute to a given xml element """ value = get_value(item, value_path) if value: sub_element.set(attribute, value) def _add_text(self, item: object, sub_element: object, path): """ Gets from the rdm response a value and adds it as text to a given xml element """ sub_element.text = get_value(item, path) def _get_rdm_records_metadata(self, page: int, page_size: int): """ Requests to rdm records metadata by page """ params = {'sort': 'mostrecent', 'size': page_size, 'page': page} response = self.rdm_requests.get_metadata(params) if response.status_code >= 300: return False # Load response json_data = json.loads(response.content)['hits']['hits'] # Checks if any record is listed if not json_data: return False self.report.add_template(['console'], ['pages', 'page_and_size'], [page, page_size]) self.report.add('') # adds empty line return json_data
class RdmGroups: def __init__(self): self.rdm_db = RdmDatabase() self.report = Reports() self.rdm_requests = Requests() self.general_functions = GeneralFunctions() self.report_files = ['console', 'groups'] def _general_report_and_variables(func): def _wrapper(self, old_group_externalId, new_groups_externalIds): self.report.add_template(self.report_files, ['general', 'title'], ['GROUP SPLIT']) self.report.add( f'\nOld group: {old_group_externalId} @ New groups: {new_groups_externalIds}\n', self.report_files) # Get name and uuid of new groups self.new_groups_data = [] # Decorated function func(self, old_group_externalId, new_groups_externalIds) return _wrapper @_general_report_and_variables def rdm_group_split(self, old_group_externalId: str, new_groups_externalIds: list): """ 1 - Create new groups 2 - Add users to new groups 3 - Remove users from old group 4 - Delete old group 5 - Modify RDM record: . groupRestrictions . managingOrganisationUnit (if necessary) . organisationUnits """ for externalId in new_groups_externalIds: # Get group information group_name = self._get_pure_group_metadata(externalId) if not group_name: return False # Create new group response = self.rdm_create_group(externalId, group_name) # Get old group id old_group_id = self._get_rdm_group_id(old_group_externalId) # Removes users from old group and adds to new groups self._rdm_split_users_from_old_to_new_group(old_group_id, old_group_externalId, new_groups_externalIds) # Modify all related records self._rdm_split_modify_record(old_group_externalId, new_groups_externalIds) def _general_report_and_variables(func): def _wrapper(self, old_groups_externalId, new_group_externalId): self.report.add_template(self.report_files, ['general', 'title'], ['GROUP MERGE']) report = f'\nOld groups: {old_groups_externalId} @ New group: {new_group_externalId}\n' self.report.add(report, self.report_files) # Get new group information self.new_groups_data = [] # Decorated function func(self, old_groups_externalId, new_group_externalId) return _wrapper @_general_report_and_variables def rdm_group_merge(self, old_groups_externalId: list, new_group_externalId: str): """ 1 - Create new group 2 - Remove users from old groups 3 - Add users to new group 4 - Delete old groups 5 - Modify RDM records: . groupRestrictions . managingOrganisationUnit (if necessary) . organisationUnits """ group_name = self._get_pure_group_metadata(new_group_externalId) if not group_name: return False # Create new group response = self.rdm_create_group(new_group_externalId, group_name) # Adds users to new group and removes them from the old ones self._merge_users_from_old_to_new_group(old_groups_externalId, new_group_externalId) # Modify all related records self._rdm_merge_modify_records(old_groups_externalId, self.new_groups_data[0], new_group_externalId) def _get_rdm_group_id(self, externalId: str): response = self.rdm_db.select_query('id, description', 'accounts_role', {'name': f"'{externalId}'"}) group_id = response[0][0] group_name = response[0][1] report = f'\tOld group info @ ExtId: {add_spaces(externalId)} @ RDM id: {add_spaces(group_id)} @ {group_name}' self.report.add(report, self.report_files) return group_id def _rdm_split_modify_record(self, old_group_externalId: str, new_groups_externalIds: list): # Get from RDM all old group's records response = self.rdm_requests.get_metadata_by_query( old_group_externalId) resp_json = json.loads(response.content) total_items = resp_json['hits']['total'] report = f"\tModify old g. records @ ExtId: {add_spaces(old_group_externalId)} @ Num. of records: {total_items}" self.report.add(report, self.report_files) if total_items == 0: self.report.add('\tNothing to modify @ End', self.report_files) return True # Iterates over all old group records for item in resp_json['hits']['hits']: item = item['metadata'] # Removes old organisationalUnit from organisationalUnits for i in item['organisationalUnits']: if i['externalId'] == old_group_externalId: item['organisationalUnits'].remove(i) # Adds new organisationalUnits for i in self.new_groups_data: item['organisationalUnits'].append(i) # Change group restrictions if old_group_externalId in item['groupRestrictions']: item['groupRestrictions'].remove(old_group_externalId) for i in new_groups_externalIds: item['groupRestrictions'].append(i) # Change managingOrganisationalUnit item = self._process_managing_organisational_unit( item, old_group_externalId) # Update record recid = item['recid'] response = self.general_functions.update_rdm_record(recid, item) return True def _process_managing_organisational_unit(self, item: object, old_group_externalId: str): if item['managingOrganisationalUnit_externalId'] == old_group_externalId: item['managingOrganisationalUnit_name'] = self.new_groups_data[0][ 'name'] item['managingOrganisationalUnit_uuid'] = self.new_groups_data[0][ 'uuid'] item[ 'managingOrganisationalUnit_externalId'] = self.new_groups_data[ 0]['externalId'] return item def _rdm_split_users_from_old_to_new_group(self, old_group_id: str, old_group_externalId: str, new_groups_externalIds: list): # Get all users in old group response = self.rdm_db.select_query('user_id', 'accounts_userrole', {'role_id': old_group_id}) report = 'Old group @@ Num. of users: ' if not response: self.report.add(f'\t{report} 0', self.report_files) return self.report.add(f'\t{report} {len(response)}', self.report_files) for i in response: user_id = i[0] # Get user email user_email = self.rdm_db.select_query('email', 'accounts_user', {'id': user_id})[0][0] for new_group_externalId in new_groups_externalIds: # Add user to new groups self._group_add_user(user_email, new_group_externalId, user_id) # Remove user from old group response = self._group_remove_user(user_email, old_group_externalId) def _rdm_merge_modify_records(self, old_groups_externalId: list, new_group_data: dict, new_group_externalId: str): # Get from RDM all records with old groups for old_group_externalId in old_groups_externalId: self._rdm_check_if_group_exists(old_group_externalId) # Get record metadata response = self.rdm_requests.get_metadata_by_query( old_group_externalId) resp_json = json.loads(response.content) total_items = resp_json['hits']['total'] report = f"\tModify records @ Group: {add_spaces(old_group_externalId)} @ Num. of records: {total_items}" self.report.add(report, self.report_files) if total_items == 0: continue # Iterates over all old group records for item in resp_json['hits']['hits']: item = item['metadata'] # Organisational units item = self._process_organisational_units( item, new_group_data, old_groups_externalId) # Group restrictions self._process_group_restrictions(item, old_group_externalId, new_group_externalId) # Managing Organisational Unit if item['managingOrganisationalUnit_externalId'] == old_group_externalId: item['managingOrganisationalUnit_name'] = new_group_data[ 'name'] item['managingOrganisationalUnit_uuid'] = new_group_data[ 'uuid'] item[ 'managingOrganisationalUnit_externalId'] = new_group_data[ 'externalId'] # Update record recid = item['recid'] response = self.general_functions.update_rdm_record( recid, item) def _process_organisational_units(self, item, new_group_data, old_groups_externalId): new_organisationalUnits_data = [new_group_data] for i in item['organisationalUnits']: if (i['externalId'] in old_groups_externalId or i['externalId'] == new_group_data['externalId']): continue new_organisationalUnits_data.append(i) item['organisationalUnits'] = new_organisationalUnits_data return item def _process_group_restrictions(self, item, old_group_externalId, new_group_externalId): # Remove old group if old_group_externalId in item['groupRestrictions']: item['groupRestrictions'].remove(old_group_externalId) # Add new group if new_group_externalId not in item['groupRestrictions']: item['groupRestrictions'].append(new_group_externalId) return item def _merge_users_from_old_to_new_group(self, old_groups_externalId: list, new_group_externalId: str): # Iterate over old groups for old_group_externalId in old_groups_externalId: # Get group id response = self.rdm_db.select_query( 'id, description', 'accounts_role', {'name': f"'{old_group_externalId}'"}) if not response: self.report.add( '\nWarning @ Old group ({old_groups_externalId}) not in database @ END TASK\n' ) return False old_group_id = response[0][0] old_group_name = response[0][1] # Get all users id that are in this group old_group_users = self.rdm_db.select_query( 'user_id', 'accounts_userrole', {'role_id': old_group_id}) if not old_group_users: old_group_users = [] report = f"\tOld group @ ExtId: {add_spaces(old_group_externalId)} @ Num. users: {add_spaces(len(old_group_users))} @ {old_group_name}" self.report.add(report, self.report_files) for i in old_group_users: user_id = i[0] # Get user email user_email = self.rdm_db.select_query('email', 'accounts_user', {'id': user_id})[0][0] # - - Add user to new group - - self._group_add_user(user_email, new_group_externalId, user_id) # - - Remove user from old group - - response = self._group_remove_user(user_email, old_group_externalId) # Delete old group def _get_pure_group_metadata(self, externalId: str): """ Get organisationalUnit name and uuid """ # PURE REQUEST response = get_pure_metadata('organisational-units', f'{externalId}/research-outputs', { 'page': 1, 'pageSize': 100 }) report = f'\tNew group info @ ExtId: {add_spaces(externalId)} @ ' # Check response if response.status_code >= 300: report += 'Not in pure - END TASK\n' self.report.add(report, self.report_files) self.report.add(response.content, self.report_files) return False # Load json data = json.loads(response.content) data = data['items'][0]['organisationalUnits'] for organisationalUnit in data: if organisationalUnit['externalId'] == externalId: organisationalUnit_data = {} organisationalUnit_data['externalId'] = externalId organisationalUnit_data['uuid'] = organisationalUnit['uuid'] organisationalUnit_data['name'] = organisationalUnit['names'][ 0]['value'] report += f"{organisationalUnit_data['uuid']} @ {organisationalUnit_data['name']}" self.report.add(report, self.report_files) self.new_groups_data.append(organisationalUnit_data) return organisationalUnit_data['name'] return False def _rdm_check_if_group_exists(self, group_externalId: str): """ Checks if the group already exists""" response = self.rdm_db.select_query('*', 'accounts_role', {'name': f"'{group_externalId}'"}) if response: report = f'\tNew group check @@ ExtId: {add_spaces(group_externalId)} @ Already exists' self.report.add(report) return True return False def rdm_create_group(self, externalId: str, group_name: str): # Checks if the group already exists response = self._rdm_check_if_group_exists(externalId) if response: return True group_name = group_name.replace('(', '\(') group_name = group_name.replace(')', '\)') group_name = group_name.replace(' ', '_') # Run command command = f'pipenv run invenio roles create {externalId} -d {group_name}' response = os.system(command) report = f'\tNew group check @@' if response != 0: self.report.add(f'{report} Error: {response}') return False self.report.add(f'{report} Group created @ External id: {externalId}') return True def _rdm_add_user_to_group(self, user_id: int, group_externalId: str, group_name: str): # Get user's rdm email user_email = self.rdm_db.select_query('email', 'accounts_user', {'id': user_id})[0][0] # Get group id response = self.rdm_db.select_query('id', 'accounts_role', {'name': f"'{group_externalId}'"}) if not response: # If the group does not exist then creates it self.rdm_create_group(group_externalId, group_name) # Repeats the query to get the group id response = self.rdm_db.select_query( 'id', 'accounts_role', {'name': f"'{group_externalId}'"}) group_id = response[0][0] # Checks if match already exists response = self.rdm_db.select_query('*', 'accounts_userrole', { 'user_id': user_id, 'role_id': group_id }) if response: report = f'\tRDM user in group @ User id: {add_spaces(user_id)} @@ Already belongs to group {group_externalId} (id {group_id})' self.report.add(report) return True # Adds user to group command = f'pipenv run invenio roles add {user_email} {group_externalId}' response = os.system(command) if response != 0: self.report.add(f'Warning @ Creating group response: {response}') def _group_add_user(self, user_email: str, new_group_externalId: str, user_id: str): # Get group id group_id = self.rdm_db.select_query( 'id', 'accounts_role', {'name': f"'{new_group_externalId}'"})[0][0] # Check if the user is already in the group response = self.rdm_db.select_query('*', 'accounts_userrole', { 'user_id': user_id, 'role_id': group_id }) if response: return True command = f'pipenv run invenio roles add {user_email} {new_group_externalId}' response = os.system(command) report = f'\tAdd user to group @ ExtId: {add_spaces(new_group_externalId)} @ User id: {add_spaces(user_id)}' if response != 0: self.report.add(f'{report} @ Error: {response}', self.report_files) return False self.report.add(f'{report} @ Success', self.report_files) return True def _group_remove_user(self, user_email: str, group_name: str): # Get user id user_id = self.rdm_db.select_query('id', 'accounts_user', {'email': f"'{user_email}'"})[0][0] # Get group id group_id = self.rdm_db.select_query('id', 'accounts_role', {'name': f"'{group_name}'"})[0][0] # Check if the user is already in the group response = self.rdm_db.select_query('*', 'accounts_userrole', { 'user_id': user_id, 'role_id': group_id }) report = f'Remove user fromGroup @ ExtId: {add_spaces(group_name)} @ User id: {add_spaces(user_id)}' if not response: self.report.add(f'\t{report} @ Not in group (already removed)', self.report_files) return True # Remove user from old group command = f'pipenv run invenio roles remove {user_email} {group_name}' response = os.system(command) if response != 0: self.report.add(f'\t{report} @ Error: {response}', self.report_files) return False self.report.add(f'\t{report} @ Success', self.report_files) return True