def __init__(self): self.rdm_requests = Requests() self.rdm_db = RdmDatabase() self.report = Reports() self.rdm_add_record = RdmAddRecord() self.general_functions = GeneralFunctions() self.report_files = ['console', 'owners']
def __init__(self): self.add_record = RdmAddRecord() self.report = Reports() self.delete = Delete() self.general_functions = GeneralFunctions()
class PureChangesByDate: def __init__(self): self.add_record = RdmAddRecord() self.report = Reports() self.delete = Delete() self.general_functions = GeneralFunctions() def get_pure_changes(self): """ Gets from Pure 'changes' endpoint all records that have been created / updated / deleted and modifies accordingly the relative RDM records """ # Get date of last update missing_updates = self._get_missing_updates() missing_updates = ['2020-05-15'] # TEMPORARY !!!!! if missing_updates == []: self.report.add('\nNothing to update.\n') return for date_to_update in reversed(missing_updates): self._changes_by_date(date_to_update) return def _set_counters_and_title(func): def _wrapper(self, changes_date: str): # Initialize global counters self.global_counters = initialize_counters() self.report_files = ['console', 'changes'] self.report.add_template(self.report_files, ['general', 'title'], ['CHANGES']) self.report.add(f'\nProcessed date: {changes_date}', self.report_files) # Decorated function func(self, changes_date) self._report_summary() return _wrapper @_set_counters_and_title def _changes_by_date(self, changes_date: str): """ Gets from Pure all changes that took place in a certain date """ reference = changes_date page = 1 while reference: # Get from pure all changes of a certain date response = get_pure_metadata('changes', reference, {}) if response.status_code >= 300: self.report.add(response.content, self.report_files) return False # Check if there are records in the response from pure json_response = self._records_to_process(response, page, changes_date) # If there are no records to process if not json_response: return True # Used to check if there are doubled tasks (e.g. update uuid and delete same uuid) self.duplicated_uuid = [] self._initialize_local_counters() # Iterates over all records that need to be deleted self._delete_records(json_response) # Create / Add / Update self._update_records(json_response) # Gets the reference code of the next page reference = get_next_page(json_response).split('/')[-1] page += 1 def _records_to_process(self, response: object, page: int, changes_date: str): """ Check if there are records in the response from pure """ # Load response json json_response = json.loads(response.content) number_records = json_response["count"] if number_records == 0: # Adds the date to successful_changes.txt open(data_files_name['successful_changes'], "a").write(f'{changes_date}\n') if page == 1: # If there are no changes at all self.report.add(f'\n\nNothing to transfer.\n\n', self.report_files) return False report_line = f'\nPag{add_spaces(page)} @ Pure get changes @ {response} @ Number of items: {add_spaces(number_records)}' self.report.add(report_line, self.report_files) return json_response def _delete_records(self, json_response: dict): """ Iterates over the Pure response and process all records that need to be deleted """ for item in json_response['items']: if 'changeType' not in item or 'uuid' not in item: continue elif item['familySystemName'] != 'ResearchOutput': continue elif item['changeType'] != 'DELETE': continue uuid = item['uuid'] self.duplicated_uuid.append(uuid) self.local_counters['delete'] += 1 report = f"\n{self.local_counters['delete']} @ {item['changeType']}" self.report.add(report) # Gets the record recid recid = self.general_functions.get_recid(uuid, self.global_counters) if recid: # Deletes the record from RDM self.delete.record(recid) else: # The record is not in RDM self.global_counters['delete']['success'] += 1 return True def _update_records(self, json_response: dict): """ Iterates over the Pure response and process all records that need to be created/updated """ for item in json_response['items']: if 'changeType' not in item or 'uuid' not in item: self.local_counters['incomplete'] += 1 continue elif item['familySystemName'] != 'ResearchOutput': self.local_counters['not_ResearchOutput'] += 1 continue elif item['changeType'] == 'DELETE': continue uuid = item['uuid'] if uuid in self.duplicated_uuid: self.local_counters['duplicated'] += 1 continue record_number = add_spaces(self.global_counters['total'] + 1) report = f"\n{record_number} - Change type - {item['changeType']}" self.report.add(report) if item['changeType'] == 'ADD' or item['changeType'] == 'CREATE': self.local_counters['create'] += 1 if item['changeType'] == 'UPDATE': self.local_counters['update'] += 1 # Checks if this uuid has already been created / updated / deleted self.duplicated_uuid.append(uuid) # Adds record to RDM self.add_record.push_record_by_uuid(self.global_counters, uuid) def _get_missing_updates(self): """ Reading successful_changes.txt gets the dates in which Pure changes have not been processed """ file_name = data_files_name['successful_changes'] check_if_file_exists(file_name) missing_updates = [] count = 0 days_span = 7 date_today = str(datetime.today().strftime('%Y-%m-%d')) date_check = datetime.strptime(date_today, "%Y-%m-%d").date() while count < days_span: if str(date_check) not in open(file_name, 'r').read(): missing_updates.append(str(date_check)) date_check = date_check - timedelta(days=1) count += 1 return missing_updates def _report_summary(self): # Global counters self.report.summary_global_counters(self.report_files, self.global_counters) arguments = [] for i in self.local_counters: arguments.append(add_spaces(self.local_counters[i])) self.report.add_template(self.report_files, ['changes', 'summary'], arguments) return def _initialize_local_counters(self): # Incomplete: when the uuid or changeType are not specified # Duplicated: e.g. when a record has been modified twice in a day # Irrelevant: when familySystemName is not ResearchOutput self.local_counters = { 'delete': 0, 'update': 0, 'create': 0, 'incomplete': 0, 'duplicated': 0, 'not_ResearchOutput': 0, }
def __init__(self): self.report = Reports() self.rdm_requests = Requests() self.general_functions = GeneralFunctions()
class Versioning: def __init__(self): self.report = Reports() self.rdm_requests = Requests() self.general_functions = GeneralFunctions() def get_uuid_version(self, uuid): """ Gives the version to use for a new record and old versions of the same uuid """ # Request response = self.rdm_requests.get_metadata_by_query(uuid) resp_json = json.loads(response.content) message = f'\tRDM metadata version - {response} - ' total_recids = resp_json['hits']['total'] all_metadata_versions = [] if total_recids == 0: # If there are no records with the same uuid means it is the first one (version 1) new_version = 1 self.report.add( f'{message}Record NOT found - Metadata version: 1') return [new_version, all_metadata_versions] new_version = None # Iterates over all records in response for item in resp_json['hits']['hits']: rdm_metadata = item['metadata'] # If a record has a differnt uuid than it will be ignored if uuid != rdm_metadata['uuid']: self.report.add( f" VERSIONING - Different uuid {rdm_metadata['uuid']}") continue # Get the latest version if 'metadataVersion' in rdm_metadata and not new_version: new_version = rdm_metadata['metadataVersion'] + 1 # Add data to listed versions (old versions) recid = item['id'] creation_date = item['created'].split('T')[0] version = str(rdm_metadata['metadataVersion']) all_metadata_versions.append([recid, version, creation_date]) # In case the record has no metadataVersion if not new_version: message += f'Vers. not specified - New metadata version: 1' new_version = 1 else: count_old_versions = add_spaces(len(all_metadata_versions)) message += f'Older versions{count_old_versions} - New version: {new_version}' self.report.add(message) return [new_version, all_metadata_versions] def update_all_uuid_versions(self, uuid): # Request response = self.rdm_requests.get_metadata_by_query(uuid) resp_json = json.loads(response.content) total_recids = resp_json['hits']['total'] if total_recids == 0: self.report.add('There are no records with this uuid') return all_metadata_versions = [] for item in resp_json['hits']['hits']: # Add data to listed versions recid = item['id'] creation_date = item['created'].split('T')[0] version = str(item['metadata']['metadataVersion']) all_metadata_versions.append([recid, version, creation_date]) self.report.add(f'\tUpdate uuid versions') for item in resp_json['hits']['hits']: recid = item['id'] item = item['metadata'] if item['metadataOtherVersions'] == all_metadata_versions: self.report.add(f'\tRecord update @ Up to date @ {recid}') continue item['metadataOtherVersions'] = all_metadata_versions # Update record self.general_functions.update_rdm_record(recid, item)
class RdmOwners: def __init__(self): self.rdm_requests = Requests() self.rdm_db = RdmDatabase() self.report = Reports() self.rdm_add_record = RdmAddRecord() self.general_functions = GeneralFunctions() self.report_files = ['console', 'owners'] def _set_counters_and_title(func): def _wrapper(self, identifier): self.report.add_template(['console'], ['general', 'title'], ['OWNERS CHECK']) self.global_counters = initialize_counters() # Decorated function func(self, identifier) return _wrapper @_set_counters_and_title def run_owners(self, identifier: str): """ Gets from pure all the records related to a certain user (based on orcid or externalId), afterwards it modifies/create RDM records accordingly. """ identifier_value = '0000-0002-4154-6945' # TEMPORARY if identifier == 'externalId': # TEMPORARY # identifier_value = '3261' # TEMPORARY identifier_value = '30' # TEMPORARY self.report.add(f'\n{identifier}: {identifier_value}\n') # Gets the ID and IP of the logged in user self.user_id = self._get_user_id_from_rdm() # If the user was not found in RDM then there is no owner to add to the record. if not self.user_id: return # Get from pure user_uuid self.user_uuid = self._get_user_uuid_from_pure(identifier, identifier_value) if not self.user_uuid: return False # Add user to user_ids_match.txt if identifier == 'externalId': self._add_user_ids_match(identifier_value) next_page = True page = 1 self.local_counters = {'create': 0, 'in_record': 0, 'to_update': 0} while next_page: # Pure request params = {'sort': 'modified', 'page': page, 'pageSize': 100} response = get_pure_metadata('persons', f'{self.user_uuid}/research-outputs', params) if response.status_code >= 300: return False # Initial response proceses and json load pure_json = self._process_response(response, page) # In case the user has no records if not pure_json: return True # Checks if there is a 'next' page to be processed next_page = get_next_page(pure_json) # Iterates over all items in the page for item in pure_json['items']: uuid = item['uuid'] title = shorten_file_name(item['title']) self.report.add(f"\n\tRecord uuid @ {uuid} @ {title}") # Get from RDM the recid recid = self.general_functions.get_recid( uuid, self.global_counters) # Record NOT in RDM, create it if recid == False: self._create_rdm_record(item) continue # Gets record metadata from RDM and checks if the user is already a record owner self._process_record_owners(recid) page += 1 self._final_report() def _process_record_owners(self, recid): """ Gets record metadata from RDM and checks if the user is already a record owner """ response = self.rdm_requests.get_metadata_by_recid(recid) rdm_json = json.loads(response.content)['metadata'] self.report.add( f"\tRDM get metadata @ {response} @ Current owners: @ {rdm_json['owners']}" ) if self.user_id not in rdm_json['owners']: # The record is in RDM but the logged in user is not among the recod owners self._add_user_as_owner(rdm_json, recid) else: # The record is in RDM and the user is an owner self.report.add('\tRDM record status @@ Owner IN record') self.local_counters['in_record'] += 1 def _add_user_as_owner(self, data, recid): """ Adds the current logged in user as record owner """ data['owners'].append(self.user_id) self.report.add( f"\tRDM record status @ ADDING owner @ New owners: @ {data['owners']}" ) # Add owner to an existing RDM record self.general_functions.update_rdm_record(recid, data) self.local_counters['to_update'] += 1 def _create_rdm_record(self, item: dict): """ If a record of the processed user is not in RDM creates it """ item['owners'] = [self.user_id] self.report.add('\tRDM record status @@ CREATE record') self.local_counters['create'] += 1 # Creates record metadata and pushes it to RDM self.rdm_add_record.create_invenio_data(self.global_counters, item) def _final_report(self): # Final report create = self.local_counters['create'] update = self.local_counters['to_update'] in_rec = self.local_counters['in_record'] report = f"\nCreate: {create} - To update: {update} - In record: {in_rec}" self.report.add(report, self.report_files) self.report.summary_global_counters(self.report_files, self.global_counters) def _process_response(self, response: object, page: int): """ Checks if there are records to process """ # Load response json resp_json = json.loads(response.content) total_items = resp_json['count'] if page == 1: self.report.add(f'Total records: {total_items}') if page == 1 and total_items == 0: self.report.add('\nThe user has no records @ End task\n') return False self.report.add(f'\nPag {page} - Get person records - {response}') return resp_json def _get_user_uuid_from_pure(self, key_name: str, key_value: str): """ Given the user's external id it return the relative user uuid """ # If the uuid is not found in the first x items then it will continue with the next page page = 1 page_size = 10 next_page = True while next_page: params = { 'page': page, 'pageSize': page_size, 'q': f'"{key_value}"' } response = get_pure_metadata('persons', '', params) if response.status_code >= 300: self.report.add(response.content, self.report_files) return False record_json = json.loads(response.content) total_items = record_json['count'] for item in record_json['items']: if item[key_name] == key_value: first_name = item['name']['firstName'] lastName = item['name']['lastName'] uuid = item['uuid'] self.report.add( f'Name: {first_name} {lastName}\nUuid: {uuid}', self.report_files) if len(uuid) != 36: self.report.add( '\n- Warning! Incorrect user_uuid length -\n', self.report_files) return False return uuid # Checks if there is a 'next' page to be processed next_page = get_next_page(record_json) page += 1 self.report.add(f'Uuid NOT FOUND - End task\n', self.report_files) return False # --- --- --- def _get_user_id_from_rdm(self): """ Gets the ID and IP of the logged in user """ table_name = 'accounts_user_session_activity' # SQL query response = self.rdm_db.select_query('user_id, ip', table_name) if not response: self.report.add(f'\n- {table_name}: No user is logged in -\n', self.report_files) return False elif len(response) > 1: self.report.add(f'\n- {table_name}: Multiple users logged in \n', self.report_files) return False self.report.add( f'user IP: {response[0][1]}\nUser id: {response[0][0]}', self.report_files) return response[0][0] def _add_user_ids_match(self, external_id: str): """ Add user to user_ids_match.txt, where are specified: rdm_user_id, user_uuid and user_external_id """ file_name = data_files_name['user_ids_match'] needs_to_add = self._check_user_ids_match('user_ids_match', external_id) if needs_to_add: open(file_name, 'a').write(f'{self.user_id} {self.user_uuid} {external_id}\n') report = f'user_ids_match @ Adding id toList @ {self.user_id}, {self.user_uuid}, {external_id}' self.report.add(report, self.report_files) def _check_user_ids_match(self, file_name: str, external_id: str): lines = file_read_lines(file_name) for line in lines: line = line.split('\n')[0] line = line.split(' ') # Checks if at least one of the ids match if str(self.user_id) == line[0] or self.user_uuid == line[ 1] or external_id == line[2]: if line == [str(self.user_id), self.user_uuid, external_id]: self.report.add('Ids list: user in list', self.report_files) return False return True def _initalizing_method(func): def _wrapper(self): self.report.add_template(['console'], ['general', 'title'], ['RECORDS OWNER']) # Empty file rdm_reocrds_owner.txt file_owner = data_files_name['rdm_record_owners'] open(file_owner, 'w').close() # Decorated function func(self) return _wrapper @_initalizing_method def get_rdm_record_owners(self): """ Gets all records from RDM and counts how many records belong to each user. It also updates the content of all_rdm_records.txt """ pag = 1 pag_size = 250 count = 0 count_records_per_owner = {} all_records_list = '' next_page = True while next_page == True: # REQUEST to RDM params = {'sort': 'mostrecent', 'size': pag_size, 'page': pag} response = self.rdm_requests.get_metadata(params) self.report.add(f'\n{response}\n') if response.status_code >= 300: self.report.add(response.content) break resp_json = json.loads(response.content) data = '' for item in resp_json['hits']['hits']: count += 1 uuid = item['metadata']['uuid'] recid = item['metadata']['recid'] owners = item['metadata']['owners'] line = f'{uuid} - {recid} - {owners}' self.report.add(line) data += f'{line}\n' all_records_list += f'{uuid} {recid}\n' for i in owners: if i not in count_records_per_owner: count_records_per_owner[i] = 0 count_records_per_owner[i] += 1 self.report.add(f'\nPag {str(pag)} - Records {count}\n') open(data_files_name['rdm_record_owners'], 'a').write(data) if 'next' not in resp_json['links']: next_page = False pag += 1 # Counts how many records have each owner self._count_records_per_owner(count_records_per_owner) # Update all_rdm_records.txt file self._update_all_rdm_records_file(all_records_list) def _count_records_per_owner(self, count_records_per_owner): self.report.add('Owner Records') for key in count_records_per_owner: records = add_spaces(count_records_per_owner[key]) key = add_spaces(key) self.report.add(f'{key} {records}') def _update_all_rdm_records_file(self, all_records_list): # Updates content of all_rdm_records.txt file file_all_records_list = data_files_name['all_rdm_records'] # Empty file open(file_all_records_list, 'w').close() # Add all records to file open(file_all_records_list, 'a').write(all_records_list)
def __init__(self): self.rdm_requests = Requests() self.report = Reports() self.groups = RdmGroups() self.general_functions = GeneralFunctions() self.versioning = Versioning()
class RdmAddRecord: def __init__(self): self.rdm_requests = Requests() self.report = Reports() self.groups = RdmGroups() self.general_functions = GeneralFunctions() self.versioning = Versioning() def push_record_by_uuid(self, global_counters: dict, uuid: str): """ Gets from Pure the metadata of a given uuid """ item = get_pure_record_metadata_by_uuid(uuid) if not item: return False return self.create_invenio_data(global_counters, item) def _set_initial_variables(func): def _wrapper(self, global_counters, item) : self.global_counters = global_counters self.global_counters['total'] += 1 self.uuid = item['uuid'] self.item = item self.data = {} # Stores the name of the record files # Necessary because we need first to create the record and then to put the files self.record_files = [] # Decorated function func(self, global_counters, item) return _wrapper @_set_initial_variables def create_invenio_data(self, global_counters: dict, item: dict): """ Process the data received from Pure and submits it to RDM """ # Versioning self._check_record_version() # Record owners self._check_record_owners() # self.data['metadataOtherVersions'] = [['1', ''], ['2', '']] # self.data['owners'].append(3) # TEMPORARY # Restrictions self.data['appliedRestrictions'] = ['owners', 'groups', 'ip_single', 'ip_range'] # TO REVIEW - TO REVIEW self.data['_access'] = {'metadata_restricted': False, 'files_restricted': False} # TO REVIEW - TO REVIEW # Process various single fields self._process_single_fields(item) # Electronic Versions (files) self._process_electronic_versions() # Additional Files if 'additionalFiles' in item: for i in item['additionalFiles']: self.get_files_data(i) # Person Associations self._process_person_associations() # Organisational Units self._process_organisational_units() # Checks if the restrictions applied to the record are valid self._applied_restrictions_check() self.data = json.dumps(self.data) # Post request to RDM self._post_metadata() # Updates the versioning data of all records with the same uuid self._update_all_uuid_versions() def _versioning_required(func): def _wrapper(self) : if not versioning_running: return func(self) return _wrapper @_versioning_required def _check_record_version(self): """ Checks if there are in RDM other versions of the same uuid """ # Get metadata version response = self.versioning.get_uuid_version(self.uuid) if response: self.data['metadataVersion'] = response[0] self.data['metadataOtherVersions'] = response[1] @_versioning_required def _update_all_uuid_versions(self): """ Updates the versioning data of all records with the same uuid """ self.versioning.update_all_uuid_versions(self.uuid) def _check_record_owners(self): """ Removes duplicate owners """ if 'owners' in self.item: self.data['owners'] = list(set(self.item['owners'])) else: self.data['owners'] = list(set([1])) def _process_single_fields(self, item: dict): # RDM field name # PURE json path self._add_field(item, 'title', ['title']) self._add_field(item, 'uuid', ['uuid']) self._add_field(item, 'pureId', ['pureId']) self._add_field(item, 'publicationDate', ['publicationStatuses', 0, 'publicationDate', 'year']) self._add_field(item, 'createdDate', ['info', 'createdDate']) self._add_field(item, 'pages', ['info','pages']) self._add_field(item, 'volume', ['info','volume']) self._add_field(item, 'journalTitle', ['info', 'journalAssociation', 'title', 'value']) self._add_field(item, 'journalNumber', ['info', 'journalNumber']) self._add_field(item, 'metadataModifBy', ['info', 'modifiedBy']) self._add_field(item, 'metadataModifDate', ['info', 'modifiedDate']) self._add_field(item, 'pure_link', ['info', 'portalUrl']) self._add_field(item, 'recordType', ['types', 0, 'value']) self._add_field(item, 'category', ['categories', 0, 'value']) self._add_field(item, 'peerReview', ['peerReview']) self._add_field(item, 'publicationStatus', ['publicationStatuses', 0, 'publicationStatuses', 0, 'value']) self._add_field(item, 'numberOfAuthors', ['totalNumberOfAuthors']) self._add_field(item, 'workflow', ['workflows', 0, 'value']) self._add_field(item, 'confidential', ['confidential']) self._add_field(item, 'publisherName', ['publisher', 'names', 0, 'value']) self._add_field(item, 'abstract', ['abstracts', 0, 'value']) self._add_field(item, 'managingOrganisationalUnit_name', ['managingOrganisationalUnit', 'names', 0, 'value']) self._add_field(item, 'managingOrganisationalUnit_uuid', ['managingOrganisationalUnit', 'uuid']) self._add_field(item, 'managingOrganisationalUnit_externalId', ['managingOrganisationalUnit', 'externalId']) # Access right value = get_value(item, ['openAccessPermissions', 0, 'value']) self.data['access_right'] = self._accessright_conversion(value) # Language value = get_value(item, ['languages', 0, 'value']) self.data['language'] = self._language_conversion(value) def _process_electronic_versions(self): """ Data relative to files """ self.data['versionFiles'] = [] self.rdm_file_review = [] if 'electronicVersions' in self.item or 'additionalFiles' in self.item: # Checks if the file has been already uploaded to RDM and if it has been internally reviewed self._get_rdm_file_review() if 'electronicVersions' in self.item: for i in self.item['electronicVersions']: self.get_files_data(i) def _process_person_associations(self): """ Process data ralative to the record contributors """ if 'personAssociations' not in self.item: return self.data['contributors'] = [] file_data = file_read_lines('user_ids_match') for item in self.item['personAssociations']: self.sub_data = {} self._get_contributor_name(item) self._add_subdata(item, 'uuid', ['person', 'uuid']) self._add_subdata(item, 'externalId', ['person', 'externalId']) self._add_subdata(item, 'authorCollaboratorName', ['authorCollaboration', 'names', 0, 'value']) self._add_subdata(item, 'personRole', ['personRoles', 0, 'value']) self._add_subdata(item, 'organisationalUnit', ['organisationalUnits', 0, 'names', 0, 'value']) self._add_subdata(item, 'type_p', ['externalPerson', 'types', 0, 'value']) self._add_subdata(item, 'uuid', ['externalPerson', 'uuid']) # Checks if the record owner is available in user_ids_match.txt person_external_id = get_value(item, ['person', 'externalId']) owner = self.general_functions.get_userid_from_list_by_externalid(person_external_id, file_data) if owner and int(owner) not in self.data['owners']: self.data['owners'].append(int(owner)) # ORCID self._process_contributor_orcid() self.data['contributors'].append(self.sub_data) def _get_contributor_name(self, item: object): first_name = get_value(item, ['name', 'firstName']) last_name = get_value(item, ['name', 'lastName']) if not first_name: first_name = '(first name not specified)' if not last_name: first_name = '(last name not specified)' self.sub_data['name'] = f'{last_name}, {first_name}' def _process_contributor_orcid(self): if 'uuid' in self.sub_data: person_uuid = self.sub_data['uuid'] person_name = self.sub_data['name'] # External persons are not present in 'persons' Pure API endpoint if 'type_p' in self.sub_data and self.sub_data['type_p'] == 'External person': report = f'\tPure get orcid @@ External person @ {person_uuid} @ {person_name}' self.report.add(report) else: orcid = self._get_orcid(person_uuid, person_name) if orcid: self.sub_data['orcid'] = orcid def _process_organisational_units(self): """ Process the metadata relative to the organisational units """ if 'organisationalUnits' in self.item: self.data['organisationalUnits'] = [] self.data['groupRestrictions'] = [] for i in self.item['organisationalUnits']: sub_data = {} organisational_unit_name = get_value(i, ['names', 0, 'value']) organisational_unit_uuid = get_value(i, ['uuid']) organisational_unit_externalId = get_value(i, ['externalId']) sub_data['name'] = organisational_unit_name sub_data['uuid'] = organisational_unit_uuid sub_data['externalId'] = organisational_unit_externalId self.data['organisationalUnits'].append(sub_data) # Adding organisational unit as group owner self.data['groupRestrictions'].append(organisational_unit_externalId) # Create group self.groups.rdm_create_group(organisational_unit_externalId, organisational_unit_name) def _applied_restrictions_check(self): """ Checks if the restrictions applied to the record are valid. e.g. ['groups', 'owners', 'ip_range', 'ip_single'] """ if not 'appliedRestrictions' in self.data: return False for i in self.data['appliedRestrictions']: if i not in possible_record_restrictions: report = f"Warning: the value '{i}' is not amont the accepted restrictions\n" self.report.add(report) return True def _post_metadata(self): """ Submits the created json to RDM """ uuid = self.item['uuid'] success_check = { 'metadata': False, 'file': False } # POST REQUEST metadata response = self.rdm_requests.post_metadata(self.data) # Process response if not self._process_post_response(response, uuid): return False success_check['metadata'] = True # After pushing a record's metadata to RDM it takes about one second to be able to get its recid time.sleep(1) # Gets recid from RDM recid = self.general_functions.get_recid(uuid, self.global_counters) if not recid: return False # add record to all_rdm_records.txt open(data_files_name['all_rdm_records'], "a").write(f'{uuid} {recid}\n') # Submit record FILES for file_name in self.record_files: # Submit request response = rdm_add_file(file_name, recid) # Process response successful = self._process_file_response(response, success_check) # if successful: # # Sends email to remove record from Pure # send_email(uuid, file_name) if not self.record_files: success_check['file'] = True # Checks if both metadata and files were correctly transmitted self._metadata_and_file_submission_check(success_check) def _process_post_response(self, response: object, uuid: str): # Count http responses self._http_response_counter(response.status_code) self.report.add(f"\tRDM post metadata @ {response} @ Uuid: {uuid}") if response.status_code >= 300: self.global_counters['metadata']['error'] += 1 return False self.global_counters['metadata']['success'] += 1 return True def _process_file_response(self, response: object, success_check: object): if response: self.global_counters['file']['success'] += 1 success_check['file'] = True else: self.global_counters['file']['error'] += 1 def _remove_uuid_from_list(self, uuid: str, file_name: str): """ If the given uuid is in the given file then the line will be removed """ check_if_file_exists(file_name) with open(file_name, "r") as f: lines = f.readlines() with open(file_name, "w") as f: for line in lines: if line.strip("\n") != uuid: f.write(line) def _add_field(self, item: list, rdm_field: str, path: list): """ Adds the field to the data json """ value = get_value(item, path) if value: self.data[rdm_field] = value return def _accessright_conversion(self, pure_value: str): """ Converts the Pure access right to the corresponding RDM value """ if pure_value in accessright_pure_to_rdm: return accessright_pure_to_rdm[pure_value] self.report.add('\n--- new access_right ---> not in accessright_pure_to_rdmk array\n\n') return False def _language_conversion(self, pure_language: str): """ Converts from pure full language name to iso6393 (3 characters) """ if pure_language == 'Undefined/Unknown': return False # Read iso6393 json file resp_json = json.load(open(iso6393_file_name, 'r')) for i in resp_json: if i['name'] == pure_language: return i['iso6393'] # in case there is no match (e.g. spelling mistake in Pure) ignore field return False def _get_rdm_file_review(self): """ When a record is updated in Pure, there will be a check if the new file from Pure is the same as the old file in RDM. To do so it makes a comparison on the file size. If the size is not the same, then it will be uploaded to RDM and a new internal review will be required. """ # Get from RDM file size and internalReview params = {'sort': 'mostrecent', 'size': '100', 'page': '1', 'q': self.uuid} response = self.rdm_requests.get_metadata(params) if response.status_code >= 300: self.report.add(f'\nget_rdm_file_size @ {self.uuid} @ {response}') return False # Load response resp_json = json.loads(response.content) total_recids = resp_json['hits']['total'] if total_recids == 0: return False record = resp_json['hits']['hits'][0]['metadata'] # [0] because they are ordered, therefore it is the most recent if 'versionFiles' in record: for file in record['versionFiles']: if 'size' in file and 'internalReview' in file and 'name' in file: file_size = file['size'] file_review = file['internalReview'] file_name = file['name'] self.rdm_file_review.append({'size': file_size, 'review': file_review, 'name': file_name}) return def get_files_data(self, item: dict): """ Gets metadata information from electronicVersions and additionalFiles files. It also downloads the relative files. The Metadata without file will be ignored """ if 'file' not in item: return False elif 'fileURL' not in item['file'] or 'fileName' not in item['file']: return False internal_review = False # Default value pure_file_size = get_value(item, ['file', 'size']) file_name = get_value(item, ['file', 'fileName']) file_url = get_value(item, ['file', 'fileURL']) self.pure_rdm_file_match = [] # Checks if pure_file_size and file_name are the same as any of the files in RDM with the same uuid for rdm_file in self.rdm_file_review: rdm_file_size = str(rdm_file['size']) rdm_review = rdm_file['review'] if pure_file_size == rdm_file_size and file_name == rdm_file['name']: self.pure_rdm_file_match.append(True) # Do the old and new file match? self.pure_rdm_file_match.append(rdm_review) # Was the old file reviewed? internal_review = rdm_review # The new uploaded file will have the same review value as in RDM break self.sub_data = {} self.sub_data['internalReview'] = internal_review self._add_subdata(item, 'name', ['file', 'fileName']) self._add_subdata(item, 'size', ['file', 'size']) self._add_subdata(item, 'mimeType', ['file', 'mimeType']) self._add_subdata(item, 'digest', ['file', 'digest']) self._add_subdata(item, 'digestAlgorithm', ['file', 'digestAlgorithm']) self._add_subdata(item, 'createdBy', ['creator']) self._add_subdata(item, 'createdDate', ['created']) self._add_subdata(item, 'versionType', ['versionTypes', 0, 'value']) self._add_subdata(item, 'licenseType', ['licenseTypes', 0, 'value']) # Access type value = get_value(item, ['accessTypes', 0, 'value']) self.sub_data['accessType'] = self._accessright_conversion(value) # Append to sub_data to .data self.data['versionFiles'].append(self.sub_data) # Download file from Pure response = get_pure_file(self, file_url, file_name) # Checks if the file is already in RDM, and if it has already been reviewed self._process_file_download_response(response, file_name) def _add_subdata(self, item: list, rdm_field: str, path: list): """ Adds the field to sub_data """ value = get_value(item, path) if value: self.sub_data[rdm_field] = value def _process_file_download_response(self, response, file_name): """ Checks if the file is already in RDM, and if it has already been reviewed """ # If the file is not in RDM if len(self.pure_rdm_file_match) == 0: match_review = 'File not in RDM ' # If the file in pure is different from the one in RDM elif self.pure_rdm_file_match[0] == False: match_review = 'Match: F, Review: -' # If the file is the same, checks if the one in RDM has been reviewed by internal stuff else: match_review = 'Match: T, Review: F' if self.pure_rdm_file_match[1]: match_review = 'Match: T, Review: T' file_name_report = shorten_file_name(file_name) report = f'\tPure get file @ {response} @ {match_review} @ {file_name_report}' self.report.add(report) self.record_files.append(file_name) def _get_orcid(self, person_uuid: str, name: str): """ Gets from pure a person orcid """ # Pure request response = get_pure_metadata('persons', person_uuid, {}, False) message = f'\tPure get orcid @ {response} @' # Error if response.status_code >= 300: self.report.add(f'{message} Error: {response.content}') return False # Load json resp_json = json.loads(response.content) # Read orcid if 'orcid' in resp_json: orcid = resp_json['orcid'] self.report.add(f'{message} {orcid} @ {person_uuid} @ {name}') return orcid # Not found self.report.add(f'{message} Orcid not found @ {person_uuid} @ {name}') return False def _metadata_and_file_submission_check(self, success_check: dict): """ Checks if both metadata and files were correctly transmitted """ if (success_check['metadata'] == True and success_check['file'] == True): # Remove uuid from to_transmit.txt self._remove_uuid_from_list(self.uuid, data_files_name['transfer_uuid_list']) else: # Add uuid to to_transmit.txt to be re-transmitted open(data_files_name['transfer_uuid_list'], "a").write(f'{self.uuid}\n') return False return True def _http_response_counter(self, status_code: int): """ According to the given http status code creates a new object element or increaes an existing one """ if status_code not in self.global_counters['http_responses']: self.global_counters['http_responses'][status_code] = 0 self.global_counters['http_responses'][status_code] += 1
def __init__(self): self.rdm_db = RdmDatabase() self.report = Reports() self.rdm_requests = Requests() self.general_functions = GeneralFunctions() self.report_files = ['console', 'groups']
class RdmGroups: def __init__(self): self.rdm_db = RdmDatabase() self.report = Reports() self.rdm_requests = Requests() self.general_functions = GeneralFunctions() self.report_files = ['console', 'groups'] def _general_report_and_variables(func): def _wrapper(self, old_group_externalId, new_groups_externalIds): self.report.add_template(self.report_files, ['general', 'title'], ['GROUP SPLIT']) self.report.add( f'\nOld group: {old_group_externalId} @ New groups: {new_groups_externalIds}\n', self.report_files) # Get name and uuid of new groups self.new_groups_data = [] # Decorated function func(self, old_group_externalId, new_groups_externalIds) return _wrapper @_general_report_and_variables def rdm_group_split(self, old_group_externalId: str, new_groups_externalIds: list): """ 1 - Create new groups 2 - Add users to new groups 3 - Remove users from old group 4 - Delete old group 5 - Modify RDM record: . groupRestrictions . managingOrganisationUnit (if necessary) . organisationUnits """ for externalId in new_groups_externalIds: # Get group information group_name = self._get_pure_group_metadata(externalId) if not group_name: return False # Create new group response = self.rdm_create_group(externalId, group_name) # Get old group id old_group_id = self._get_rdm_group_id(old_group_externalId) # Removes users from old group and adds to new groups self._rdm_split_users_from_old_to_new_group(old_group_id, old_group_externalId, new_groups_externalIds) # Modify all related records self._rdm_split_modify_record(old_group_externalId, new_groups_externalIds) def _general_report_and_variables(func): def _wrapper(self, old_groups_externalId, new_group_externalId): self.report.add_template(self.report_files, ['general', 'title'], ['GROUP MERGE']) report = f'\nOld groups: {old_groups_externalId} @ New group: {new_group_externalId}\n' self.report.add(report, self.report_files) # Get new group information self.new_groups_data = [] # Decorated function func(self, old_groups_externalId, new_group_externalId) return _wrapper @_general_report_and_variables def rdm_group_merge(self, old_groups_externalId: list, new_group_externalId: str): """ 1 - Create new group 2 - Remove users from old groups 3 - Add users to new group 4 - Delete old groups 5 - Modify RDM records: . groupRestrictions . managingOrganisationUnit (if necessary) . organisationUnits """ group_name = self._get_pure_group_metadata(new_group_externalId) if not group_name: return False # Create new group response = self.rdm_create_group(new_group_externalId, group_name) # Adds users to new group and removes them from the old ones self._merge_users_from_old_to_new_group(old_groups_externalId, new_group_externalId) # Modify all related records self._rdm_merge_modify_records(old_groups_externalId, self.new_groups_data[0], new_group_externalId) def _get_rdm_group_id(self, externalId: str): response = self.rdm_db.select_query('id, description', 'accounts_role', {'name': f"'{externalId}'"}) group_id = response[0][0] group_name = response[0][1] report = f'\tOld group info @ ExtId: {add_spaces(externalId)} @ RDM id: {add_spaces(group_id)} @ {group_name}' self.report.add(report, self.report_files) return group_id def _rdm_split_modify_record(self, old_group_externalId: str, new_groups_externalIds: list): # Get from RDM all old group's records response = self.rdm_requests.get_metadata_by_query( old_group_externalId) resp_json = json.loads(response.content) total_items = resp_json['hits']['total'] report = f"\tModify old g. records @ ExtId: {add_spaces(old_group_externalId)} @ Num. of records: {total_items}" self.report.add(report, self.report_files) if total_items == 0: self.report.add('\tNothing to modify @ End', self.report_files) return True # Iterates over all old group records for item in resp_json['hits']['hits']: item = item['metadata'] # Removes old organisationalUnit from organisationalUnits for i in item['organisationalUnits']: if i['externalId'] == old_group_externalId: item['organisationalUnits'].remove(i) # Adds new organisationalUnits for i in self.new_groups_data: item['organisationalUnits'].append(i) # Change group restrictions if old_group_externalId in item['groupRestrictions']: item['groupRestrictions'].remove(old_group_externalId) for i in new_groups_externalIds: item['groupRestrictions'].append(i) # Change managingOrganisationalUnit item = self._process_managing_organisational_unit( item, old_group_externalId) # Update record recid = item['recid'] response = self.general_functions.update_rdm_record(recid, item) return True def _process_managing_organisational_unit(self, item: object, old_group_externalId: str): if item['managingOrganisationalUnit_externalId'] == old_group_externalId: item['managingOrganisationalUnit_name'] = self.new_groups_data[0][ 'name'] item['managingOrganisationalUnit_uuid'] = self.new_groups_data[0][ 'uuid'] item[ 'managingOrganisationalUnit_externalId'] = self.new_groups_data[ 0]['externalId'] return item def _rdm_split_users_from_old_to_new_group(self, old_group_id: str, old_group_externalId: str, new_groups_externalIds: list): # Get all users in old group response = self.rdm_db.select_query('user_id', 'accounts_userrole', {'role_id': old_group_id}) report = 'Old group @@ Num. of users: ' if not response: self.report.add(f'\t{report} 0', self.report_files) return self.report.add(f'\t{report} {len(response)}', self.report_files) for i in response: user_id = i[0] # Get user email user_email = self.rdm_db.select_query('email', 'accounts_user', {'id': user_id})[0][0] for new_group_externalId in new_groups_externalIds: # Add user to new groups self._group_add_user(user_email, new_group_externalId, user_id) # Remove user from old group response = self._group_remove_user(user_email, old_group_externalId) def _rdm_merge_modify_records(self, old_groups_externalId: list, new_group_data: dict, new_group_externalId: str): # Get from RDM all records with old groups for old_group_externalId in old_groups_externalId: self._rdm_check_if_group_exists(old_group_externalId) # Get record metadata response = self.rdm_requests.get_metadata_by_query( old_group_externalId) resp_json = json.loads(response.content) total_items = resp_json['hits']['total'] report = f"\tModify records @ Group: {add_spaces(old_group_externalId)} @ Num. of records: {total_items}" self.report.add(report, self.report_files) if total_items == 0: continue # Iterates over all old group records for item in resp_json['hits']['hits']: item = item['metadata'] # Organisational units item = self._process_organisational_units( item, new_group_data, old_groups_externalId) # Group restrictions self._process_group_restrictions(item, old_group_externalId, new_group_externalId) # Managing Organisational Unit if item['managingOrganisationalUnit_externalId'] == old_group_externalId: item['managingOrganisationalUnit_name'] = new_group_data[ 'name'] item['managingOrganisationalUnit_uuid'] = new_group_data[ 'uuid'] item[ 'managingOrganisationalUnit_externalId'] = new_group_data[ 'externalId'] # Update record recid = item['recid'] response = self.general_functions.update_rdm_record( recid, item) def _process_organisational_units(self, item, new_group_data, old_groups_externalId): new_organisationalUnits_data = [new_group_data] for i in item['organisationalUnits']: if (i['externalId'] in old_groups_externalId or i['externalId'] == new_group_data['externalId']): continue new_organisationalUnits_data.append(i) item['organisationalUnits'] = new_organisationalUnits_data return item def _process_group_restrictions(self, item, old_group_externalId, new_group_externalId): # Remove old group if old_group_externalId in item['groupRestrictions']: item['groupRestrictions'].remove(old_group_externalId) # Add new group if new_group_externalId not in item['groupRestrictions']: item['groupRestrictions'].append(new_group_externalId) return item def _merge_users_from_old_to_new_group(self, old_groups_externalId: list, new_group_externalId: str): # Iterate over old groups for old_group_externalId in old_groups_externalId: # Get group id response = self.rdm_db.select_query( 'id, description', 'accounts_role', {'name': f"'{old_group_externalId}'"}) if not response: self.report.add( '\nWarning @ Old group ({old_groups_externalId}) not in database @ END TASK\n' ) return False old_group_id = response[0][0] old_group_name = response[0][1] # Get all users id that are in this group old_group_users = self.rdm_db.select_query( 'user_id', 'accounts_userrole', {'role_id': old_group_id}) if not old_group_users: old_group_users = [] report = f"\tOld group @ ExtId: {add_spaces(old_group_externalId)} @ Num. users: {add_spaces(len(old_group_users))} @ {old_group_name}" self.report.add(report, self.report_files) for i in old_group_users: user_id = i[0] # Get user email user_email = self.rdm_db.select_query('email', 'accounts_user', {'id': user_id})[0][0] # - - Add user to new group - - self._group_add_user(user_email, new_group_externalId, user_id) # - - Remove user from old group - - response = self._group_remove_user(user_email, old_group_externalId) # Delete old group def _get_pure_group_metadata(self, externalId: str): """ Get organisationalUnit name and uuid """ # PURE REQUEST response = get_pure_metadata('organisational-units', f'{externalId}/research-outputs', { 'page': 1, 'pageSize': 100 }) report = f'\tNew group info @ ExtId: {add_spaces(externalId)} @ ' # Check response if response.status_code >= 300: report += 'Not in pure - END TASK\n' self.report.add(report, self.report_files) self.report.add(response.content, self.report_files) return False # Load json data = json.loads(response.content) data = data['items'][0]['organisationalUnits'] for organisationalUnit in data: if organisationalUnit['externalId'] == externalId: organisationalUnit_data = {} organisationalUnit_data['externalId'] = externalId organisationalUnit_data['uuid'] = organisationalUnit['uuid'] organisationalUnit_data['name'] = organisationalUnit['names'][ 0]['value'] report += f"{organisationalUnit_data['uuid']} @ {organisationalUnit_data['name']}" self.report.add(report, self.report_files) self.new_groups_data.append(organisationalUnit_data) return organisationalUnit_data['name'] return False def _rdm_check_if_group_exists(self, group_externalId: str): """ Checks if the group already exists""" response = self.rdm_db.select_query('*', 'accounts_role', {'name': f"'{group_externalId}'"}) if response: report = f'\tNew group check @@ ExtId: {add_spaces(group_externalId)} @ Already exists' self.report.add(report) return True return False def rdm_create_group(self, externalId: str, group_name: str): # Checks if the group already exists response = self._rdm_check_if_group_exists(externalId) if response: return True group_name = group_name.replace('(', '\(') group_name = group_name.replace(')', '\)') group_name = group_name.replace(' ', '_') # Run command command = f'pipenv run invenio roles create {externalId} -d {group_name}' response = os.system(command) report = f'\tNew group check @@' if response != 0: self.report.add(f'{report} Error: {response}') return False self.report.add(f'{report} Group created @ External id: {externalId}') return True def _rdm_add_user_to_group(self, user_id: int, group_externalId: str, group_name: str): # Get user's rdm email user_email = self.rdm_db.select_query('email', 'accounts_user', {'id': user_id})[0][0] # Get group id response = self.rdm_db.select_query('id', 'accounts_role', {'name': f"'{group_externalId}'"}) if not response: # If the group does not exist then creates it self.rdm_create_group(group_externalId, group_name) # Repeats the query to get the group id response = self.rdm_db.select_query( 'id', 'accounts_role', {'name': f"'{group_externalId}'"}) group_id = response[0][0] # Checks if match already exists response = self.rdm_db.select_query('*', 'accounts_userrole', { 'user_id': user_id, 'role_id': group_id }) if response: report = f'\tRDM user in group @ User id: {add_spaces(user_id)} @@ Already belongs to group {group_externalId} (id {group_id})' self.report.add(report) return True # Adds user to group command = f'pipenv run invenio roles add {user_email} {group_externalId}' response = os.system(command) if response != 0: self.report.add(f'Warning @ Creating group response: {response}') def _group_add_user(self, user_email: str, new_group_externalId: str, user_id: str): # Get group id group_id = self.rdm_db.select_query( 'id', 'accounts_role', {'name': f"'{new_group_externalId}'"})[0][0] # Check if the user is already in the group response = self.rdm_db.select_query('*', 'accounts_userrole', { 'user_id': user_id, 'role_id': group_id }) if response: return True command = f'pipenv run invenio roles add {user_email} {new_group_externalId}' response = os.system(command) report = f'\tAdd user to group @ ExtId: {add_spaces(new_group_externalId)} @ User id: {add_spaces(user_id)}' if response != 0: self.report.add(f'{report} @ Error: {response}', self.report_files) return False self.report.add(f'{report} @ Success', self.report_files) return True def _group_remove_user(self, user_email: str, group_name: str): # Get user id user_id = self.rdm_db.select_query('id', 'accounts_user', {'email': f"'{user_email}'"})[0][0] # Get group id group_id = self.rdm_db.select_query('id', 'accounts_role', {'name': f"'{group_name}'"})[0][0] # Check if the user is already in the group response = self.rdm_db.select_query('*', 'accounts_userrole', { 'user_id': user_id, 'role_id': group_id }) report = f'Remove user fromGroup @ ExtId: {add_spaces(group_name)} @ User id: {add_spaces(user_id)}' if not response: self.report.add(f'\t{report} @ Not in group (already removed)', self.report_files) return True # Remove user from old group command = f'pipenv run invenio roles remove {user_email} {group_name}' response = os.system(command) if response != 0: self.report.add(f'\t{report} @ Error: {response}', self.report_files) return False self.report.add(f'\t{report} @ Success', self.report_files) return True