def _get_user_uuid_from_pure(self, key_name: str, key_value: str): """ Given the user's external id it return the relative user uuid """ # If the uuid is not found in the first x items then it will continue with the next page page = 1 page_size = 10 next_page = True while next_page: params = { 'page': page, 'pageSize': page_size, 'q': f'"{key_value}"' } response = get_pure_metadata('persons', '', params) if response.status_code >= 300: self.report.add(response.content, self.report_files) return False record_json = json.loads(response.content) total_items = record_json['count'] for item in record_json['items']: if item[key_name] == key_value: first_name = item['name']['firstName'] lastName = item['name']['lastName'] uuid = item['uuid'] self.report.add( f'Name: {first_name} {lastName}\nUuid: {uuid}', self.report_files) if len(uuid) != 36: self.report.add( '\n- Warning! Incorrect user_uuid length -\n', self.report_files) return False return uuid # Checks if there is a 'next' page to be processed next_page = get_next_page(record_json) page += 1 self.report.add(f'Uuid NOT FOUND - End task\n', self.report_files) return False
def get_pure_record_metadata_by_uuid(uuid: str): """ Method used to get from Pure record's metadata """ # PURE REQUEST response = get_pure_metadata('research-outputs', uuid) report = f'\tPure get metadata - {response}' if response.status_code == 404: report += f' - Metadata not found in Pure for record {uuid}' elif response.status_code >= 300: report += f' - Error: {response.content}' else: report += f' - {uuid}' reports.add(report) # Check response if response.status_code >= 300: report = f'Get Pure metadata - {response.content}\n' reports.add(report['console', 'records']) return False return json.loads(response.content)
def _get_pure_group_metadata(self, externalId: str): """ Get organisationalUnit name and uuid """ # PURE REQUEST response = get_pure_metadata('organisational-units', f'{externalId}/research-outputs', { 'page': 1, 'pageSize': 100 }) report = f'\tNew group info @ ExtId: {add_spaces(externalId)} @ ' # Check response if response.status_code >= 300: report += 'Not in pure - END TASK\n' self.report.add(report, self.report_files) self.report.add(response.content, self.report_files) return False # Load json data = json.loads(response.content) data = data['items'][0]['organisationalUnits'] for organisationalUnit in data: if organisationalUnit['externalId'] == externalId: organisationalUnit_data = {} organisationalUnit_data['externalId'] = externalId organisationalUnit_data['uuid'] = organisationalUnit['uuid'] organisationalUnit_data['name'] = organisationalUnit['names'][ 0]['value'] report += f"{organisationalUnit_data['uuid']} @ {organisationalUnit_data['name']}" self.report.add(report, self.report_files) self.new_groups_data.append(organisationalUnit_data) return organisationalUnit_data['name'] return False
def get_pure_by_page(self, page_begin: int, page_end: int, page_size: int): """ Gets records from Pure 'research-outputs' endpoint by page and submit them to RDM. """ for page in range(page_begin, page_end): self.global_counters = initialize_counters() # Report intro self.report.add_template(['console'], ['general', 'title'], ['PAGES']) self.report.add_template(['console'], ['pages', 'page_and_size'], [page, page_size]) # Pure get request response = get_pure_metadata('research-outputs', '', {'page': page, 'pageSize': page_size}) # Load json response resp_json = json.loads(response.content) # Creates data to push to RDM for item in resp_json['items']: self.report.add('') # adds new line in the console self.rdm_add_record.create_invenio_data(self.global_counters, item) self.report_summary(page, page_size)
def _changes_by_date(self, changes_date: str): """ Gets from Pure all changes that took place in a certain date """ reference = changes_date page = 1 while reference: # Get from pure all changes of a certain date response = get_pure_metadata('changes', reference, {}) if response.status_code >= 300: self.report.add(response.content, self.report_files) return False # Check if there are records in the response from pure json_response = self._records_to_process(response, page, changes_date) # If there are no records to process if not json_response: return True # Used to check if there are doubled tasks (e.g. update uuid and delete same uuid) self.duplicated_uuid = [] self._initialize_local_counters() # Iterates over all records that need to be deleted self._delete_records(json_response) # Create / Add / Update self._update_records(json_response) # Gets the reference code of the next page reference = get_next_page(json_response).split('/')[-1] page += 1
def _get_orcid(self, person_uuid: str, name: str): """ Gets from pure a person orcid """ # Pure request response = get_pure_metadata('persons', person_uuid, {}, False) message = f'\tPure get orcid @ {response} @' # Error if response.status_code >= 300: self.report.add(f'{message} Error: {response.content}') return False # Load json resp_json = json.loads(response.content) # Read orcid if 'orcid' in resp_json: orcid = resp_json['orcid'] self.report.add(f'{message} {orcid} @ {person_uuid} @ {name}') return orcid # Not found self.report.add(f'{message} Orcid not found @ {person_uuid} @ {name}') return False
def run_owners(self, identifier: str): """ Gets from pure all the records related to a certain user (based on orcid or externalId), afterwards it modifies/create RDM records accordingly. """ identifier_value = '0000-0002-4154-6945' # TEMPORARY if identifier == 'externalId': # TEMPORARY # identifier_value = '3261' # TEMPORARY identifier_value = '30' # TEMPORARY self.report.add(f'\n{identifier}: {identifier_value}\n') # Gets the ID and IP of the logged in user self.user_id = self._get_user_id_from_rdm() # If the user was not found in RDM then there is no owner to add to the record. if not self.user_id: return # Get from pure user_uuid self.user_uuid = self._get_user_uuid_from_pure(identifier, identifier_value) if not self.user_uuid: return False # Add user to user_ids_match.txt if identifier == 'externalId': self._add_user_ids_match(identifier_value) next_page = True page = 1 self.local_counters = {'create': 0, 'in_record': 0, 'to_update': 0} while next_page: # Pure request params = {'sort': 'modified', 'page': page, 'pageSize': 100} response = get_pure_metadata('persons', f'{self.user_uuid}/research-outputs', params) if response.status_code >= 300: return False # Initial response proceses and json load pure_json = self._process_response(response, page) # In case the user has no records if not pure_json: return True # Checks if there is a 'next' page to be processed next_page = get_next_page(pure_json) # Iterates over all items in the page for item in pure_json['items']: uuid = item['uuid'] title = shorten_file_name(item['title']) self.report.add(f"\n\tRecord uuid @ {uuid} @ {title}") # Get from RDM the recid recid = self.general_functions.get_recid( uuid, self.global_counters) # Record NOT in RDM, create it if recid == False: self._create_rdm_record(item) continue # Gets record metadata from RDM and checks if the user is already a record owner self._process_record_owners(recid) page += 1 self._final_report()