def __init__(self): self.rdm_requests = Requests() self.rdm_db = RdmDatabase() self.report = Reports() self.rdm_add_record = RdmAddRecord() self.general_functions = GeneralFunctions() self.report_files = ['console', 'owners']
class AddFromUuidList: """ Reads from a txt file a list of record uuids and submit them to RDM """ def __init__(self): self.report = Reports() self.add_record = RdmAddRecord() def _set_counters_and_title(func): def _wrapper(self): self.report.add_template(['console'], ['general', 'title'], ['PUSH RECORDS FROM LIST']) self.global_counters = initialize_counters() # Decorated method func(self) return _wrapper @_set_counters_and_title def add_from_uuid_list(self): """ Submits to RDM all uuids in list (data/to_transfer.txt) """ uuids = self._read_file() if not uuids: return for uuid in uuids: uuid = uuid.split('\n')[0] # Checks if lenght of the uuid is correct if not check_uuid_authenticity(uuid): self.report.add('Invalid uuid lenght.') continue self.add_record.push_record_by_uuid(self.global_counters, uuid) return def _read_file(self): # read to_transmit.txt file_name = data_files_name['transfer_uuid_list'] uuids = open(file_name, 'r').readlines() if len(uuids) == 0: self.report.add('\nThere is nothing to transfer.\n') return False return uuids
class RunPages: def __init__(self): self.report = Reports() self.rdm_add_record = RdmAddRecord() def get_pure_by_page(self, page_begin: int, page_end: int, page_size: int): """ Gets records from Pure 'research-outputs' endpoint by page and submit them to RDM. """ for page in range(page_begin, page_end): self.global_counters = initialize_counters() # Report intro self.report.add_template(['console'], ['general', 'title'], ['PAGES']) self.report.add_template(['console'], ['pages', 'page_and_size'], [page, page_size]) # Pure get request response = get_pure_metadata('research-outputs', '', {'page': page, 'pageSize': page_size}) # Load json response resp_json = json.loads(response.content) # Creates data to push to RDM for item in resp_json['items']: self.report.add('') # adds new line in the console self.rdm_add_record.create_invenio_data(self.global_counters, item) self.report_summary(page, page_size) def report_summary(self, pag, page_size): # Global counters self.report.summary_global_counters(['console'], self.global_counters) # Summary pages.log self.report.pages_single_line(self.global_counters, pag, page_size)
def __init__(self): self.report = Reports() self.add_record = RdmAddRecord()
def __init__(self): self.add_record = RdmAddRecord() self.report = Reports() self.delete = Delete() self.general_functions = GeneralFunctions()
class PureChangesByDate: def __init__(self): self.add_record = RdmAddRecord() self.report = Reports() self.delete = Delete() self.general_functions = GeneralFunctions() def get_pure_changes(self): """ Gets from Pure 'changes' endpoint all records that have been created / updated / deleted and modifies accordingly the relative RDM records """ # Get date of last update missing_updates = self._get_missing_updates() missing_updates = ['2020-05-15'] # TEMPORARY !!!!! if missing_updates == []: self.report.add('\nNothing to update.\n') return for date_to_update in reversed(missing_updates): self._changes_by_date(date_to_update) return def _set_counters_and_title(func): def _wrapper(self, changes_date: str): # Initialize global counters self.global_counters = initialize_counters() self.report_files = ['console', 'changes'] self.report.add_template(self.report_files, ['general', 'title'], ['CHANGES']) self.report.add(f'\nProcessed date: {changes_date}', self.report_files) # Decorated function func(self, changes_date) self._report_summary() return _wrapper @_set_counters_and_title def _changes_by_date(self, changes_date: str): """ Gets from Pure all changes that took place in a certain date """ reference = changes_date page = 1 while reference: # Get from pure all changes of a certain date response = get_pure_metadata('changes', reference, {}) if response.status_code >= 300: self.report.add(response.content, self.report_files) return False # Check if there are records in the response from pure json_response = self._records_to_process(response, page, changes_date) # If there are no records to process if not json_response: return True # Used to check if there are doubled tasks (e.g. update uuid and delete same uuid) self.duplicated_uuid = [] self._initialize_local_counters() # Iterates over all records that need to be deleted self._delete_records(json_response) # Create / Add / Update self._update_records(json_response) # Gets the reference code of the next page reference = get_next_page(json_response).split('/')[-1] page += 1 def _records_to_process(self, response: object, page: int, changes_date: str): """ Check if there are records in the response from pure """ # Load response json json_response = json.loads(response.content) number_records = json_response["count"] if number_records == 0: # Adds the date to successful_changes.txt open(data_files_name['successful_changes'], "a").write(f'{changes_date}\n') if page == 1: # If there are no changes at all self.report.add(f'\n\nNothing to transfer.\n\n', self.report_files) return False report_line = f'\nPag{add_spaces(page)} @ Pure get changes @ {response} @ Number of items: {add_spaces(number_records)}' self.report.add(report_line, self.report_files) return json_response def _delete_records(self, json_response: dict): """ Iterates over the Pure response and process all records that need to be deleted """ for item in json_response['items']: if 'changeType' not in item or 'uuid' not in item: continue elif item['familySystemName'] != 'ResearchOutput': continue elif item['changeType'] != 'DELETE': continue uuid = item['uuid'] self.duplicated_uuid.append(uuid) self.local_counters['delete'] += 1 report = f"\n{self.local_counters['delete']} @ {item['changeType']}" self.report.add(report) # Gets the record recid recid = self.general_functions.get_recid(uuid, self.global_counters) if recid: # Deletes the record from RDM self.delete.record(recid) else: # The record is not in RDM self.global_counters['delete']['success'] += 1 return True def _update_records(self, json_response: dict): """ Iterates over the Pure response and process all records that need to be created/updated """ for item in json_response['items']: if 'changeType' not in item or 'uuid' not in item: self.local_counters['incomplete'] += 1 continue elif item['familySystemName'] != 'ResearchOutput': self.local_counters['not_ResearchOutput'] += 1 continue elif item['changeType'] == 'DELETE': continue uuid = item['uuid'] if uuid in self.duplicated_uuid: self.local_counters['duplicated'] += 1 continue record_number = add_spaces(self.global_counters['total'] + 1) report = f"\n{record_number} - Change type - {item['changeType']}" self.report.add(report) if item['changeType'] == 'ADD' or item['changeType'] == 'CREATE': self.local_counters['create'] += 1 if item['changeType'] == 'UPDATE': self.local_counters['update'] += 1 # Checks if this uuid has already been created / updated / deleted self.duplicated_uuid.append(uuid) # Adds record to RDM self.add_record.push_record_by_uuid(self.global_counters, uuid) def _get_missing_updates(self): """ Reading successful_changes.txt gets the dates in which Pure changes have not been processed """ file_name = data_files_name['successful_changes'] check_if_file_exists(file_name) missing_updates = [] count = 0 days_span = 7 date_today = str(datetime.today().strftime('%Y-%m-%d')) date_check = datetime.strptime(date_today, "%Y-%m-%d").date() while count < days_span: if str(date_check) not in open(file_name, 'r').read(): missing_updates.append(str(date_check)) date_check = date_check - timedelta(days=1) count += 1 return missing_updates def _report_summary(self): # Global counters self.report.summary_global_counters(self.report_files, self.global_counters) arguments = [] for i in self.local_counters: arguments.append(add_spaces(self.local_counters[i])) self.report.add_template(self.report_files, ['changes', 'summary'], arguments) return def _initialize_local_counters(self): # Incomplete: when the uuid or changeType are not specified # Duplicated: e.g. when a record has been modified twice in a day # Irrelevant: when familySystemName is not ResearchOutput self.local_counters = { 'delete': 0, 'update': 0, 'create': 0, 'incomplete': 0, 'duplicated': 0, 'not_ResearchOutput': 0, }
class RdmOwners: def __init__(self): self.rdm_requests = Requests() self.rdm_db = RdmDatabase() self.report = Reports() self.rdm_add_record = RdmAddRecord() self.general_functions = GeneralFunctions() self.report_files = ['console', 'owners'] def _set_counters_and_title(func): def _wrapper(self, identifier): self.report.add_template(['console'], ['general', 'title'], ['OWNERS CHECK']) self.global_counters = initialize_counters() # Decorated function func(self, identifier) return _wrapper @_set_counters_and_title def run_owners(self, identifier: str): """ Gets from pure all the records related to a certain user (based on orcid or externalId), afterwards it modifies/create RDM records accordingly. """ identifier_value = '0000-0002-4154-6945' # TEMPORARY if identifier == 'externalId': # TEMPORARY # identifier_value = '3261' # TEMPORARY identifier_value = '30' # TEMPORARY self.report.add(f'\n{identifier}: {identifier_value}\n') # Gets the ID and IP of the logged in user self.user_id = self._get_user_id_from_rdm() # If the user was not found in RDM then there is no owner to add to the record. if not self.user_id: return # Get from pure user_uuid self.user_uuid = self._get_user_uuid_from_pure(identifier, identifier_value) if not self.user_uuid: return False # Add user to user_ids_match.txt if identifier == 'externalId': self._add_user_ids_match(identifier_value) next_page = True page = 1 self.local_counters = {'create': 0, 'in_record': 0, 'to_update': 0} while next_page: # Pure request params = {'sort': 'modified', 'page': page, 'pageSize': 100} response = get_pure_metadata('persons', f'{self.user_uuid}/research-outputs', params) if response.status_code >= 300: return False # Initial response proceses and json load pure_json = self._process_response(response, page) # In case the user has no records if not pure_json: return True # Checks if there is a 'next' page to be processed next_page = get_next_page(pure_json) # Iterates over all items in the page for item in pure_json['items']: uuid = item['uuid'] title = shorten_file_name(item['title']) self.report.add(f"\n\tRecord uuid @ {uuid} @ {title}") # Get from RDM the recid recid = self.general_functions.get_recid( uuid, self.global_counters) # Record NOT in RDM, create it if recid == False: self._create_rdm_record(item) continue # Gets record metadata from RDM and checks if the user is already a record owner self._process_record_owners(recid) page += 1 self._final_report() def _process_record_owners(self, recid): """ Gets record metadata from RDM and checks if the user is already a record owner """ response = self.rdm_requests.get_metadata_by_recid(recid) rdm_json = json.loads(response.content)['metadata'] self.report.add( f"\tRDM get metadata @ {response} @ Current owners: @ {rdm_json['owners']}" ) if self.user_id not in rdm_json['owners']: # The record is in RDM but the logged in user is not among the recod owners self._add_user_as_owner(rdm_json, recid) else: # The record is in RDM and the user is an owner self.report.add('\tRDM record status @@ Owner IN record') self.local_counters['in_record'] += 1 def _add_user_as_owner(self, data, recid): """ Adds the current logged in user as record owner """ data['owners'].append(self.user_id) self.report.add( f"\tRDM record status @ ADDING owner @ New owners: @ {data['owners']}" ) # Add owner to an existing RDM record self.general_functions.update_rdm_record(recid, data) self.local_counters['to_update'] += 1 def _create_rdm_record(self, item: dict): """ If a record of the processed user is not in RDM creates it """ item['owners'] = [self.user_id] self.report.add('\tRDM record status @@ CREATE record') self.local_counters['create'] += 1 # Creates record metadata and pushes it to RDM self.rdm_add_record.create_invenio_data(self.global_counters, item) def _final_report(self): # Final report create = self.local_counters['create'] update = self.local_counters['to_update'] in_rec = self.local_counters['in_record'] report = f"\nCreate: {create} - To update: {update} - In record: {in_rec}" self.report.add(report, self.report_files) self.report.summary_global_counters(self.report_files, self.global_counters) def _process_response(self, response: object, page: int): """ Checks if there are records to process """ # Load response json resp_json = json.loads(response.content) total_items = resp_json['count'] if page == 1: self.report.add(f'Total records: {total_items}') if page == 1 and total_items == 0: self.report.add('\nThe user has no records @ End task\n') return False self.report.add(f'\nPag {page} - Get person records - {response}') return resp_json def _get_user_uuid_from_pure(self, key_name: str, key_value: str): """ Given the user's external id it return the relative user uuid """ # If the uuid is not found in the first x items then it will continue with the next page page = 1 page_size = 10 next_page = True while next_page: params = { 'page': page, 'pageSize': page_size, 'q': f'"{key_value}"' } response = get_pure_metadata('persons', '', params) if response.status_code >= 300: self.report.add(response.content, self.report_files) return False record_json = json.loads(response.content) total_items = record_json['count'] for item in record_json['items']: if item[key_name] == key_value: first_name = item['name']['firstName'] lastName = item['name']['lastName'] uuid = item['uuid'] self.report.add( f'Name: {first_name} {lastName}\nUuid: {uuid}', self.report_files) if len(uuid) != 36: self.report.add( '\n- Warning! Incorrect user_uuid length -\n', self.report_files) return False return uuid # Checks if there is a 'next' page to be processed next_page = get_next_page(record_json) page += 1 self.report.add(f'Uuid NOT FOUND - End task\n', self.report_files) return False # --- --- --- def _get_user_id_from_rdm(self): """ Gets the ID and IP of the logged in user """ table_name = 'accounts_user_session_activity' # SQL query response = self.rdm_db.select_query('user_id, ip', table_name) if not response: self.report.add(f'\n- {table_name}: No user is logged in -\n', self.report_files) return False elif len(response) > 1: self.report.add(f'\n- {table_name}: Multiple users logged in \n', self.report_files) return False self.report.add( f'user IP: {response[0][1]}\nUser id: {response[0][0]}', self.report_files) return response[0][0] def _add_user_ids_match(self, external_id: str): """ Add user to user_ids_match.txt, where are specified: rdm_user_id, user_uuid and user_external_id """ file_name = data_files_name['user_ids_match'] needs_to_add = self._check_user_ids_match('user_ids_match', external_id) if needs_to_add: open(file_name, 'a').write(f'{self.user_id} {self.user_uuid} {external_id}\n') report = f'user_ids_match @ Adding id toList @ {self.user_id}, {self.user_uuid}, {external_id}' self.report.add(report, self.report_files) def _check_user_ids_match(self, file_name: str, external_id: str): lines = file_read_lines(file_name) for line in lines: line = line.split('\n')[0] line = line.split(' ') # Checks if at least one of the ids match if str(self.user_id) == line[0] or self.user_uuid == line[ 1] or external_id == line[2]: if line == [str(self.user_id), self.user_uuid, external_id]: self.report.add('Ids list: user in list', self.report_files) return False return True def _initalizing_method(func): def _wrapper(self): self.report.add_template(['console'], ['general', 'title'], ['RECORDS OWNER']) # Empty file rdm_reocrds_owner.txt file_owner = data_files_name['rdm_record_owners'] open(file_owner, 'w').close() # Decorated function func(self) return _wrapper @_initalizing_method def get_rdm_record_owners(self): """ Gets all records from RDM and counts how many records belong to each user. It also updates the content of all_rdm_records.txt """ pag = 1 pag_size = 250 count = 0 count_records_per_owner = {} all_records_list = '' next_page = True while next_page == True: # REQUEST to RDM params = {'sort': 'mostrecent', 'size': pag_size, 'page': pag} response = self.rdm_requests.get_metadata(params) self.report.add(f'\n{response}\n') if response.status_code >= 300: self.report.add(response.content) break resp_json = json.loads(response.content) data = '' for item in resp_json['hits']['hits']: count += 1 uuid = item['metadata']['uuid'] recid = item['metadata']['recid'] owners = item['metadata']['owners'] line = f'{uuid} - {recid} - {owners}' self.report.add(line) data += f'{line}\n' all_records_list += f'{uuid} {recid}\n' for i in owners: if i not in count_records_per_owner: count_records_per_owner[i] = 0 count_records_per_owner[i] += 1 self.report.add(f'\nPag {str(pag)} - Records {count}\n') open(data_files_name['rdm_record_owners'], 'a').write(data) if 'next' not in resp_json['links']: next_page = False pag += 1 # Counts how many records have each owner self._count_records_per_owner(count_records_per_owner) # Update all_rdm_records.txt file self._update_all_rdm_records_file(all_records_list) def _count_records_per_owner(self, count_records_per_owner): self.report.add('Owner Records') for key in count_records_per_owner: records = add_spaces(count_records_per_owner[key]) key = add_spaces(key) self.report.add(f'{key} {records}') def _update_all_rdm_records_file(self, all_records_list): # Updates content of all_rdm_records.txt file file_all_records_list = data_files_name['all_rdm_records'] # Empty file open(file_all_records_list, 'w').close() # Add all records to file open(file_all_records_list, 'a').write(all_records_list)