def read_animal_numbers_from_xls(self, xls_filename): ''' Open the daily report xls, read animal numbers ''' try: self._workbook = xlrd.open_workbook(xls_filename) self._sheet = self._workbook.sheet_by_index(0) if not self._sheet.nrows: Log.error( f'ERROR: I\'m afraid you have an empty report: {xls_filename}' ) return None # Perform some initial sanity checks # STATUS_DATE_COL = 0 ANIMAL_TYPE_COL = 1 ANIMAL_ID_COL = 2 ANIMAL_NAME_COL = 3 ANIMAL_AGE_COL = 4 FOSTER_PARENT_ID_COL = 5 if (self._sheet.row_values(0)[STATUS_DATE_COL] != 'Datetime of Current Status Date' or self._sheet.row_values(0)[ANIMAL_TYPE_COL] != 'Current Animal Type' or self._sheet.row_values(0)[ANIMAL_ID_COL] != 'AnimalID' or self._sheet.row_values(0)[ANIMAL_NAME_COL] != 'Animal Name' or self._sheet.row_values(0)[ANIMAL_AGE_COL] != 'Age' or self._sheet.row_values(0)[FOSTER_PARENT_ID_COL] != 'Foster Parent ID'): Log.error( f'ERROR: Unexpected column layout in the report. Something has changed! {xls_filename}' ) return None Log.success(f'Loaded report {xls_filename}') animal_numbers = set() for row_number in range(1, self._sheet.nrows): animal_number = self._sheet.row_values( row_number)[ANIMAL_ID_COL] # xls stores all numbers as float, but also handle str type just in case # if isinstance(animal_number, float) or (isinstance(animal_number, str) and animal_number.isdigit()): animal_numbers.add((int(animal_number))) return animal_numbers except IOError as err: Log.error(f'ERROR: Unable to read xls file: {xls_filename}, {err}') except xlrd.XLRDError as err: Log.error(f'ERROR: Unable to read xls file: {xls_filename}, {err}') return None
def _load_config_file(self, config_file_yaml): ''' A config.yaml configuration file is expected to be in the same directory as this script ''' try: config_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), config_file_yaml) self.config = yaml.load(open(config_file, 'r'), Loader=yaml.SafeLoader) self._username = self.config['username'] self._password = self.config['password'] self._dog_mode = self.config[ 'dog_mode'] if 'dog_mode' in self.config else False self._google_spreadsheet_key = self.config[ 'google_spreadsheet_key'] if 'google_spreadsheet_key' in self.config else None self._google_client_secret = self.config[ 'google_client_secret'] if 'google_client_secret' in self.config else None self._box_user_id = self.config[ 'box_user_id'] if 'box_user_id' in self.config else None self._box_file_id = self.config[ 'box_file_id'] if 'box_file_id' in self.config else None self._box_jwt = self.config[ 'box_jwt'] if 'box_jwt' in self.config else None if not (self._google_spreadsheet_key and self._google_client_secret ) and not (self._box_user_id and self._box_file_id and self._box_jwt): Log.error( f'ERROR: Incomplete mentor spreadsheet configuration: {config_file}' ) return False if self._dog_mode: Log.warn('** Dog Mode is Active **') self.BASE_ANIMAL_TYPE = 'feline_and_critters' if not self._dog_mode else 'canine' except yaml.YAMLError as err: Log.error( f'ERROR: Unable to parse configuration file: {config_file}, {err}' ) return False except IOError as err: Log.error( f'ERROR: Unable to read configuration file: {config_file}, {err}' ) return False except KeyError as err: Log.error( f'ERROR: Missing value in configuration file: {config_file}, {err}' ) return False return True
def get_current_mentees(self): ''' Return the current mentees assigned to each mentor ''' current_mentees = [] for worksheet in self._mentor_sheets: if worksheet.name.lower() == 'retired mentor': continue print(f'Loading current mentees for {worksheet.name}... ', end='') # It's much faster to grab a whole block of cells at once vs iterating through many API calls # max_search_rows = min(50, worksheet.nrows) cells = [ worksheet.row_slice(row, start_colx=0, end_colx=7) for row in range(0, max_search_rows) ] name_col_id = self._find_column_by_name(cells, 'Name') pid_col_id = self._find_column_by_name(cells, 'ID') mentees = [] search_failed = False for i in range(1, max_search_rows): if i == max_search_rows - 1: search_failed = True Log.error( f'Unable to determine current mentees for mentor {worksheet.name}' ) mentees = [] break elif str(cells[i][0].value).lower().find( 'completed mentees') >= 0: break # We've reach the end of the "active mentee" rows elif cells[i][name_col_id].value and cells[i][pid_col_id].value: mentee_name = cells[i][name_col_id].value pid = int(cells[i][pid_col_id].value) if not [ mentee for mentee in mentees if mentee['pid'] == pid ]: # ignore duplicate mentees mentees.append({'name': mentee_name, 'pid': pid}) if not search_failed: print(f'found {len(mentees)}') current_mentees.append({ 'mentor': worksheet.name, 'mentees': mentees }) return current_mentees
def _animal_has_adoption_summary(self, animal_number): adoption_summary = '' try: self._driver.get(self._adoption_summary_url.format(animal_number)) adoption_summary = self._get_text_by_id('adoptSummary').strip() except Exception: Log.error( f'Failed to read adoption summary for animal {animal_number}') return False return len( adoption_summary ) > 10 # minimum of 10 chars, completely arbitrary in case there is some junk in here
def _get_current_mentee_status(self, arg_status): ''' Get current mentees and mentee status for each mentor ''' autoupdate_completed_mentees = 'autoupdate' in arg_status # mark 'completed' mentors in the spreadsheet verbose_status = 'verbose' in arg_status Log.success( f'Looking up mentee status (verbose = {verbose_status}, autoupdate_completed_mentees = {autoupdate_completed_mentees})...' ) completed_mentees = {} current_mentees = self.mentor_sheet_reader.get_current_mentees() for current in current_mentees: current['active_count'] = 0 print(f'Checking mentee status for {current["mentor"]}... ', end='', flush=True) if current['mentees']: for mentee in current['mentees']: current_animal_ids = self._current_animals_fostered( mentee['pid']) mentee['current_animals'] = {} if verbose_status: animal_data, _, _ = self._get_animal_data( current_animal_ids, True) for current_animal_id in current_animal_ids: mentee['current_animals'][ current_animal_id] = animal_data[ current_animal_id] if verbose_status else {} if current_animal_ids: current['active_count'] = current['active_count'] + 1 else: completed_mentees.setdefault(current['mentor'], []).append(mentee['pid']) days_ago = (datetime.now() - current['most_recent'] ).days if current['most_recent'] else 'N/A' print( f'active mentees = {current["active_count"]}, last assigned days ago = {days_ago}' ) if autoupdate_completed_mentees: Log.success( 'Auto-updating completed mentees in the mentor spreadsheet...') for mentor in completed_mentees: self.mentor_sheet_reader.set_completed_mentees( mentor, completed_mentees[mentor]) return current_mentees
def _get_spay_neuter_status(self, animal_number): ''' Load spay/neuter status from the medical details page ''' try: self._driver.get(self._medical_details_url.format(animal_number)) return Utils.utf8( self._get_attr_by_xpath( 'innerText', '/html/body/table[2]/tbody/tr[2]/td/table/tbody/tr[4]/td[4]' )) except Exception: Log.error( f'Failed to read spay/neuter status for animal {animal_number}' ) return 'Unknown'
def set_completed_mentees(self, mentor, mentee_ids): ''' Mark the given mentees as completed. Future refactoring consideration: See similar code between set_completed_mentees() and get_current_mentees(). ''' for worksheet in self._mentor_sheets: if worksheet.title.lower() == mentor.lower(): max_search_rows = min(100, worksheet.rows) cells = worksheet.range(f'A1:G{max_search_rows}', returnas='cells') name_col_id = self._find_column_by_name(cells, 'Name') pid_col_id = self._find_column_by_name(cells, 'ID') notes_col_id = 0 for i in range(1, max_search_rows): if str(cells[i][0].value).lower().find( 'completed mentees') >= 0: break # We've reached the end of the "active mentee" rows if cells[i][name_col_id].value and str( cells[i][pid_col_id].value).isdigit(): pid = int(cells[i][pid_col_id].value) if pid in mentee_ids: # If this mentee name cell is already marked with strikethrough, leave it alone # name_cell_format = cells[i][ name_col_id].text_format if not name_cell_format or 'strikethrough' not in name_cell_format or name_cell_format[ 'strikethrough'] is False: mentee_name = cells[i][name_col_id].value mentee_name = mentee_name.replace('\n', ' ').replace( '\r', '') Log.debug( f'Completed: {mentee_name} ({pid}) @ {mentor}[\'{cells[i][name_col_id].label}\']' ) debug_mode = False if not debug_mode: cells[i][name_col_id].set_text_format( 'strikethrough', True) notes_current_value = cells[i][ notes_col_id].value if 'autoupdate: no animals' not in notes_current_value.lower( ): cells[i][notes_col_id].set_value( f'AutoUpdate: No animals {date.today().strftime("%b %-d, %Y")}\r\n{notes_current_value}' )
def load_mentors_spreadsheet(self, auth): ''' Load the feline foster spreadsheet ''' try: Log.success( f'Loading mentors spreadsheet from Box (id = {auth["box_file_id"]})...' ) jwt_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), auth['box_jwt']) client = Client(JWTAuth.from_settings_file(jwt_path)) box_file = client.as_user( client.user(user_id=auth['box_user_id'])).file( file_id=auth['box_file_id']).get() xlxs_workbook = xlrd.open_workbook( file_contents=box_file.content()) config_yaml = xlxs_workbook.sheet_by_name( self._CONFIG_SHEET_NAME).row_values(1)[0] for sheet_name in xlxs_workbook.sheet_names(): if not self._is_reserved_sheet(sheet_name): sheet = xlxs_workbook.sheet_by_name(sheet_name) self._mentor_sheets.append(sheet) all_values = [ sheet.row_values(i) for i in range(1, sheet.nrows) ] self._mentor_match_values[Utils.utf8(sheet_name)] = [ Utils.utf8(str(item)).lower() for sublist in all_values for item in sublist ] except Exception as e: Log.error( f'ERROR: Unable to load Feline Foster spreadsheet!\r\n{str(e)}, {repr(e)}' ) return None print( f'Loaded {len(self._mentor_sheets)} mentors from \"{box_file["name"]}\"' ) return config_yaml
def _start_browser(self, show_browser): ''' Instantiate the browser, configure options as needed ''' Log.success('Starting chromedriver...') chrome_options = webdriver.ChromeOptions() chrome_options.add_argument( '--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 ' '(KHTML, like Gecko) Version/12.0.3 Safari/605.1.15') if not show_browser: chrome_options.add_argument('--headless') # Consider adding chromedriver-binary or chromedriver_installer to requirements.txt and # removing these local copies. # if sys.platform == 'darwin': chromedriver_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'bin/mac64/chromedriver') elif sys.platform.startswith('linux'): chromedriver_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'bin/linux64/chromedriver') elif sys.platform.startswith('win32'): chromedriver_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'bin/win32/chromedriver') chrome_options.add_experimental_option( 'excludeSwitches', ['enable-logging']) # chromedriver complains a lot on Windows else: Log.error( f'Sorry friends, I haven\'t included chromedriver for your platform ({sys.platform}). Exiting now.' ) sys.exit(0) self._driver = webdriver.Chrome(chromedriver_path, options=chrome_options) self._driver.set_page_load_timeout(60)
def _read_additional_config_yaml(self, additional_config_yaml): ''' The mentors spreadsheet contains additional configuration data. This makes it easier to manage dynamic configuration data vs rollout of config.yaml updates. ''' try: Log.success('Reading configuration data from spreadsheet...') config = yaml.load(additional_config_yaml, Loader=yaml.SafeLoader) self._login_url = config['login_url'] self._search_url = config['search_url'] self._animal_url = config['animal_url'] self._medical_details_url = config['medical_details_url'] self._list_all_animals_url = config['list_animals_url'] self._responsible_for_url = config['responsible_for_url'] self._responsible_for_paged_url = config[ 'responsible_for_paged_url'] self._adoption_summary_url = config['adoption_summary_url'] self._do_not_assign_mentor = config['do_not_assign_mentor'] if 'do_not_assign_mentor' in config else [] self._mentors = config['mentors'] if 'mentors' in config else [] except AttributeError as err: Log.error(f'ERROR: Unable to read additional config: {err}') return False except yaml.YAMLError as err: Log.error(f'ERROR: Unable to read additional config: {err}') return False except TypeError as err: Log.error(f'ERROR: Invalid yaml in additional config: {err}') return False except KeyError as err: Log.error(f'ERROR: Missing value in additional config: {err}') return False return True
def check_for_surgery_sheet(self, worksheet): if any(worksheet.title in substr for substr in self._SURGERY_SHEET_NAMES): surgery_rows = worksheet.get_values( 'A1', f'H{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) date_col = -1 patient_col = -1 for col in range(0, len(surgery_rows[0])): # Allow for an extra header row (accounting for differences between Feline and Canine) # if any('date' in substr for substr in [ str(surgery_rows[0][col]).lower(), str(surgery_rows[1][col]).lower() ]): date_col = col elif 'patient' in (str(surgery_rows[0][col]).lower(), str(surgery_rows[1][col]).lower()): patient_col = col if date_col != -1 and patient_col != -1: for row in range(1, len(surgery_rows)): try: a_number = surgery_rows[row][patient_col] if a_number.isdigit(): a_number = int(a_number) # If there are multiple entries for a given a_number, assume the first is the most recent. # if a_number not in self._surgery_dates: self._surgery_dates[int( a_number)] = surgery_rows[row][date_col] except Exception as e: Log.warn( f'{worksheet.title} column {patient_col}, row {row} is empty. Assuming this is the end of the list.' ) break else: Log.error( f'Surgery form is not in expected format (date_col={date_col}, patient_col={patient_col}. Skipping.' ) Log.debug( f'Loaded {len(self._surgery_dates)} entries from the surgery sheet' ) return True return False
def _login(self): ''' Load the login page, enter credentials, submit ''' Log.success('Logging in...') try: self._driver.set_page_load_timeout(20) self._driver.get(self._login_url) except TimeoutException: Log.error( 'ERROR: Unable to load the login page. Please check your connection.' ) return False except NoSuchElementException: Log.error( 'ERROR: Unable to load the login page. Please check your connection.' ) return False try: self._driver.find_element_by_id('txt_username').send_keys( self._username) self._driver.find_element_by_id('txt_password').send_keys( self._password) self._driver.find_element_by_id( 'ctl00_ctl00_ContentPlaceHolderBase_ContentPlaceHolder1_btn_login' ).click() self._driver.find_element_by_id('Continue').click() except NoSuchElementException: Log.error( 'ERROR: Unable to login. Please check your username/password.') return False return True
def get_current_mentees(self): ''' Return the current mentees assigned to each mentor ''' current_mentees = [] for worksheet in self._mentor_sheets: if worksheet.title.lower() == 'retired mentor': continue print(f'Loading current mentees for {worksheet.title}... ', end='', flush=True) # It's much faster to grab a whole block of cells at once vs iterating through many API calls # max_search_rows = min(100, worksheet.rows) cells = worksheet.range(f'A1:G{max_search_rows}', returnas='cells') name_col_id = self._find_column_by_name(cells, 'Name') pid_col_id = self._find_column_by_name(cells, 'ID') date_col_id = self._find_column_by_name(cells, 'Date\nKittens\nReceived') if date_col_id == -1: date_col_id = self._find_column_by_name( cells, 'Date Dog Received') mentees = [] search_failed = False most_recent_received_date = None for i in range(1, max_search_rows): if i == max_search_rows - 1: search_failed = True Log.error( f'Unable to determine current mentees for mentor {worksheet.title}' ) mentees = [] break elif str(cells[i][0].value).lower().find( 'completed mentees') >= 0: break # We've reached the end of the "active mentee" rows elif cells[i][name_col_id].value and str( cells[i][pid_col_id].value).isdigit(): mentee_name = cells[i][name_col_id].value pid = int(cells[i][pid_col_id].value) received_date = Utils.string_to_datetime( cells[i][date_col_id].value) if received_date and ( most_recent_received_date is None or received_date > most_recent_received_date): most_recent_received_date = received_date if not [ mentee for mentee in mentees if mentee['pid'] == pid ]: # ignore duplicate mentees mentees.append({'name': mentee_name, 'pid': pid}) if not search_failed: print(f'found {len(mentees)}') current_mentees.append({ 'mentor': worksheet.title, 'mentees': mentees, 'most_recent': most_recent_received_date }) return current_mentees
def _output_results(self, animal_data, foster_parents, persons_data, animals_not_in_foster, current_mentee_status, csv_filename): ''' Output all of our new super amazing results to a csv file ''' Log.success(f'Writing results to {csv_filename}...') csv_rows = [] csv_rows.append([]) csv_rows[-1].append('Kitten-Scraper Notes') csv_rows[-1].append('Loss Rate') csv_rows[-1].append('Name') csv_rows[-1].append('E-mail') csv_rows[-1].append('Phone') csv_rows[-1].append('Person ID') csv_rows[-1].append('Foster Experience') csv_rows[-1].append('Date Animals Received') if self._dog_mode: csv_rows[-1].append('"Name, Breed, Color"') csv_rows[-1].append('Animal Details') csv_rows[-1].append('Special Animal Message') # Build a row for each foster parent # for person_number in sorted(foster_parents, key=lambda p: persons_data[p]['notes']): person_data = persons_data[person_number] name = person_data['full_name'] report_notes = person_data['notes'] loss_rate = round(person_data['loss_rate']) animals_with_this_person = foster_parents[person_number] animal_details, animal_details_brief = self._get_animal_details_string( animals_with_this_person, animal_data) prev_animals_fostered = person_data['prev_animals_fostered'] foster_experience = 'NEW' if not prev_animals_fostered else prev_animals_fostered special_message = '' for a_number in animals_with_this_person: msg = animal_data[a_number]['message'] if msg: special_message += '{}{}: {}'.format( '\r\r' if special_message else '', a_number, msg) cell_number = person_data['cell_phone'] home_number = person_data['home_phone'] phone = '' if len(cell_number) >= 10: # ignore incomplete phone numbers phone = f'(C) {cell_number}' if len(home_number) >= 10: # ignore incomplete phone numbers phone += '{}(H) {}'.format('\r' if phone else '', home_number) emails_str = '' for email in person_data['emails']: emails_str += '{}{}'.format('\r' if emails_str else '', email) # I will assume all animals in this group went into foster on the same date. This should usually be true # since this is designed to processed with a "daily report". # date_received = animal_data[ animals_with_this_person[0]]['status_date'] # Explicitly wrap numbers/datestr with ="{}" to avoid Excel auto-formatting issues # csv_rows.append([]) csv_rows[-1].append(f'"{report_notes}"') csv_rows[-1].append(f'"{loss_rate}%"') csv_rows[-1].append(f'"{name}"') csv_rows[-1].append(f'"{emails_str}"') csv_rows[-1].append(f'"{phone}"') csv_rows[-1].append(f'="{person_number}"') csv_rows[-1].append(f'"{foster_experience}"') csv_rows[-1].append(f'="{date_received}"') if self._dog_mode: csv_rows[-1].append(f'"{animal_details_brief}"') csv_rows[-1].append(f'"{animal_details}"') csv_rows[-1].append(f'"{special_message}"') print('{} (Experience: {}, Loss Rate: {}%) {}{}{}'.format( name, foster_experience, loss_rate, Log.GREEN, report_notes.replace('\r', ', '), Log.END)) with open(csv_filename, 'w') as outfile: for row in csv_rows: outfile.write(','.join(row)) outfile.write('\n') if not foster_parents: outfile.write( '*** None of the animals in this report are currently in foster\n' ) Log.warn( 'None of the animals in this report are currently in foster. Nothing to do!' ) if animals_not_in_foster: outfile.write('\n\n\n*** Animals not in foster\n') Log.warn('\nAnimals not in foster') for a_number in animals_not_in_foster: outfile.write('{} {} - {}\n'.format( a_number, animal_data[a_number]['type'], animal_data[a_number]['status'])) print('{} {} - {}'.format(a_number, animal_data[a_number]['type'], animal_data[a_number]['status'])) if current_mentee_status: outfile.write( '\n\nMentor,Active Mentees,Last Assigned (days ago)\n') for current in current_mentee_status: days_ago = (datetime.now() - current['most_recent'] ).days if current['most_recent'] else 'N/A' outfile.write( f'{current["mentor"]},{current["active_count"]},{days_ago}\n' )
def run(self): print(f'Welcome to KittenScraper {__version__}') start_time = time.time() arg_parser = ArgumentParser() arg_parser.add_argument( '-i', '--input', help= 'specify the daily foster report (xls), or optionally a comma-separated list of animal numbers', required=False) arg_parser.add_argument( '-s', '--status', help='retrieve current mentee status [verbose,autoupdate,export]', required=False, nargs='?', default='', const='yes') arg_parser.add_argument( '-c', '--config', help= 'specify a config file (optional, defaults to \'config.yaml\')', required=False, default='config.yaml') arg_parser.add_argument( '-b', '--show_browser', help='show the web browser window (generally used for debugging)', required=False, action='store_true') args = arg_parser.parse_args() if not args.input and not args.status: arg_parser.print_help() sys.exit(0) # Load config.yaml # if not self._load_config_file(args.config): sys.exit() # Load the Foster Mentors spreadsheet # if self._google_spreadsheet_key and self._google_client_secret: self.mentor_sheet_reader = GoogleSheetReader() self._additional_config_yaml = self.mentor_sheet_reader.load_mentors_spreadsheet( { 'google_spreadsheet_key': self._google_spreadsheet_key, 'google_client_secret': self._google_client_secret }) elif self._box_user_id and self._box_file_id and self._box_jwt: self.mentor_sheet_reader = BoxSheetReader() self._additional_config_yaml = self.mentor_sheet_reader.load_mentors_spreadsheet( { 'box_user_id': self._box_user_id, 'box_file_id': self._box_file_id, 'box_jwt': self._box_jwt }) else: Log.error( 'ERROR: Incorrect mentor spreadsheet configuration, please check config.yaml' ) sys.exit() if self._additional_config_yaml is None: Log.error( 'ERROR: configuration YAML from mentors spreadsheet not found, cannot continue' ) sys.exit() # Load additional config data from the mentors spreadsheet. This minimizes the need to deploy updates to the # local config.yaml file. # if not self._read_additional_config_yaml(self._additional_config_yaml): sys.exit() # Start the browser, log in # self._start_browser(args.show_browser) if not self._login(): sys.exit() current_mentee_status = self._get_current_mentee_status( args.status) if args.status else None if current_mentee_status: status_file = None export_status = 'export' in args.status verbose_status = 'verbose' in args.status if export_status: status_file_path = os.path.join( Utils.default_dir(), f'{self.BASE_ANIMAL_TYPE}_foster_mentor_status_{date.today().strftime("%Y.%m.%d")}.txt' ) status_file = open(status_file_path, 'w') Log.success( f'Exporting mentee status to file: {status_file_path}') for current in current_mentee_status: self._print_and_write( status_file, '--------------------------------------------------') self._print_and_write(status_file, current['mentor']) if current['mentees']: for mentee in current['mentees']: self._print_and_write( status_file, f' {mentee["name"]} ({mentee["pid"]}) - {len(mentee["current_animals"])} animals' ) for a_number, data in mentee['current_animals'].items( ): surgery_date = self.mentor_sheet_reader.get_surgery_date( a_number) surgery_info = '' if surgery_date: surgery_info = f', Surgery Date {surgery_date}' if verbose_status: self._print_and_write( status_file, f' {a_number}, {data["age"]}, S/N {data["sn"]}, Bio {data["bio"]}, Photo {data["photo"]}{surgery_info}' ) else: self._print_and_write( status_file, f' {a_number}{surgery_info}') else: self._print_and_write(status_file, ' ** No current mentees **') self._print_and_write(status_file, '') if args.input: # Load animal numbers. Note that args.input will either be a path to the "daily report" xls, or may # optionally be a comma-separated list of animal numbers. # if re.fullmatch(r'(\s?\d+\s?)(\s?,\s?\d+\s?)*$', args.input): animal_numbers = [s.strip() for s in args.input.split(',')] else: animal_numbers = KittenReportReader( ).read_animal_numbers_from_xls(args.input) if not animal_numbers: sys.exit() print( f'Found {len(animal_numbers)} animal{"s" if len(animal_numbers) != 1 else ""}: {", ".join([str(a) for a in animal_numbers])}' ) # Query details for each animal (current foster parent, foster status, breed, color, gender, age, etc.) # animal_data, foster_parents, animals_not_in_foster = self._get_animal_data( animal_numbers) for p_number in foster_parents: print( f'Animals for foster parent {p_number} = {foster_parents[p_number]}' ) # Query details for each foster parent (name, contact details, etc.) # persons_data = {} for person in foster_parents: persons_data[person] = self._get_person_data(person) # Save report to file # output_csv = os.path.join( Utils.default_dir(), f'{self.BASE_ANIMAL_TYPE}_foster_mentor_report_{date.today().strftime("%Y.%m.%d")}.csv' ) Utils.make_dir(output_csv) self._output_results(animal_data, foster_parents, persons_data, animals_not_in_foster, current_mentee_status, output_csv) # Optional: automatically forward this report via email # if 'generate_email' in self.config: from outlook_email import compose_outlook_email subject = self._get_from_dict(self.config['generate_email'], 'subject') recipient_name = self._get_from_dict( self.config['generate_email'], 'recipient_name') recipient_email = self._get_from_dict( self.config['generate_email'], 'recipient_email') message = self._get_from_dict(self.config['generate_email'], 'message') if None not in [ subject, recipient_name, recipient_email, message ]: compose_outlook_email(subject=subject, recipient_name=recipient_name, recipient_email=recipient_email, body=message, attachment=output_csv) Log.debug( f'Composed email to {recipient_name} <{recipient_email}>' ) print('KittenScraper completed in {0:.0f} seconds'.format(time.time() - start_time)) self._exit_browser()
def load_mentors_spreadsheet(self, auth): ''' Load the feline foster spreadsheet ''' start_time = time.time() try: Log.success( f'Loading mentors spreadsheet from Google Sheets (id = \'{auth["google_spreadsheet_key"]}\')...' ) client = pygsheets.authorize(auth['google_client_secret']) spreadsheet = client.open_by_key(auth['google_spreadsheet_key']) config_yaml = spreadsheet.worksheet_by_title( self._CONFIG_SHEET_NAME)[2][0] for worksheet in spreadsheet.worksheets(): if not self._is_reserved_sheet(worksheet.title): Log.debug(f'Reading worksheet \"{worksheet.title}\"...') try: if self.check_for_surgery_sheet(worksheet): continue # Mentor sheet header rows vary slightly between feline and canine. Perform a terrible quick-and-dirty validation. # if ['ID'] not in worksheet.get_values('E1', 'E2'): raise Exception('') from Exception # Build a list of mentee names/emails/ids to be used for mentor matching # b_rows = worksheet.get_values( 'B2', f'B{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) c_rows = worksheet.get_values( 'C2', f'C{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) e_rows = worksheet.get_values( 'E2', f'E{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) mentor_match_cells = b_rows + c_rows + e_rows self._mentor_match_values[Utils.utf8( worksheet.title)] = [ Utils.utf8(item).lower() for sublist in mentor_match_cells for item in sublist ] self._mentor_sheets.append(worksheet) except Exception: Log.debug( f'Sheet \'{worksheet.title}\' does not appear to be a mentor sheet (skipping)' ) except Exception as e: Log.error( f'ERROR: Unable to load mentors spreadsheet!\r\n{str(e)}, {repr(e)}' ) return None print('Loaded {0} mentors from \"{1}\" in {2:.0f} seconds'.format( len(self._mentor_sheets), spreadsheet.title, time.time() - start_time)) return config_yaml
def _get_animal_data(self, animal_numbers, silent=False): ''' Load additional animal data for each animal number ''' animal_data = {} foster_parents = {} animals_not_in_foster = set() for a_number in animal_numbers: if not silent: print(f'Looking up animal {a_number}... ', end='', flush=True) sys.stdout.flush() self._driver.get(self._animal_url.format(a_number)) try: # Dismiss alert (if found) # Alert(self._driver).dismiss() except NoAlertPresentException: pass try: # Wait for lazy-loaded content # WebDriverWait(self._driver, 10).until( EC.presence_of_element_located((By.ID, 'submitbtn2'))) except Exception: raise Exception( 'Timeout while waiting for content on search page!' ) from Exception # Get Special Message text (if it exists) # special_msg = Utils.utf8( self._get_text_by_id('specialMessagesDialog')) if special_msg: # Remove text we don't care about # special_msg = re.sub( r'(?i)This is a special message. If you would like to delete it then clear the Special Message box in the General Details section of this page.', '', special_msg).strip() # Remove empty lines and double quotes # special_msg = os.linesep.join( [s for s in special_msg.splitlines() if s]) special_msg = special_msg.replace('"', '\'') animal_data[a_number] = {} animal_data[a_number]['message'] = special_msg status = self._get_selection_by_id('status') if not status: # Status text is usually found within a <select> element, but is sometimes found as innerText within the # <td> that looks something like this: "Adopted - Awaiting Pickup\nChange Status" try: status = self._get_property_by_xpath( 'innerText', '//*[@id="Table17"]/tbody/tr[5]/td[2]').split('\n')[0] except Exception: status = '' sub_status = self._get_selection_by_id('subStatus') animal_data[a_number][ 'status'] = f'{status}{" - " if sub_status else ""}{sub_status}' animal_data[a_number]['name'] = self._get_attr_by_id( 'animalname').strip() animal_data[a_number]['type'] = self._get_attr_by_id('type') animal_data[a_number]['breed'] = self._get_attr_by_id( 'primaryBreed').strip() animal_data[a_number]['primary_color'] = self._get_selection_by_id( 'primaryColour') animal_data[a_number][ 'secondary_color'] = self._get_selection_by_id( 'secondaryColour') animal_data[a_number]['gender'] = self._get_selection_by_id('sex') animal_data[a_number][ 'photo'] = 'No' if 'NoImage.png' in self._get_property_by_xpath( 'src', '//*[@id="animal-default-photo"]') else 'Yes' try: age = datetime.now() - datetime.strptime( self._get_attr_by_id('dob'), '%m/%d/%Y') animal_data[a_number]['age'] = self._stringify_age(age) except Exception: animal_data[a_number]['age'] = 'Unknown Age' try: animal_data[a_number]['status_date'] = datetime.strptime( self._get_attr_by_id('statusdate'), '%m/%d/%Y').strftime('%-d-%b-%Y') except ValueError: animal_data[a_number]['status_date'] = 'Unknown' # If this animal is currently in foster, get the responsible person (foster parent). # status = status.lower() if ('in foster' in status and 'unassisted death' not in status): try: p_number = int( self._get_attr_by_xpath( 'href', '//*[@id="Table17"]/tbody/tr[1]/td[2]/a').split( 'personid=')[1]) foster_parents.setdefault(p_number, []).append(a_number) except Exception: Log.error( f'Failed to find foster parent for animal {a_number}, please check report' ) else: animals_not_in_foster.add(a_number) # Perform these operations last. They will load new pages! # animal_data[a_number]['sn'] = self._get_spay_neuter_status( a_number) animal_data[a_number][ 'bio'] = 'Yes' if self._animal_has_adoption_summary( a_number) else 'No' # Create some helpful/default string representations # set_default = lambda str, default: default if str.strip( ) in [None, ''] else str animal_data[a_number]['sn'] = set_default( animal_data[a_number]['sn'], 'Unknown') animal_data[a_number]['status'] = set_default( animal_data[a_number]['status'], 'Status Unknown') animal_data[a_number]['name'] = set_default( animal_data[a_number]['name'], 'Unnamed') animal_data[a_number]['breed'] = set_default( animal_data[a_number]['breed'], 'Unknown Breed') animal_data[a_number]['gender'] = set_default( animal_data[a_number]['gender'], 'Unknown Gender') animal_data[a_number]['color'] = set_default( animal_data[a_number]['primary_color'], 'Unknown Color') if animal_data[a_number]['secondary_color'].strip() not in [ None, '', 'None' ]: animal_data[a_number][ 'color'] += f'/{animal_data[a_number]["secondary_color"]}' breed_abbreviations = { # 'Breed' is a free-form text field and not everyone enters data exactly the same. I'll compare against # lowercase/no-whitespace for slightly better odds of matches. # 'domesticshorthair': 'DSH', 'domesticmediumhair': 'DMH', 'domesticlonghair': 'DLH' } abbreviation = breed_abbreviations.get( animal_data[a_number]['breed'].replace(' ', '').lower()) if abbreviation: animal_data[a_number]['breed'] = abbreviation if animal_data[a_number]['gender'].lower() == 'male': animal_data[a_number]['gender_short'] = 'M' elif animal_data[a_number]['gender'].lower() == 'female': animal_data[a_number]['gender_short'] = 'F' else: animal_data[a_number]['gender_short'] = animal_data[a_number][ 'gender'] if not silent: print(animal_data[a_number]['status']) return animal_data, foster_parents, animals_not_in_foster