def read_animal_numbers_from_xls(self, xls_filename): ''' Open the daily report xls, read animal numbers ''' try: self._workbook = xlrd.open_workbook(xls_filename) self._sheet = self._workbook.sheet_by_index(0) if not self._sheet.nrows: Log.error( f'ERROR: I\'m afraid you have an empty report: {xls_filename}' ) return None # Perform some initial sanity checks # STATUS_DATE_COL = 0 ANIMAL_TYPE_COL = 1 ANIMAL_ID_COL = 2 ANIMAL_NAME_COL = 3 ANIMAL_AGE_COL = 4 FOSTER_PARENT_ID_COL = 5 if (self._sheet.row_values(0)[STATUS_DATE_COL] != 'Datetime of Current Status Date' or self._sheet.row_values(0)[ANIMAL_TYPE_COL] != 'Current Animal Type' or self._sheet.row_values(0)[ANIMAL_ID_COL] != 'AnimalID' or self._sheet.row_values(0)[ANIMAL_NAME_COL] != 'Animal Name' or self._sheet.row_values(0)[ANIMAL_AGE_COL] != 'Age' or self._sheet.row_values(0)[FOSTER_PARENT_ID_COL] != 'Foster Parent ID'): Log.error( f'ERROR: Unexpected column layout in the report. Something has changed! {xls_filename}' ) return None Log.success(f'Loaded report {xls_filename}') animal_numbers = set() for row_number in range(1, self._sheet.nrows): animal_number = self._sheet.row_values( row_number)[ANIMAL_ID_COL] # xls stores all numbers as float, but also handle str type just in case # if isinstance(animal_number, float) or (isinstance(animal_number, str) and animal_number.isdigit()): animal_numbers.add((int(animal_number))) return animal_numbers except IOError as err: Log.error(f'ERROR: Unable to read xls file: {xls_filename}, {err}') except xlrd.XLRDError as err: Log.error(f'ERROR: Unable to read xls file: {xls_filename}, {err}') return None
def _get_current_mentee_status(self, arg_status): ''' Get current mentees and mentee status for each mentor ''' autoupdate_completed_mentees = 'autoupdate' in arg_status # mark 'completed' mentors in the spreadsheet verbose_status = 'verbose' in arg_status Log.success( f'Looking up mentee status (verbose = {verbose_status}, autoupdate_completed_mentees = {autoupdate_completed_mentees})...' ) completed_mentees = {} current_mentees = self.mentor_sheet_reader.get_current_mentees() for current in current_mentees: current['active_count'] = 0 print(f'Checking mentee status for {current["mentor"]}... ', end='', flush=True) if current['mentees']: for mentee in current['mentees']: current_animal_ids = self._current_animals_fostered( mentee['pid']) mentee['current_animals'] = {} if verbose_status: animal_data, _, _ = self._get_animal_data( current_animal_ids, True) for current_animal_id in current_animal_ids: mentee['current_animals'][ current_animal_id] = animal_data[ current_animal_id] if verbose_status else {} if current_animal_ids: current['active_count'] = current['active_count'] + 1 else: completed_mentees.setdefault(current['mentor'], []).append(mentee['pid']) days_ago = (datetime.now() - current['most_recent'] ).days if current['most_recent'] else 'N/A' print( f'active mentees = {current["active_count"]}, last assigned days ago = {days_ago}' ) if autoupdate_completed_mentees: Log.success( 'Auto-updating completed mentees in the mentor spreadsheet...') for mentor in completed_mentees: self.mentor_sheet_reader.set_completed_mentees( mentor, completed_mentees[mentor]) return current_mentees
def load_mentors_spreadsheet(self, auth): ''' Load the feline foster spreadsheet ''' try: Log.success( f'Loading mentors spreadsheet from Box (id = {auth["box_file_id"]})...' ) jwt_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), auth['box_jwt']) client = Client(JWTAuth.from_settings_file(jwt_path)) box_file = client.as_user( client.user(user_id=auth['box_user_id'])).file( file_id=auth['box_file_id']).get() xlxs_workbook = xlrd.open_workbook( file_contents=box_file.content()) config_yaml = xlxs_workbook.sheet_by_name( self._CONFIG_SHEET_NAME).row_values(1)[0] for sheet_name in xlxs_workbook.sheet_names(): if not self._is_reserved_sheet(sheet_name): sheet = xlxs_workbook.sheet_by_name(sheet_name) self._mentor_sheets.append(sheet) all_values = [ sheet.row_values(i) for i in range(1, sheet.nrows) ] self._mentor_match_values[Utils.utf8(sheet_name)] = [ Utils.utf8(str(item)).lower() for sublist in all_values for item in sublist ] except Exception as e: Log.error( f'ERROR: Unable to load Feline Foster spreadsheet!\r\n{str(e)}, {repr(e)}' ) return None print( f'Loaded {len(self._mentor_sheets)} mentors from \"{box_file["name"]}\"' ) return config_yaml
def _start_browser(self, show_browser): ''' Instantiate the browser, configure options as needed ''' Log.success('Starting chromedriver...') chrome_options = webdriver.ChromeOptions() chrome_options.add_argument( '--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 ' '(KHTML, like Gecko) Version/12.0.3 Safari/605.1.15') if not show_browser: chrome_options.add_argument('--headless') # Consider adding chromedriver-binary or chromedriver_installer to requirements.txt and # removing these local copies. # if sys.platform == 'darwin': chromedriver_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'bin/mac64/chromedriver') elif sys.platform.startswith('linux'): chromedriver_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'bin/linux64/chromedriver') elif sys.platform.startswith('win32'): chromedriver_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'bin/win32/chromedriver') chrome_options.add_experimental_option( 'excludeSwitches', ['enable-logging']) # chromedriver complains a lot on Windows else: Log.error( f'Sorry friends, I haven\'t included chromedriver for your platform ({sys.platform}). Exiting now.' ) sys.exit(0) self._driver = webdriver.Chrome(chromedriver_path, options=chrome_options) self._driver.set_page_load_timeout(60)
def _read_additional_config_yaml(self, additional_config_yaml): ''' The mentors spreadsheet contains additional configuration data. This makes it easier to manage dynamic configuration data vs rollout of config.yaml updates. ''' try: Log.success('Reading configuration data from spreadsheet...') config = yaml.load(additional_config_yaml, Loader=yaml.SafeLoader) self._login_url = config['login_url'] self._search_url = config['search_url'] self._animal_url = config['animal_url'] self._medical_details_url = config['medical_details_url'] self._list_all_animals_url = config['list_animals_url'] self._responsible_for_url = config['responsible_for_url'] self._responsible_for_paged_url = config[ 'responsible_for_paged_url'] self._adoption_summary_url = config['adoption_summary_url'] self._do_not_assign_mentor = config['do_not_assign_mentor'] if 'do_not_assign_mentor' in config else [] self._mentors = config['mentors'] if 'mentors' in config else [] except AttributeError as err: Log.error(f'ERROR: Unable to read additional config: {err}') return False except yaml.YAMLError as err: Log.error(f'ERROR: Unable to read additional config: {err}') return False except TypeError as err: Log.error(f'ERROR: Invalid yaml in additional config: {err}') return False except KeyError as err: Log.error(f'ERROR: Missing value in additional config: {err}') return False return True
def _login(self): ''' Load the login page, enter credentials, submit ''' Log.success('Logging in...') try: self._driver.set_page_load_timeout(20) self._driver.get(self._login_url) except TimeoutException: Log.error( 'ERROR: Unable to load the login page. Please check your connection.' ) return False except NoSuchElementException: Log.error( 'ERROR: Unable to load the login page. Please check your connection.' ) return False try: self._driver.find_element_by_id('txt_username').send_keys( self._username) self._driver.find_element_by_id('txt_password').send_keys( self._password) self._driver.find_element_by_id( 'ctl00_ctl00_ContentPlaceHolderBase_ContentPlaceHolder1_btn_login' ).click() self._driver.find_element_by_id('Continue').click() except NoSuchElementException: Log.error( 'ERROR: Unable to login. Please check your username/password.') return False return True
def load_mentors_spreadsheet(self, auth): ''' Load the feline foster spreadsheet ''' start_time = time.time() try: Log.success( f'Loading mentors spreadsheet from Google Sheets (id = \'{auth["google_spreadsheet_key"]}\')...' ) client = pygsheets.authorize(auth['google_client_secret']) spreadsheet = client.open_by_key(auth['google_spreadsheet_key']) config_yaml = spreadsheet.worksheet_by_title( self._CONFIG_SHEET_NAME)[2][0] for worksheet in spreadsheet.worksheets(): if not self._is_reserved_sheet(worksheet.title): Log.debug(f'Reading worksheet \"{worksheet.title}\"...') try: if self.check_for_surgery_sheet(worksheet): continue # Mentor sheet header rows vary slightly between feline and canine. Perform a terrible quick-and-dirty validation. # if ['ID'] not in worksheet.get_values('E1', 'E2'): raise Exception('') from Exception # Build a list of mentee names/emails/ids to be used for mentor matching # b_rows = worksheet.get_values( 'B2', f'B{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) c_rows = worksheet.get_values( 'C2', f'C{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) e_rows = worksheet.get_values( 'E2', f'E{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) mentor_match_cells = b_rows + c_rows + e_rows self._mentor_match_values[Utils.utf8( worksheet.title)] = [ Utils.utf8(item).lower() for sublist in mentor_match_cells for item in sublist ] self._mentor_sheets.append(worksheet) except Exception: Log.debug( f'Sheet \'{worksheet.title}\' does not appear to be a mentor sheet (skipping)' ) except Exception as e: Log.error( f'ERROR: Unable to load mentors spreadsheet!\r\n{str(e)}, {repr(e)}' ) return None print('Loaded {0} mentors from \"{1}\" in {2:.0f} seconds'.format( len(self._mentor_sheets), spreadsheet.title, time.time() - start_time)) return config_yaml
def _output_results(self, animal_data, foster_parents, persons_data, animals_not_in_foster, current_mentee_status, csv_filename): ''' Output all of our new super amazing results to a csv file ''' Log.success(f'Writing results to {csv_filename}...') csv_rows = [] csv_rows.append([]) csv_rows[-1].append('Kitten-Scraper Notes') csv_rows[-1].append('Loss Rate') csv_rows[-1].append('Name') csv_rows[-1].append('E-mail') csv_rows[-1].append('Phone') csv_rows[-1].append('Person ID') csv_rows[-1].append('Foster Experience') csv_rows[-1].append('Date Animals Received') if self._dog_mode: csv_rows[-1].append('"Name, Breed, Color"') csv_rows[-1].append('Animal Details') csv_rows[-1].append('Special Animal Message') # Build a row for each foster parent # for person_number in sorted(foster_parents, key=lambda p: persons_data[p]['notes']): person_data = persons_data[person_number] name = person_data['full_name'] report_notes = person_data['notes'] loss_rate = round(person_data['loss_rate']) animals_with_this_person = foster_parents[person_number] animal_details, animal_details_brief = self._get_animal_details_string( animals_with_this_person, animal_data) prev_animals_fostered = person_data['prev_animals_fostered'] foster_experience = 'NEW' if not prev_animals_fostered else prev_animals_fostered special_message = '' for a_number in animals_with_this_person: msg = animal_data[a_number]['message'] if msg: special_message += '{}{}: {}'.format( '\r\r' if special_message else '', a_number, msg) cell_number = person_data['cell_phone'] home_number = person_data['home_phone'] phone = '' if len(cell_number) >= 10: # ignore incomplete phone numbers phone = f'(C) {cell_number}' if len(home_number) >= 10: # ignore incomplete phone numbers phone += '{}(H) {}'.format('\r' if phone else '', home_number) emails_str = '' for email in person_data['emails']: emails_str += '{}{}'.format('\r' if emails_str else '', email) # I will assume all animals in this group went into foster on the same date. This should usually be true # since this is designed to processed with a "daily report". # date_received = animal_data[ animals_with_this_person[0]]['status_date'] # Explicitly wrap numbers/datestr with ="{}" to avoid Excel auto-formatting issues # csv_rows.append([]) csv_rows[-1].append(f'"{report_notes}"') csv_rows[-1].append(f'"{loss_rate}%"') csv_rows[-1].append(f'"{name}"') csv_rows[-1].append(f'"{emails_str}"') csv_rows[-1].append(f'"{phone}"') csv_rows[-1].append(f'="{person_number}"') csv_rows[-1].append(f'"{foster_experience}"') csv_rows[-1].append(f'="{date_received}"') if self._dog_mode: csv_rows[-1].append(f'"{animal_details_brief}"') csv_rows[-1].append(f'"{animal_details}"') csv_rows[-1].append(f'"{special_message}"') print('{} (Experience: {}, Loss Rate: {}%) {}{}{}'.format( name, foster_experience, loss_rate, Log.GREEN, report_notes.replace('\r', ', '), Log.END)) with open(csv_filename, 'w') as outfile: for row in csv_rows: outfile.write(','.join(row)) outfile.write('\n') if not foster_parents: outfile.write( '*** None of the animals in this report are currently in foster\n' ) Log.warn( 'None of the animals in this report are currently in foster. Nothing to do!' ) if animals_not_in_foster: outfile.write('\n\n\n*** Animals not in foster\n') Log.warn('\nAnimals not in foster') for a_number in animals_not_in_foster: outfile.write('{} {} - {}\n'.format( a_number, animal_data[a_number]['type'], animal_data[a_number]['status'])) print('{} {} - {}'.format(a_number, animal_data[a_number]['type'], animal_data[a_number]['status'])) if current_mentee_status: outfile.write( '\n\nMentor,Active Mentees,Last Assigned (days ago)\n') for current in current_mentee_status: days_ago = (datetime.now() - current['most_recent'] ).days if current['most_recent'] else 'N/A' outfile.write( f'{current["mentor"]},{current["active_count"]},{days_ago}\n' )
def run(self): print(f'Welcome to KittenScraper {__version__}') start_time = time.time() arg_parser = ArgumentParser() arg_parser.add_argument( '-i', '--input', help= 'specify the daily foster report (xls), or optionally a comma-separated list of animal numbers', required=False) arg_parser.add_argument( '-s', '--status', help='retrieve current mentee status [verbose,autoupdate,export]', required=False, nargs='?', default='', const='yes') arg_parser.add_argument( '-c', '--config', help= 'specify a config file (optional, defaults to \'config.yaml\')', required=False, default='config.yaml') arg_parser.add_argument( '-b', '--show_browser', help='show the web browser window (generally used for debugging)', required=False, action='store_true') args = arg_parser.parse_args() if not args.input and not args.status: arg_parser.print_help() sys.exit(0) # Load config.yaml # if not self._load_config_file(args.config): sys.exit() # Load the Foster Mentors spreadsheet # if self._google_spreadsheet_key and self._google_client_secret: self.mentor_sheet_reader = GoogleSheetReader() self._additional_config_yaml = self.mentor_sheet_reader.load_mentors_spreadsheet( { 'google_spreadsheet_key': self._google_spreadsheet_key, 'google_client_secret': self._google_client_secret }) elif self._box_user_id and self._box_file_id and self._box_jwt: self.mentor_sheet_reader = BoxSheetReader() self._additional_config_yaml = self.mentor_sheet_reader.load_mentors_spreadsheet( { 'box_user_id': self._box_user_id, 'box_file_id': self._box_file_id, 'box_jwt': self._box_jwt }) else: Log.error( 'ERROR: Incorrect mentor spreadsheet configuration, please check config.yaml' ) sys.exit() if self._additional_config_yaml is None: Log.error( 'ERROR: configuration YAML from mentors spreadsheet not found, cannot continue' ) sys.exit() # Load additional config data from the mentors spreadsheet. This minimizes the need to deploy updates to the # local config.yaml file. # if not self._read_additional_config_yaml(self._additional_config_yaml): sys.exit() # Start the browser, log in # self._start_browser(args.show_browser) if not self._login(): sys.exit() current_mentee_status = self._get_current_mentee_status( args.status) if args.status else None if current_mentee_status: status_file = None export_status = 'export' in args.status verbose_status = 'verbose' in args.status if export_status: status_file_path = os.path.join( Utils.default_dir(), f'{self.BASE_ANIMAL_TYPE}_foster_mentor_status_{date.today().strftime("%Y.%m.%d")}.txt' ) status_file = open(status_file_path, 'w') Log.success( f'Exporting mentee status to file: {status_file_path}') for current in current_mentee_status: self._print_and_write( status_file, '--------------------------------------------------') self._print_and_write(status_file, current['mentor']) if current['mentees']: for mentee in current['mentees']: self._print_and_write( status_file, f' {mentee["name"]} ({mentee["pid"]}) - {len(mentee["current_animals"])} animals' ) for a_number, data in mentee['current_animals'].items( ): surgery_date = self.mentor_sheet_reader.get_surgery_date( a_number) surgery_info = '' if surgery_date: surgery_info = f', Surgery Date {surgery_date}' if verbose_status: self._print_and_write( status_file, f' {a_number}, {data["age"]}, S/N {data["sn"]}, Bio {data["bio"]}, Photo {data["photo"]}{surgery_info}' ) else: self._print_and_write( status_file, f' {a_number}{surgery_info}') else: self._print_and_write(status_file, ' ** No current mentees **') self._print_and_write(status_file, '') if args.input: # Load animal numbers. Note that args.input will either be a path to the "daily report" xls, or may # optionally be a comma-separated list of animal numbers. # if re.fullmatch(r'(\s?\d+\s?)(\s?,\s?\d+\s?)*$', args.input): animal_numbers = [s.strip() for s in args.input.split(',')] else: animal_numbers = KittenReportReader( ).read_animal_numbers_from_xls(args.input) if not animal_numbers: sys.exit() print( f'Found {len(animal_numbers)} animal{"s" if len(animal_numbers) != 1 else ""}: {", ".join([str(a) for a in animal_numbers])}' ) # Query details for each animal (current foster parent, foster status, breed, color, gender, age, etc.) # animal_data, foster_parents, animals_not_in_foster = self._get_animal_data( animal_numbers) for p_number in foster_parents: print( f'Animals for foster parent {p_number} = {foster_parents[p_number]}' ) # Query details for each foster parent (name, contact details, etc.) # persons_data = {} for person in foster_parents: persons_data[person] = self._get_person_data(person) # Save report to file # output_csv = os.path.join( Utils.default_dir(), f'{self.BASE_ANIMAL_TYPE}_foster_mentor_report_{date.today().strftime("%Y.%m.%d")}.csv' ) Utils.make_dir(output_csv) self._output_results(animal_data, foster_parents, persons_data, animals_not_in_foster, current_mentee_status, output_csv) # Optional: automatically forward this report via email # if 'generate_email' in self.config: from outlook_email import compose_outlook_email subject = self._get_from_dict(self.config['generate_email'], 'subject') recipient_name = self._get_from_dict( self.config['generate_email'], 'recipient_name') recipient_email = self._get_from_dict( self.config['generate_email'], 'recipient_email') message = self._get_from_dict(self.config['generate_email'], 'message') if None not in [ subject, recipient_name, recipient_email, message ]: compose_outlook_email(subject=subject, recipient_name=recipient_name, recipient_email=recipient_email, body=message, attachment=output_csv) Log.debug( f'Composed email to {recipient_name} <{recipient_email}>' ) print('KittenScraper completed in {0:.0f} seconds'.format(time.time() - start_time)) self._exit_browser()