def check_for_surgery_sheet(self, worksheet): if any(worksheet.title in substr for substr in self._SURGERY_SHEET_NAMES): surgery_rows = worksheet.get_values( 'A1', f'H{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) date_col = -1 patient_col = -1 for col in range(0, len(surgery_rows[0])): # Allow for an extra header row (accounting for differences between Feline and Canine) # if any('date' in substr for substr in [ str(surgery_rows[0][col]).lower(), str(surgery_rows[1][col]).lower() ]): date_col = col elif 'patient' in (str(surgery_rows[0][col]).lower(), str(surgery_rows[1][col]).lower()): patient_col = col if date_col != -1 and patient_col != -1: for row in range(1, len(surgery_rows)): try: a_number = surgery_rows[row][patient_col] if a_number.isdigit(): a_number = int(a_number) # If there are multiple entries for a given a_number, assume the first is the most recent. # if a_number not in self._surgery_dates: self._surgery_dates[int( a_number)] = surgery_rows[row][date_col] except Exception as e: Log.warn( f'{worksheet.title} column {patient_col}, row {row} is empty. Assuming this is the end of the list.' ) break else: Log.error( f'Surgery form is not in expected format (date_col={date_col}, patient_col={patient_col}. Skipping.' ) Log.debug( f'Loaded {len(self._surgery_dates)} entries from the surgery sheet' ) return True return False
def set_completed_mentees(self, mentor, mentee_ids): ''' Mark the given mentees as completed. Future refactoring consideration: See similar code between set_completed_mentees() and get_current_mentees(). ''' for worksheet in self._mentor_sheets: if worksheet.title.lower() == mentor.lower(): max_search_rows = min(100, worksheet.rows) cells = worksheet.range(f'A1:G{max_search_rows}', returnas='cells') name_col_id = self._find_column_by_name(cells, 'Name') pid_col_id = self._find_column_by_name(cells, 'ID') notes_col_id = 0 for i in range(1, max_search_rows): if str(cells[i][0].value).lower().find( 'completed mentees') >= 0: break # We've reached the end of the "active mentee" rows if cells[i][name_col_id].value and str( cells[i][pid_col_id].value).isdigit(): pid = int(cells[i][pid_col_id].value) if pid in mentee_ids: # If this mentee name cell is already marked with strikethrough, leave it alone # name_cell_format = cells[i][ name_col_id].text_format if not name_cell_format or 'strikethrough' not in name_cell_format or name_cell_format[ 'strikethrough'] is False: mentee_name = cells[i][name_col_id].value mentee_name = mentee_name.replace('\n', ' ').replace( '\r', '') Log.debug( f'Completed: {mentee_name} ({pid}) @ {mentor}[\'{cells[i][name_col_id].label}\']' ) debug_mode = False if not debug_mode: cells[i][name_col_id].set_text_format( 'strikethrough', True) notes_current_value = cells[i][ notes_col_id].value if 'autoupdate: no animals' not in notes_current_value.lower( ): cells[i][notes_col_id].set_value( f'AutoUpdate: No animals {date.today().strftime("%b %-d, %Y")}\r\n{notes_current_value}' )
def load_mentors_spreadsheet(self, auth): ''' Load the feline foster spreadsheet ''' start_time = time.time() try: Log.success( f'Loading mentors spreadsheet from Google Sheets (id = \'{auth["google_spreadsheet_key"]}\')...' ) client = pygsheets.authorize(auth['google_client_secret']) spreadsheet = client.open_by_key(auth['google_spreadsheet_key']) config_yaml = spreadsheet.worksheet_by_title( self._CONFIG_SHEET_NAME)[2][0] for worksheet in spreadsheet.worksheets(): if not self._is_reserved_sheet(worksheet.title): Log.debug(f'Reading worksheet \"{worksheet.title}\"...') try: if self.check_for_surgery_sheet(worksheet): continue # Mentor sheet header rows vary slightly between feline and canine. Perform a terrible quick-and-dirty validation. # if ['ID'] not in worksheet.get_values('E1', 'E2'): raise Exception('') from Exception # Build a list of mentee names/emails/ids to be used for mentor matching # b_rows = worksheet.get_values( 'B2', f'B{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) c_rows = worksheet.get_values( 'C2', f'C{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) e_rows = worksheet.get_values( 'E2', f'E{worksheet.rows}', include_tailing_empty=False, include_tailing_empty_rows=False) mentor_match_cells = b_rows + c_rows + e_rows self._mentor_match_values[Utils.utf8( worksheet.title)] = [ Utils.utf8(item).lower() for sublist in mentor_match_cells for item in sublist ] self._mentor_sheets.append(worksheet) except Exception: Log.debug( f'Sheet \'{worksheet.title}\' does not appear to be a mentor sheet (skipping)' ) except Exception as e: Log.error( f'ERROR: Unable to load mentors spreadsheet!\r\n{str(e)}, {repr(e)}' ) return None print('Loaded {0} mentors from \"{1}\" in {2:.0f} seconds'.format( len(self._mentor_sheets), spreadsheet.title, time.time() - start_time)) return config_yaml
def run(self): print(f'Welcome to KittenScraper {__version__}') start_time = time.time() arg_parser = ArgumentParser() arg_parser.add_argument( '-i', '--input', help= 'specify the daily foster report (xls), or optionally a comma-separated list of animal numbers', required=False) arg_parser.add_argument( '-s', '--status', help='retrieve current mentee status [verbose,autoupdate,export]', required=False, nargs='?', default='', const='yes') arg_parser.add_argument( '-c', '--config', help= 'specify a config file (optional, defaults to \'config.yaml\')', required=False, default='config.yaml') arg_parser.add_argument( '-b', '--show_browser', help='show the web browser window (generally used for debugging)', required=False, action='store_true') args = arg_parser.parse_args() if not args.input and not args.status: arg_parser.print_help() sys.exit(0) # Load config.yaml # if not self._load_config_file(args.config): sys.exit() # Load the Foster Mentors spreadsheet # if self._google_spreadsheet_key and self._google_client_secret: self.mentor_sheet_reader = GoogleSheetReader() self._additional_config_yaml = self.mentor_sheet_reader.load_mentors_spreadsheet( { 'google_spreadsheet_key': self._google_spreadsheet_key, 'google_client_secret': self._google_client_secret }) elif self._box_user_id and self._box_file_id and self._box_jwt: self.mentor_sheet_reader = BoxSheetReader() self._additional_config_yaml = self.mentor_sheet_reader.load_mentors_spreadsheet( { 'box_user_id': self._box_user_id, 'box_file_id': self._box_file_id, 'box_jwt': self._box_jwt }) else: Log.error( 'ERROR: Incorrect mentor spreadsheet configuration, please check config.yaml' ) sys.exit() if self._additional_config_yaml is None: Log.error( 'ERROR: configuration YAML from mentors spreadsheet not found, cannot continue' ) sys.exit() # Load additional config data from the mentors spreadsheet. This minimizes the need to deploy updates to the # local config.yaml file. # if not self._read_additional_config_yaml(self._additional_config_yaml): sys.exit() # Start the browser, log in # self._start_browser(args.show_browser) if not self._login(): sys.exit() current_mentee_status = self._get_current_mentee_status( args.status) if args.status else None if current_mentee_status: status_file = None export_status = 'export' in args.status verbose_status = 'verbose' in args.status if export_status: status_file_path = os.path.join( Utils.default_dir(), f'{self.BASE_ANIMAL_TYPE}_foster_mentor_status_{date.today().strftime("%Y.%m.%d")}.txt' ) status_file = open(status_file_path, 'w') Log.success( f'Exporting mentee status to file: {status_file_path}') for current in current_mentee_status: self._print_and_write( status_file, '--------------------------------------------------') self._print_and_write(status_file, current['mentor']) if current['mentees']: for mentee in current['mentees']: self._print_and_write( status_file, f' {mentee["name"]} ({mentee["pid"]}) - {len(mentee["current_animals"])} animals' ) for a_number, data in mentee['current_animals'].items( ): surgery_date = self.mentor_sheet_reader.get_surgery_date( a_number) surgery_info = '' if surgery_date: surgery_info = f', Surgery Date {surgery_date}' if verbose_status: self._print_and_write( status_file, f' {a_number}, {data["age"]}, S/N {data["sn"]}, Bio {data["bio"]}, Photo {data["photo"]}{surgery_info}' ) else: self._print_and_write( status_file, f' {a_number}{surgery_info}') else: self._print_and_write(status_file, ' ** No current mentees **') self._print_and_write(status_file, '') if args.input: # Load animal numbers. Note that args.input will either be a path to the "daily report" xls, or may # optionally be a comma-separated list of animal numbers. # if re.fullmatch(r'(\s?\d+\s?)(\s?,\s?\d+\s?)*$', args.input): animal_numbers = [s.strip() for s in args.input.split(',')] else: animal_numbers = KittenReportReader( ).read_animal_numbers_from_xls(args.input) if not animal_numbers: sys.exit() print( f'Found {len(animal_numbers)} animal{"s" if len(animal_numbers) != 1 else ""}: {", ".join([str(a) for a in animal_numbers])}' ) # Query details for each animal (current foster parent, foster status, breed, color, gender, age, etc.) # animal_data, foster_parents, animals_not_in_foster = self._get_animal_data( animal_numbers) for p_number in foster_parents: print( f'Animals for foster parent {p_number} = {foster_parents[p_number]}' ) # Query details for each foster parent (name, contact details, etc.) # persons_data = {} for person in foster_parents: persons_data[person] = self._get_person_data(person) # Save report to file # output_csv = os.path.join( Utils.default_dir(), f'{self.BASE_ANIMAL_TYPE}_foster_mentor_report_{date.today().strftime("%Y.%m.%d")}.csv' ) Utils.make_dir(output_csv) self._output_results(animal_data, foster_parents, persons_data, animals_not_in_foster, current_mentee_status, output_csv) # Optional: automatically forward this report via email # if 'generate_email' in self.config: from outlook_email import compose_outlook_email subject = self._get_from_dict(self.config['generate_email'], 'subject') recipient_name = self._get_from_dict( self.config['generate_email'], 'recipient_name') recipient_email = self._get_from_dict( self.config['generate_email'], 'recipient_email') message = self._get_from_dict(self.config['generate_email'], 'message') if None not in [ subject, recipient_name, recipient_email, message ]: compose_outlook_email(subject=subject, recipient_name=recipient_name, recipient_email=recipient_email, body=message, attachment=output_csv) Log.debug( f'Composed email to {recipient_name} <{recipient_email}>' ) print('KittenScraper completed in {0:.0f} seconds'.format(time.time() - start_time)) self._exit_browser()