Example #1
0
    def read_animal_numbers_from_xls(self, xls_filename):
        ''' Open the daily report xls, read animal numbers
        '''
        try:
            self._workbook = xlrd.open_workbook(xls_filename)
            self._sheet = self._workbook.sheet_by_index(0)

            if not self._sheet.nrows:
                Log.error(
                    f'ERROR: I\'m afraid you have an empty report: {xls_filename}'
                )
                return None

            # Perform some initial sanity checks
            #
            STATUS_DATE_COL = 0
            ANIMAL_TYPE_COL = 1
            ANIMAL_ID_COL = 2
            ANIMAL_NAME_COL = 3
            ANIMAL_AGE_COL = 4
            FOSTER_PARENT_ID_COL = 5

            if (self._sheet.row_values(0)[STATUS_DATE_COL] !=
                    'Datetime of Current Status Date'
                    or self._sheet.row_values(0)[ANIMAL_TYPE_COL] !=
                    'Current Animal Type'
                    or self._sheet.row_values(0)[ANIMAL_ID_COL] != 'AnimalID'
                    or
                    self._sheet.row_values(0)[ANIMAL_NAME_COL] != 'Animal Name'
                    or self._sheet.row_values(0)[ANIMAL_AGE_COL] != 'Age'
                    or self._sheet.row_values(0)[FOSTER_PARENT_ID_COL] !=
                    'Foster Parent ID'):

                Log.error(
                    f'ERROR: Unexpected column layout in the report. Something has changed! {xls_filename}'
                )
                return None

            Log.success(f'Loaded report {xls_filename}')

            animal_numbers = set()
            for row_number in range(1, self._sheet.nrows):
                animal_number = self._sheet.row_values(
                    row_number)[ANIMAL_ID_COL]
                # xls stores all numbers as float, but also handle str type just in case
                #
                if isinstance(animal_number,
                              float) or (isinstance(animal_number, str)
                                         and animal_number.isdigit()):
                    animal_numbers.add((int(animal_number)))

            return animal_numbers

        except IOError as err:
            Log.error(f'ERROR: Unable to read xls file: {xls_filename}, {err}')

        except xlrd.XLRDError as err:
            Log.error(f'ERROR: Unable to read xls file: {xls_filename}, {err}')

        return None
    def _get_current_mentee_status(self, arg_status):
        ''' Get current mentees and mentee status for each mentor
        '''
        autoupdate_completed_mentees = 'autoupdate' in arg_status  # mark 'completed' mentors in the spreadsheet
        verbose_status = 'verbose' in arg_status
        Log.success(
            f'Looking up mentee status (verbose = {verbose_status}, autoupdate_completed_mentees = {autoupdate_completed_mentees})...'
        )
        completed_mentees = {}
        current_mentees = self.mentor_sheet_reader.get_current_mentees()
        for current in current_mentees:
            current['active_count'] = 0
            print(f'Checking mentee status for {current["mentor"]}... ',
                  end='',
                  flush=True)

            if current['mentees']:
                for mentee in current['mentees']:
                    current_animal_ids = self._current_animals_fostered(
                        mentee['pid'])
                    mentee['current_animals'] = {}

                    if verbose_status:
                        animal_data, _, _ = self._get_animal_data(
                            current_animal_ids, True)

                    for current_animal_id in current_animal_ids:
                        mentee['current_animals'][
                            current_animal_id] = animal_data[
                                current_animal_id] if verbose_status else {}

                    if current_animal_ids:
                        current['active_count'] = current['active_count'] + 1
                    else:
                        completed_mentees.setdefault(current['mentor'],
                                                     []).append(mentee['pid'])

            days_ago = (datetime.now() - current['most_recent']
                        ).days if current['most_recent'] else 'N/A'
            print(
                f'active mentees = {current["active_count"]}, last assigned days ago = {days_ago}'
            )

        if autoupdate_completed_mentees:
            Log.success(
                'Auto-updating completed mentees in the mentor spreadsheet...')
            for mentor in completed_mentees:
                self.mentor_sheet_reader.set_completed_mentees(
                    mentor, completed_mentees[mentor])

        return current_mentees
Example #3
0
    def load_mentors_spreadsheet(self, auth):
        ''' Load the feline foster spreadsheet
        '''
        try:
            Log.success(
                f'Loading mentors spreadsheet from Box (id = {auth["box_file_id"]})...'
            )

            jwt_path = os.path.join(
                os.path.dirname(os.path.realpath(__file__)), auth['box_jwt'])
            client = Client(JWTAuth.from_settings_file(jwt_path))
            box_file = client.as_user(
                client.user(user_id=auth['box_user_id'])).file(
                    file_id=auth['box_file_id']).get()
            xlxs_workbook = xlrd.open_workbook(
                file_contents=box_file.content())

            config_yaml = xlxs_workbook.sheet_by_name(
                self._CONFIG_SHEET_NAME).row_values(1)[0]

            for sheet_name in xlxs_workbook.sheet_names():
                if not self._is_reserved_sheet(sheet_name):
                    sheet = xlxs_workbook.sheet_by_name(sheet_name)
                    self._mentor_sheets.append(sheet)
                    all_values = [
                        sheet.row_values(i) for i in range(1, sheet.nrows)
                    ]
                    self._mentor_match_values[Utils.utf8(sheet_name)] = [
                        Utils.utf8(str(item)).lower() for sublist in all_values
                        for item in sublist
                    ]

        except Exception as e:
            Log.error(
                f'ERROR: Unable to load Feline Foster spreadsheet!\r\n{str(e)}, {repr(e)}'
            )
            return None

        print(
            f'Loaded {len(self._mentor_sheets)} mentors from \"{box_file["name"]}\"'
        )
        return config_yaml
    def _start_browser(self, show_browser):
        ''' Instantiate the browser, configure options as needed
        '''
        Log.success('Starting chromedriver...')
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument(
            '--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 '
            '(KHTML, like Gecko) Version/12.0.3 Safari/605.1.15')
        if not show_browser:
            chrome_options.add_argument('--headless')

        # Consider adding chromedriver-binary or chromedriver_installer to requirements.txt and
        # removing these local copies.
        #
        if sys.platform == 'darwin':
            chromedriver_path = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                'bin/mac64/chromedriver')
        elif sys.platform.startswith('linux'):
            chromedriver_path = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                'bin/linux64/chromedriver')
        elif sys.platform.startswith('win32'):
            chromedriver_path = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                'bin/win32/chromedriver')
            chrome_options.add_experimental_option(
                'excludeSwitches',
                ['enable-logging'])  # chromedriver complains a lot on Windows
        else:
            Log.error(
                f'Sorry friends, I haven\'t included chromedriver for your platform ({sys.platform}). Exiting now.'
            )
            sys.exit(0)

        self._driver = webdriver.Chrome(chromedriver_path,
                                        options=chrome_options)
        self._driver.set_page_load_timeout(60)
    def _read_additional_config_yaml(self, additional_config_yaml):
        ''' The mentors spreadsheet contains additional configuration data. This makes it easier to manage dynamic
            configuration data vs rollout of config.yaml updates.
        '''
        try:
            Log.success('Reading configuration data from spreadsheet...')
            config = yaml.load(additional_config_yaml, Loader=yaml.SafeLoader)
            self._login_url = config['login_url']
            self._search_url = config['search_url']
            self._animal_url = config['animal_url']
            self._medical_details_url = config['medical_details_url']
            self._list_all_animals_url = config['list_animals_url']
            self._responsible_for_url = config['responsible_for_url']
            self._responsible_for_paged_url = config[
                'responsible_for_paged_url']
            self._adoption_summary_url = config['adoption_summary_url']
            self._do_not_assign_mentor = config['do_not_assign_mentor'] if 'do_not_assign_mentor' in config else []
            self._mentors = config['mentors'] if 'mentors' in config else []

        except AttributeError as err:
            Log.error(f'ERROR: Unable to read additional config: {err}')
            return False

        except yaml.YAMLError as err:
            Log.error(f'ERROR: Unable to read additional config: {err}')
            return False

        except TypeError as err:
            Log.error(f'ERROR: Invalid yaml in additional config: {err}')
            return False

        except KeyError as err:
            Log.error(f'ERROR: Missing value in additional config: {err}')
            return False

        return True
    def _login(self):
        ''' Load the login page, enter credentials, submit
        '''
        Log.success('Logging in...')

        try:
            self._driver.set_page_load_timeout(20)
            self._driver.get(self._login_url)

        except TimeoutException:
            Log.error(
                'ERROR: Unable to load the login page. Please check your connection.'
            )
            return False
        except NoSuchElementException:
            Log.error(
                'ERROR: Unable to load the login page. Please check your connection.'
            )
            return False

        try:
            self._driver.find_element_by_id('txt_username').send_keys(
                self._username)
            self._driver.find_element_by_id('txt_password').send_keys(
                self._password)
            self._driver.find_element_by_id(
                'ctl00_ctl00_ContentPlaceHolderBase_ContentPlaceHolder1_btn_login'
            ).click()
            self._driver.find_element_by_id('Continue').click()

        except NoSuchElementException:
            Log.error(
                'ERROR: Unable to login. Please check your username/password.')
            return False

        return True
Example #7
0
    def load_mentors_spreadsheet(self, auth):
        ''' Load the feline foster spreadsheet
        '''
        start_time = time.time()
        try:
            Log.success(
                f'Loading mentors spreadsheet from Google Sheets (id = \'{auth["google_spreadsheet_key"]}\')...'
            )

            client = pygsheets.authorize(auth['google_client_secret'])
            spreadsheet = client.open_by_key(auth['google_spreadsheet_key'])

            config_yaml = spreadsheet.worksheet_by_title(
                self._CONFIG_SHEET_NAME)[2][0]

            for worksheet in spreadsheet.worksheets():
                if not self._is_reserved_sheet(worksheet.title):
                    Log.debug(f'Reading worksheet \"{worksheet.title}\"...')
                    try:
                        if self.check_for_surgery_sheet(worksheet):
                            continue

                        # Mentor sheet header rows vary slightly between feline and canine. Perform a terrible quick-and-dirty validation.
                        #
                        if ['ID'] not in worksheet.get_values('E1', 'E2'):
                            raise Exception('') from Exception

                        # Build a list of mentee names/emails/ids to be used for mentor matching
                        #
                        b_rows = worksheet.get_values(
                            'B2',
                            f'B{worksheet.rows}',
                            include_tailing_empty=False,
                            include_tailing_empty_rows=False)
                        c_rows = worksheet.get_values(
                            'C2',
                            f'C{worksheet.rows}',
                            include_tailing_empty=False,
                            include_tailing_empty_rows=False)
                        e_rows = worksheet.get_values(
                            'E2',
                            f'E{worksheet.rows}',
                            include_tailing_empty=False,
                            include_tailing_empty_rows=False)

                        mentor_match_cells = b_rows + c_rows + e_rows
                        self._mentor_match_values[Utils.utf8(
                            worksheet.title)] = [
                                Utils.utf8(item).lower()
                                for sublist in mentor_match_cells
                                for item in sublist
                            ]
                        self._mentor_sheets.append(worksheet)

                    except Exception:
                        Log.debug(
                            f'Sheet \'{worksheet.title}\' does not appear to be a mentor sheet (skipping)'
                        )

        except Exception as e:
            Log.error(
                f'ERROR: Unable to load mentors spreadsheet!\r\n{str(e)}, {repr(e)}'
            )
            return None

        print('Loaded {0} mentors from \"{1}\" in {2:.0f} seconds'.format(
            len(self._mentor_sheets), spreadsheet.title,
            time.time() - start_time))
        return config_yaml
    def _output_results(self, animal_data, foster_parents, persons_data,
                        animals_not_in_foster, current_mentee_status,
                        csv_filename):
        ''' Output all of our new super amazing results to a csv file
        '''
        Log.success(f'Writing results to {csv_filename}...')
        csv_rows = []

        csv_rows.append([])
        csv_rows[-1].append('Kitten-Scraper Notes')
        csv_rows[-1].append('Loss Rate')
        csv_rows[-1].append('Name')
        csv_rows[-1].append('E-mail')
        csv_rows[-1].append('Phone')
        csv_rows[-1].append('Person ID')
        csv_rows[-1].append('Foster Experience')
        csv_rows[-1].append('Date Animals Received')
        if self._dog_mode:
            csv_rows[-1].append('"Name, Breed, Color"')
        csv_rows[-1].append('Animal Details')
        csv_rows[-1].append('Special Animal Message')

        # Build a row for each foster parent
        #
        for person_number in sorted(foster_parents,
                                    key=lambda p: persons_data[p]['notes']):
            person_data = persons_data[person_number]
            name = person_data['full_name']
            report_notes = person_data['notes']
            loss_rate = round(person_data['loss_rate'])
            animals_with_this_person = foster_parents[person_number]
            animal_details, animal_details_brief = self._get_animal_details_string(
                animals_with_this_person, animal_data)

            prev_animals_fostered = person_data['prev_animals_fostered']
            foster_experience = 'NEW' if not prev_animals_fostered else prev_animals_fostered

            special_message = ''
            for a_number in animals_with_this_person:
                msg = animal_data[a_number]['message']
                if msg:
                    special_message += '{}{}: {}'.format(
                        '\r\r' if special_message else '', a_number, msg)

            cell_number = person_data['cell_phone']
            home_number = person_data['home_phone']
            phone = ''
            if len(cell_number) >= 10:  # ignore incomplete phone numbers
                phone = f'(C) {cell_number}'
            if len(home_number) >= 10:  # ignore incomplete phone numbers
                phone += '{}(H) {}'.format('\r' if phone else '', home_number)

            emails_str = ''
            for email in person_data['emails']:
                emails_str += '{}{}'.format('\r' if emails_str else '', email)

            # I will assume all animals in this group went into foster on the same date. This should usually be true
            # since this is designed to processed with a "daily report".
            #
            date_received = animal_data[
                animals_with_this_person[0]]['status_date']

            # Explicitly wrap numbers/datestr with ="{}" to avoid Excel auto-formatting issues
            #
            csv_rows.append([])
            csv_rows[-1].append(f'"{report_notes}"')
            csv_rows[-1].append(f'"{loss_rate}%"')
            csv_rows[-1].append(f'"{name}"')
            csv_rows[-1].append(f'"{emails_str}"')
            csv_rows[-1].append(f'"{phone}"')
            csv_rows[-1].append(f'="{person_number}"')
            csv_rows[-1].append(f'"{foster_experience}"')
            csv_rows[-1].append(f'="{date_received}"')
            if self._dog_mode:
                csv_rows[-1].append(f'"{animal_details_brief}"')
            csv_rows[-1].append(f'"{animal_details}"')
            csv_rows[-1].append(f'"{special_message}"')

            print('{} (Experience: {}, Loss Rate: {}%) {}{}{}'.format(
                name, foster_experience, loss_rate, Log.GREEN,
                report_notes.replace('\r', ', '), Log.END))
        with open(csv_filename, 'w') as outfile:
            for row in csv_rows:
                outfile.write(','.join(row))
                outfile.write('\n')

            if not foster_parents:
                outfile.write(
                    '*** None of the animals in this report are currently in foster\n'
                )
                Log.warn(
                    'None of the animals in this report are currently in foster. Nothing to do!'
                )

            if animals_not_in_foster:
                outfile.write('\n\n\n*** Animals not in foster\n')
                Log.warn('\nAnimals not in foster')
                for a_number in animals_not_in_foster:
                    outfile.write('{} {} - {}\n'.format(
                        a_number, animal_data[a_number]['type'],
                        animal_data[a_number]['status']))
                    print('{} {} - {}'.format(a_number,
                                              animal_data[a_number]['type'],
                                              animal_data[a_number]['status']))

            if current_mentee_status:
                outfile.write(
                    '\n\nMentor,Active Mentees,Last Assigned (days ago)\n')
                for current in current_mentee_status:
                    days_ago = (datetime.now() - current['most_recent']
                                ).days if current['most_recent'] else 'N/A'
                    outfile.write(
                        f'{current["mentor"]},{current["active_count"]},{days_ago}\n'
                    )
    def run(self):
        print(f'Welcome to KittenScraper {__version__}')
        start_time = time.time()

        arg_parser = ArgumentParser()
        arg_parser.add_argument(
            '-i',
            '--input',
            help=
            'specify the daily foster report (xls), or optionally a comma-separated list of animal numbers',
            required=False)
        arg_parser.add_argument(
            '-s',
            '--status',
            help='retrieve current mentee status [verbose,autoupdate,export]',
            required=False,
            nargs='?',
            default='',
            const='yes')
        arg_parser.add_argument(
            '-c',
            '--config',
            help=
            'specify a config file (optional, defaults to \'config.yaml\')',
            required=False,
            default='config.yaml')
        arg_parser.add_argument(
            '-b',
            '--show_browser',
            help='show the web browser window (generally used for debugging)',
            required=False,
            action='store_true')
        args = arg_parser.parse_args()

        if not args.input and not args.status:
            arg_parser.print_help()
            sys.exit(0)

        # Load config.yaml
        #
        if not self._load_config_file(args.config):
            sys.exit()

        # Load the Foster Mentors spreadsheet
        #
        if self._google_spreadsheet_key and self._google_client_secret:
            self.mentor_sheet_reader = GoogleSheetReader()
            self._additional_config_yaml = self.mentor_sheet_reader.load_mentors_spreadsheet(
                {
                    'google_spreadsheet_key': self._google_spreadsheet_key,
                    'google_client_secret': self._google_client_secret
                })

        elif self._box_user_id and self._box_file_id and self._box_jwt:
            self.mentor_sheet_reader = BoxSheetReader()
            self._additional_config_yaml = self.mentor_sheet_reader.load_mentors_spreadsheet(
                {
                    'box_user_id': self._box_user_id,
                    'box_file_id': self._box_file_id,
                    'box_jwt': self._box_jwt
                })
        else:
            Log.error(
                'ERROR: Incorrect mentor spreadsheet configuration, please check config.yaml'
            )
            sys.exit()

        if self._additional_config_yaml is None:
            Log.error(
                'ERROR: configuration YAML from mentors spreadsheet not found, cannot continue'
            )
            sys.exit()

        # Load additional config data from the mentors spreadsheet. This minimizes the need to deploy updates to the
        # local config.yaml file.
        #
        if not self._read_additional_config_yaml(self._additional_config_yaml):
            sys.exit()

        # Start the browser, log in
        #
        self._start_browser(args.show_browser)
        if not self._login():
            sys.exit()

        current_mentee_status = self._get_current_mentee_status(
            args.status) if args.status else None

        if current_mentee_status:
            status_file = None
            export_status = 'export' in args.status
            verbose_status = 'verbose' in args.status

            if export_status:
                status_file_path = os.path.join(
                    Utils.default_dir(),
                    f'{self.BASE_ANIMAL_TYPE}_foster_mentor_status_{date.today().strftime("%Y.%m.%d")}.txt'
                )
                status_file = open(status_file_path, 'w')
                Log.success(
                    f'Exporting mentee status to file: {status_file_path}')

            for current in current_mentee_status:
                self._print_and_write(
                    status_file,
                    '--------------------------------------------------')
                self._print_and_write(status_file, current['mentor'])
                if current['mentees']:
                    for mentee in current['mentees']:
                        self._print_and_write(
                            status_file,
                            f'    {mentee["name"]} ({mentee["pid"]}) - {len(mentee["current_animals"])} animals'
                        )
                        for a_number, data in mentee['current_animals'].items(
                        ):
                            surgery_date = self.mentor_sheet_reader.get_surgery_date(
                                a_number)
                            surgery_info = ''
                            if surgery_date:
                                surgery_info = f', Surgery Date {surgery_date}'
                            if verbose_status:
                                self._print_and_write(
                                    status_file,
                                    f'        {a_number}, {data["age"]}, S/N {data["sn"]}, Bio {data["bio"]}, Photo {data["photo"]}{surgery_info}'
                                )
                            else:
                                self._print_and_write(
                                    status_file,
                                    f'        {a_number}{surgery_info}')
                else:
                    self._print_and_write(status_file,
                                          '    ** No current mentees **')

                self._print_and_write(status_file, '')

        if args.input:
            # Load animal numbers. Note that args.input will either be a path to the "daily report" xls, or may
            # optionally be a comma-separated list of animal numbers.
            #
            if re.fullmatch(r'(\s?\d+\s?)(\s?,\s?\d+\s?)*$', args.input):
                animal_numbers = [s.strip() for s in args.input.split(',')]
            else:
                animal_numbers = KittenReportReader(
                ).read_animal_numbers_from_xls(args.input)

            if not animal_numbers:
                sys.exit()

            print(
                f'Found {len(animal_numbers)} animal{"s" if len(animal_numbers) != 1 else ""}: {", ".join([str(a) for a in animal_numbers])}'
            )

            # Query details for each animal (current foster parent, foster status, breed, color, gender, age, etc.)
            #
            animal_data, foster_parents, animals_not_in_foster = self._get_animal_data(
                animal_numbers)

            for p_number in foster_parents:
                print(
                    f'Animals for foster parent {p_number} = {foster_parents[p_number]}'
                )

            # Query details for each foster parent (name, contact details, etc.)
            #
            persons_data = {}
            for person in foster_parents:
                persons_data[person] = self._get_person_data(person)

            # Save report to file
            #
            output_csv = os.path.join(
                Utils.default_dir(),
                f'{self.BASE_ANIMAL_TYPE}_foster_mentor_report_{date.today().strftime("%Y.%m.%d")}.csv'
            )
            Utils.make_dir(output_csv)
            self._output_results(animal_data, foster_parents, persons_data,
                                 animals_not_in_foster, current_mentee_status,
                                 output_csv)

            # Optional: automatically forward this report via email
            #
            if 'generate_email' in self.config:
                from outlook_email import compose_outlook_email
                subject = self._get_from_dict(self.config['generate_email'],
                                              'subject')
                recipient_name = self._get_from_dict(
                    self.config['generate_email'], 'recipient_name')
                recipient_email = self._get_from_dict(
                    self.config['generate_email'], 'recipient_email')
                message = self._get_from_dict(self.config['generate_email'],
                                              'message')

                if None not in [
                        subject, recipient_name, recipient_email, message
                ]:
                    compose_outlook_email(subject=subject,
                                          recipient_name=recipient_name,
                                          recipient_email=recipient_email,
                                          body=message,
                                          attachment=output_csv)
                    Log.debug(
                        f'Composed email to {recipient_name} <{recipient_email}>'
                    )

        print('KittenScraper completed in {0:.0f} seconds'.format(time.time() -
                                                                  start_time))
        self._exit_browser()