Ejemplos de Log en Python, ejemplos de kitten_utils.Log en Python

Ejemplo n.º 1

0

Mostrar archivo

    def read_animal_numbers_from_xls(self, xls_filename):
        ''' Open the daily report xls, read animal numbers
        '''
        try:
            self._workbook = xlrd.open_workbook(xls_filename)
            self._sheet = self._workbook.sheet_by_index(0)

            if not self._sheet.nrows:
                Log.error(
                    f'ERROR: I\'m afraid you have an empty report: {xls_filename}'
                )
                return None

            # Perform some initial sanity checks
            #
            STATUS_DATE_COL = 0
            ANIMAL_TYPE_COL = 1
            ANIMAL_ID_COL = 2
            ANIMAL_NAME_COL = 3
            ANIMAL_AGE_COL = 4
            FOSTER_PARENT_ID_COL = 5

            if (self._sheet.row_values(0)[STATUS_DATE_COL] !=
                    'Datetime of Current Status Date'
                    or self._sheet.row_values(0)[ANIMAL_TYPE_COL] !=
                    'Current Animal Type'
                    or self._sheet.row_values(0)[ANIMAL_ID_COL] != 'AnimalID'
                    or
                    self._sheet.row_values(0)[ANIMAL_NAME_COL] != 'Animal Name'
                    or self._sheet.row_values(0)[ANIMAL_AGE_COL] != 'Age'
                    or self._sheet.row_values(0)[FOSTER_PARENT_ID_COL] !=
                    'Foster Parent ID'):

                Log.error(
                    f'ERROR: Unexpected column layout in the report. Something has changed! {xls_filename}'
                )
                return None

            Log.success(f'Loaded report {xls_filename}')

            animal_numbers = set()
            for row_number in range(1, self._sheet.nrows):
                animal_number = self._sheet.row_values(
                    row_number)[ANIMAL_ID_COL]
                # xls stores all numbers as float, but also handle str type just in case
                #
                if isinstance(animal_number,
                              float) or (isinstance(animal_number, str)
                                         and animal_number.isdigit()):
                    animal_numbers.add((int(animal_number)))

            return animal_numbers

        except IOError as err:
            Log.error(f'ERROR: Unable to read xls file: {xls_filename}, {err}')

        except xlrd.XLRDError as err:
            Log.error(f'ERROR: Unable to read xls file: {xls_filename}, {err}')

        return None

Ejemplo n.º 2

0

Mostrar archivo

Archivo: kitten_scraper.py Proyecto: skylarstein/kitten-scraper

    def _load_config_file(self, config_file_yaml):
        ''' A config.yaml configuration file is expected to be in the same directory as this script
        '''
        try:
            config_file = os.path.join(
                os.path.dirname(os.path.realpath(__file__)), config_file_yaml)
            self.config = yaml.load(open(config_file, 'r'),
                                    Loader=yaml.SafeLoader)
            self._username = self.config['username']
            self._password = self.config['password']
            self._dog_mode = self.config[
                'dog_mode'] if 'dog_mode' in self.config else False

            self._google_spreadsheet_key = self.config[
                'google_spreadsheet_key'] if 'google_spreadsheet_key' in self.config else None
            self._google_client_secret = self.config[
                'google_client_secret'] if 'google_client_secret' in self.config else None

            self._box_user_id = self.config[
                'box_user_id'] if 'box_user_id' in self.config else None
            self._box_file_id = self.config[
                'box_file_id'] if 'box_file_id' in self.config else None
            self._box_jwt = self.config[
                'box_jwt'] if 'box_jwt' in self.config else None

            if not (self._google_spreadsheet_key and self._google_client_secret
                    ) and not (self._box_user_id and self._box_file_id
                               and self._box_jwt):
                Log.error(
                    f'ERROR: Incomplete mentor spreadsheet configuration: {config_file}'
                )
                return False

            if self._dog_mode:
                Log.warn('** Dog Mode is Active **')

            self.BASE_ANIMAL_TYPE = 'feline_and_critters' if not self._dog_mode else 'canine'

        except yaml.YAMLError as err:
            Log.error(
                f'ERROR: Unable to parse configuration file: {config_file}, {err}'
            )
            return False

        except IOError as err:
            Log.error(
                f'ERROR: Unable to read configuration file: {config_file}, {err}'
            )
            return False

        except KeyError as err:
            Log.error(
                f'ERROR: Missing value in configuration file: {config_file}, {err}'
            )
            return False

        return True

Ejemplo n.º 3

0

Mostrar archivo

    def get_current_mentees(self):
        ''' Return the current mentees assigned to each mentor
        '''
        current_mentees = []
        for worksheet in self._mentor_sheets:
            if worksheet.name.lower() == 'retired mentor':
                continue

            print(f'Loading current mentees for {worksheet.name}... ', end='')

            # It's much faster to grab a whole block of cells at once vs iterating through many API calls
            #
            max_search_rows = min(50, worksheet.nrows)
            cells = [
                worksheet.row_slice(row, start_colx=0, end_colx=7)
                for row in range(0, max_search_rows)
            ]

            name_col_id = self._find_column_by_name(cells, 'Name')
            pid_col_id = self._find_column_by_name(cells, 'ID')

            mentees = []
            search_failed = False
            for i in range(1, max_search_rows):
                if i == max_search_rows - 1:
                    search_failed = True
                    Log.error(
                        f'Unable to determine current mentees for mentor {worksheet.name}'
                    )
                    mentees = []
                    break

                elif str(cells[i][0].value).lower().find(
                        'completed mentees') >= 0:
                    break  # We've reach the end of the "active mentee" rows

                elif cells[i][name_col_id].value and cells[i][pid_col_id].value:
                    mentee_name = cells[i][name_col_id].value
                    pid = int(cells[i][pid_col_id].value)
                    if not [
                            mentee
                            for mentee in mentees if mentee['pid'] == pid
                    ]:  # ignore duplicate mentees
                        mentees.append({'name': mentee_name, 'pid': pid})

            if not search_failed:
                print(f'found {len(mentees)}')

            current_mentees.append({
                'mentor': worksheet.name,
                'mentees': mentees
            })

        return current_mentees

Ejemplo n.º 4

0

Mostrar archivo

Archivo: kitten_scraper.py Proyecto: skylarstein/kitten-scraper

    def _animal_has_adoption_summary(self, animal_number):
        adoption_summary = ''
        try:
            self._driver.get(self._adoption_summary_url.format(animal_number))
            adoption_summary = self._get_text_by_id('adoptSummary').strip()
        except Exception:
            Log.error(
                f'Failed to read adoption summary for animal {animal_number}')
            return False

        return len(
            adoption_summary
        ) > 10  # minimum of 10 chars, completely arbitrary in case there is some junk in here

Ejemplo n.º 5

0

Mostrar archivo

Archivo: kitten_scraper.py Proyecto: skylarstein/kitten-scraper

    def _get_current_mentee_status(self, arg_status):
        ''' Get current mentees and mentee status for each mentor
        '''
        autoupdate_completed_mentees = 'autoupdate' in arg_status  # mark 'completed' mentors in the spreadsheet
        verbose_status = 'verbose' in arg_status
        Log.success(
            f'Looking up mentee status (verbose = {verbose_status}, autoupdate_completed_mentees = {autoupdate_completed_mentees})...'
        )
        completed_mentees = {}
        current_mentees = self.mentor_sheet_reader.get_current_mentees()
        for current in current_mentees:
            current['active_count'] = 0
            print(f'Checking mentee status for {current["mentor"]}... ',
                  end='',
                  flush=True)

            if current['mentees']:
                for mentee in current['mentees']:
                    current_animal_ids = self._current_animals_fostered(
                        mentee['pid'])
                    mentee['current_animals'] = {}

                    if verbose_status:
                        animal_data, _, _ = self._get_animal_data(
                            current_animal_ids, True)

                    for current_animal_id in current_animal_ids:
                        mentee['current_animals'][
                            current_animal_id] = animal_data[
                                current_animal_id] if verbose_status else {}

                    if current_animal_ids:
                        current['active_count'] = current['active_count'] + 1
                    else:
                        completed_mentees.setdefault(current['mentor'],
                                                     []).append(mentee['pid'])

            days_ago = (datetime.now() - current['most_recent']
                        ).days if current['most_recent'] else 'N/A'
            print(
                f'active mentees = {current["active_count"]}, last assigned days ago = {days_ago}'
            )

        if autoupdate_completed_mentees:
            Log.success(
                'Auto-updating completed mentees in the mentor spreadsheet...')
            for mentor in completed_mentees:
                self.mentor_sheet_reader.set_completed_mentees(
                    mentor, completed_mentees[mentor])

        return current_mentees

Ejemplo n.º 6

0

Mostrar archivo

Archivo: kitten_scraper.py Proyecto: skylarstein/kitten-scraper

 def _get_spay_neuter_status(self, animal_number):
     ''' Load spay/neuter status from the medical details page
     '''
     try:
         self._driver.get(self._medical_details_url.format(animal_number))
         return Utils.utf8(
             self._get_attr_by_xpath(
                 'innerText',
                 '/html/body/table[2]/tbody/tr[2]/td/table/tbody/tr[4]/td[4]'
             ))
     except Exception:
         Log.error(
             f'Failed to read spay/neuter status for animal {animal_number}'
         )
         return 'Unknown'

Ejemplo n.º 7

0

Mostrar archivo

    def set_completed_mentees(self, mentor, mentee_ids):
        ''' Mark the given mentees as completed.

            Future refactoring consideration: See similar code between set_completed_mentees() and get_current_mentees().
        '''
        for worksheet in self._mentor_sheets:
            if worksheet.title.lower() == mentor.lower():
                max_search_rows = min(100, worksheet.rows)
                cells = worksheet.range(f'A1:G{max_search_rows}',
                                        returnas='cells')

                name_col_id = self._find_column_by_name(cells, 'Name')
                pid_col_id = self._find_column_by_name(cells, 'ID')
                notes_col_id = 0

                for i in range(1, max_search_rows):
                    if str(cells[i][0].value).lower().find(
                            'completed mentees') >= 0:
                        break  # We've reached the end of the "active mentee" rows

                    if cells[i][name_col_id].value and str(
                            cells[i][pid_col_id].value).isdigit():
                        pid = int(cells[i][pid_col_id].value)
                        if pid in mentee_ids:
                            # If this mentee name cell is already marked with strikethrough, leave it alone
                            #
                            name_cell_format = cells[i][
                                name_col_id].text_format
                            if not name_cell_format or 'strikethrough' not in name_cell_format or name_cell_format[
                                    'strikethrough'] is False:
                                mentee_name = cells[i][name_col_id].value
                                mentee_name = mentee_name.replace('\n',
                                                                  ' ').replace(
                                                                      '\r', '')
                                Log.debug(
                                    f'Completed: {mentee_name} ({pid}) @ {mentor}[\'{cells[i][name_col_id].label}\']'
                                )
                                debug_mode = False
                                if not debug_mode:
                                    cells[i][name_col_id].set_text_format(
                                        'strikethrough', True)
                                    notes_current_value = cells[i][
                                        notes_col_id].value
                                    if 'autoupdate: no animals' not in notes_current_value.lower(
                                    ):
                                        cells[i][notes_col_id].set_value(
                                            f'AutoUpdate: No animals {date.today().strftime("%b %-d, %Y")}\r\n{notes_current_value}'
                                        )

Ejemplo n.º 8

0

Mostrar archivo

    def load_mentors_spreadsheet(self, auth):
        ''' Load the feline foster spreadsheet
        '''
        try:
            Log.success(
                f'Loading mentors spreadsheet from Box (id = {auth["box_file_id"]})...'
            )

            jwt_path = os.path.join(
                os.path.dirname(os.path.realpath(__file__)), auth['box_jwt'])
            client = Client(JWTAuth.from_settings_file(jwt_path))
            box_file = client.as_user(
                client.user(user_id=auth['box_user_id'])).file(
                    file_id=auth['box_file_id']).get()
            xlxs_workbook = xlrd.open_workbook(
                file_contents=box_file.content())

            config_yaml = xlxs_workbook.sheet_by_name(
                self._CONFIG_SHEET_NAME).row_values(1)[0]

            for sheet_name in xlxs_workbook.sheet_names():
                if not self._is_reserved_sheet(sheet_name):
                    sheet = xlxs_workbook.sheet_by_name(sheet_name)
                    self._mentor_sheets.append(sheet)
                    all_values = [
                        sheet.row_values(i) for i in range(1, sheet.nrows)
                    ]
                    self._mentor_match_values[Utils.utf8(sheet_name)] = [
                        Utils.utf8(str(item)).lower() for sublist in all_values
                        for item in sublist
                    ]

        except Exception as e:
            Log.error(
                f'ERROR: Unable to load Feline Foster spreadsheet!\r\n{str(e)}, {repr(e)}'
            )
            return None

        print(
            f'Loaded {len(self._mentor_sheets)} mentors from \"{box_file["name"]}\"'
        )
        return config_yaml

Ejemplo n.º 9

0

Mostrar archivo

Archivo: kitten_scraper.py Proyecto: skylarstein/kitten-scraper

    def _start_browser(self, show_browser):
        ''' Instantiate the browser, configure options as needed
        '''
        Log.success('Starting chromedriver...')
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument(
            '--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 '
            '(KHTML, like Gecko) Version/12.0.3 Safari/605.1.15')
        if not show_browser:
            chrome_options.add_argument('--headless')

        # Consider adding chromedriver-binary or chromedriver_installer to requirements.txt and
        # removing these local copies.
        #
        if sys.platform == 'darwin':
            chromedriver_path = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                'bin/mac64/chromedriver')
        elif sys.platform.startswith('linux'):
            chromedriver_path = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                'bin/linux64/chromedriver')
        elif sys.platform.startswith('win32'):
            chromedriver_path = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                'bin/win32/chromedriver')
            chrome_options.add_experimental_option(
                'excludeSwitches',
                ['enable-logging'])  # chromedriver complains a lot on Windows
        else:
            Log.error(
                f'Sorry friends, I haven\'t included chromedriver for your platform ({sys.platform}). Exiting now.'
            )
            sys.exit(0)

        self._driver = webdriver.Chrome(chromedriver_path,
                                        options=chrome_options)
        self._driver.set_page_load_timeout(60)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: kitten_scraper.py Proyecto: skylarstein/kitten-scraper

    def _read_additional_config_yaml(self, additional_config_yaml):
        ''' The mentors spreadsheet contains additional configuration data. This makes it easier to manage dynamic
            configuration data vs rollout of config.yaml updates.
        '''
        try:
            Log.success('Reading configuration data from spreadsheet...')
            config = yaml.load(additional_config_yaml, Loader=yaml.SafeLoader)
            self._login_url = config['login_url']
            self._search_url = config['search_url']
            self._animal_url = config['animal_url']
            self._medical_details_url = config['medical_details_url']
            self._list_all_animals_url = config['list_animals_url']
            self._responsible_for_url = config['responsible_for_url']
            self._responsible_for_paged_url = config[
                'responsible_for_paged_url']
            self._adoption_summary_url = config['adoption_summary_url']
            self._do_not_assign_mentor = config['do_not_assign_mentor'] if 'do_not_assign_mentor' in config else []
            self._mentors = config['mentors'] if 'mentors' in config else []

        except AttributeError as err:
            Log.error(f'ERROR: Unable to read additional config: {err}')
            return False

        except yaml.YAMLError as err:
            Log.error(f'ERROR: Unable to read additional config: {err}')
            return False

        except TypeError as err:
            Log.error(f'ERROR: Invalid yaml in additional config: {err}')
            return False

        except KeyError as err:
            Log.error(f'ERROR: Missing value in additional config: {err}')
            return False

        return True

Ejemplo n.º 11

0

Mostrar archivo

    def check_for_surgery_sheet(self, worksheet):
        if any(worksheet.title in substr
               for substr in self._SURGERY_SHEET_NAMES):
            surgery_rows = worksheet.get_values(
                'A1',
                f'H{worksheet.rows}',
                include_tailing_empty=False,
                include_tailing_empty_rows=False)
            date_col = -1
            patient_col = -1
            for col in range(0, len(surgery_rows[0])):
                # Allow for an extra header row (accounting for differences between Feline and Canine)
                #
                if any('date' in substr for substr in [
                        str(surgery_rows[0][col]).lower(),
                        str(surgery_rows[1][col]).lower()
                ]):
                    date_col = col
                elif 'patient' in (str(surgery_rows[0][col]).lower(),
                                   str(surgery_rows[1][col]).lower()):
                    patient_col = col

            if date_col != -1 and patient_col != -1:
                for row in range(1, len(surgery_rows)):
                    try:
                        a_number = surgery_rows[row][patient_col]
                        if a_number.isdigit():
                            a_number = int(a_number)
                            # If there are multiple entries for a given a_number, assume the first is the most recent.
                            #
                            if a_number not in self._surgery_dates:
                                self._surgery_dates[int(
                                    a_number)] = surgery_rows[row][date_col]
                    except Exception as e:
                        Log.warn(
                            f'{worksheet.title} column {patient_col}, row {row} is empty. Assuming this is the end of the list.'
                        )
                        break
            else:
                Log.error(
                    f'Surgery form is not in expected format (date_col={date_col}, patient_col={patient_col}. Skipping.'
                )

            Log.debug(
                f'Loaded {len(self._surgery_dates)} entries from the surgery sheet'
            )
            return True

        return False

Ejemplo n.º 12

0

Mostrar archivo

Archivo: kitten_scraper.py Proyecto: skylarstein/kitten-scraper

    def _login(self):
        ''' Load the login page, enter credentials, submit
        '''
        Log.success('Logging in...')

        try:
            self._driver.set_page_load_timeout(20)
            self._driver.get(self._login_url)

        except TimeoutException:
            Log.error(
                'ERROR: Unable to load the login page. Please check your connection.'
            )
            return False
        except NoSuchElementException:
            Log.error(
                'ERROR: Unable to load the login page. Please check your connection.'
            )
            return False

        try:
            self._driver.find_element_by_id('txt_username').send_keys(
                self._username)
            self._driver.find_element_by_id('txt_password').send_keys(
                self._password)
            self._driver.find_element_by_id(
                'ctl00_ctl00_ContentPlaceHolderBase_ContentPlaceHolder1_btn_login'
            ).click()
            self._driver.find_element_by_id('Continue').click()

        except NoSuchElementException:
            Log.error(
                'ERROR: Unable to login. Please check your username/password.')
            return False

        return True

Ejemplo n.º 13

0

Mostrar archivo

    def get_current_mentees(self):
        ''' Return the current mentees assigned to each mentor
        '''
        current_mentees = []
        for worksheet in self._mentor_sheets:
            if worksheet.title.lower() == 'retired mentor':
                continue
            print(f'Loading current mentees for {worksheet.title}... ',
                  end='',
                  flush=True)

            # It's much faster to grab a whole block of cells at once vs iterating through many API calls
            #
            max_search_rows = min(100, worksheet.rows)
            cells = worksheet.range(f'A1:G{max_search_rows}', returnas='cells')

            name_col_id = self._find_column_by_name(cells, 'Name')
            pid_col_id = self._find_column_by_name(cells, 'ID')
            date_col_id = self._find_column_by_name(cells,
                                                    'Date\nKittens\nReceived')
            if date_col_id == -1:
                date_col_id = self._find_column_by_name(
                    cells, 'Date Dog Received')

            mentees = []
            search_failed = False
            most_recent_received_date = None
            for i in range(1, max_search_rows):
                if i == max_search_rows - 1:
                    search_failed = True
                    Log.error(
                        f'Unable to determine current mentees for mentor {worksheet.title}'
                    )
                    mentees = []
                    break

                elif str(cells[i][0].value).lower().find(
                        'completed mentees') >= 0:
                    break  # We've reached the end of the "active mentee" rows

                elif cells[i][name_col_id].value and str(
                        cells[i][pid_col_id].value).isdigit():
                    mentee_name = cells[i][name_col_id].value
                    pid = int(cells[i][pid_col_id].value)
                    received_date = Utils.string_to_datetime(
                        cells[i][date_col_id].value)

                    if received_date and (
                            most_recent_received_date is None
                            or received_date > most_recent_received_date):
                        most_recent_received_date = received_date

                    if not [
                            mentee
                            for mentee in mentees if mentee['pid'] == pid
                    ]:  # ignore duplicate mentees
                        mentees.append({'name': mentee_name, 'pid': pid})

            if not search_failed:
                print(f'found {len(mentees)}')

            current_mentees.append({
                'mentor': worksheet.title,
                'mentees': mentees,
                'most_recent': most_recent_received_date
            })
        return current_mentees

Ejemplo n.º 14

0

Mostrar archivo

Archivo: kitten_scraper.py Proyecto: skylarstein/kitten-scraper

    def _output_results(self, animal_data, foster_parents, persons_data,
                        animals_not_in_foster, current_mentee_status,
                        csv_filename):
        ''' Output all of our new super amazing results to a csv file
        '''
        Log.success(f'Writing results to {csv_filename}...')
        csv_rows = []

        csv_rows.append([])
        csv_rows[-1].append('Kitten-Scraper Notes')
        csv_rows[-1].append('Loss Rate')
        csv_rows[-1].append('Name')
        csv_rows[-1].append('E-mail')
        csv_rows[-1].append('Phone')
        csv_rows[-1].append('Person ID')
        csv_rows[-1].append('Foster Experience')
        csv_rows[-1].append('Date Animals Received')
        if self._dog_mode:
            csv_rows[-1].append('"Name, Breed, Color"')
        csv_rows[-1].append('Animal Details')
        csv_rows[-1].append('Special Animal Message')

        # Build a row for each foster parent
        #
        for person_number in sorted(foster_parents,
                                    key=lambda p: persons_data[p]['notes']):
            person_data = persons_data[person_number]
            name = person_data['full_name']
            report_notes = person_data['notes']
            loss_rate = round(person_data['loss_rate'])
            animals_with_this_person = foster_parents[person_number]
            animal_details, animal_details_brief = self._get_animal_details_string(
                animals_with_this_person, animal_data)

            prev_animals_fostered = person_data['prev_animals_fostered']
            foster_experience = 'NEW' if not prev_animals_fostered else prev_animals_fostered

            special_message = ''
            for a_number in animals_with_this_person:
                msg = animal_data[a_number]['message']
                if msg:
                    special_message += '{}{}: {}'.format(
                        '\r\r' if special_message else '', a_number, msg)

            cell_number = person_data['cell_phone']
            home_number = person_data['home_phone']
            phone = ''
            if len(cell_number) >= 10:  # ignore incomplete phone numbers
                phone = f'(C) {cell_number}'
            if len(home_number) >= 10:  # ignore incomplete phone numbers
                phone += '{}(H) {}'.format('\r' if phone else '', home_number)

            emails_str = ''
            for email in person_data['emails']:
                emails_str += '{}{}'.format('\r' if emails_str else '', email)

            # I will assume all animals in this group went into foster on the same date. This should usually be true
            # since this is designed to processed with a "daily report".
            #
            date_received = animal_data[
                animals_with_this_person[0]]['status_date']

            # Explicitly wrap numbers/datestr with ="{}" to avoid Excel auto-formatting issues
            #
            csv_rows.append([])
            csv_rows[-1].append(f'"{report_notes}"')
            csv_rows[-1].append(f'"{loss_rate}%"')
            csv_rows[-1].append(f'"{name}"')
            csv_rows[-1].append(f'"{emails_str}"')
            csv_rows[-1].append(f'"{phone}"')
            csv_rows[-1].append(f'="{person_number}"')
            csv_rows[-1].append(f'"{foster_experience}"')
            csv_rows[-1].append(f'="{date_received}"')
            if self._dog_mode:
                csv_rows[-1].append(f'"{animal_details_brief}"')
            csv_rows[-1].append(f'"{animal_details}"')
            csv_rows[-1].append(f'"{special_message}"')

            print('{} (Experience: {}, Loss Rate: {}%) {}{}{}'.format(
                name, foster_experience, loss_rate, Log.GREEN,
                report_notes.replace('\r', ', '), Log.END))
        with open(csv_filename, 'w') as outfile:
            for row in csv_rows:
                outfile.write(','.join(row))
                outfile.write('\n')

            if not foster_parents:
                outfile.write(
                    '*** None of the animals in this report are currently in foster\n'
                )
                Log.warn(
                    'None of the animals in this report are currently in foster. Nothing to do!'
                )

            if animals_not_in_foster:
                outfile.write('\n\n\n*** Animals not in foster\n')
                Log.warn('\nAnimals not in foster')
                for a_number in animals_not_in_foster:
                    outfile.write('{} {} - {}\n'.format(
                        a_number, animal_data[a_number]['type'],
                        animal_data[a_number]['status']))
                    print('{} {} - {}'.format(a_number,
                                              animal_data[a_number]['type'],
                                              animal_data[a_number]['status']))

            if current_mentee_status:
                outfile.write(
                    '\n\nMentor,Active Mentees,Last Assigned (days ago)\n')
                for current in current_mentee_status:
                    days_ago = (datetime.now() - current['most_recent']
                                ).days if current['most_recent'] else 'N/A'
                    outfile.write(
                        f'{current["mentor"]},{current["active_count"]},{days_ago}\n'
                    )

Ejemplo n.º 15

0

Mostrar archivo

Archivo: kitten_scraper.py Proyecto: skylarstein/kitten-scraper

    def run(self):
        print(f'Welcome to KittenScraper {__version__}')
        start_time = time.time()

        arg_parser = ArgumentParser()
        arg_parser.add_argument(
            '-i',
            '--input',
            help=
            'specify the daily foster report (xls), or optionally a comma-separated list of animal numbers',
            required=False)
        arg_parser.add_argument(
            '-s',
            '--status',
            help='retrieve current mentee status [verbose,autoupdate,export]',
            required=False,
            nargs='?',
            default='',
            const='yes')
        arg_parser.add_argument(
            '-c',
            '--config',
            help=
            'specify a config file (optional, defaults to \'config.yaml\')',
            required=False,
            default='config.yaml')
        arg_parser.add_argument(
            '-b',
            '--show_browser',
            help='show the web browser window (generally used for debugging)',
            required=False,
            action='store_true')
        args = arg_parser.parse_args()

        if not args.input and not args.status:
            arg_parser.print_help()
            sys.exit(0)

        # Load config.yaml
        #
        if not self._load_config_file(args.config):
            sys.exit()

        # Load the Foster Mentors spreadsheet
        #
        if self._google_spreadsheet_key and self._google_client_secret:
            self.mentor_sheet_reader = GoogleSheetReader()
            self._additional_config_yaml = self.mentor_sheet_reader.load_mentors_spreadsheet(
                {
                    'google_spreadsheet_key': self._google_spreadsheet_key,
                    'google_client_secret': self._google_client_secret
                })

        elif self._box_user_id and self._box_file_id and self._box_jwt:
            self.mentor_sheet_reader = BoxSheetReader()
            self._additional_config_yaml = self.mentor_sheet_reader.load_mentors_spreadsheet(
                {
                    'box_user_id': self._box_user_id,
                    'box_file_id': self._box_file_id,
                    'box_jwt': self._box_jwt
                })
        else:
            Log.error(
                'ERROR: Incorrect mentor spreadsheet configuration, please check config.yaml'
            )
            sys.exit()

        if self._additional_config_yaml is None:
            Log.error(
                'ERROR: configuration YAML from mentors spreadsheet not found, cannot continue'
            )
            sys.exit()

        # Load additional config data from the mentors spreadsheet. This minimizes the need to deploy updates to the
        # local config.yaml file.
        #
        if not self._read_additional_config_yaml(self._additional_config_yaml):
            sys.exit()

        # Start the browser, log in
        #
        self._start_browser(args.show_browser)
        if not self._login():
            sys.exit()

        current_mentee_status = self._get_current_mentee_status(
            args.status) if args.status else None

        if current_mentee_status:
            status_file = None
            export_status = 'export' in args.status
            verbose_status = 'verbose' in args.status

            if export_status:
                status_file_path = os.path.join(
                    Utils.default_dir(),
                    f'{self.BASE_ANIMAL_TYPE}_foster_mentor_status_{date.today().strftime("%Y.%m.%d")}.txt'
                )
                status_file = open(status_file_path, 'w')
                Log.success(
                    f'Exporting mentee status to file: {status_file_path}')

            for current in current_mentee_status:
                self._print_and_write(
                    status_file,
                    '--------------------------------------------------')
                self._print_and_write(status_file, current['mentor'])
                if current['mentees']:
                    for mentee in current['mentees']:
                        self._print_and_write(
                            status_file,
                            f'    {mentee["name"]} ({mentee["pid"]}) - {len(mentee["current_animals"])} animals'
                        )
                        for a_number, data in mentee['current_animals'].items(
                        ):
                            surgery_date = self.mentor_sheet_reader.get_surgery_date(
                                a_number)
                            surgery_info = ''
                            if surgery_date:
                                surgery_info = f', Surgery Date {surgery_date}'
                            if verbose_status:
                                self._print_and_write(
                                    status_file,
                                    f'        {a_number}, {data["age"]}, S/N {data["sn"]}, Bio {data["bio"]}, Photo {data["photo"]}{surgery_info}'
                                )
                            else:
                                self._print_and_write(
                                    status_file,
                                    f'        {a_number}{surgery_info}')
                else:
                    self._print_and_write(status_file,
                                          '    ** No current mentees **')

                self._print_and_write(status_file, '')

        if args.input:
            # Load animal numbers. Note that args.input will either be a path to the "daily report" xls, or may
            # optionally be a comma-separated list of animal numbers.
            #
            if re.fullmatch(r'(\s?\d+\s?)(\s?,\s?\d+\s?)*$', args.input):
                animal_numbers = [s.strip() for s in args.input.split(',')]
            else:
                animal_numbers = KittenReportReader(
                ).read_animal_numbers_from_xls(args.input)

            if not animal_numbers:
                sys.exit()

            print(
                f'Found {len(animal_numbers)} animal{"s" if len(animal_numbers) != 1 else ""}: {", ".join([str(a) for a in animal_numbers])}'
            )

            # Query details for each animal (current foster parent, foster status, breed, color, gender, age, etc.)
            #
            animal_data, foster_parents, animals_not_in_foster = self._get_animal_data(
                animal_numbers)

            for p_number in foster_parents:
                print(
                    f'Animals for foster parent {p_number} = {foster_parents[p_number]}'
                )

            # Query details for each foster parent (name, contact details, etc.)
            #
            persons_data = {}
            for person in foster_parents:
                persons_data[person] = self._get_person_data(person)

            # Save report to file
            #
            output_csv = os.path.join(
                Utils.default_dir(),
                f'{self.BASE_ANIMAL_TYPE}_foster_mentor_report_{date.today().strftime("%Y.%m.%d")}.csv'
            )
            Utils.make_dir(output_csv)
            self._output_results(animal_data, foster_parents, persons_data,
                                 animals_not_in_foster, current_mentee_status,
                                 output_csv)

            # Optional: automatically forward this report via email
            #
            if 'generate_email' in self.config:
                from outlook_email import compose_outlook_email
                subject = self._get_from_dict(self.config['generate_email'],
                                              'subject')
                recipient_name = self._get_from_dict(
                    self.config['generate_email'], 'recipient_name')
                recipient_email = self._get_from_dict(
                    self.config['generate_email'], 'recipient_email')
                message = self._get_from_dict(self.config['generate_email'],
                                              'message')

                if None not in [
                        subject, recipient_name, recipient_email, message
                ]:
                    compose_outlook_email(subject=subject,
                                          recipient_name=recipient_name,
                                          recipient_email=recipient_email,
                                          body=message,
                                          attachment=output_csv)
                    Log.debug(
                        f'Composed email to {recipient_name} <{recipient_email}>'
                    )

        print('KittenScraper completed in {0:.0f} seconds'.format(time.time() -
                                                                  start_time))
        self._exit_browser()

Ejemplo n.º 16

0

Mostrar archivo

    def load_mentors_spreadsheet(self, auth):
        ''' Load the feline foster spreadsheet
        '''
        start_time = time.time()
        try:
            Log.success(
                f'Loading mentors spreadsheet from Google Sheets (id = \'{auth["google_spreadsheet_key"]}\')...'
            )

            client = pygsheets.authorize(auth['google_client_secret'])
            spreadsheet = client.open_by_key(auth['google_spreadsheet_key'])

            config_yaml = spreadsheet.worksheet_by_title(
                self._CONFIG_SHEET_NAME)[2][0]

            for worksheet in spreadsheet.worksheets():
                if not self._is_reserved_sheet(worksheet.title):
                    Log.debug(f'Reading worksheet \"{worksheet.title}\"...')
                    try:
                        if self.check_for_surgery_sheet(worksheet):
                            continue

                        # Mentor sheet header rows vary slightly between feline and canine. Perform a terrible quick-and-dirty validation.
                        #
                        if ['ID'] not in worksheet.get_values('E1', 'E2'):
                            raise Exception('') from Exception

                        # Build a list of mentee names/emails/ids to be used for mentor matching
                        #
                        b_rows = worksheet.get_values(
                            'B2',
                            f'B{worksheet.rows}',
                            include_tailing_empty=False,
                            include_tailing_empty_rows=False)
                        c_rows = worksheet.get_values(
                            'C2',
                            f'C{worksheet.rows}',
                            include_tailing_empty=False,
                            include_tailing_empty_rows=False)
                        e_rows = worksheet.get_values(
                            'E2',
                            f'E{worksheet.rows}',
                            include_tailing_empty=False,
                            include_tailing_empty_rows=False)

                        mentor_match_cells = b_rows + c_rows + e_rows
                        self._mentor_match_values[Utils.utf8(
                            worksheet.title)] = [
                                Utils.utf8(item).lower()
                                for sublist in mentor_match_cells
                                for item in sublist
                            ]
                        self._mentor_sheets.append(worksheet)

                    except Exception:
                        Log.debug(
                            f'Sheet \'{worksheet.title}\' does not appear to be a mentor sheet (skipping)'
                        )

        except Exception as e:
            Log.error(
                f'ERROR: Unable to load mentors spreadsheet!\r\n{str(e)}, {repr(e)}'
            )
            return None

        print('Loaded {0} mentors from \"{1}\" in {2:.0f} seconds'.format(
            len(self._mentor_sheets), spreadsheet.title,
            time.time() - start_time))
        return config_yaml

Ejemplo n.º 17

0

Mostrar archivo

Archivo: kitten_scraper.py Proyecto: skylarstein/kitten-scraper

    def _get_animal_data(self, animal_numbers, silent=False):
        ''' Load additional animal data for each animal number
        '''
        animal_data = {}
        foster_parents = {}
        animals_not_in_foster = set()
        for a_number in animal_numbers:
            if not silent:
                print(f'Looking up animal {a_number}... ', end='', flush=True)
            sys.stdout.flush()
            self._driver.get(self._animal_url.format(a_number))
            try:
                # Dismiss alert (if found)
                #
                Alert(self._driver).dismiss()
            except NoAlertPresentException:
                pass

            try:
                # Wait for lazy-loaded content
                #
                WebDriverWait(self._driver, 10).until(
                    EC.presence_of_element_located((By.ID, 'submitbtn2')))
            except Exception:
                raise Exception(
                    'Timeout while waiting for content on search page!'
                ) from Exception

            # Get Special Message text (if it exists)
            #
            special_msg = Utils.utf8(
                self._get_text_by_id('specialMessagesDialog'))

            if special_msg:
                # Remove text we don't care about
                #
                special_msg = re.sub(
                    r'(?i)This is a special message. If you would like to delete it then clear the Special Message box in the General Details section of this page.',
                    '', special_msg).strip()

                # Remove empty lines and double quotes
                #
                special_msg = os.linesep.join(
                    [s for s in special_msg.splitlines() if s])
                special_msg = special_msg.replace('"', '\'')

            animal_data[a_number] = {}
            animal_data[a_number]['message'] = special_msg
            status = self._get_selection_by_id('status')

            if not status:
                # Status text is usually found within a <select> element, but is sometimes found as innerText within the
                # <td> that looks something like this: "Adopted - Awaiting Pickup\nChange Status"
                try:
                    status = self._get_property_by_xpath(
                        'innerText',
                        '//*[@id="Table17"]/tbody/tr[5]/td[2]').split('\n')[0]
                except Exception:
                    status = ''

            sub_status = self._get_selection_by_id('subStatus')
            animal_data[a_number][
                'status'] = f'{status}{" - " if sub_status else ""}{sub_status}'
            animal_data[a_number]['name'] = self._get_attr_by_id(
                'animalname').strip()
            animal_data[a_number]['type'] = self._get_attr_by_id('type')
            animal_data[a_number]['breed'] = self._get_attr_by_id(
                'primaryBreed').strip()
            animal_data[a_number]['primary_color'] = self._get_selection_by_id(
                'primaryColour')
            animal_data[a_number][
                'secondary_color'] = self._get_selection_by_id(
                    'secondaryColour')
            animal_data[a_number]['gender'] = self._get_selection_by_id('sex')
            animal_data[a_number][
                'photo'] = 'No' if 'NoImage.png' in self._get_property_by_xpath(
                    'src', '//*[@id="animal-default-photo"]') else 'Yes'

            try:
                age = datetime.now() - datetime.strptime(
                    self._get_attr_by_id('dob'), '%m/%d/%Y')
                animal_data[a_number]['age'] = self._stringify_age(age)
            except Exception:
                animal_data[a_number]['age'] = 'Unknown Age'

            try:
                animal_data[a_number]['status_date'] = datetime.strptime(
                    self._get_attr_by_id('statusdate'),
                    '%m/%d/%Y').strftime('%-d-%b-%Y')
            except ValueError:
                animal_data[a_number]['status_date'] = 'Unknown'

            # If this animal is currently in foster, get the responsible person (foster parent).
            #
            status = status.lower()
            if ('in foster' in status and 'unassisted death' not in status):
                try:
                    p_number = int(
                        self._get_attr_by_xpath(
                            'href',
                            '//*[@id="Table17"]/tbody/tr[1]/td[2]/a').split(
                                'personid=')[1])
                    foster_parents.setdefault(p_number, []).append(a_number)
                except Exception:
                    Log.error(
                        f'Failed to find foster parent for animal {a_number}, please check report'
                    )
            else:
                animals_not_in_foster.add(a_number)

            # Perform these operations last. They will load new pages!
            #
            animal_data[a_number]['sn'] = self._get_spay_neuter_status(
                a_number)
            animal_data[a_number][
                'bio'] = 'Yes' if self._animal_has_adoption_summary(
                    a_number) else 'No'

            # Create some helpful/default string representations
            #
            set_default = lambda str, default: default if str.strip(
            ) in [None, ''] else str

            animal_data[a_number]['sn'] = set_default(
                animal_data[a_number]['sn'], 'Unknown')
            animal_data[a_number]['status'] = set_default(
                animal_data[a_number]['status'], 'Status Unknown')
            animal_data[a_number]['name'] = set_default(
                animal_data[a_number]['name'], 'Unnamed')
            animal_data[a_number]['breed'] = set_default(
                animal_data[a_number]['breed'], 'Unknown Breed')
            animal_data[a_number]['gender'] = set_default(
                animal_data[a_number]['gender'], 'Unknown Gender')
            animal_data[a_number]['color'] = set_default(
                animal_data[a_number]['primary_color'], 'Unknown Color')

            if animal_data[a_number]['secondary_color'].strip() not in [
                    None, '', 'None'
            ]:
                animal_data[a_number][
                    'color'] += f'/{animal_data[a_number]["secondary_color"]}'

            breed_abbreviations = {
                # 'Breed' is a free-form text field and not everyone enters data exactly the same. I'll compare against
                # lowercase/no-whitespace for slightly better odds of matches.
                #
                'domesticshorthair': 'DSH',
                'domesticmediumhair': 'DMH',
                'domesticlonghair': 'DLH'
            }

            abbreviation = breed_abbreviations.get(
                animal_data[a_number]['breed'].replace(' ', '').lower())
            if abbreviation:
                animal_data[a_number]['breed'] = abbreviation

            if animal_data[a_number]['gender'].lower() == 'male':
                animal_data[a_number]['gender_short'] = 'M'
            elif animal_data[a_number]['gender'].lower() == 'female':
                animal_data[a_number]['gender_short'] = 'F'
            else:
                animal_data[a_number]['gender_short'] = animal_data[a_number][
                    'gender']

            if not silent:
                print(animal_data[a_number]['status'])

        return animal_data, foster_parents, animals_not_in_foster