class SummitLearning(WebUIDataSource, LoggingMixin): def __init__(self, username, password, wait_time, hostname='summitlearning.org', temp_folder_path=None, headless=False, login_provider='google'): super().__init__(username, password, wait_time, hostname, temp_folder_path, headless) self.login_provider=login_provider self.uri_scheme = 'https://' self.base_url = self.uri_scheme + 'www.' + self.hostname def _login(self): if self.login_provider == 'google': login_url = self.base_url + '/auth/google_oauth2' self.driver.get(login_url) elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.ID, 'identifierId'))) elem.clear() elem.send_keys(self.username) elem.send_keys(Keys.RETURN) elem = WebDriverWait(self.driver, self.wait_time).until( EC.visibility_of_element_located((By.NAME, 'password'))) elem.send_keys(self.password) elem.send_keys(Keys.RETURN) WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.CLASS_NAME, 'app-teacher'))) def download_url_report(self, report_url, write_to_disk=None, **kwargs): """ Downloads a Summit Learning report at a URL that triggers a CSV download Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. write_to_disk (string): The path for a directory to store the downloaded file. If nothing is provided, the file will be stored in a temporary directory and deleted at the end of this function. **kwargs: additional arguments to pass to Pandas read_excel or read_csv (depending on the report_url) Returns: A Pandas DataFrame of the report contents. """ report_download_url = interpret_report_url(self.base_url, report_url) if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() self.log.debug('Getting report page at: {}'.format(report_download_url)) self.driver.get(report_download_url) self.log.debug('Starting download of: '.format(report_download_url)) wait_for_any_file_in_folder(csv_download_folder_path, "csv") self.log.debug('Download Finished.') df_report = pd.read_csv(get_most_recent_file_in_dir(csv_download_folder_path), **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0: raise NoDataError('No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, report_url))) self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report
class Lexia(WebUIDataSource, LoggingMixin): """ Class for interacting with the web ui of Lexia """ def __init__(self, username, password, wait_time, hostname, temp_folder_path=None, headless=False, lexia_school_year_start_date=None, district_export_email_address=None, district_export_email_password=None, district_export_email_imap_uri=None, district_export_email_folder='Lexia District Exports', district_export_email_wait_time=600, district_export_email_retry_frequency=30, district_id=None): super().__init__(username, password, wait_time, hostname, temp_folder_path, headless) self.lexia_school_year_start_date = lexia_school_year_start_date self.district_export_email_address = district_export_email_address self.district_export_email_password = district_export_email_password self.district_export_email_imap_uri = district_export_email_imap_uri self.district_export_email_folder = district_export_email_folder self.district_export_email_wait_time = district_export_email_wait_time self.district_export_email_retry_frequency = district_export_email_retry_frequency self.district_id = district_id self.uri_scheme = 'https://' self.base_url = self.uri_scheme + 'www.' + self.hostname def _login(self): """ Logs into the provided Lexia instance. """ login_url = self.uri_scheme + 'auth.mylexia.com/mylexiaLogin' self.log.debug('Logging into Lexia at: {}'.format(login_url)) self.driver.get(login_url) elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.ID, 'username'))) elem.clear() elem.send_keys(self.username) elem.send_keys(Keys.RETURN) elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.ID, 'login-password'))) elem.send_keys(self.password) elem.send_keys(Keys.RETURN) # ensure that login is successful self.driver.get(self.base_url) if 'Welcome' in self.driver.title: self.driver.close() raise InvalidLoginCredentials def download_url_report(self, report_url, write_to_disk=None, **kwargs): """ Downloads a Lexia report at a URL for a page with an 'export' button. Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. write_to_disk (string): The path for a directory to store the downloaded file. If nothing is provided, the file will be stored in a temporary directory and deleted at the end of this function. **kwargs: additional arguments to pass to Pandas read_excel or read_csv (depending on the report_url) Returns: A Pandas DataFrame of the report contents. """ report_download_url = interpret_report_url(self.base_url, report_url) # if user is trying to download a manage tab report (for convenience) if '/mylexiaweb/app/index.html#/groups/' in report_download_url: return self.download_manage_tab_report(report_url, write_to_disk, **kwargs) if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() self.log.debug( 'Getting report page at: {}'.format(report_download_url)) self.driver.get(report_download_url) # find and click the download button elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, "//button[contains(text(), 'Export')]"))) self.log.debug('Starting download of: '.format(report_download_url)) elem.click() wait_for_any_file_in_folder(csv_download_folder_path, "xlsx") self.log.debug('Downloada Finished.') df_report = pd.read_excel( get_most_recent_file_in_dir(csv_download_folder_path), **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0: raise ValueError('No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, report_url))) self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report def download_manage_tab_report(self, report_url, write_to_disk=None, **kwargs): """ Downloads a Lexia report from the 'Manage' tab. Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. write_to_disk (string): The path for a directory to store the downloaded file. If nothing is provided, the file will be stored in a temporary directory and deleted at the end of this function. **kwargs: additional arguments to pass to Pandas read_csv Returns: A Pandas DataFrame of the report contents. """ if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() report_download_url = interpret_report_url(self.base_url, report_url) self.log.debug( 'Getting report page at: {}'.format(report_download_url)) self.driver.get(report_download_url) # select all users and find the download button def check_for_export_button_enabled(driver, elem_select_all_locator, elem_export_locator): elem_select_all = driver.find_element(*elem_select_all_locator) if not elem_select_all.is_enabled(): return False elem_select_all.click() if not elem_select_all.is_selected(): return False elem_export = driver.find_element(*elem_export_locator) if elem_export.is_enabled() and elem_export.is_displayed(): return elem_export else: return False # have to use a lambda because until expects a callable elem_export = WebDriverWait( self.driver, self.wait_time).until(lambda x: check_for_export_button_enabled( self.driver, (By.NAME, "lexia-select-all"), (By.XPATH, "//button[contains(text(), 'Export')]"))) self.log.debug('Starting download of: '.format(report_download_url)) elem_export.click() wait_for_any_file_in_folder(csv_download_folder_path, "xls") self.log.debug('Download Finished.') df_report = pd.read_csv( get_most_recent_file_in_dir(csv_download_folder_path), sep='\t', **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0: raise ValueError('No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, report_url))) self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report def download_district_export_core5_monthly( self, write_to_disk=None, pandas_read_csv_kwargs={}, period_end_date=dt.datetime.now().date()): return self._download_district_export( report_type='export', period_end_date=period_end_date, write_to_disk=write_to_disk, pandas_read_csv_kwargs=pandas_read_csv_kwargs) def download_district_export_core5_year_to_date( self, write_to_disk=None, pandas_read_csv_kwargs={}, period_end_date=dt.datetime.now().date()): return self._download_district_export( report_type='expytd', period_end_date=period_end_date, write_to_disk=write_to_disk, pandas_read_csv_kwargs=pandas_read_csv_kwargs) def download_district_export_powerup_year_to_date( self, write_to_disk=None, pandas_read_csv_kwargs={}, period_end_date=dt.datetime.now().date()): return self._download_district_export( report_type='pupytd', period_end_date=period_end_date, write_to_disk=write_to_disk, pandas_read_csv_kwargs=pandas_read_csv_kwargs) def _download_district_export(self, report_type, period_end_date, period_start_date=None, write_to_disk=None, pandas_read_csv_kwargs={}): if not period_start_date: period_start_date = self.lexia_school_year_start_date self.__request_district_export(report_type, period_start_date, period_end_date) df_report = None number_retries = int(self.district_export_email_wait_time / self.district_export_email_retry_frequency) for retry_count in range(0, number_retries): if retry_count > 0: time.sleep(self.district_export_email_retry_frequency) self.log.info( str(self.district_id) + ': get export_id from email, try: ' + str(retry_count)) try: export_id = self.__get_exportid_from_email() except ValueError as err: self.log.debug(err) self.log.warning( '{}: No export_id found in email, retrying in {} seconds.'. format(self.district_id, self.district_export_email_retry_frequency)) time.sleep(self.district_export_email_retry_frequency) continue try: df_report = self.__download_export_for_exportid( export_id, write_to_disk, pandas_read_csv_kwargs) break except NoDataError as e: self.log.warning('{}: {} Retrying in {} seconds.'.format( self.district_id, e, self.district_export_email_retry_frequency)) if df_report is None: raise ReportNotFound( 'No email was received with report id. Make sure the emails are not going to spam.' ) else: return df_report def __request_district_export(self, report_type, period_start_date=None, period_end_date=None, write_to_disk=None): """ Logs into Lexia and submits the request to generate a district export :param report_type: The text from one of 'Report type' options listed in the myLexia 'District Exports' modal. :param period_start_date: The start date for the report request (unsure if this actually affects the data returned if it is different from the school year start date set for your Lexia instance) :param period_end_date: The end date for the report request (unsure if this actually affects the data returned if it is different from the day on which the request is made) :param write_to_disk: The path to save the CSV to. :return: Boolean. Whether or not the export request was successful. """ if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = self.temp_folder_path self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() # use requests to post the download request with requests.Session() as s: for cookie in self.driver.get_cookies(): s.cookies.set(cookie['name'], cookie['value']) payload = { "districtID": self.district_id, "type": report_type, "email": self.district_export_email_address, "startDate": period_start_date.strftime("%Y-%m-%d"), "endDate": period_end_date.strftime("%Y-%m-%d") } self.log.info('{}: Export request payload: {}'.format( self.district_id, payload)) download_response = s.put(self.base_url + '/exportData/progress', data=payload) if download_response.ok: self.log.info( '{}: Export request for {} succeeded for user: {}'.format( self.district_id, report_type, self.username)) j_data = json.loads(download_response.content.decode()) self.log.info(j_data) return True else: self.log.info( '{}: Export request for {} FAILED for user: {}'.format( self.district_id, report_type, self.username)) self.log.info(download_response.content) return False def __get_exportid_from_email(self): """Log into an IMAP email server and get messages in a specific folder. Checks for a new Lexia export_id in those messages. Returns: int: the export_id """ self.log.info('Checking email for latest report ID for district_id: ' + str(self.district_id)) imap_conn = imaplib.IMAP4_SSL(self.district_export_email_imap_uri) try: imap_conn.login(self.district_export_email_address, self.district_export_email_password) except imaplib.IMAP4.error: self.log.error('Email login failed for: ' + self.district_export_email_address) sys.exit(1) rv, data = imap_conn.select('"{}"'.format( self.district_export_email_folder)) if rv == 'OK': self.log.info('Processing mailbox for ' + self.district_export_email_address + ' in folder "' + self.district_export_email_folder + '"') export_id = self.__extract_lexia_export_id_from_email(imap_conn) if export_id == -1: raise ValueError('No new export_id found on ' + self.district_export_email_address) else: imap_conn.close() return export_id else: raise InvalidIMAPParameters( "ERROR: Unable to open mailbox. Check your parameters and email folder. Message: ", rv) imap_conn.logout() def __extract_lexia_export_id_from_email(self, imap_conn): """ Extract the export_id that is sent by Lexia that is needed to download the prepared report export. Email messages in Gmail aren't sorted can can't be sorted using regular IMAP functions (Gmail does not support them). Therefore we will search within the folder for messages in the last day. Args: imap_conn (imaplib.IMAP4_SSL): A current connection to an IMAP email account. Returns: int: The new export_id """ # get all messages received in the last day rv, data = imap_conn.search( None, '(SINCE ' + (dt.datetime.now() - dt.timedelta(1)).strftime("%d-%b-%Y") + ')') if rv != 'OK': self.log.warning("No email messages found!") # TODO change this to raise an error return -1 highest_export_id = -1 for num in data[0].split(): rv, data = imap_conn.fetch(num, '(RFC822)') if rv != 'OK': # TODO change this to raise an error self.log.error("ERROR getting email message", num) return -1 msg = email.message_from_bytes(data[0][1]) self.log.debug('Processing Message %s, Raw Date: %s' % (num, msg['Date'])) highest_export_id = 0 for part in msg.walk(): # each part is a either non-multipart, or another multipart message # that contains further parts... Message is organized like a tree if part.get_content_type() == 'text/plain': # get the raw text part_str = part.get_payload() # extract the report id match = re.search(r'(?<=id=)(\d*?)(?=\s)', part_str) if match: export_id = int(match.group(0)) self.log.debug('export_id found: ' + str(export_id)) if export_id > highest_export_id: highest_export_id = export_id else: return -1 return highest_export_id def __download_export_for_exportid(self, export_id, write_to_disk=None, pandas_read_csv_kwargs={}): """Logs into lexia and downloads the report associated with a specific export_id. Args: export_id (int): The Lexia export id to download. write_to_disk (str): A path where the CSV that has been downloaded should be written to disk. pandas_read_csv_kwargs (dict): kwargs to pass to the Pandas read_csv function as necessary Returns: A Pandas dataframe with the report contents """ self.log.info( str(self.district_id) + ': downloading report with export_id=' + str(export_id)) with requests.Session() as s: for cookie in self.driver.get_cookies(): s.cookies.set(cookie['name'], cookie['value']) export_url = self.base_url + '/reports/get_export.php' + '?id=' + str( export_id) download_response = s.get(export_url) self.log.info( 'Report download request response for export_id {}: {}'.format( export_id, download_response.content)) if download_response.ok: df_report = pd.read_csv( io.StringIO( download_response.content.decode(LEXIA_CSV_ENCODING)), **pandas_read_csv_kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0: raise NoDataError( 'No data in report for user {} at url: {}'.format( self.username, export_url)) else: raise ValueError('Report download request failed') self.driver.close() if write_to_disk: df_report.to_csv(write_to_disk) return df_report
class SchoolMint(WebUIDataSource, LoggingMixin): """ Class for interacting with SchoolMint """ def __init__(self, username, password, wait_time, hostname, temp_folder_path, headless=False): # try: # self.logger = logging.getLogger('sps-automation.data_sources.schoolmint.Schoolmint') # except AttributeError: # self.log super().__init__(username, password, wait_time, hostname, temp_folder_path, headless) self.uri_scheme = 'https://' self.base_url = self.uri_scheme + self.hostname def _login(self): """ Logs into the provided SchoolMint instance. """ # 2019-01-16 SchoolMint seems to be having some issues with loading the login screen recently, # so we'll add a retry here count = 0 while count < NUMBER_OF_RETRIES: self.log.debug('Logging into SchoolMint at, try {}: {}'.format( count, self.base_url)) self.driver.get(self.base_url + "/signin") # wait until login form available try: elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.ID, 'login'))) elem.clear() elem.send_keys(self.username) elem = self.driver.find_element_by_id("password") elem.send_keys(self.password) elem.send_keys(Keys.RETURN) break except ElementNotVisibleException: count += 1 # check that login succeeded by looking for the 'Student search' box try: elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.ID, 'student-lookup'))) except TimeoutException: self.driver.close() raise InvalidLoginCredentials # wait for the page to fully load - the walk-me player is the last thing, but since it's a third # party add-on we'll wait for the filters on the application index first WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.CLASS_NAME, 'report-filters'))) # now we'll wait for the walk "Walk Me Through" overlay in the bottom right try: WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.ID, 'walkme-player'))) except TimeoutException: pass # deal with the walk_me announcement pop-ups overlays try: elem = WebDriverWait(self.driver, WALKME_AND_SUPPORT_TIMEOUT).until( EC.presence_of_element_located( (By.CLASS_NAME, 'wm-shoutout'))) self.driver.execute_script( """var elem=arguments[0];elem.parentNode.removeChild(elem);""", elem) elem = self.driver.find_element_by_id('walkme-overlay-all') self.driver.execute_script( """var elem=arguments[0];elem.parentNode.removeChild(elem);""", elem) except TimeoutException: self.log.debug('No wm-shoutout found') def __remove_walk_me_and_support(self): """Removes two third party overlays that can block buttons that selenium needs to click.""" self.log.info('Removing "Walk-Me" and "Support" overlays.') walkme = True # wait for walk-me to load try: WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.ID, 'walkme-player'))) except TimeoutException: self.log.info('Probably no Walk-Me found') walkme = False if walkme: self.log.debug('Removing "Walk Me" overlay.') try: for id in ['walkme-player', 'walkme-overlay-all']: elem = WebDriverWait(self.driver, WALKME_AND_SUPPORT_TIMEOUT).until( EC.presence_of_element_located( (By.ID, id))) self.driver.execute_script( """var elem=arguments[0];elem.parentNode.removeChild(elem);""", elem) self.log.debug('Success') self.log.debug('Removing "Walk Me" bouncing overlay.') try: elem = self.driver.find_element_by_id('walkme-attengrab') self.driver.execute_script( """var elem=arguments[0];elem.parentNode.removeChild(elem);""", elem) self.log.debug('Success') except NoSuchElementException: self.log.debug('No "Walk Me" bouncing overlay found.') except TimeoutException: self.log.debug('No "Walk Me" overlay found.') # remove "Homeroom" announcement self.log.debug('Removing "Homeroom" and other wm-shoutout modals.') try: elem = WebDriverWait( self.driver, WALKME_AND_SUPPORT_TIMEOUT ).until( # it turns out that the id can have numbers at the end (e.g. wm-shoutout-141590), so we need XPATH EC.presence_of_element_located( (By.XPATH, "//*[starts-with(@id, 'wm-shoutout')]"))) self.driver.execute_script( """var elem=arguments[0];elem.parentNode.removeChild(elem);""", elem) self.log.debug('Success') except TimeoutException: self.log.debug( 'No "Homeroom" or other wm-shoutout modals found.') # remove 'Support' button self.log.debug('Trying to remove "Support" overlay.') try: elem = WebDriverWait(self.driver, WALKME_AND_SUPPORT_TIMEOUT).until( EC.presence_of_element_located( (By.ID, 'launcher'))) self.driver.execute_script( """var elem=arguments[0];elem.parentNode.removeChild(elem);""", elem) self.log.debug('Success') except TimeoutException: self.log.debug('No "Support" overlay found.') pass def _set_year(self, school_year, driver=None): """Sets the year for the SchoolMint interface. Args: school_year (string): The school year that should be selected. Use the format shown in the SchoolMint interface. Example: '2016-2017' :return: True if function succeeds """ self.log.debug('Changing school year to: {}'.format(school_year)) if not driver: self.driver = configure_selenium_chrome() self._login() # open the year selector menu elem = self.driver.find_element_by_xpath( "//a[contains(@class,'dropdown-toggle enrollment')]") elem.click() # select the appropriate year try: year_xpath = "//*[@id='enrollment-selector']//a[contains(text(),'{}')]".format( school_year) elem = self.driver.find_element_by_xpath(year_xpath) elem.click() except NoSuchElementException as e: self.driver.save_screenshot('cannot_find_year.png') message = (' Check that the school_year variable is valid. ' 'Passed value for school_year: {}').format(school_year) raise_with_traceback(type(e)(str(e) + message)) # wait for the page to be ready again self.driver.get(self.base_url) WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.ID, 'student-lookup'))) if not driver: self.driver.close() return True def check_school_year(self, school_year): """Checks that the school year is set as expected in the UI.""" elem = self.driver.find_element_by_xpath( "//a[contains(@class,'dropdown-toggle enrollment')]/span[contains(@class,'current')]" ) if school_year in elem.text: return True else: return False def download_url_report(self, report_url, school_year, temp_folder_name=None, pandas_read_csv_kwargs={}): """ Downloads a SchoolMint data-stream-table report. Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. school_year (string): The SchoolMint school year to download from (e.g. '2018-2019') temp_folder_name (string): The name for a sub-directory in which the files from the browser will be temporarily stored. If this directory does not exist, it will be created. NOTE: This sub-directory will be pandas_read_csv_kwargs: additional arguments to pass to Pandas read_csv Returns: A Pandas DataFrame of the report contents. """ if temp_folder_name: csv_download_folder_path = self.temp_folder_path + '/' + temp_folder_name else: csv_download_folder_path = mkdtemp(dir=self.temp_folder_path) # set up the driver for execution self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() self._set_year(school_year, self.driver) # get the report url self.driver.get(interpret_report_url(self.base_url, report_url)) self.__remove_walk_me_and_support() # wait until we have rows in the stream data table before starting to # look for results elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, "//*[@id='stream-table']/tbody/tr[1]/td[1]"))) if not self.check_school_year(school_year): raise ReportNotFound( "Wrong school detected prior to clicking generate.") self.log.debug('Waiting for report-data-summary to load') # wait until the stream table is fully loaded before downloading prev_data_summary_elem = self.driver.find_element_by_id( 'report-data-summary').text # print(prev_data_summary_elem) time.sleep(1) # we use the following count as a proxy for time elapsed, so we can # use the class's wait_time as the number of retries count = 0 while True: # check id=report-data-summary report_data_summary_elem = self.driver.find_element_by_id( 'report-data-summary').text # if it matches, wait a little longer and double deck that it hasn't changed if prev_data_summary_elem == report_data_summary_elem: time.sleep(3) count += 3 report_data_summary_elem = self.driver.find_element_by_id( 'report-data-summary').text if prev_data_summary_elem == report_data_summary_elem: break prev_data_summary_elem = report_data_summary_elem time.sleep(1) count += 1 if count >= self.wait_time: raise TimeoutError( 'SchoolMint Report Data never did not fully load within %d' % self.wait_time) # click the button to download the report self.log.debug('Starting download...') elem = self.driver.find_element_by_class_name("export-table") elem.click() # wait until file has downloaded to close the browser. We can do this # because we delete the file before we return it, so the temp dir should # always be empty when this command is run wait_for_any_file_in_folder(csv_download_folder_path, "csv") self.log.debug('Download finished.') report_df = pd.read_csv( get_most_recent_file_in_dir(csv_download_folder_path), encoding=SCHOOLMINT_DEFAULT_EXPORT_ENCODING, **pandas_read_csv_kwargs) # TODO: move this out of this function. It should happen as cleanup once # the whole DAG has completed #delete_folder_contents(csv_download_folder_path) shutil.rmtree(csv_download_folder_path) # close the driver for this task self.driver.close() # if the dataframe is empty (the report had no data), raise an error if report_df.shape[0] == 0: #delete_folder_contents(csv_download_folder_path) shutil.rmtree(csv_download_folder_path) raise ValueError('No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, report_url))) return report_df def __get_number_of_pages(self): """Get the number of pages in a SchoolMint pagination.""" total_num_pages_xpath = '//*[@id="content"]//*[@class="pagination "]/li[@data-page][last()]' elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.XPATH, total_num_pages_xpath))) num_pages = int(elem.get_attribute("data-page")) + 1 return num_pages def __navigate_to_custom_report(self, report_name, school_year, download_folder_path=None): """Navigate to the page of the custom report tool that has the custom report on it""" if not download_folder_path: download_folder_path = self.temp_folder_path self.driver = DriverBuilder().get_driver( download_location=download_folder_path, headless=self.headless) self._login() self._set_year(school_year, self.driver) # get the custom reports page custom_reports_url = 'report/customReports' self.driver.get(interpret_report_url(self.base_url, custom_reports_url)) self.__remove_walk_me_and_support() # wait for the page to load and get the maximum number of pages total_num_pages_xpath = '//*[@id="content"]//*[@class="pagination "]/li[@data-page][last()]' elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.XPATH, total_num_pages_xpath))) num_pages = int(elem.get_attribute("data-page")) + 1 current_page = 0 while current_page < num_pages: report_name_xpath = "//tr[td//text()[contains(., '{}')]]".format( report_name) try: elem = self.driver.find_element_by_xpath(report_name_xpath) return current_page except NoSuchElementException: current_page += 1 if current_page < num_pages: next_page_xpath = '//*[@id="content"]//*[@class="pagination "]/li[@data-page={}]/a'.format( current_page) self.driver.find_element_by_xpath(next_page_xpath).click() # scroll back to the top of the page, prevents selenium clicking errors self.driver.execute_script("window.scrollTo(0, 0);") raise ReportNotFound def generate_custom_report(self, report_name, school_year): """ Clicks the generate button on a SchoolMint custom report. :param report_name: The name of the report exactly as it is shown in the SchoolMint UI :param school_year: The year in SchoolMint. Should be formatted as shown in the UI (e.g. '2018-2019') :return: True if the button was clicked. False if the button was not clicked because the report is generating. """ self.__navigate_to_custom_report(report_name, school_year) if not self.check_school_year(school_year): raise ReportNotFound( "Wrong school detected prior to clicking generate.") generate_report_button_xpath = GENERATE_REPORT_BUTTON_XPATH.format( report_name=report_name) try: generate_report_button = WebDriverWait( self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, generate_report_button_xpath))) except NoSuchElementException: raise ReportNotFound if generate_report_button.text == 'Generate Report': generate_report_button.click() self.driver.close() return True elif generate_report_button.text == 'Report in Progress': self.driver.close() return False else: raise ValueError("Unknown 'Generate Report' button text found") def is_custom_report_generating(self, report_name, school_year): """Checks if a SchoolMint Custom Report is generating or not""" self.__navigate_to_custom_report(report_name, school_year) generate_report_button_xpath = GENERATE_REPORT_BUTTON_XPATH.format( report_name=report_name) try: generate_report_button = WebDriverWait( self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, generate_report_button_xpath))) except NoSuchElementException: raise ReportNotFound if generate_report_button.text == 'Report in Progress': return True elif generate_report_button.text == 'Generate Report': return False else: raise ValueError("Unknown 'Generate Report' button text found") def get_last_custom_report_generation_datetime(self, report_name, school_year): """Get's a report's generation timestamp in raw text""" self.__navigate_to_custom_report(report_name, school_year) try: # old custom reports interface report_generated_on_xpath = ( "//tr[td[./text()='{}']]/td[4]").format(report_name) report_generated_on_text = WebDriverWait( self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, report_generated_on_xpath))).text except TimeoutException: try: # new custom reports interface report_generated_on_xpath = ( "//tr[td[text()=' {} ']]/td[contains(@class,'last_generated_date-td')]" ).format(report_name) report_generated_on_text = WebDriverWait( self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, report_generated_on_xpath))).text except TimeoutException: raise ReportNotFound return report_generated_on_text def _download_custom_report(self, report_name, school_year, download_folder_path, download_if_generating=False): """Protected function for clicking the download button on a report on the Custom Reports page""" if not download_folder_path: download_folder_path = self.temp_folder_path self.__navigate_to_custom_report(report_name, school_year, download_folder_path) generate_report_button_xpath = GENERATE_REPORT_BUTTON_XPATH.format( report_name=report_name) generate_report_button_text = WebDriverWait( self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, generate_report_button_xpath))).text download_button_xpath = ( "//tr[td[text() = '{report_name}' or text() = ' {report_name} ']]//a[contains(text(), 'Download')]" ).format(report_name=report_name) elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.XPATH, download_button_xpath))) if generate_report_button_text == 'Generate Report': elem.click() elif generate_report_button_text == 'Report in Progress' and download_if_generating: elem.click() else: raise ReportNotReady return self.driver def download_csv_custom_report(self, report_name, school_year, download_if_generating=False, pandas_read_csv_kwargs={}): """Download a SchoolMint Custom Report that downloads as a single CSV file""" temp_folder_name = report_name.replace(" ", "_").lower() csv_download_folder_path = self.temp_folder_path + '/' + temp_folder_name driver = self._download_custom_report(report_name, school_year, csv_download_folder_path, download_if_generating) # wait until file has downloaded to close the browser. We can do this # because we delete the file before we return it, so the temp dir should # always be empty when this command is run wait_for_any_file_in_folder(csv_download_folder_path, "csv") report_df = pd.read_csv( get_most_recent_file_in_dir(csv_download_folder_path), encoding=SCHOOLMINT_DEFAULT_EXPORT_ENCODING, **pandas_read_csv_kwargs) # delete any files in the mealtime temp folder; we don't need them now # TODO: move this out of this function. It should happen as cleanup once # the whole DAG has completed delete_folder_contents(csv_download_folder_path) # close the driver for this task driver.close() # if the dataframe is empty (the report had no data), raise an error if report_df.shape[0] == 0: raise NoDataError( 'No data for user {} in Custom Report: {}'.format( self.username, report_name)) return report_df def download_zip_custom_report(self, report_name, school_year, download_folder_path=None, download_if_generating=False, unzip=True, pandas_read_csv_kwargs={}): """ Downloads a SchoolMint Custom Report that downloads as a zipped set of CSVs :param report_name: The name of the report exactly as it is shown in the SchoolMint UI :param school_year: The year in SchoolMint. Should be formatted as shown in the UI (e.g. '2018-2019') :param download_folder_path: The path to where you want to store the zip file. :param download_if_generating: Whether or not to download a custom report if the report is currently generating. :param unzip: Boolean. If True, not only downloads the file, but also unzips it and returns each csv in a Pandas Dataframe in a dictionary. :param pandas_read_csv_kwargs: Additional keyward arguments to pass to Panda's read_csv function. :return: None or a dictionary of Pandas DataFrames representing each of the CSVs in the zipped file. """ # create a folder for this specific run run_time = datetime.datetime.utcnow() if not download_folder_path: download_folder_path = self.temp_folder_path download_dir_final = "{}/{}-{}-{}".format(download_folder_path, report_name, run_time.strftime('%Y%m%d'), run_time.strftime('%H%M%S')) driver = self._download_custom_report(report_name, school_year, download_dir_final, download_if_generating) # wait until file has downloaded to close the browser. We can do this # because we delete the file before we return it, so the temp dir should # always be empty when this command is run # TODO add a try/except block here wait_for_any_file_in_folder(download_dir_final, "zip") driver.close() if unzip: # unzip the files file_path = max(glob.iglob(download_dir_final + '/*.zip'), key=os.path.getctime) ZipfileLongPaths(file_path).extractall(download_dir_final) dfs = dict() # iterate through the unzipped files and load them into dataframes for csv_filepath in glob.iglob(download_dir_final + '/*.csv'): csv_filename = os.path.basename(csv_filepath) #print(csv_filename) # find the files that start with a number, these are the custom forms files if re.match("^(\d+)", csv_filename): num_beg = re.match("^(\d+)", csv_filename).group(0) words = re.findall("[A-Za-z]+", csv_filename) dict_key = csv_filename # "{}_{}".format(num_beg, '_'.join(words[0:3])).lower() dfs[dict_key] = pd.read_csv( csv_filepath, encoding=SCHOOLMINT_DEFAULT_EXPORT_ENCODING, skiprows=[0, 2], **pandas_read_csv_kwargs) # otherwise it is the info file that comes along with the zip export (application-data-export, etc.) else: words = re.findall("[A-Za-z]+", csv_filename) dict_key = csv_filename # "{}".format('_'.join(words[0:3])).lower() dfs[dict_key] = pd.read_csv( csv_filepath, encoding=SCHOOLMINT_DEFAULT_EXPORT_ENCODING, **pandas_read_csv_kwargs) return dfs
class Mealtime(WebUIDataSource): """ Class for interacting with the web ui of Mealtime """ def __init__(self, username, password, wait_time, hostname, temp_folder_path, headless=False): super().__init__(username, password, wait_time, hostname, temp_folder_path, headless) self.uri_scheme = 'https://' self.base_url = self.uri_scheme + self.hostname def _login(self): """ Logs into the provided Mealtime instance. """ self.driver.get(self.base_url + '/Base/SignIn.aspx') elem = self.driver.find_element_by_id("username") elem.clear() elem.send_keys(self.username) elem = self.driver.find_element_by_id("password") elem.send_keys(self.password) elem.send_keys(Keys.RETURN) def download_url_report(self, report_url, temp_folder_name): """ Downloads a MealTime report. Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. temp_folder_name (string): The name of the folder in which this specific report's download files should be stored. Returns: A Pandas DataFrame of the report contents. """ csv_download_folder_path = self.temp_folder_path + '/' + temp_folder_name # set up the driver for execution self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) #self.driver = configure_selenium_chrome(csv_download_folder_path) self._login() # get the report url self.driver.get(interpret_report_url(self.base_url, report_url)) # select the download format (csv) and execute export_format_select = Select(self.driver.find_element_by_id('ctl00_ctl00_MainContent_reportViewer_ctl01_ctl05_ctl00')) try: export_format_select.select_by_value('CSV') dl_type = 'csv' except NoSuchElementException: export_format_select.select_by_value('EXCELNoHeader') dl_type = 'xls' self.driver.find_element_by_id('ctl00_ctl00_MainContent_reportViewer_ctl01_ctl05_ctl01').click() # wait until file has downloaded to close the browser. We can do this # because we delete the file before we return it, so the temp dir should # always be empty when this command is run # TODO add a try/except block here wait_for_any_file_in_folder(csv_download_folder_path, dl_type) # remove the header rows #xlrd.open_workbook(utils.get_most_recent_file_in_dir(csv_download_folder_path), formatting_info=False) if dl_type == 'csv': report_df = pd.read_csv(get_most_recent_file_in_dir(csv_download_folder_path), header=2) else: report_df = pd.read_excel(get_most_recent_file_in_dir(csv_download_folder_path), header=3) # delete any files in the mealtime temp folder; we don't need them now # TODO: move this out of this function. It should happen as cleanup once # the whole DAG has completed delete_folder_contents(csv_download_folder_path) self.driver.close() # if the dataframe is empty (the report had no data), raise an error if report_df.shape[0] == 0: raise ValueError('No data in report for user {} at url: {}'.format(self.username, interpret_report_url(self.base_url, report_url))) return report_df
class SummitLearning(WebUIDataSource, LoggingMixin): def __init__(self, username, password, wait_time, hostname='summitlearning.org', temp_folder_path=None, headless=False, login_provider='google'): super().__init__(username, password, wait_time, hostname, temp_folder_path, headless) self.login_provider = login_provider self.uri_scheme = 'https://' self.base_url = self.uri_scheme + 'www.' + self.hostname def _login(self): if self.login_provider == 'google': login_url = self.base_url + '/auth/google_oauth2' self.driver.get(login_url) # the Google login screen has multiple versions - the 'Email' one # seems to be used when headless try: elem = self.driver.find_element_by_id('Email') except NoSuchElementException: elem = self.driver.find_element_by_id('identifierId') elem.clear() elem.send_keys(self.username) elem.send_keys(Keys.RETURN) # headless version of Google Login elem = WebDriverWait(self.driver, self.wait_time).until( EC.visibility_of_element_located((By.ID, 'password'))) # regular version of Google login if elem.tag_name == 'div': elem = WebDriverWait(self.driver, self.wait_time).until( EC.element_to_be_clickable((By.NAME, 'password'))) elem.send_keys(self.password) elem.send_keys(Keys.RETURN) # wait for the destination page to fully load WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.CLASS_NAME, 'app-teacher'))) def download_url_report(self, report_url, write_to_disk=None, **kwargs): """ Downloads a Summit Learning report at a URL that triggers a CSV download Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. write_to_disk (string): The path for a directory to store the downloaded file. If nothing is provided, the file will be stored in a temporary directory and deleted at the end of this function. **kwargs: additional arguments to pass to Pandas read_excel or read_csv (depending on the report_url) Returns: A Pandas DataFrame of the report contents. """ report_download_url = interpret_report_url(self.base_url, report_url) if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() self.log.debug( 'Getting report page at: {}'.format(report_download_url)) self.driver.get(report_download_url) self.log.debug('Starting download of: '.format(report_download_url)) wait_for_any_file_in_folder(csv_download_folder_path, "csv") self.log.debug('Download Finished.') df_report = pd.read_csv( get_most_recent_file_in_dir(csv_download_folder_path), **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0: raise NoDataError( 'No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, report_url))) self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report def _set_dl_academic_year(self, academic_year): """Sets the academic year to download the reports from Args: academic_year (string): The academic year that should be selected. Use the format shown in the Summit Learning interface. Example: '2016-2017' :return: True if function succeeds """ # The UI uses a '–' instead of a '-'. We'll make a convenience replacement academic_year = academic_year.replace('-', '–') self.log.debug('Changing academic year to: {}'.format(academic_year)) # open the menu to select the academic year elem = WebDriverWait(self.driver, self.wait_time).until( EC.element_to_be_clickable((By.ID, 'academic-year-selector'))) elem.click() # select the appropriate year try: year_xpath = "//*[@id='academic-year-selector']/parent::div//a[contains(text(),'{}')]".format( academic_year) elem = self.driver.find_element_by_xpath(year_xpath) elem.click() except NoSuchElementException as e: self.driver.save_screenshot('cannot_find_year.png') message = ( ' Check that the academic_year variable is valid. ' 'Passed value for academic_year: {}').format(academic_year) raise_with_traceback(type(e)(str(e) + message)) return True def check_dl_academic_year(self, academic_year): """Checks that the academic year is set as expected in the UI.""" # The UI uses a '–' instead of a '-'. We'll make a convenience replacement academic_year = academic_year.replace('-', '–') elem = self.driver.find_element_by_xpath( "//*[@id='academic-year-selector']/parent::div//button") if academic_year in elem.text: return True else: return False def download_site_data_download( self, dl_heading, site_id, academic_year, report_generation_wait=REPORT_GENERATION_WAIT, write_to_disk=None, **kwargs): if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() dl_page_url = "{base_url}/sites/{site_id}/data_downloads/".format( base_url=self.base_url, site_id=site_id) self.driver.get(dl_page_url) self._set_dl_academic_year(academic_year) if not self.check_dl_academic_year(academic_year): raise ValueError("Academic Year not correctly set") # start the CSV generation process download_button_xpath = "//h3[contains(text(), '{dl_heading}')]/parent::div/parent::div//a[contains(text(), '{button_text}')]" # try to find the "Download CSV" button - old version of the interface old_interface = False try: elem = self.driver.find_element_by_xpath( download_button_xpath.format(dl_heading=dl_heading, button_text='Download CSV')) old_interface = True self.log.info("'Download CSV' interface detected.") elem.click() # if it's not there, it may have changed to a "Refresh" button except NoSuchElementException as e: pass # try to find the "Generate CSV" button - new version of the interface if not old_interface: gen_button_xpath = "//h3[contains(text(), '{dl_heading}')]/parent::div/parent::div//button[contains(text(), '{button_text}')]" try: elem = self.driver.find_element_by_xpath( gen_button_xpath.format(dl_heading=dl_heading, button_text='Generate CSV')) self.log.info("'Generate CSV' interface detected.") elem.click() # if it's not there, it may have changed to a "Refresh" button except NoSuchElementException as e: try: elem = self.driver.find_element_by_xpath( gen_button_xpath.format(dl_heading=dl_heading, button_text='Download')) except NoSuchElementException as e: elem = self.driver.find_element_by_xpath( gen_button_xpath.format(dl_heading=dl_heading, button_text='Refresh')) elem.click() # wait for the refresh command to be issued time.sleep(1) # wait for the report to be available and download it self.log.info( 'Starting download of report "{}" for site_id "{}"'.format( dl_heading, site_id)) dl_button_xpath = "//h3[contains(text(), '{dl_heading}')]/parent::div/parent::div//a[contains(text(), 'Download')]" try: elem = WebDriverWait(self.driver, report_generation_wait).until( EC.presence_of_element_located( (By.XPATH, dl_button_xpath.format(dl_heading=dl_heading)))) elem.click() # if the download is not ready, refresh the page and try one more time except TimeoutException: self.driver.refresh() elem = WebDriverWait(self.driver, report_generation_wait).until( EC.presence_of_element_located( (By.XPATH, dl_button_xpath.format(dl_heading=dl_heading)))) elem.click() wait_for_any_file_in_folder(csv_download_folder_path, "csv") self.log.debug('Download Finished.') df_report = pd.read_csv( get_most_recent_file_in_dir(csv_download_folder_path), **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0: raise NoDataError('No data in report "{}" for site_id "{}"'.format( dl_heading, site_id)) self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report
class Clever(WebUIDataSource, LoggingMixin): """ Class for interacting with the Clever Web UI """ def __init__(self, username, password, wait_time, hostname='schools.clever.com', temp_folder_path=None, headless=False): super().__init__(username, password, wait_time, hostname, temp_folder_path) self.uri_scheme = 'https://' self.base_url = self.uri_scheme + self.hostname self.headless = headless self.log.debug('creating instance of Clever') def _login(self): """ Logs into the provided Clever instance. """ self.log.info('Logging into Clever instance: hostname, username: {}, {}'.format( self.hostname, self.username )) self.driver.get(self.base_url) # wait until login form available elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.NAME, 'username'))) elem.clear() elem.send_keys(self.username) elem = self.driver.find_element_by_name("password") elem.send_keys(self.password) elem.send_keys(Keys.RETURN) # ensure that login is successful self.driver.get(self.base_url) if 'Clever | Home' not in self.driver.title: self.driver.close() raise InvalidLoginCredentials def download_url_report(self, report_url, collection, write_to_disk=None, **kwargs): """Currently a short cut for download_data_shared_with_application""" return self.download_data_shared_with_application(report_url, collection, write_to_disk, **kwargs) def download_data_shared_with_application(self, application_page_url, collection, write_to_disk=None, **kwargs): """ Downloads the students shared with a particular application through Clever. :param application_page_url: The url for the main Clever management page for a particular application. For example, for My Lexia, this would be https://schools.clever.com/applications/lexia-mylexia :param collection: A string of 'schools', 'students', 'sections', 'teachers', 'schooladmins' that indicates which shared data to download :param write_to_disk: A path to a directory where the downloaded CSV should be saved. If nothing is passed, it will not be saved and only a Pandas DataFrame will be returned. :param kwargs: Additional keyword arguments to be passed to the Pandas read_csv function. :return: A Pandas DataFrame of the indicated collection download. """ collection = collection.lower().replace(' ', '') if collection not in ['schools', 'students', 'sections', 'teachers', 'schooladmins']: raise ReportNotFound( ( "Argument for collection '{collection}' is not a valid. Please choose from: " "'schools', 'students', 'sections', 'teachers', 'schooladmins'." ).format(collection=collection) ) report_access_page_url = interpret_report_url(self.base_url, application_page_url) if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() self.log.debug('Getting report access page at: {}'.format(report_access_page_url)) self.driver.get(report_access_page_url) # find and click the download button based on the collection desired elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, "//a[contains(@href, '{collection}.csv')]".format(collection=collection)) ) ) self.log.info('Starting download of: {} - {}'.format(report_access_page_url, collection)) elem.click() wait_for_any_file_in_folder(csv_download_folder_path, "csv") self.log.info('Download Finished.') df_report = pd.read_csv(get_most_recent_file_in_dir(csv_download_folder_path), **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0 and collection != 'schooladmins': raise ValueError('No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, application_page_url))) elif df_report.shape[0] == 0: warnings.warn("The 'schooladmins' collection has no data. Ensure that no school admins are shared.") self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report def download_google_accounts_manager_student_export(self): """ Downloads the Google Accounts Manager Student Export that includes student emails.""" self.log.info('Starting student email download.') # set up the driver for execution self.driver = configure_selenium_chrome() self._login() # grab some cookies (need to do this here for _mkto_trk cookie) cookies_orig = self.driver.get_cookies() # open the Google Accounts Manager application page # note - clever applications like Google Accounts Manager have unique ids that are a part of their URL # note - we have to get the settings page of the Google Accounts Manager to get the cookie # that we need in order to download the file self.driver.get('https://schools.clever.com/school/applications/50ca15a93bc2733956000007/settings') cookies_schools = self.driver.get_cookies() # we may need to get the gaprov.ops.clever.com to get a cookie in new versions of chromedriver self.driver.get('https://gaprov.ops.clever.com/') cookies_gaprov = self.driver.get_cookies() # create requests session to download report without need for file storage with requests.Session() as s: # transfer over a bunch of cookies to the requests session for cookie in cookies_orig: s.cookies.set(cookie['name'], cookie['value']) for cookie in cookies_schools: s.cookies.set(cookie['name'], cookie['value']) for cookie in cookies_gaprov: s.cookies.set(cookie['name'], cookie['value']) s.cookies.set('_gat', "1") s.cookies.set('_gat_globalTracker', "1") report_url = 'https://gaprov.ops.clever.com/reporting/student' # download with 10 retries on failure c = 0 while True: download_response = s.get(report_url, stream=True) if download_response.ok: df_report = pd.read_csv(io.StringIO(download_response.content.decode('utf-8'))) else: self.log.info('Download failed for report url: {}'.format(report_url)) self.log.info('Download status_code: {}'.format(download_response.status_code)) self.log.info('Retrying... Retry#: {}'.format(c+1)) if c >= 9: raise ValueError('Unable to download report after multiple retries.') # add some jitter to the requests sleep_time = (1000 + randint(500)) / 1000 time.sleep(sleep_time) c += 1 continue break self.driver.close() self.log.info('Student email download complete.') return df_report