Example #1
0
    def download_url_report(self, report_url, temp_folder_name):
        """ Downloads a MealTime report.

        Args:
            report_url (string): Information pertaining to the path and query
                string for the report whose access is desired. Any filtering
                that can be done with a stateful URL should be included.
            temp_folder_name (string): The name of the folder in which this
                specific report's download files should be stored.

        Returns: A Pandas DataFrame of the report contents.
        """
        csv_download_folder_path = self.temp_folder_path + '/' + temp_folder_name
        # set up the driver for execution
        self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless)
        #self.driver = configure_selenium_chrome(csv_download_folder_path)
        self._login()

        # get the report url
        self.driver.get(interpret_report_url(self.base_url, report_url))

        # select the download format (csv) and execute
        export_format_select = Select(self.driver.find_element_by_id('ctl00_ctl00_MainContent_reportViewer_ctl01_ctl05_ctl00'))
        try:
            export_format_select.select_by_value('CSV')
            dl_type = 'csv'
        except NoSuchElementException:
            export_format_select.select_by_value('EXCELNoHeader')
            dl_type = 'xls'
        self.driver.find_element_by_id('ctl00_ctl00_MainContent_reportViewer_ctl01_ctl05_ctl01').click()

        # wait until file has downloaded to close the browser. We can do this
        # because we delete the file before we return it, so the temp dir should
        # always be empty when this command is run
        # TODO add a try/except block here
        wait_for_any_file_in_folder(csv_download_folder_path, dl_type)

        # remove the header rows
        #xlrd.open_workbook(utils.get_most_recent_file_in_dir(csv_download_folder_path), formatting_info=False)

        if dl_type == 'csv':
            report_df = pd.read_csv(get_most_recent_file_in_dir(csv_download_folder_path),
                                      header=2)
        else:
            report_df = pd.read_excel(get_most_recent_file_in_dir(csv_download_folder_path),
                                      header=3)

        # delete any files in the mealtime temp folder; we don't need them now
        # TODO: move this out of this function. It should happen as cleanup once
        # the whole DAG has completed
        delete_folder_contents(csv_download_folder_path)

        self.driver.close()

        # if the dataframe is empty (the report had no data), raise an error
        if report_df.shape[0] == 0:
            raise ValueError('No data in report for user {} at url: {}'.format(self.username, interpret_report_url(self.base_url, report_url)))

        return report_df
Example #2
0
    def __request_district_export(self,
                                  report_type,
                                  period_start_date=None,
                                  period_end_date=None,
                                  write_to_disk=None):
        """
        Logs into Lexia and submits the request to generate a district export
        :param report_type: The text from one of 'Report type' options listed in the myLexia
            'District Exports' modal.
        :param period_start_date: The start date for the report request (unsure if this actually
            affects the data returned if it is different from the school year start date set
            for your Lexia instance)
        :param period_end_date: The end date for the report request (unsure if this actually
            affects the data returned if it is different from the day on which the request is made)
        :param write_to_disk: The path to save the CSV to.
        :return: Boolean. Whether or not the export request was successful.
        """
        if write_to_disk:
            csv_download_folder_path = write_to_disk
        else:
            csv_download_folder_path = self.temp_folder_path
        self.driver = DriverBuilder().get_driver(csv_download_folder_path,
                                                 self.headless)
        self._login()

        # use requests to post the download request
        with requests.Session() as s:
            for cookie in self.driver.get_cookies():
                s.cookies.set(cookie['name'], cookie['value'])

            payload = {
                "districtID": self.district_id,
                "type": report_type,
                "email": self.district_export_email_address,
                "startDate": period_start_date.strftime("%Y-%m-%d"),
                "endDate": period_end_date.strftime("%Y-%m-%d")
            }
            self.log.info('{}: Export request payload: {}'.format(
                self.district_id, payload))
            download_response = s.put(self.base_url + '/exportData/progress',
                                      data=payload)

            if download_response.ok:
                self.log.info(
                    '{}: Export request for {} succeeded for user: {}'.format(
                        self.district_id, report_type, self.username))
                j_data = json.loads(download_response.content.decode())
                self.log.info(j_data)
                return True
            else:
                self.log.info(
                    '{}: Export request for {} FAILED  for user: {}'.format(
                        self.district_id, report_type, self.username))
                self.log.info(download_response.content)
                return False
Example #3
0
    def download_url_report(self, report_url, write_to_disk=None, **kwargs):
        """ Downloads a Summit Learning report at a URL that triggers a CSV download

        Args:
            report_url (string): Information pertaining to the path and query
                string for the report whose access is desired. Any filtering
                that can be done with a stateful URL should be included.
            write_to_disk (string): The path for a directory to store the
                downloaded file. If nothing is provided, the file will be
                stored in a temporary directory and deleted at the end of
                this function.
            **kwargs: additional arguments to pass to Pandas read_excel or
                read_csv (depending on the report_url)

        Returns: A Pandas DataFrame of the report contents.
        """

        report_download_url = interpret_report_url(self.base_url, report_url)

        if write_to_disk:
            csv_download_folder_path = write_to_disk
        else:
            csv_download_folder_path = mkdtemp()
        self.driver = DriverBuilder().get_driver(csv_download_folder_path,
                                                 self.headless)
        self._login()

        self.log.debug(
            'Getting report page at: {}'.format(report_download_url))
        self.driver.get(report_download_url)

        self.log.debug('Starting download of: '.format(report_download_url))

        wait_for_any_file_in_folder(csv_download_folder_path, "csv")
        self.log.debug('Download Finished.')

        df_report = pd.read_csv(
            get_most_recent_file_in_dir(csv_download_folder_path), **kwargs)

        # if the dataframe is empty (the report had no data), raise an error
        if df_report.shape[0] == 0:
            raise NoDataError(
                'No data in report for user {} at url: {}'.format(
                    self.username,
                    interpret_report_url(self.base_url, report_url)))

        self.driver.close()

        if not write_to_disk:
            shutil.rmtree(csv_download_folder_path)

        return df_report
Example #4
0
    def test_set_dl_academic_year_invalid_year(self):
        year = CONFIG['SummitLearning'][
            'test_set_dl_academic_year_invalid_year__academic_year']
        self.sl.driver = DriverBuilder().get_driver(
            headless=CONFIG.getboolean('SummitLearning', 'headless'))
        self.sl._login()

        dl_page_url = "{base_url}/sites/{site_id}/data_downloads/".format(
            base_url=self.sl.base_url,
            site_id=CONFIG['SummitLearning']['site_id'])

        self.sl.driver.get(dl_page_url)

        self.assertRaises(NoSuchElementException,
                          self.sl._set_dl_academic_year, year)
Example #5
0
    def __navigate_to_custom_report(self,
                                    report_name,
                                    school_year,
                                    download_folder_path=None):
        """Navigate to the page of the custom report tool that has the custom report on it"""
        if not download_folder_path:
            download_folder_path = self.temp_folder_path
        self.driver = DriverBuilder().get_driver(
            download_location=download_folder_path, headless=self.headless)
        self._login()
        self._set_year(school_year, self.driver)

        # get the custom reports page
        custom_reports_url = 'report/customReports'
        self.driver.get(interpret_report_url(self.base_url,
                                             custom_reports_url))
        self.__remove_walk_me_and_support()

        # wait for the page to load and get the maximum number of pages
        total_num_pages_xpath = '//*[@id="content"]//*[@class="pagination "]/li[@data-page][last()]'

        elem = WebDriverWait(self.driver, self.wait_time).until(
            EC.presence_of_element_located((By.XPATH, total_num_pages_xpath)))

        num_pages = int(elem.get_attribute("data-page")) + 1

        current_page = 0
        while current_page < num_pages:
            report_name_xpath = "//tr[td//text()[contains(., '{}')]]".format(
                report_name)

            try:
                elem = self.driver.find_element_by_xpath(report_name_xpath)
                return current_page
            except NoSuchElementException:
                current_page += 1
                if current_page < num_pages:
                    next_page_xpath = '//*[@id="content"]//*[@class="pagination "]/li[@data-page={}]/a'.format(
                        current_page)
                    self.driver.find_element_by_xpath(next_page_xpath).click()

                    # scroll back to the top of the page, prevents selenium clicking errors
                    self.driver.execute_script("window.scrollTo(0, 0);")

        raise ReportNotFound
Example #6
0
    def test_set_dl_academic_year(self):
        year = CONFIG['SummitLearning'][
            'test_set_dl_academic_year__academic_year']
        self.sl.driver = DriverBuilder().get_driver(
            headless=CONFIG.getboolean('SummitLearning', 'headless'))
        self.sl._login()

        dl_page_url = "{base_url}/sites/{site_id}/data_downloads/".format(
            base_url=self.sl.base_url,
            site_id=CONFIG['SummitLearning']['site_id'])

        self.sl.driver.get(dl_page_url)

        result = self.sl._set_dl_academic_year(academic_year=year)

        self.assertTrue(result)

        self.assertTrue(self.sl.check_dl_academic_year(academic_year=year))
Example #7
0
    def download_url_report(self, report_url, write_to_disk=None, **kwargs):
        """ Downloads a Lexia report at a URL for a page with an 'export' button.

        Args:
            report_url (string): Information pertaining to the path and query
                string for the report whose access is desired. Any filtering
                that can be done with a stateful URL should be included.
            write_to_disk (string): The path for a directory to store the
                downloaded file. If nothing is provided, the file will be
                stored in a temporary directory and deleted at the end of
                this function.
            **kwargs: additional arguments to pass to Pandas read_excel or
                read_csv (depending on the report_url)

        Returns: A Pandas DataFrame of the report contents.
        """

        report_download_url = interpret_report_url(self.base_url, report_url)

        # if user is trying to download a manage tab report (for convenience)
        if '/mylexiaweb/app/index.html#/groups/' in report_download_url:
            return self.download_manage_tab_report(report_url, write_to_disk,
                                                   **kwargs)

        if write_to_disk:
            csv_download_folder_path = write_to_disk
        else:
            csv_download_folder_path = mkdtemp()
        self.driver = DriverBuilder().get_driver(csv_download_folder_path,
                                                 self.headless)
        self._login()

        self.log.debug(
            'Getting report page at: {}'.format(report_download_url))
        self.driver.get(report_download_url)

        # find and click the download button
        elem = WebDriverWait(self.driver, self.wait_time).until(
            EC.presence_of_element_located(
                (By.XPATH, "//button[contains(text(), 'Export')]")))

        self.log.debug('Starting download of: '.format(report_download_url))
        elem.click()

        wait_for_any_file_in_folder(csv_download_folder_path, "xlsx")
        self.log.debug('Downloada Finished.')

        df_report = pd.read_excel(
            get_most_recent_file_in_dir(csv_download_folder_path), **kwargs)

        # if the dataframe is empty (the report had no data), raise an error
        if df_report.shape[0] == 0:
            raise ValueError('No data in report for user {} at url: {}'.format(
                self.username, interpret_report_url(self.base_url,
                                                    report_url)))

        self.driver.close()

        if not write_to_disk:
            shutil.rmtree(csv_download_folder_path)

        return df_report
Example #8
0
    def download_manage_tab_report(self,
                                   report_url,
                                   write_to_disk=None,
                                   **kwargs):
        """ Downloads a Lexia report from the 'Manage' tab.

        Args:
            report_url (string): Information pertaining to the path and query
                string for the report whose access is desired. Any filtering
                that can be done with a stateful URL should be included.
            write_to_disk (string): The path for a directory to store the
                downloaded file. If nothing is provided, the file will be
                stored in a temporary directory and deleted at the end of
                this function.
            **kwargs: additional arguments to pass to Pandas read_csv

        Returns: A Pandas DataFrame of the report contents.
        """
        if write_to_disk:
            csv_download_folder_path = write_to_disk
        else:
            csv_download_folder_path = mkdtemp()
        self.driver = DriverBuilder().get_driver(csv_download_folder_path,
                                                 self.headless)
        self._login()

        report_download_url = interpret_report_url(self.base_url, report_url)
        self.log.debug(
            'Getting report page at: {}'.format(report_download_url))
        self.driver.get(report_download_url)

        # select all users and find the download button
        def check_for_export_button_enabled(driver, elem_select_all_locator,
                                            elem_export_locator):
            elem_select_all = driver.find_element(*elem_select_all_locator)
            if not elem_select_all.is_enabled():
                return False
            elem_select_all.click()
            if not elem_select_all.is_selected():
                return False
            elem_export = driver.find_element(*elem_export_locator)
            if elem_export.is_enabled() and elem_export.is_displayed():
                return elem_export
            else:
                return False

        # have to use a lambda because until expects a callable
        elem_export = WebDriverWait(
            self.driver,
            self.wait_time).until(lambda x: check_for_export_button_enabled(
                self.driver, (By.NAME, "lexia-select-all"),
                (By.XPATH, "//button[contains(text(), 'Export')]")))
        self.log.debug('Starting download of: '.format(report_download_url))
        elem_export.click()

        wait_for_any_file_in_folder(csv_download_folder_path, "xls")
        self.log.debug('Download Finished.')

        df_report = pd.read_csv(
            get_most_recent_file_in_dir(csv_download_folder_path),
            sep='\t',
            **kwargs)

        # if the dataframe is empty (the report had no data), raise an error
        if df_report.shape[0] == 0:
            raise ValueError('No data in report for user {} at url: {}'.format(
                self.username, interpret_report_url(self.base_url,
                                                    report_url)))

        self.driver.close()

        if not write_to_disk:
            shutil.rmtree(csv_download_folder_path)

        return df_report
Example #9
0
    def download_url_report(self,
                            report_url,
                            school_year,
                            temp_folder_name=None,
                            pandas_read_csv_kwargs={}):
        """ Downloads a SchoolMint data-stream-table report.

        Args:
            report_url (string): Information pertaining to the path and query
                string for the report whose access is desired. Any filtering
                that can be done with a stateful URL should be included.
            school_year (string): The SchoolMint school year to download from (e.g. '2018-2019')
            temp_folder_name (string): The name for a sub-directory in which the files from the
                browser will be temporarily stored. If this directory does not exist, it will be
                created. NOTE: This sub-directory will be
            pandas_read_csv_kwargs: additional arguments to pass to Pandas read_csv

        Returns: A Pandas DataFrame of the report contents.
        """
        if temp_folder_name:
            csv_download_folder_path = self.temp_folder_path + '/' + temp_folder_name
        else:
            csv_download_folder_path = mkdtemp(dir=self.temp_folder_path)

        # set up the driver for execution
        self.driver = DriverBuilder().get_driver(csv_download_folder_path,
                                                 self.headless)
        self._login()
        self._set_year(school_year, self.driver)

        # get the report url
        self.driver.get(interpret_report_url(self.base_url, report_url))
        self.__remove_walk_me_and_support()

        # wait until we have rows in the stream data table before starting to
        # look for results
        elem = WebDriverWait(self.driver, self.wait_time).until(
            EC.presence_of_element_located(
                (By.XPATH, "//*[@id='stream-table']/tbody/tr[1]/td[1]")))

        if not self.check_school_year(school_year):
            raise ReportNotFound(
                "Wrong school detected prior to clicking generate.")

        self.log.debug('Waiting for report-data-summary to load')
        # wait until the stream table is fully loaded before downloading
        prev_data_summary_elem = self.driver.find_element_by_id(
            'report-data-summary').text
        # print(prev_data_summary_elem)
        time.sleep(1)
        # we use the following count as a proxy for time elapsed, so we can
        # use the class's wait_time as the number of retries
        count = 0
        while True:
            # check id=report-data-summary
            report_data_summary_elem = self.driver.find_element_by_id(
                'report-data-summary').text

            # if it matches, wait a little longer and double deck that it hasn't changed
            if prev_data_summary_elem == report_data_summary_elem:
                time.sleep(3)
                count += 3
                report_data_summary_elem = self.driver.find_element_by_id(
                    'report-data-summary').text
                if prev_data_summary_elem == report_data_summary_elem:
                    break
            prev_data_summary_elem = report_data_summary_elem
            time.sleep(1)

            count += 1
            if count >= self.wait_time:
                raise TimeoutError(
                    'SchoolMint Report Data never did not fully load within %d'
                    % self.wait_time)

        # click the button to download the report
        self.log.debug('Starting download...')
        elem = self.driver.find_element_by_class_name("export-table")
        elem.click()

        # wait until file has downloaded to close the browser. We can do this
        # because we delete the file before we return it, so the temp dir should
        # always be empty when this command is run
        wait_for_any_file_in_folder(csv_download_folder_path, "csv")

        self.log.debug('Download finished.')
        report_df = pd.read_csv(
            get_most_recent_file_in_dir(csv_download_folder_path),
            encoding=SCHOOLMINT_DEFAULT_EXPORT_ENCODING,
            **pandas_read_csv_kwargs)

        # TODO: move this out of this function. It should happen as cleanup once
        # the whole DAG has completed
        #delete_folder_contents(csv_download_folder_path)
        shutil.rmtree(csv_download_folder_path)

        # close the driver for this task
        self.driver.close()

        # if the dataframe is empty (the report had no data), raise an error
        if report_df.shape[0] == 0:
            #delete_folder_contents(csv_download_folder_path)
            shutil.rmtree(csv_download_folder_path)
            raise ValueError('No data in report for user {} at url: {}'.format(
                self.username, interpret_report_url(self.base_url,
                                                    report_url)))

        return report_df
Example #10
0
 def test_login(self):
     self.sl.driver = DriverBuilder().get_driver()
     self.sl._login()
     self.sl.driver.close()
Example #11
0
 def test_login(self):
     self.sl.driver = DriverBuilder().get_driver(
         headless=CONFIG.getboolean('SummitLearning', 'headless'))
     self.sl._login()
     self.sl.driver.close()
Example #12
0
    def download_site_data_download(
            self,
            dl_heading,
            site_id,
            academic_year,
            report_generation_wait=REPORT_GENERATION_WAIT,
            write_to_disk=None,
            **kwargs):
        if write_to_disk:
            csv_download_folder_path = write_to_disk
        else:
            csv_download_folder_path = mkdtemp()
        self.driver = DriverBuilder().get_driver(csv_download_folder_path,
                                                 self.headless)
        self._login()

        dl_page_url = "{base_url}/sites/{site_id}/data_downloads/".format(
            base_url=self.base_url, site_id=site_id)

        self.driver.get(dl_page_url)

        self._set_dl_academic_year(academic_year)

        if not self.check_dl_academic_year(academic_year):
            raise ValueError("Academic Year not correctly set")

        # start the CSV generation process
        download_button_xpath = "//h3[contains(text(), '{dl_heading}')]/parent::div/parent::div//a[contains(text(), '{button_text}')]"

        # try to find the "Download CSV" button - old version of the interface
        old_interface = False
        try:
            elem = self.driver.find_element_by_xpath(
                download_button_xpath.format(dl_heading=dl_heading,
                                             button_text='Download CSV'))
            old_interface = True
            self.log.info("'Download CSV' interface detected.")
            elem.click()
        # if it's not there, it may have changed to a "Refresh" button
        except NoSuchElementException as e:
            pass

        # try to find the "Generate CSV" button - new version of the interface

        if not old_interface:
            gen_button_xpath = "//h3[contains(text(), '{dl_heading}')]/parent::div/parent::div//button[contains(text(), '{button_text}')]"
            try:
                elem = self.driver.find_element_by_xpath(
                    gen_button_xpath.format(dl_heading=dl_heading,
                                            button_text='Generate CSV'))
                self.log.info("'Generate CSV' interface detected.")
                elem.click()
            # if it's not there, it may have changed to a "Refresh" button
            except NoSuchElementException as e:
                try:
                    elem = self.driver.find_element_by_xpath(
                        gen_button_xpath.format(dl_heading=dl_heading,
                                                button_text='Download'))
                except NoSuchElementException as e:
                    elem = self.driver.find_element_by_xpath(
                        gen_button_xpath.format(dl_heading=dl_heading,
                                                button_text='Refresh'))
                    elem.click()

        # wait for the refresh command to be issued
        time.sleep(1)

        # wait for the report to be available and download it
        self.log.info(
            'Starting download of report "{}" for site_id "{}"'.format(
                dl_heading, site_id))

        dl_button_xpath = "//h3[contains(text(), '{dl_heading}')]/parent::div/parent::div//a[contains(text(), 'Download')]"
        try:
            elem = WebDriverWait(self.driver, report_generation_wait).until(
                EC.presence_of_element_located(
                    (By.XPATH, dl_button_xpath.format(dl_heading=dl_heading))))
            elem.click()
        # if the download is not ready, refresh the page and try one more time
        except TimeoutException:
            self.driver.refresh()
            elem = WebDriverWait(self.driver, report_generation_wait).until(
                EC.presence_of_element_located(
                    (By.XPATH, dl_button_xpath.format(dl_heading=dl_heading))))
            elem.click()

        wait_for_any_file_in_folder(csv_download_folder_path, "csv")
        self.log.debug('Download Finished.')

        df_report = pd.read_csv(
            get_most_recent_file_in_dir(csv_download_folder_path), **kwargs)

        # if the dataframe is empty (the report had no data), raise an error
        if df_report.shape[0] == 0:
            raise NoDataError('No data in report "{}" for site_id "{}"'.format(
                dl_heading, site_id))

        self.driver.close()

        if not write_to_disk:
            shutil.rmtree(csv_download_folder_path)

        return df_report
Example #13
0
    def download_data_shared_with_application(self, application_page_url, collection,
                                              write_to_disk=None, **kwargs):
        """
        Downloads the students shared with a particular application through Clever.
        :param application_page_url: The url for the main Clever management page for a
            particular application. For example, for My Lexia, this would be
            https://schools.clever.com/applications/lexia-mylexia
        :param collection: A string of 'schools', 'students', 'sections', 'teachers', 'schooladmins'
            that indicates which shared data to download
        :param write_to_disk: A path to a directory where the downloaded CSV should be saved.
            If nothing is passed, it will not be saved and only a Pandas DataFrame will be returned.
        :param kwargs: Additional keyword arguments to be passed to the Pandas read_csv function.
        :return: A Pandas DataFrame of the indicated collection download.
        """
        collection = collection.lower().replace(' ', '')
        if collection not in ['schools', 'students', 'sections', 'teachers', 'schooladmins']:
            raise ReportNotFound(
                (
                    "Argument for collection '{collection}' is not a valid. Please choose from: "
                    "'schools', 'students', 'sections', 'teachers', 'schooladmins'."
                ).format(collection=collection)
            )
        report_access_page_url = interpret_report_url(self.base_url, application_page_url)

        if write_to_disk:
            csv_download_folder_path = write_to_disk
        else:
            csv_download_folder_path = mkdtemp()
        self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless)
        self._login()

        self.log.debug('Getting report access page at: {}'.format(report_access_page_url))
        self.driver.get(report_access_page_url)

        # find and click the download button based on the collection desired
        elem = WebDriverWait(self.driver, self.wait_time).until(
            EC.presence_of_element_located(
                (By.XPATH, "//a[contains(@href, '{collection}.csv')]".format(collection=collection))
            )
        )
        self.log.info('Starting download of: {} - {}'.format(report_access_page_url, collection))
        elem.click()

        wait_for_any_file_in_folder(csv_download_folder_path, "csv")
        self.log.info('Download Finished.')

        df_report = pd.read_csv(get_most_recent_file_in_dir(csv_download_folder_path),
                                **kwargs)

        # if the dataframe is empty (the report had no data), raise an error
        if df_report.shape[0] == 0 and collection != 'schooladmins':
            raise ValueError('No data in report for user {} at url: {}'.format(
                self.username, interpret_report_url(self.base_url, application_page_url)))
        elif df_report.shape[0] == 0:
            warnings.warn("The 'schooladmins' collection has no data. Ensure that no school admins are shared.")

        self.driver.close()

        if not write_to_disk:
            shutil.rmtree(csv_download_folder_path)

        return df_report