Exemple #1
0
    def _set_year(self, school_year, driver=None):
        """Sets the year for the SchoolMint interface.

        Args:
            school_year (string): The school year that should be selected. Use the format shown in the
                SchoolMint interface. Example: '2016-2017'

        :return: True if function succeeds
        """
        self.log.debug('Changing school year to: {}'.format(school_year))
        if not driver:
            self.driver = configure_selenium_chrome()
            self._login()

        # open the year selector menu
        elem = self.driver.find_element_by_xpath(
            "//a[contains(@class,'dropdown-toggle enrollment')]")
        elem.click()

        # select the appropriate year
        try:
            year_xpath = "//*[@id='enrollment-selector']//a[contains(text(),'{}')]".format(
                school_year)
            elem = self.driver.find_element_by_xpath(year_xpath)
            elem.click()
        except NoSuchElementException as e:
            self.driver.save_screenshot('cannot_find_year.png')
            message = (' Check that the school_year variable is valid. '
                       'Passed value for school_year: {}').format(school_year)

            #raise type(e)(str(e) + message).with_traceback(sys.exc_info()[2])
            raise_with_traceback(type(e)(str(e) + message))

        # wait for the page to be ready again
        self.driver.get(self.base_url)
        WebDriverWait(self.driver, self.wait_time).until(
            EC.presence_of_element_located((By.ID, 'student-lookup')))

        if not driver:
            self.driver.close()

        return True
Exemple #2
0
    def download_url_report(self, report_url, temp_folder_name):
        """ Downloads an Informed K12 report.

        Args:
            report_url (string): Information pertaining to the path and query
                string for the report whose access is desired. Any filtering
                that can be done with a stateful URL should be included.
            temp_folder_name (string): The name of the folder in which this
                specific report's download files should be stored.

        Returns: A Pandas DataFrame of the report contents.
        """
        count = 0
        while True:
            try:
                # WebDriverException - except
                csv_download_folder_path = self.temp_folder_path + '/' + temp_folder_name
                # set up the driver for execution
                self.driver = configure_selenium_chrome(
                    csv_download_folder_path)
                self._login()

                time.sleep(2)
                #self.driver.get(self.base_url)

                # get the report url
                self.driver.get(interpret_report_url(self.base_url,
                                                     report_url))

                # select all responses
                # get the report url
                #self.driver.get(interpret_report_url(self.base_url, report_url))

                # check to see if there are no submissions. If so, abort by exception
                try:
                    self.driver.find_element_by_xpath(
                        "//h2[contains(text(), 'No submissions')]")
                    self.driver.close()
                    raise ValueError(
                        'No data in report for user {} at url: {}'.format(
                            self.username,
                            interpret_report_url(self.base_url, report_url)))
                except NoSuchElementException:
                    # We actually don't want to find this.
                    pass

                # wait until we have rows in the responses data table before starting to
                # look for results
                try:
                    elem = WebDriverWait(self.driver, self.wait_time).until(
                        EC.presence_of_element_located((
                            By.XPATH,
                            "//*[@class='responses-table']/table/thead/tr[1]/*[@class='checkboxes']/input"
                        )))
                except TimeoutException:
                    raise

                # select all
                elem.click()

                # check to see if a new link populates to 'select all filtered submissions" (happens if more than 50 submissions)
                try:
                    elem = self.driver.find_element_by_xpath(
                        "//*[@class='responses-bulk-actions']/*[@class='select-link']"
                    )
                    elem.click()
                except NoSuchElementException():
                    pass

                # click download
                elem = self.driver.find_element_by_xpath(
                    "//*[contains(text(), 'Download') and @class='hidden-xs']")
                elem.click()

                # click 'As a spreadsheet'
                elem = self.driver.find_element_by_xpath(
                    "//*[@class='dropdown-menu dropdown-menu-right']//*[contains(text(), 'As a spreadsheet')]"
                )
                elem.click()

                # activate the menu that allows 'select all'
                try:
                    # the following elem selection fails b/c is moves, so we time.sleep to let it load first
                    time.sleep(0.5)
                    elem = WebDriverWait(self.driver, self.wait_time).until(
                        EC.visibility_of_element_located((
                            By.XPATH,
                            "//*[@class='dropdown-toggle']/*[contains(text(), 'columns')]/i"
                        )))
                    elem.click()
                except TimeoutException:
                    # TODO
                    raise

                # click on 'select all'
                elem = self.driver.find_element_by_xpath(
                    "//*[@class='dropdown-menu dropdown-menu-right']//*[contains(text(), 'Select all')]"
                )
                elem.click()

                # wait a moment for the info to populate
                time.sleep(2)

                # click download
                # elem = self.driver.find_element_by_xpath(
                #     "//*[@class='btn btn-primary' and contains(text(), 'Download')]")
                # elem.click()
                #
                # time.sleep(1)
                # try:
                #     elem = self.driver.find_element_by_xpath(
                #         "//*[@class='btn btn-primary' and contains(text(), 'Download')]")
                #     elem.click()
                # except WebDriverException:
                #     pass

                c = 0
                while True:

                    try:
                        elem = self.driver.find_element_by_xpath(
                            "//*[@class='btn btn-primary' and contains(text(), 'Download')]"
                        )
                        elem.click()
                    except NoSuchElementException:
                        if c >= 9:
                            raise
                        time.sleep(1)
                        c += 1
                        continue
                    break

                # wait until file has downloaded to close the browser. We can do this
                # because we delete the file before we return it, so the temp dir should
                # always be empty when this command is run
                # TODO add a try/except block here
                wait_for_any_file_in_folder(csv_download_folder_path, 'csv')

                report_df = pd.read_csv(
                    get_most_recent_file_in_dir(csv_download_folder_path))

                # delete any files in the mealtime temp folder; we don't need them now
                # TODO: move this out of this function. It should happen as cleanup once
                # the whole DAG has completed
                delete_folder_contents(csv_download_folder_path)

                self.driver.close()
            except WebDriverException:
                if count >= 9:
                    raise
                count += 1
                self.driver.close()
                continue
            break

        return report_df
Exemple #3
0
    def download_proficiency_report(self, awpm=0, wpm=0, accuracy=0, qscore=0):
        """Downloads the built-in Proficiency Report from Typing Agent.
        
        Args:
            awpm (int): A threshold for the Average Words Per Minute metric. Setting this
                to a value other than the default(0) will filter the report to not include
                students with a value below this threshold.
            wpm (int): A threshold for the Words Per Minute metric. Setting this
                to a value other than the default(0) will filter the report to not include
                students with a value below this threshold.
            accuracy (float): A threshold for the Accuracy metric. Setting this
                to a value other than the default(0) will filter the report to not include
                students with a value below this threshold. This number should be the
                 decimal representation of a percentage (a float between 0 and 1.0).
            qscore (int): A threshold for the Q-Score or "Quality Score" metric. Setting this
                to a value other than the default(0) will filter the report to not include
                students with a value below this threshold.
        
        Returns: A Pandas Dataframe of the Typing Agent Proficiency Report for all of the
            students in all of the grades accessible to this instance of the TypingAgent
            object (all schools and grades that are accessible at the hostname provided
            when this object was instantiated).
        """
        self.logger.info('Beginning download_proficiency_report')
        # input validation
        if awpm < 0 or wpm < 0 or accuracy < 0 or accuracy > 1 or qscore < 0:
            raise ValueError(
                'Inputs to TypingAgent.downlaod_proficiency_report() outside acceptible bounds.'
            )

        # set up the driver for execution
        self.driver = configure_selenium_chrome()
        self._login()

        # get the report url
        self.driver.get(
            "https://app.typingagent.com/index.php?r=district/home/index#/index.php?r=district/report/proficiency"
        )

        # get all of the school codes and names
        try:
            elem_select_school = WebDriverWait(
                self.driver, self.wait_time).until(
                    EC.presence_of_element_located((By.ID, 'school_prof')))
        except TimeoutException:
            raise

        school_code_options = elem_select_school.find_elements_by_xpath(
            "//*[@id='school_prof']/option")
        schools = list()
        for school_code_option in school_code_options:
            if school_code_option.get_attribute("value") is not "":
                schools.append({
                    'name':
                    school_code_option.text,
                    'code':
                    school_code_option.get_attribute("value")
                })

        # get all of the school grades
        elem_select_grade = self.driver.find_element_by_id('grade_prof')
        grade_options = elem_select_grade.find_elements_by_xpath(
            "//*[@id='grade_prof']/option")
        grades = list()
        for grade_option in grade_options:
            if grade_option.get_attribute("value") is not "":
                grades.append({
                    'name': grade_option.text,
                    'code': grade_option.get_attribute("value")
                })
        # grades = [x.get_attribute("value") for x in grade_options if x.get_attribute("value") is not ""]

        # create requests session to efficiently download multiple files
        with requests.Session() as s:
            for cookie in self.driver.get_cookies():
                s.cookies.set(cookie['name'], cookie['value'])

            dfs_school_grade = list()
            for school in schools:
                for grade in grades:
                    self.logger.info(
                        'Downloading proficiency_report for school, grade: {}, {}'
                        .format(school['name'], grade['name']))
                    # create GET url
                    report_url = (
                        "https://app.typingagent.com/index.php?r=district/report/ProficiencyReport&"
                        "prof_awpm={}"
                        "&prof_wpm={}"
                        "&prof_accuracy={}"
                        "&prof_qscore={}"
                        "&school_prof={}"
                        "&grade_prof={}"
                        "&export=1").format(awpm, wpm,
                                            str(int(accuracy * 100)), qscore,
                                            school['code'], grade['code'])

                    c = 0
                    while True:
                        download_response = s.get(report_url, stream=True)

                        if download_response.ok:
                            df_school_grade = pd.read_csv(
                                io.StringIO(
                                    download_response.content.decode('utf-8')))
                            df_school_grade['School Name'] = school['name']
                            df_school_grade['Grade'] = grade['name']

                            dfs_school_grade.append(df_school_grade)
                        else:
                            self.logger.info(
                                'Download failed for school, grade: {}, {}'.
                                format(school['name'], grade['name']))
                            self.logger.info(
                                'Report URL: {}'.format(report_url))
                            self.logger.info('Download status_code: {}'.format(
                                download_response.status_code))
                            self.logger.info(
                                'Retrying... Retry#: {}'.format(c + 1))
                            if c >= 9:
                                raise ValueError(
                                    'Unable to download report after multiple retries.'
                                )
                            # add some jitter to the requests
                            sleep_time = (1000 + randint(500)) / 1000
                            time.sleep(sleep_time)
                            c += 1
                            continue
                        break

        self.driver.close()

        self.logger.info('Proficiency report download complete!')

        return pd.concat(dfs_school_grade, ignore_index=True)
Exemple #4
0
    def download_google_accounts_manager_student_export(self):
        """ Downloads the Google Accounts Manager Student Export that includes student emails."""
        self.log.info('Starting student email download.')
        # set up the driver for execution
        self.driver = configure_selenium_chrome()
        self._login()

        # grab some cookies (need to do this here for _mkto_trk cookie)
        cookies_orig = self.driver.get_cookies()

        # open the Google Accounts Manager application page
        # note - clever applications like Google Accounts Manager have unique ids that are a part of their URL
        # note - we have to get the settings page of the Google Accounts Manager to get the cookie
        #  that we need in order to download the file
        self.driver.get('https://schools.clever.com/school/applications/50ca15a93bc2733956000007/settings')
        cookies_schools = self.driver.get_cookies()

        # we may need to get the gaprov.ops.clever.com to get a cookie in new versions of chromedriver
        self.driver.get('https://gaprov.ops.clever.com/')
        cookies_gaprov = self.driver.get_cookies()

        # create requests session to download report without need for file storage
        with requests.Session() as s:

            # transfer over a bunch of cookies to the requests session
            for cookie in cookies_orig:
                s.cookies.set(cookie['name'], cookie['value'])

            for cookie in cookies_schools:
                s.cookies.set(cookie['name'], cookie['value'])

            for cookie in cookies_gaprov:
                s.cookies.set(cookie['name'], cookie['value'])

            s.cookies.set('_gat', "1")
            s.cookies.set('_gat_globalTracker', "1")

            report_url = 'https://gaprov.ops.clever.com/reporting/student'

            # download with 10 retries on failure
            c = 0
            while True:
                download_response = s.get(report_url, stream=True)

                if download_response.ok:
                    df_report = pd.read_csv(io.StringIO(download_response.content.decode('utf-8')))
                else:
                    self.log.info('Download failed for report url: {}'.format(report_url))
                    self.log.info('Download status_code: {}'.format(download_response.status_code))
                    self.log.info('Retrying... Retry#: {}'.format(c+1))
                    if c >= 9:
                        raise ValueError('Unable to download report after multiple retries.')
                    # add some jitter to the requests
                    sleep_time = (1000 + randint(500)) / 1000
                    time.sleep(sleep_time)
                    c += 1
                    continue
                break
        self.driver.close()

        self.log.info('Student email download complete.')

        return df_report
Exemple #5
0
    def download_custom_report(self, custom_report_name):
        """ Downloads a named custom report from Typing Agent
        :param custom_report_name: A string representing the custom report you wish to
                                    download.
        :return: A pandas dataframe with the data from the custom report.
        """
        self.logger.info(
            'Beginning custom_report download for report: {}'.format(
                custom_report_name))
        # set up the driver for execution
        self.driver = configure_selenium_chrome()
        self._login()

        # get the report url
        self.driver.get(
            "https://app.typingagent.com/index.php?r=district/report/index")

        # get list of reports, find one that matches the custom_report_name
        try:
            elem_select_report = Select(
                WebDriverWait(self.driver, self.wait_time).until(
                    EC.presence_of_element_located((By.ID, 'report_list'))))
        except TimeoutException:
            raise

        # find the that we need to pass in order to download the intended report
        report_options = elem_select_report.options
        for report_option in report_options:
            if report_option.text == custom_report_name:
                custom_report_query_string = report_option.get_attribute(
                    "value")
                break

        if not custom_report_query_string:
            raise ValueError(
                'Typing Agent Custom Report not found with name: {}'.format(
                    custom_report_query_string))

        # create requests session to download report without need for file storage
        with requests.Session() as s:
            for cookie in self.driver.get_cookies():
                s.cookies.set(cookie['name'], cookie['value'])

            report_url = self.base_url + custom_report_query_string + '&export=1'

            # download with 10 retries on failure
            c = 0
            while True:
                download_response = s.get(report_url, stream=True)

                if download_response.ok:
                    df_report = pd.read_csv(
                        io.StringIO(download_response.content.decode('utf-8')))
                else:
                    self.logger.info('Download failed for {}'.format(
                        report_option.text))
                    self.logger.info('Report URL: {}'.format(report_url))
                    self.logger.info('Download status_code: {}'.format(
                        download_response.status_code))
                    self.logger.info('Retrying... Retry#: {}'.format(c + 1))
                    if c >= 9:
                        raise ValueError(
                            'Unable to download report after multiple retries.'
                        )
                    # add some jitter to the requests
                    sleep_time = (1000 + randint(500)) / 1000
                    time.sleep(sleep_time)
                    c += 1
                    continue
                break
        self.driver.close()

        self.logger.info('Custom report download complete!')

        return df_report