def test_dict_filter_by_dict(self): original1 = { 2016: { 'Fall': 1, 'Spring': 2, }, 2017: { 'Fall': 3, 'Spring': 4 } } filtered = dict_filter_by_dict(original1, {2016: ['Fall']}) self.assertEqual({2016: {'Fall': 1}}, filtered) original2 = { '2016': [ 'Fall', 'Spring' ], '2017': [ 'Fall' ], '2018': [] } filtered = dict_filter_by_dict(original2, {2016: ['Spring']}) self.assertEqual({'2016': ['Spring']}, filtered)
def start( self, verbosity=3, textbooks=False, departments_filter=None, years_and_terms_filter=None, ): """Start parse.""" self.verbosity = verbosity # Default to hardcoded current year. years = {"2022", "2021", "2020", "2019", "2018", "2017", "2016", "2015"} terms = {"Spring", "Fall", "Summer", "Intersession"} years_and_terms = dict_filter_by_dict( {year: [term for term in terms] for year in years}, years_and_terms_filter ) for year, terms in list(years_and_terms.items()): self.ingestor["year"] = year for term in terms: self.ingestor["term"] = term self.semester = "{} {}".format(term, year) self._get_schools() self._parse_schools()
def start( self, verbosity=3, textbooks=True, departments_filter=None, years_and_terms_filter=None, ): """Start the parse.""" self.verbosity = verbosity self._login() years_and_terms = dict_filter_by_dict(self._parse_years_and_terms(), years_and_terms_filter) for year, terms in years_and_terms.items(): self.ingestor["year"] = year for term_name, term_code in terms.items(): self.ingestor["term"] = term_name # Load environment for targeted semester self.requester.get( Parser.URL + "/SelectTerm!selectTerm.action", params={"selectedTermCode": term_code}, parse=False, ) self.requester.get(Parser.URL + "/SelectTerm!updateSessions.action", parse=False) # Create payload to request course list from server params = { "searchCriteria.classStatusCodes": ["O", "W", "C"], "__checkbox_searchCriteria.classStatusCodes": ["O", "W", "C"], } departments = dict_filter_by_list( dict(self.extract_department_codes()), departments_filter) for dept_code, dept_name in departments.items(): self.ingestor["department_code"] = dept_code self.ingestor["department_name"] = dept_name # Construct payload with department code params.update( {"searchCriteria.subjectAreaCodes": dept_code}) # GET html for department course listings soup = self.requester.get( Parser.URL + "/SearchClassesExecute!search.action", params=params, ) # Parse courses in department self.parse_courses_in_department(soup) # return to search page for next iteration self.requester.get(Parser.URL + "/Entry.action", parse=False)
def test_dict_filter_by_dict(self): original1 = { 2016: { "Fall": 1, "Spring": 2, }, 2017: { "Fall": 3, "Spring": 4 }, } filtered = dict_filter_by_dict(original1, {2016: ["Fall"]}) self.assertEqual({2016: {"Fall": 1}}, filtered) original2 = {"2016": ["Fall", "Spring"], "2017": ["Fall"], "2018": []} filtered = dict_filter_by_dict(original2, {2016: ["Spring"]}) self.assertEqual({"2016": ["Spring"]}, filtered)
def _parse_program(self, program, program_code, query, years_and_terms_filter): query['programId'] = program_code query['requestType'] = 'TERMS' terms_and_years = self._extract_json(query) years_and_terms = self._parse_terms_and_years(terms_and_years) years_and_terms = dict_filter_by_dict(years_and_terms, years_and_terms_filter) for year, terms in list(years_and_terms.items()): self.ingestor['year'] = year for term, term_code in list(terms.items()): self._parse_term(term, term_code, query)
def _parse_program(self, program, program_code, query, years_and_terms_filter): query['programId'] = program_code query['requestType'] = 'TERMS' terms_and_years = self._extract_json(query) years_and_terms = self._parse_terms_and_years(terms_and_years) years_and_terms = dict_filter_by_dict(years_and_terms, years_and_terms_filter) for year, terms in years_and_terms.items(): self.ingestor['year'] = year for term, term_code in terms.items(): self._parse_term(term, term_code, query)
def start(self, verbosity=3, textbooks=False, departments_filter=None, years_and_terms_filter=None): """Start parse.""" self.verbosity = verbosity # Default to hardcoded current year. years = {'2018', '2017', '2016', '2015'} terms = {'Spring', 'Fall', 'Summer', 'Intersession'} years_and_terms = dict_filter_by_dict( {year: [term for term in terms] for year in years}, years_and_terms_filter) for year, terms in years_and_terms.items(): self.ingestor['year'] = year for term in terms: self.ingestor['term'] = term self.semester = '{} {}'.format(term, year) self._get_schools() self._parse_schools()
def start(self, verbosity=3, textbooks=False, departments_filter=None, years_and_terms_filter=None): """Start parse.""" self.verbosity = verbosity # Default to hardcoded current year. years = {'2022', '2021', '2020'} terms = {'1', '2', 'V'} years_and_terms = dict_filter_by_dict( {year: [term for term in terms] for year in years}, years_and_terms_filter) for year, terms in list(years_and_terms.items()): self.ingestor['year'] = year self.year = year for term in terms: self.ingestor['term'] = term self.term = term self._get_schools() self._parse_schools()
def start(self, verbosity=3, textbooks=False, departments_filter=None, years_and_terms_filter=None): """Start parse.""" self.verbosity = verbosity # Default to hardcoded current year. years = {'2018', '2017', '2016', '2015'} terms = {'Spring', 'Fall', 'Summer', 'Intersession'} years_and_terms = dict_filter_by_dict( {year: [term for term in terms] for year in years}, years_and_terms_filter ) for year, terms in years_and_terms.items(): self.ingestor['year'] = year for term in terms: self.ingestor['term'] = term self.semester = '{} {}'.format(term, year) self._get_schools() self._parse_schools()
def parse(self, verbosity=3, textbooks=True, years_and_terms_filter=None, departments_filter=None, department_name_regex=None): """Do parse.""" self.verbosity = verbosity self.textbooks = textbooks self._empty_ingestor_lists() # NOTE: umich will do nothing and return an empty dict soup, params = self._goto_search_page(self.url_params) years_and_terms = dict_filter_by_dict( self._get_years_and_terms(soup), years_and_terms_filter ) for year, terms in years_and_terms.items(): self.ingestor['year'] = year for term_name, term_code in terms.items(): soup = self._term_update(term_code, params) self.ingestor['term'] = term_name # NOTE: schools that do not use groups will return {None: None} groups = self._get_groups(soup, params) for group_id, group_name in groups.items(): params2 = {} if group_id is not None: soup = self._group_update(group_id, params) params2 = PeoplesoftParser._hidden_params(soup, ajax=True) else: # School does not use groups. # Update search params to get course list. params = PeoplesoftParser._exclude_ajax_params(params) params.update( PeoplesoftParser._create_ic_action('class_search') ) params2 = params # extract department list info dept_param_key = self._get_dept_param_key(soup) departments, department_ids = self._get_departments( soup, departments_filter ) for dept_code, dept_name in departments.iteritems(): self.ingestor['dept_name'] = dept_name self.ingestor['dept_code'] = dept_code # Update search payload with department code params2[dept_param_key] = dept_code if department_ids is not None: params2[dept_param_key] = department_ids[dept_code] # Get course listing page for department soup = self.requester.post(self.base_url, params=params2) if not self._is_valid_search_page(soup): continue if self._is_special_search(soup): # too many results soup = self._handle_special_case_on_search(soup) courses = self._get_courses(soup) course_soups = self._get_course_list_as_soup(courses, soup) for course_soup in course_soups: self._parse_course_description(course_soup)
def start(self, years_and_terms_filter=None, departments_filter=None, verbosity=3, textbooks=None): """Start parse.""" self._login() self._direct_to_search_page() years_and_terms = dict_filter_by_dict(Parser.YEARS_AND_TERMS, years_and_terms_filter) for year, terms in list(years_and_terms.items()): self.ingestor['year'] = year for term_name in terms: term_code = Parser.YEARS_AND_TERMS[year][term_name] self.ingestor['term'] = term_name # Retrieve term search page. soup = self.requester.get(Parser.URL + '/bwckgens.p_proc_term_date', params={ 'p_calling_proc': 'P_CrseSearch', 'p_term': term_code }) # Create search param list. input_options_soup = soup.find( 'form', action='/PRODCartridge/bwskfcls.P_GetCrse').find_all( 'input') query = {} for input_option in input_options_soup: query[input_option['name']] = input_option.get('value', '') query.update({ 'begin_hh': '0', 'begin_mi': '0', 'end_hh': '0', 'end_mi': '0', 'sel_ptrm': '%', 'SUB_BTN': 'Section Search' }) # Construct list of departments. depts = {} depts_soup = soup.find('select', id='subj_id').find_all('option') for dept_soup in depts_soup: depts[dept_soup.text.strip()] = dept_soup['value'] for dept_name, dept_code in depts.items(): self.ingestor['department'] = { 'name': dept_name, 'code': dept_code } query['sel_subj'] = ['dummy', dept_code] rows = self.requester.post(Parser.URL + '/bwskfcls.P_GetCrse', params=query) Parser._check_errorpage(rows) try: rows = rows.find( 'table', class_='datadisplaytable').find_all('tr')[2:] except AttributeError: print('message: no results for department', dept_name, file=sys.stderr) continue # no results for department # collect offered courses in department for row in rows: info = row.find_all('td') if info[1].find('a'): # general info self.ingestor.update({ # 'ident': info[1].text, 'code': info[2].text + ' ' + info[3].text, # 'href': info[1].find('a')['href'], 'dept': dept_name, 'section': info[4].text, 'credits': safe_cast(info[6].text, float, default=0.), 'name': info[7].text, 'size': int(info[10].text), 'enrollment': int(info[11].text), 'waitlist': safe_cast(info[14].text, int, default=-1), 'areas': '; '.join(info[22].text.split(' and ')) if len(info) == 23 else '' # FIXME - hacky fix }) # Query course catalog to obtain description. catalog = self.requester.get( Parser.URL + '/bwckctlg.p_display_courses', params={ 'term_in': term_code, 'one_subj': dept_code, 'sel_crse_strt': info[3].text, 'sel_crse_end': info[3].text, 'sel_subj': '', 'sel_levl': '', 'sel_schd': '', 'sel_coll': '', 'sel_divs': '', 'sel_dept': '', 'sel_attr': '' }) if catalog: self.ingestor.update( Parser._parse_catalogentrypage(catalog)) course = self.ingestor.ingest_course() section_soup = self.requester.get( Parser.URL + '/bwckschd.p_disp_listcrse', params={ 'term_in': term_code, 'subj_in': dept_code, 'crse_in': info[3].text, 'crn_in': info[1].text, }) meetings_soup = Parser._extract_meetings( section_soup) """Example of a meeting entry <tr> <td class="dddefault">Class</td> <td class="dddefault">4:00 pm - 6:00 pm</td> <td class="dddefault">T</td> <td class="dddefault">See Department DEPT</td> <td class="dddefault">08/28/17 - 12/11/17</td> <td class="dddefault">Lecture</td> <td class="dddefault">Timothy A. McCaffrey (<abbr title="Primary">P</abbr>), David Leitenberg </td> </tr> """ self._parse_instructors(meetings_soup) if len(meetings_soup) == 0: continue self.ingestor['section_type'] = meetings_soup[ 0].find_all('td')[5].text section_model = self.ingestor.ingest_section( course) self._parse_meetings(meetings_soup, section_model)
def start( self, years_and_terms_filter=None, departments_filter=None, verbosity=3, textbooks=None, ): """Start parse.""" self._login() self._direct_to_search_page() years_and_terms = dict_filter_by_dict(Parser.YEARS_AND_TERMS, years_and_terms_filter) for year, terms in list(years_and_terms.items()): self.ingestor["year"] = year for term_name in terms: term_code = Parser.YEARS_AND_TERMS[year][term_name] self.ingestor["term"] = term_name # Retrieve term search page. soup = self.requester.get( Parser.URL + "/bwckgens.p_proc_term_date", params={ "p_calling_proc": "P_CrseSearch", "p_term": term_code }, ) # Create search param list. input_options_soup = soup.find( "form", action="/PRODCartridge/bwskfcls.P_GetCrse").find_all( "input") query = {} for input_option in input_options_soup: query[input_option["name"]] = input_option.get("value", "") query.update({ "begin_hh": "0", "begin_mi": "0", "end_hh": "0", "end_mi": "0", "sel_ptrm": "%", "SUB_BTN": "Section Search", }) # Construct list of departments. depts = {} depts_soup = soup.find("select", id="subj_id").find_all("option") for dept_soup in depts_soup: depts[dept_soup.text.strip()] = dept_soup["value"] for dept_name, dept_code in depts.items(): self.ingestor["department"] = { "name": dept_name, "code": dept_code } query["sel_subj"] = ["dummy", dept_code] rows = self.requester.post(Parser.URL + "/bwskfcls.P_GetCrse", params=query) Parser._check_errorpage(rows) try: rows = rows.find( "table", class_="datadisplaytable").find_all("tr")[2:] except AttributeError: print( "message: no results for department", dept_name, file=sys.stderr, ) continue # no results for department # collect offered courses in department for row in rows: info = row.find_all("td") if info[1].find("a"): # general info self.ingestor.update({ # 'ident': info[1].text, "code": info[2].text + " " + info[3].text, # 'href': info[1].find('a')['href'], "dept": dept_name, "section": info[4].text, "credits": safe_cast(info[6].text, float, default=0.0), "name": info[7].text, "size": int(info[10].text), "enrollment": int(info[11].text), "waitlist": safe_cast(info[14].text, int, default=-1), "areas": "; ".join(info[22].text.split(" and ")) if len(info) == 23 else "", # FIXME - hacky fix }) # Query course catalog to obtain description. catalog = self.requester.get( Parser.URL + "/bwckctlg.p_display_courses", params={ "term_in": term_code, "one_subj": dept_code, "sel_crse_strt": info[3].text, "sel_crse_end": info[3].text, "sel_subj": "", "sel_levl": "", "sel_schd": "", "sel_coll": "", "sel_divs": "", "sel_dept": "", "sel_attr": "", }, ) if catalog: self.ingestor.update( Parser._parse_catalogentrypage(catalog)) course = self.ingestor.ingest_course() section_soup = self.requester.get( Parser.URL + "/bwckschd.p_disp_listcrse", params={ "term_in": term_code, "subj_in": dept_code, "crse_in": info[3].text, "crn_in": info[1].text, }, ) meetings_soup = Parser._extract_meetings( section_soup) """Example of a meeting entry <tr> <td class="dddefault">Class</td> <td class="dddefault">4:00 pm - 6:00 pm</td> <td class="dddefault">T</td> <td class="dddefault">See Department DEPT</td> <td class="dddefault">08/28/17 - 12/11/17</td> <td class="dddefault">Lecture</td> <td class="dddefault">Timothy A. McCaffrey (<abbr title="Primary">P</abbr>), David Leitenberg </td> </tr> """ self._parse_instructors(meetings_soup) if len(meetings_soup) == 0: continue self.ingestor["section_type"] = ( meetings_soup[0].find_all("td")[5].text) section_model = self.ingestor.ingest_section( course) self._parse_meetings(meetings_soup, section_model)
def start(self, verbosity=3, textbooks=True, departments_filter=None, years_and_terms_filter=None): """Start the parse.""" self.verbosity = verbosity self._login() years_and_terms = dict_filter_by_dict(self._parse_years_and_terms(), years_and_terms_filter) for year, terms in years_and_terms.items(): self.ingestor['year'] = year for term_name, term_code in terms.items(): self.ingestor['term'] = term_name # Load environment for targeted semester self.requester.get( Parser.URL + '/SelectTerm!selectTerm.action', params={'selectedTermCode': term_code}, parse=False ) self.requester.get( Parser.URL + '/SelectTerm!updateSessions.action', parse=False ) # Create payload to request course list from server params = { 'searchCriteria.classStatusCodes': [ 'O', 'W', 'C' ], '__checkbox_searchCriteria.classStatusCodes': [ 'O', 'W', 'C' ] } departments = dict_filter_by_list( dict(self.extract_department_codes()), departments_filter ) for dept_code, dept_name in departments.items(): self.ingestor['department_code'] = dept_code self.ingestor['department_name'] = dept_name # Construct payload with department code params.update({ 'searchCriteria.subjectAreaCodes': dept_code }) # GET html for department course listings soup = self.requester.get( Parser.URL + '/SearchClassesExecute!search.action', params=params ) # Parse courses in department self.parse_courses_in_department(soup) # return to search page for next iteration self.requester.get(Parser.URL + '/Entry.action', parse=False)
def start(self, years_and_terms_filter=None, departments_filter=None, verbosity=3, textbooks=None): """Start parse.""" self._login() self._direct_to_search_page() years_and_terms = dict_filter_by_dict( Parser.YEARS_AND_TERMS, years_and_terms_filter ) for year, terms in years_and_terms.items(): self.ingestor['year'] = year for term_name in terms: term_code = Parser.YEARS_AND_TERMS[year][term_name] self.ingestor['term'] = term_name # Retrieve term search page. soup = self.requester.get( Parser.URL + '/bwckgens.p_proc_term_date', params={ 'p_calling_proc': 'P_CrseSearch', 'p_term': term_code } ) # Create search param list. input_options_soup = soup.find( 'form', action='/PRODCartridge/bwskfcls.P_GetCrse' ).find_all('input') query = {} for input_option in input_options_soup: query[input_option['name']] = input_option.get('value', '') query.update({ 'begin_hh': '0', 'begin_mi': '0', 'end_hh': '0', 'end_mi': '0', 'sel_ptrm': '%', 'SUB_BTN': 'Section Search' }) # Construct list of departments. depts = {} depts_soup = soup.find('select', id='subj_id').find_all('option') for dept_soup in depts_soup: depts[dept_soup.text.strip()] = dept_soup['value'] for dept_name, dept_code in depts.iteritems(): self.ingestor['department'] = { 'name': dept_name, 'code': dept_code } query['sel_subj'] = ['dummy', dept_code] rows = self.requester.post( Parser.URL + '/bwskfcls.P_GetCrse', params=query ) Parser._check_errorpage(rows) try: rows = rows.find( 'table', class_='datadisplaytable' ).find_all('tr')[2:] except AttributeError: print('message: no results for department', dept_name, file=sys.stderr) continue # no results for department # collect offered courses in department for row in rows: info = row.find_all('td') if info[1].find('a'): # general info self.ingestor.update({ # 'ident': info[1].text, 'code': info[2].text + ' ' + info[3].text, # 'href': info[1].find('a')['href'], 'dept': dept_name, 'section': info[4].text, 'credits': safe_cast(info[6].text, float, default=0.), 'name': info[7].text, 'size': int(info[10].text), 'enrollment': int(info[11].text), 'waitlist': safe_cast(info[14].text, int, default=-1), 'areas': '; '.join(info[22].text.split(' and ')) if len(info) == 23 else '' # FIXME - hacky fix }) # Query course catalog to obtain description. catalog = self.requester.get( Parser.URL + '/bwckctlg.p_display_courses', params={ 'term_in': term_code, 'one_subj': dept_code, 'sel_crse_strt': info[3].text, 'sel_crse_end': info[3].text, 'sel_subj': '', 'sel_levl': '', 'sel_schd': '', 'sel_coll': '', 'sel_divs': '', 'sel_dept': '', 'sel_attr': '' } ) if catalog: self.ingestor.update( Parser._parse_catalogentrypage(catalog) ) course = self.ingestor.ingest_course() section_soup = self.requester.get( Parser.URL + '/bwckschd.p_disp_listcrse', params={ 'term_in': term_code, 'subj_in': dept_code, 'crse_in': info[3].text, 'crn_in': info[1].text, } ) meetings_soup = Parser._extract_meetings(section_soup) """Example of a meeting entry <tr> <td class="dddefault">Class</td> <td class="dddefault">4:00 pm - 6:00 pm</td> <td class="dddefault">T</td> <td class="dddefault">See Department DEPT</td> <td class="dddefault">08/28/17 - 12/11/17</td> <td class="dddefault">Lecture</td> <td class="dddefault">Timothy A. McCaffrey (<abbr title="Primary">P</abbr>), David Leitenberg </td> </tr> """ self._parse_instructors(meetings_soup) if len(meetings_soup) == 0: continue self.ingestor['section_type'] = meetings_soup[0].find_all('td')[5].text section_model = self.ingestor.ingest_section(course) self._parse_meetings(meetings_soup, section_model)