def download_majors(username, password): # Create target URL college_pages = {} parameters = copy.copy(URL_PARAMS) url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) parameters['changeCollege'] = 'Next' parameters['call'] = '4' # Get viewstate s = cmu_auth.authenticate(url, username, password) for college in COLLEGES: parameters['college'] = college url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) export_page = s.get(url).content soup = bs4.BeautifulSoup(export_page) # _.map_(soup.select('option'),lambda x:print(_.strip_tags(x).split(' in '))) college_pages[college] = _.chain( soup.select('option')).map(lambda x: _.strip_tags(x)).map( lambda x: { 'name': x, 'department': x.split(' in ')[1], 'type': x.split(' in ')[0] }).value() return {'data': college_pages, 'auth': s}
def download_majors(username, password): # Create target URL college_pages={} parameters = copy.copy(URL_PARAMS) url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) parameters['changeCollege']='Next' parameters['call']='4' # Get viewstate s = cmu_auth.authenticate(url, username, password) for college in COLLEGES: parameters['college']=college url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) export_page=s.get(url).content soup = bs4.BeautifulSoup(export_page) # _.map_(soup.select('option'),lambda x:print(_.strip_tags(x).split(' in '))) college_pages[college]=_.chain(soup.select('option')).map(lambda x: _.strip_tags(x)).map(lambda x: {'name':x,'department':x.split(' in ')[1], 'type':x.split(' in ')[0] }).value() return {'data':college_pages,'auth':s }
def download_catalog_year(college_pages, auth): # Create target URL parameters = copy.copy(URL_PARAMS) url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) parameters['changeMajor'] = 'Next' parameters['call'] = '5' for college in COLLEGES: parameters['college'] = college print('Downloading College', college) for idx, major in enumerate(college_pages[college]): print(' Getting Degree', major['name']) parameters['major'] = major['name'] url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) export_page = auth.get(url).content soup = bs4.BeautifulSoup(export_page) # print(export_page) data = soup.select('option') # print(data) if len(data) > 0: college_pages[college][idx]['year'] = _.map_( soup.select('option'), lambda x: _.strip_tags(x)) else: # print(soup.find('body table tbody')) decoded = export_page.decode() college_pages[college][idx]['MajorFile'] = decoded.split( 'name=MajorFile value=')[1].split('>')[0] year = _.js_match( _.js_match( export_page, '/[<input type=hidden name=year year="](\d{4})/'), '/\d{4}/') college_pages[college][idx]['year'] = year # if idx>5: # return college_pages # print(college_pages[college][idx]) return college_pages
def download_catalog_year(college_pages, auth): # Create target URL parameters = copy.copy(URL_PARAMS) url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) parameters['changeMajor']='Next' parameters['call']='5' for college in COLLEGES: parameters['college']=college print('Downloading College',college) for idx,major in enumerate(college_pages[college]): print(' Getting Degree', major['name']) parameters['major']=major['name'] url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters) export_page=auth.get(url).content soup = bs4.BeautifulSoup(export_page) # print(export_page) data=soup.select('option') # print(data) if len(data) > 0: college_pages[college][idx]['year']=_.map_(soup.select('option'),lambda x: _.strip_tags(x)) else: # print(soup.find('body table tbody')) decoded = export_page.decode() college_pages[college][idx]['MajorFile']=decoded.split('name=MajorFile value=')[1].split('>')[0] year=_.js_match(_.js_match(export_page,'/[<input type=hidden name=year year="](\d{4})/'),'/\d{4}/') college_pages[college][idx]['year']=year # if idx>5: # return college_pages # print(college_pages[college][idx]) return college_pages
def test_strip_tags(case, expected): assert _.strip_tags(case) == expected