Esempio n. 1
0
def download_majors(username, password):
    # Create target URL
    college_pages = {}
    parameters = copy.copy(URL_PARAMS)
    url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)
    parameters['changeCollege'] = 'Next'
    parameters['call'] = '4'
    # Get viewstate
    s = cmu_auth.authenticate(url, username, password)
    for college in COLLEGES:
        parameters['college'] = college
        url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)

        export_page = s.get(url).content
        soup = bs4.BeautifulSoup(export_page)
        # _.map_(soup.select('option'),lambda x:print(_.strip_tags(x).split(' in ')))
        college_pages[college] = _.chain(
            soup.select('option')).map(lambda x: _.strip_tags(x)).map(
                lambda x: {
                    'name': x,
                    'department': x.split(' in ')[1],
                    'type': x.split(' in ')[0]
                }).value()

    return {'data': college_pages, 'auth': s}
Esempio n. 2
0
def download_majors(username, password):
    # Create target URL
    college_pages={}
    parameters = copy.copy(URL_PARAMS)
    url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)
    parameters['changeCollege']='Next'
    parameters['call']='4'
    # Get viewstate
    s = cmu_auth.authenticate(url, username, password)
    for college in COLLEGES:
      parameters['college']=college
      url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)

      export_page=s.get(url).content
      soup = bs4.BeautifulSoup(export_page)
      # _.map_(soup.select('option'),lambda x:print(_.strip_tags(x).split(' in ')))
      college_pages[college]=_.chain(soup.select('option')).map(lambda x: _.strip_tags(x)).map(lambda x: {'name':x,'department':x.split(' in ')[1], 'type':x.split(' in ')[0] }).value()


    return {'data':college_pages,'auth':s }
Esempio n. 3
0
def download_catalog_year(college_pages, auth):
    # Create target URL
    parameters = copy.copy(URL_PARAMS)
    url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)
    parameters['changeMajor'] = 'Next'
    parameters['call'] = '5'

    for college in COLLEGES:
        parameters['college'] = college
        print('Downloading College', college)
        for idx, major in enumerate(college_pages[college]):
            print('    Getting Degree', major['name'])
            parameters['major'] = major['name']

            url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)
            export_page = auth.get(url).content
            soup = bs4.BeautifulSoup(export_page)
            # print(export_page)
            data = soup.select('option')
            # print(data)
            if len(data) > 0:
                college_pages[college][idx]['year'] = _.map_(
                    soup.select('option'), lambda x: _.strip_tags(x))
            else:
                # print(soup.find('body table tbody'))
                decoded = export_page.decode()
                college_pages[college][idx]['MajorFile'] = decoded.split(
                    'name=MajorFile value=')[1].split('>')[0]

                year = _.js_match(
                    _.js_match(
                        export_page,
                        '/[<input type=hidden name=year year="](\d{4})/'),
                    '/\d{4}/')
                college_pages[college][idx]['year'] = year

            # if idx>5:
            #   return college_pages
            # print(college_pages[college][idx])

    return college_pages
Esempio n. 4
0
def download_catalog_year(college_pages, auth):
    # Create target URL
    parameters = copy.copy(URL_PARAMS)
    url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)
    parameters['changeMajor']='Next'
    parameters['call']='5'

    for college in COLLEGES:
      parameters['college']=college
      print('Downloading College',college)
      for idx,major in enumerate(college_pages[college]):
        print('    Getting Degree', major['name'])
        parameters['major']=major['name']

        url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)
        export_page=auth.get(url).content
        soup = bs4.BeautifulSoup(export_page)
        # print(export_page)
        data=soup.select('option')
        # print(data)
        if len(data) > 0:
          college_pages[college][idx]['year']=_.map_(soup.select('option'),lambda x: _.strip_tags(x))
        else:
          # print(soup.find('body table tbody'))
          decoded = export_page.decode()
          college_pages[college][idx]['MajorFile']=decoded.split('name=MajorFile value=')[1].split('>')[0]

          year=_.js_match(_.js_match(export_page,'/[<input type=hidden name=year year="](\d{4})/'),'/\d{4}/')
          college_pages[college][idx]['year']=year

        # if idx>5:
        #   return college_pages
        # print(college_pages[college][idx])


    return college_pages
Esempio n. 5
0
def test_strip_tags(case, expected):
    assert _.strip_tags(case) == expected