Exemple #1
0
def inside_course(count, course_name, link):
    year = "Year "
    c = 0
    page = simple_link(link)
    body = page.find('body')
    div = body.find('div',
                    class_='site-container-md').find('main').find('div').find(
                        'div', class_='skin-bg-module-light').find(
                            'div', class_='tk-row anchor-link-scroll')
    div = div.find('div', id='section2').find(
        'div', class_='accordion-body').find_all('div',
                                                 class_='accordion-body')

    for i in div:
        c += 1
        year = year + c.__str__()
        #print(year)
        li = i.find('ul').find_all('li')
        if (len(li) != 0):
            for j in li:
                subject = j.find('span', class_='module-title').text
                clist.append(course(count, course_name, link, year, subject))
                #print(subject)
        else:
            p = i.find('h3').find_next('p').text
            clist.append(course(count, course_name, link, year, subject))
            #print(p)
        year = year[0:4]
        print('\n')
Exemple #2
0
def inside_course(count, cc, link):
    page = javascript_link(link)
    body = page.find('body')
    div = body.find('section', class_='visible-body').find(
        'section', class_='page-level page-content').find(
            'div', class_='wrapper').find('section', id='page-content-main')
    div = div.find('section', id='main-content').find(
        'div', class_='field-items').find(
            'div',
            class_='field-item odd').find('table').find('tbody').find_all('tr')

    #year = div[0].find('td').text

    for i in range(1, len(div), 2):
        year = div[i - 1].find('td').text
        #print(year)
        try:
            li = div[i].find('td').find('ul')
            try:
                ul = li.find('ul')
                for k in ul:
                    subject = k.text
                    clist.append(course(count, cc, link, year, subject))
            except:
                l = li.find_all('li')
                for k in l:
                    subject = k.text
                    clist.append(course(count, cc, link, year, subject))
        except:
            subject = div[i].find('p').text
            clist.append(course(count, cc, link, year, subject))
            #print(subject)
        print('\n')
Exemple #3
0
def inside_category(count,course_name,link):
    c=0
    year = "Year "
    page = simple_link(link)
    body = page.find('body')
    div = body.find('form').find('div',class_='offcanvas-wrap').find('div',class_='offcanvas-content').find('div',id='content').find('div',id='modules').find('div',class_='row')
    divv = div.find('div',class_='column large-12').find('div',class_='modulesTabs').find_all('div',class_='content panel')
    li =  div.find('div',class_='column large-12').find('div',class_='modulesTabs').find('ul').find_all('li')

    for i in range(len(li)):
        #c+=1
        #year = year + c.__str__()
        year = li[i].text
        print(year)

        try:
            d_tag = divv[i].find('div',class_='moduleBlock').find_all('div',class_='js-expandmore')

            for j in d_tag:
                subject = j.text
                clist.append(course(count, course_name, link, year, subject))
                #print(subject)

        except:
            if(count==1):
                break
            else:
                try:
                    subject = divv[i].find('p').text
                    clist.append(course(count, cc, link, year, subject))
                    #print(subject)
                except:
                    break

        year = year[0:4]
Exemple #4
0
def inside_course(count, course_name, link):
    page = javascript_link(link)
    body = page.find('body')
    div = body.find('main').find(
        'div', class_='container mx-auto px-4 pb-12 min-h-screen').find(
            'div', class_='lg:flex').find('article').find('section',
                                                          class_='block')
    div = div.find_all('section')

    for i in div:
        year = i.find('h3').text
        print(year)

        try:
            if (count != 3):
                ul = i.find('ul').find_all('li')
                for j in ul:
                    subject = j.text
                    clist.append(
                        course(count, course_name, link, year, subject))
                    print(subject)
            else:
                subject = i.find('p').text
                clist.append(course(count, course_name, link, year, subject))
                print(subject)

        except:
            subject = i.find('p').text
            clist.append(course(count, course_name, link, year, subject))
            print(subject)

        print('\n')
Exemple #5
0
def inside_master(count, course_name, link):
    page = simple_link(link)
    body = page.find('body')
    try:
        div = body.find_all(
            'div', class_='content-type content-type--main')[2].find_all('ul')
        if (len(div) != 0):
            for i in div:
                li = i.find_all('li')
                for j in li:
                    subject = j.text
                    print(subject)
                    clist.append(
                        course(count, course_name, link, "---", subject))
        else:
            div = body.find_all('div', class_='js-tabcontent')

            for i in div:
                d_tag = i.find_all('div',
                                   class_='content-type content-type--toggle')

                for j in d_tag:
                    subject = j.find('span').text
                    print(subject)
                    clist.append(
                        course(count, course_name, link, "--", subject))
    except:
        print("Null")
        print("NULL")
        clist.append(course(count, course_name, link, "Null", "Null"))
Exemple #6
0
def inside_course(count, course_name, link):
    page = simple_link(link)
    body = page.find('body')
    try:
        div = body.find(
            'div', class_='bcu-tabs-panel man pam secondary-bg-grey-light-3'
        ).find_all('div', class_='crs-dtl-content-tabs-panel')
        for i in div:
            year = i.find('h4', class_='hide-on-desktop mtl mbm').text
            print(year)
            table = i.find('table', class_='mvn plain-table').find_all('tr')

            for j in table:
                try:
                    td = j.find('td').find(
                        'div', class_='left-col inline-block size14of16')
                    subject = td.text.strip()
                    print(' ' + subject)
                    clist.append(
                        course(count, course_name, link, year, subject))
                except:
                    continue
            print('\n')
    except:
        clist.append(course(count, course_name, link, "Null", "Null"))
        print("Year : Null" + '\n' + "Subject : Null" + '\n')
Exemple #7
0
def inside_category(count, course_name, link):
    year = "Year "
    yc = 0
    page = simple_link(link)
    body = page.find('body')
    try:
        div = body.find('section',
                        class_='tabs yearTabs').find_all('div',
                                                         class_='tabContent')

        for i in div:
            yc += 1
            year = year + yc.__str__()
            section = i.find('section', class_='brief').find('ul')
            print(year)
            try:
                if (section == None):
                    tr = i.find('section', class_='modules').find(
                        'table').find('tbody').find_all('tr')

                    for j in range(1, len(tr)):
                        subject = tr[j].find('td').find('a').text
                        clist.append(
                            course(count, course_name, link, year, subject))
                        print(' ' + subject)

                else:
                    li = section.find_all('li')
                    for j in li:
                        subject = j.text
                        clist.append(
                            course(count, course_name, link, year, subject))
                        print(' ' + subject)
            except:
                subject = i.find('section', class_='brief').find('p').text
                clist.append(course(count, course_name, link, year, subject))
                print(subject)

            year = year[0:4]
            print('\n')
    except:
        div = body.find('div', class_='column-block')
        h4 = div.find_all('h4')
        for k in h4:
            year = k.text
            if (year.find("Year") == 0):
                print(year)
                subject = k.find_next('p').text
                clist.append(course(count, course_name, link, year, subject))
                print(subject)
            else:
                break
Exemple #8
0
def inside_module(llist):
    count = 0
    for i in llist:
        page = simple_link(i)
        body = page.find('body')
        course_name = nlist[count]
        print((count + 1).__str__() + " " + course_name + " " + i)

        try:
            div = body.find('main').find(
                'div',
                class_='col-xs-12 col-sm-12 col-md-12 u-p-bottom--3 u-p-right--0'
            ).find('div', class_='course-block')
            div = div.find('div', class_='responsive-tabs vertical-tabs')
        except:
            page = javascript_link(i)
            body = page.find('body')
            div = body.find('main').find(
                'div',
                class_='col-xs-12 col-sm-12 col-md-12 u-p-bottom--3 u-p-right--0'
            ).find('div', class_='course-block')
            div = div.find(
                'div',
                class_='responsive-tabs vertical-tabs responsive-tabs--enabled'
            )

        d_tag = div.find_all('div')

        for j in d_tag:
            year = j.find_previous('h3').text
            print(year)

            try:
                li = j.find('ul').find_all('li')

                for k in li:
                    subject = k.text
                    print(' ' + subject)
                    clist.append(course(count, course_name, i, year, subject))
            except:
                try:
                    p = j.find('p')
                    subject = p.text
                    print(' ' + subject)
                    clist.append(course(count, course_name, i, year, subject))
                except:
                    break

        print('\n')
        count += 1
Exemple #9
0
def inside_course(count, course_name, link):
    page = javascript_link(link)
    body = page.find('body')
    div = body.find('div', id='course-content-accordion').find_all(
        'div', class_='panel panel-default')

    for i in div:
        year = i.find('div', class_='panel-heading collapsed').find('h4').text
        print(year)
        d_tag = i.find('div', class_='panel-collapse collapse')
        try:
            iframe = "https:" + d_tag.find('iframe').attrs['src']
            try:
                p = simple_link(iframe).find('body').find('table').find_all(
                    'tr')

                for j in range(1, len(p)):
                    try:
                        subject = p[j].find_all('td')[1].text
                        clist.append(
                            course(count, course_name, link, year, subject))
                    except:
                        break

                    print(subject)

            except:
                try:
                    p = simple_link(iframe).find('body').find('p')
                    subject = p.text
                    print(subject)
                    clist.append(
                        course(count, course_name, link, year, subject))
                except:
                    li = simple_link(iframe).find('body').find('ul').find_all(
                        'li')
                    for j in li:
                        subject = j.text
                        print(subject)
                        clist.append(
                            course(count, course_name, link, year, subject))

        except:
            p = d_tag.find('p')
            subject = p.text
            print(subject)
            clist.append(course(count, course_name, link, year, subject))
        print('\n')
Exemple #10
0
def category(html_page):
    baseUrl = 'https://www.liverpool.ac.uk'
    backUrl = 'module-details'
    body = html_page.find('body')

    tables = body.find_all('table',class_='datatable')
    global count
    for i in tables:
        tr = i.find_all('tr')
        for j in range(1,len(tr)):
            try:
                count+=1
                link = tr[j].find('td',class_='course-name').find('a').attrs['href']
                course_name = tr[j].find('td', class_='course-name').find('a').text
                if(count==218):
                    print(count.__str__() + " " + course_name + " " + link)
                    link = baseUrl + link
                    inside_course(count, course_name, link)

                elif(link.find('https')>=0):
                    print(count.__str__() + " " + course_name + " " + link)

                    if(link.find('postgraduate')>=0):
                        link = link.replace("overview",backUrl)
                        master_course(course_name,link)

                    elif(course_name=='Applied English BA (Hons)' or course_name=='Basque (Honours Select)' or course_name=='Catalan (Honours Select)  'or
                            course_name=='Chinese (Honours Select)' or course_name=='Combined Honours' or course_name=='Criminology and Security BA (Hons)' or
                    course_name=='Game Design BA (Hons)' or course_name=='Honours Select' or course_name=='Italian (Honours Select)'
                    or course_name=='Joint Honours' or course_name=='Portuguese (Honours Select)' or course_name=='Social Policy (Honours Select)'
                    or course_name=='Spanish (Honours Select)' or course_name=='Veterinary Conservation Medicine BSc'
                    or course_name == 'Veterinary Conservation Medicine Intercalated Honours BSc'):

                        year = "Null"
                        subject = "Null"
                        print(year)
                        print(subject)
                        clist.append(course(count, course_name, link, year, subject))

                    elif (link.find('undergraduate') >= 0):
                        link = link.replace("overview", backUrl)
                        print(count.__str__() + " " + course_name + " " + link)
                        inside_course(count, course_name, link)

                    else:
                        inside_course_list(count, course_name, link)

                else:
                    link = link.replace("overview", backUrl)
                    link = baseUrl + tr[j].find('td',class_='course-name').find('a').attrs['href']
                    link = link.replace("overview", backUrl)
                    print(count.__str__() + " " + course_name + " " + link)
                    inside_course(count, course_name, link)

            except:
                continue

            print('\n')
Exemple #11
0
def inside_course(count, course_name, links):
    page = javascript_link(links)
    body = page.find('body')
    div_tag = body.find(
        'div', class_='campl-row campl-content campl-recessed-content').find(
            'div', class_='campl-wrap clearfix').find(
                'div', class_='campl-column6 campl-main-content').find(
                    'div', class_='node node-course-profile clearfix')
    div_tag = div_tag.find(
        'div', class_='content campl-content-container'
    ).find(
        'div',
        class_=
        'field-group-htabs-wrapper group-container-college field-group-htabs'
    ).find('div', class_='horizontal-tabs clearfix')
    div_tag = div_tag.find(
        'div', class_='horizontal-tabs-panes horizontal-tabs-processed').find(
            'fieldset', id='course-outline').find('div',
                                                  class_='field-item even')

    total_year = div_tag.find_all('h2')
    print(count.__str__() + " " + course_name)
    for i in range(1, len(total_year)):
        year = total_year[i].text

        try:
            ul = div_tag.find('h2').find_all('ul')

            li = ul[i].find_all('li')

            for k in li:
                subject = k.text
                clist.append(course(count, course_name, links, year, subject))
                #print(" " + year + " " + subject)

        except:

            p = div_tag.find_all('p')
            subject = p[i].text
            clist.append(course(count, course_name, links, year, subject))
            print(year + " " + subject)

    print('\n')
Exemple #12
0
def inside_course(count,course_code,course_name,link):
    page = simple_link(link)
    body = page.find('body')
    div = body.find('div',class_='site-content wrapper').find('div',class_='site-content__inner clearfix').find('div',class_='site-content__body').find('div',class_='site-content__main')
    div = div.find('div',class_='prospectus--programme').find('section',id='degree-structure').find('div',class_='collapse__content').find('div',class_='tabs')
    ul = div.find('ul').find_all('li')

    for i in range(0,len(ul)):
        year = ul[i].find('a').text
        tabs = div.find('div',class_='tabs__content island').find_all('div')

        try:
            tag = tabs[i].find_all('p')[1]
            if(len(tag)==0):
                #print(year)
                ul_tag = tabs[i].find('ul').find_all('li')
                for x in ul_tag:
                    subject = x.text
                    clist.append(course(count, course_name, link, year, subject))
                    #print(subject)
            else:
                subject = str(tag).split('<br/>')

                subject[0] = subject[0].replace('<p>', '')
                subject[len(subject) - 1] = subject[len(subject) - 1].replace('</p>', '')
                for xx in subject:
                    clist.append(course(count, course_name, link, year, xx))
                #print(year)
                #print(subject)


        except:
            try:
                print(year)
                ul_tag = tabs[i].find('ul').find_all('li')
                for x in ul_tag:
                    subject = x.text
                    clist.append(course(count, course_name, link, year, subject))
                    #print(subject)
            except:
                subject = "Year Abroad"
                clist.append(course(count, course_name, link, year, subject))
Exemple #13
0
def inside_category(count, course_name, link):
    page = javascript_link(link)
    body = page.find('body')

    if (count == 97):
        div = body.find('div', class_='dialog-off-canvas-main-canvas').find(
            'div',
            class_='off-canvas-content').find('main').find('section').find(
                'div', class_='row page-body')
        div = div.find('div', class_='single-course-modules').find_all(
            'div', class_='tabs-panel')
        year = "Year "
        c = 1
        for i in div:
            year = year + c.__str__()
            #print(year)
            subject = i.find('p').text
            #print(subject)
            clist.append(course(count, course_name, link, year, subject))
            year = year[0:4]

    else:
        div = body.find('div', id='courseDetailsPage').find('div',
                                                            id='modules')
        h4 = div.find_all('h4')
        table = div.find_all('table')
        yc = 0
        for i in range(0, len(table)):
            if (table[i].find_previous('p').text == "Optional modules:"):
                year = table[i].find_previous('h4').text
            else:
                year = h4[yc].text
                yc += 1

            #print(year)
            tr = table[i].find('tbody').find_all('tr')
            for j in range(0, len(tr), 2):
                subject = tr[j].find('a').text
                clist.append(course(count, course_name, link, year, subject))
                #print(subject)
            print('\n')
Exemple #14
0
def inside_course(count, course_name, link):
    page = simple_link(link)
    div = page.find(
        'div',
        class_='flex-wrapper pinned-items text-only action-list').find_all(
            'div', class_='card standard-card single-item')

    for i in div:
        try:
            year = i.find('header').text
            print(year)
            li = i.find('ul').find_all('li')
            for j in li:
                subject = j.text
                print(' ' + subject)
                clist.append(course(count, course_name, link, year, subject))
        except:
            p = i.find('p')
            subject = p.text
            print(' ' + subject)
            clist.append(course(count, course_name, link, year, subject))
Exemple #15
0
def inside_course(count, course_name, link):
    year = "Year "
    c = 0
    page = simple_link(link)
    body = page.find('body')
    div = body.find(
        'div', class_='id7-fixed-width-container').find('main').find(
            'div', class_='id7-main-content').find(
                'div', class_='column-1-content').find('div',
                                                       class_='container-text')
    try:
        div = div.find('div', class_='hp-layout').find('div',
                                                       id='course-tab-3')
        ul = div.find_all('ul')

        if (len(ul) != 0):
            for i in ul:
                c += 1
                year = year + c.__str__()
                print(year)
                li = i.find_all('li')
                for j in li:
                    subject = j.text
                    #print(subject)
                    clist.append(
                        course(count, course_name, link, year, subject))
                year = year[0:4]
                print('\n')

        else:
            h6 = div.find_all('h6')
            for i in h6:
                h5 = i.find_previous('h5')
                year = h5.text
                subject = i.text
                #print(year+'\n'+subject)
                clist.append(course(count, course_name, link, year, subject))

    except:
        print("Problem with Course " + course_name + " " + link)
Exemple #16
0
def inside_course(count, course_name, link):
    page = javascript_link(link)
    body = page.find('body')
    try:
        div = body.find('main').find('div', class_='content-toggle').find(
            'div', class_='col-md-10 offset-md-1').find_all('div',
                                                            class_='accordion')
    except:
        div = body.find('main').find('div', class_='content-toggle').find(
            'div', class_='col-md-8').find_all('div', class_='accordion')

    for i in div:
        try:
            tr = i.find('table', class_='table').find('tbody').find_all('tr')
            year = i.find_previous('h3').text
            print(year)
            for j in tr:
                subject = j.find('td').find('a').text
                clist.append(course(count, course_name, link, year, subject))
                print(' ' + subject)
        except:
            try:
                d_tag = body.find('main').find(
                    'div', class_='content-toggle').find(
                        'div', class_='col-md-10 offset-md-1')
                h3 = d_tag.find_all('h3')
            except:
                d_tag = body.find('main').find(
                    'div', class_='content-toggle').find('div',
                                                         class_='col-md-8')
                h3 = d_tag.find_all('h3')

            for j in h3:
                year = j.text
                print(year)
                subject = j.find_next('p').find_next('p').text
                clist.append(course(count, course_name, link, year, subject))
                print(subject)
            break
        print('\n')
Exemple #17
0
def master_course(course_name,link):
    page = simple_link(link)
    body = page.find('body')
    div = body.find('section', class_='content').find('section', id='module-details').find('div').find_all('h5')
    year = body.find('section', class_='content').find('h2').text
    print(year)

    global count
    for i in div:
        count+=1
        subject = i.text
        print(subject)
        clist.append(course(count, course_name, link, year, subject))
Exemple #18
0
def inside_course(count, course_name, link):
    page = javascript_link(link)
    body = page.find('body')

    div = body.find('div', class_='dialog-off-canvas-main-canvas').find(
        'div', class_='layout-container').find('main')

    div = div.find('div', class_='layout-content').find('article').find(
        'section', class_='scrollspy-wrapper position-relative py-0').find(
            'div', class_='bg-white section-padding section--triangle z-3')

    try:
        div = div.find('div', class_='body-content').find_all('div')[2].find(
            'div', class_='module-list').find(
                'div', class_='tab-content').find('div',
                                                  class_='tab-pane active')

        div2 = div.find_all('div', class_='module-year accordion')

        for i in div2:

            tr = i.find('div', class_='card').find_all('div')[1].find(
                'table').find('tbody').find_all('tr')

            year = i.find('div', class_='card').find(
                'div',
                class_=
                'card-header border border-deep-cerulean border-right-0 border-bottom-0 border-left-0 p-0'
            ).find('h5').text

            print(year)
            for j in tr:
                subject = j.find('td').text
                clist.append(course(count, course_name, link, year, subject))
                #print(subject)
            print('\n')
    except:
        clist.append(course(count, course_name, link, "null", "null"))
Exemple #19
0
def inside_course(count,course_name,link):
    page = simple_link(link)
    body = page.find('body')
    div = body.find('div',class_='course-detail').find_all('section',id='module-details')

    for i in range(1,len(div)):
        try:
            year = div[i].find('h4').text
        except:
            continue

        print(year)

        d_tag = div[i].find('div').find('ul').find_all('li')
        for j in d_tag:
            try:
                subject = j.find('h5').text
                print(' ' + subject)
                clist.append(course(count, course_name, link, year, subject))
            except:
                subject = j.text
                print(subject)
                clist.append(course(count, course_name, link, year, subject))
Exemple #20
0
def inside_module(count, course_name, link):
    page = simple_link(link)
    body = page.find('body')
    div = body.find('section', class_='tabs')
    try:
        yc = div.find('ul').find_all('li')
        modules = div.find_all('div', class_='tabs__content')
        for i in range(0, len(yc)):
            year = yc[i].text
            print(year)
            div_tag = modules[i].find_all('div', class_='accordion__section')

            for j in div_tag:
                d = j.find('div', class_='accordion__header').find('h2').find(
                    'span', class_='accordion__button-title')
                subject = d.text
                print(' ' + subject)
                clist.append(course(count, course_name, link, year, subject))
    except:
        try:
            divv = body.find('article',
                             class_='page__course_subpage course').find(
                                 'div', class_='page__content').find(
                                     'div', class_='wysiwyg').find('p')
            year = "Year - "
            print(year)
            subject = divv.text
            print(' ' + subject)
            clist.append(course(count, course_name, link, year, subject))

        except:
            year = "Null"
            print(year)
            subject = "Null"
            print(' ' + subject)
            clist.append(course(count, course_name, link, year, subject))
Exemple #21
0
def category(html_page):
    baseUrl = 'https://www.exeter.ac.uk'
    endUrl = '#course-content'
    body = html_page.find('body')
    div = body.find('div', id='all-courses-A-Z').find(
        'ul', class_='course-list').find_all('li')
    count = 0
    for i in div:
        count += 1
        course_name = i.find('a').text
        link = baseUrl + i.find('a').attrs['href'] + endUrl
        print(count.__str__() + " " + course_name + " " + link)
        if (course_name == 'Foundation' or link ==
                'https://www.exeter.ac.uk/undergraduate/courses/foundation/internationalyearone/#course-content'
            ):
            clist.append(course(count, course_name, link, "Null", "Null"))
        else:
            inside_course(count, course_name, link)
Exemple #22
0
def inside_subject(count, course_name, link):
    year = "Year "
    c = 1
    page = simple_link(link)
    body = page.find('body')
    div = body.find('div', class_='box box-blue').find('div').find_all('div')

    for i in div:
        year = year + c.__str__()
        print(year)
        c += 1
        ul = i.find_all('ul')
        for j in ul:
            li = j.find_all('li')
            for k in li:
                subject = k.text
                print(' ' + subject)
                clist.append(course(count, course_name, link, year, subject))
        year = year[0:4]
        print('\n')
Exemple #23
0
def inside_category(count, course_name, link):
    year = "Year "
    page = simple_link(link)
    body = page.find('body')
    div = body.find('div',
                    class_='js-tabs tabs').find_all('div',
                                                    class_='js-tabcontent')
    c = 0
    for i in div:
        c += 1
        year = year + c.__str__()
        print(year)

        d_tag = i.find_all('div', class_='content-type content-type--toggle')

        for j in d_tag:
            subject = j.find('span').text
            print(' ' + subject)
            clist.append(course(count, course_name, link, year, subject))

        year = year[0:4]
Exemple #24
0
def category(html_page):
    body = html_page.find('body')
    div = body.find('section', class_='visible-body').find(
        'section', class_='page-level page-content').find(
            'div', class_='wrapper').find('section', id='page-content-main')
    div = div.find('section', id='main-content').find('table').find(
        'tbody').find('tr').find_all('td')

    counter = 0
    for i in div:
        a = i.find_all('a')
        for j in a:
            counter += 1
            link = "https:" + j.attrs['href']
            cc = j.text
            print(counter.__str__() + " " + cc + " " + link)
            if (counter != 36 and counter != 40 and counter != 7):
                inside_course(counter, cc, link)
            else:
                print(counter.__str__() + " " + 'problem with page ' + cc +
                      " " + link)
                clist.append(course(counter, cc, link, "null", "null"))
Exemple #25
0
def inside_course(count,cc,link):
    year = "Year "
    c=0
    page = simple_link(link)
    body = page.find('body')
    div = body.find('div',class_='pageWrapper').find('article').find('div',class_='row tripleVertPadding').find('div',class_='oneColLayoutContainer').find('div',class_='mainContentContainer')
    div = div.find('div',class_='corporate').find('div',class_='course-profile-content')

    table = div.find_all('table')

    for i in table:
        c+=1
        year = year + c.__str__()
        #print(year)
        tr = i.find('tbody').find_all('tr')
        for j in tr:
            subject = j.find('td').text
            clist.append(course(count, cc, link, year, subject))
            #print(subject)

        year = year[0:4]
        print('\n')
Exemple #26
0
            #print(inside_module)

            #item = inside_module.find_all('div', class_='item')

            print(count.__str__() + ": " + coures_code + " " + course_title +
                  " " + dept + " " + duration + " ")
            for i in inside_module.find_all('div', class_='item'):
                year = i.find('h3').text
                ul = i.find('ul')

                try:
                    xx = ul.find_all('li')
                    for j in ul.find_all('li'):
                        cc = j.text
                        clist.append(
                            course(count, course_title, link, year, cc))
                        #print(" " + year + " " + course)

                except:
                    p = i.find('p')
                    cc = p.text
                    clist.append(course(count, course_title, link, year, cc))
                    #print(" " + year + " " + course)
            print('\n')

        else:
            clist.append(course(count, course_title, link, "Null", "Null"))
            print("Proble with page " + count.__str__() + "\n")

    wb = Workbook()
    file_path = 'C:\\Users\\jatin\\Desktop\\imperial.xlsx'
Exemple #27
0
def inside_course(count, course_name, link):
    page = simple_link(link)
    body = page.find('body')
    try:
        div = body.find('div', class_='tab-container').find(
            'div', class_='tabs flex-tabs').find_all('div', class_='panel')
        c = 0
        for i in div:
            year = i.find('h3').text
            print(year)
            li = i.find('ul').find_all('li')
            if (len(li) != 0):
                for j in li:
                    d = j.find_all('div')
                    for k in d:
                        subject = k.find('h3').text
                        print(' ' + subject)
                        clist.append(
                            course(count, course_name, link, year, subject))
            else:
                subject = "NULL"
                print(' ' + subject)
                clist.append(course(count, course_name, link, year, subject))
            print('\n')
    except:
        try:
            div = body.find('article').find_all(
                'section',
                class_='course-section')[1].find('div',
                                                 class_='unit golden-large')
            year = div.find('div', class_='accordion-header teal').text
            li = div.find('ul').find_all('li')

            for i in li:
                p = i.find('p').find('strong')
                subject = p.text
                print(' ' + subject)
                clist.append(course(count, course_name, link, year, subject))
        except:
            try:
                div = body.find('article').find('section',
                                                class_='course-section').find(
                                                    'div',
                                                    class_='unit golden-large')
                li = div.find('ul').find_all('p', class_='module')

                if (len(li) != 0):
                    print("About")

                    for i in li:
                        subject = i.text
                        print(' ' + subject)
                        clist.append(
                            course(count, course_name, link, "About", subject))
                else:
                    print("Research")
                    div = body.find('article').find(
                        'section', class_='course-section study').find(
                            'div', class_='unit golden-large')
                    li = div.find('ul').find_all('li')
                    for i in li:
                        subject = i.text
                        print(' ' + subject)
                        clist.append(
                            course(count, course_name, link, "Research",
                                   subject))
            except:
                year = "Null"
                subject = "Null"
                print(year)
                print(subject)
                clist.append(course(count, course_name, link, year, subject))
Exemple #28
0
def main_courses(count, in_course):
    global c
    for i in in_course:
        c += 1
        subject = ""
        page = javascript_link(i)
        body = page.find('body')

        try:
            course_name = body.find('section', role='main').find(
                'div', class_='page-intro__wrapper').find(
                    'div', class_='page-intro__text').find('h1').text

            try:
                sec = body.find('section', role='main').find(
                    'div', class_='container course').find_all(
                        'div', class_='row')[1].find('div', class_='col-lg-8')
                ul = sec.find('ul').find_all('li')

                print(c.__str__() + " " + course_name + " " + i)

                for j in range(0, len(ul)):
                    try:
                        year = ul[j].text
                        print(year)
                        div = sec.find('div', class_='tab-content').find_all(
                            'div')[j].find('ul').find_all('li')
                        for k in range(0, len(div)):
                            try:
                                subject = div[k].find('strong').text
                                clist.append(
                                    course(count, course_name, i, year,
                                           subject))
                                print(" " + subject)

                            except:
                                subject = div[k].text
                                clist.append(
                                    course(count, course_name, i, year,
                                           subject))
                                #print(" " + subject)
                    except:
                        div = sec.find(
                            'div',
                            class_='tab-content').find_all('div')[j].find('p')
                        subject = div.text
                        clist.append(
                            course(count, course_name, i, year, subject))
                        print(" " + subject)

                    print('\n')
                print('\n')
            except:
                sec = body.find('section', role='main').find(
                    'section', class_=''
                ).find(
                    'div',
                    class_='container padding-top--half padding-bottom--half'
                ).find_all('div', class_='row')[1].find(
                    'div', class_='col-sm-8').find('ul').find_all('li')

                for x in sec:
                    try:
                        subject = x.find('strong').text
                        year = "Year 1"
                        print(year)
                        print(" " + subject)
                        clist.append(course(c, course_name, i, year, subject))
                    except:
                        subject = x.find('p').text
                        print(subject)
                        clist.append(course(c, course_name, i, year, subject))
                        break
                    print('\n')
        except:
            clist.append(course(c, course_name, i, "NUll", "Null"))
Exemple #29
0
def inside_link(count, link):
    page_code = javascript_link(link)
    body = page_code.find('body').find(
        'div', class_='slat__container container--xl py-4 centre study-info')

    try:
        div = body.find('div', class_='study-options-navigation')
        total_course = div.find('ul').find_all('li')
        key_info = body.find_all('section')

        for i in range(len(total_course)):
            course_name = total_course[i].find('a').text

            div_tag = key_info[i].find('div', class_='option-info').find(
                'div', class_='info').find('dl')
            duration = div_tag.find_all('dd')[1].text
            course_code = div_tag.find_all('dd')[3].text

            years_detail = page_code.find('body').find(
                'section', id='structure').find(
                    'div', class_='centre slat__container container--xl py-4'
                ).find(
                    'div',
                    class_='column-item container container--md pt-2 clearfix'
                ).find('div', class_='tabs')
            years_detailes = years_detail.find_all('div')

            for jj in years_detailes:
                year = jj.find('h3', class_='year').text
                try:
                    ul = jj.find('ul').find_all('li')
                    print(course_code + " " + course_name + " " + duration +
                          " ")
                    for kk in ul:
                        subject = kk.text
                        clist.append(
                            course(count, course_name, link, year, subject))
                        #print(" " + year + " " +subject)

                except:
                    year = years_detailes.find('h3').text
                    subject = years_detail.find('h4').text
                    clist.append(
                        course(count, course_name, link, year, subject))
                    #print(" " + year + " " + subject)

    except:
        course_n = page_code.find('body').find(
            'section', class_='course slat mt-4').find(
                'div',
                class_='centre page-heading slat__container container--xl'
            ).find('div', class_='grid').find(
                'div', class_='course-title grid__item grid__item--1/2')
        course_name = course_n.find('h1', class_='th-s2 th-o text-blue').text

        key_info = page_code.find('body').find(
            'div',
            class_='slat__container container--xl py-4 centre study-info')

        div_tag = key_info.find('div', class_='info').find('dl')
        duration = div_tag.find_all('dd')[1].text
        course_code = div_tag.find_all('dd')[3].text

        years_detail = page_code.find('body').find(
            'section', id='structure'
        ).find('div', class_='centre slat__container container--xl py-4').find(
            'div',
            class_='column-item container container--md pt-2 clearfix').find(
                'div', class_='tabs')
        years_detailes = years_detail.find_all('div')

        for jj in years_detailes:
            year = jj.find('h3', class_='year').text

            try:
                ul = jj.find('ul').find_all('li')
                #print(course_code + " " + course_name + " " + duration + " ")
                for kk in ul:
                    subject = kk.text
                    clist.append(
                        course(count, course_name, link, year, subject))
                    #print(" " + year + " " + subject)
            except:
                years_detailes = years_detail.find('div')
                try:
                    year = years_detailes.find('h3', class_='year').text
                    subject = years_detail.find('h4').text
                    clist.append(
                        course(count, course_name, link, year, subject))
                    #print(" " + year + " " + subject)
                except:
                    para = years_detail.find_all('p')

                    for l in para:
                        subject = l.text
                        clist.append(
                            course(count, course_name, link, year, subject))