Exemple #1
0
async def gacco_parser(site, session):
    url = site_url(site, "/data/course/gacco_list.json")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    data = json.loads(text)
    count = len(data["opened_courses"])

    url = site_url(site, "/data/course/gacco_archive.json")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    data = json.loads(text)
    count += len(data["archived_courses"])
    return count
Exemple #2
0
async def edcast_org_parser(site, session):
    url = site_url(site, "/search")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    h4 = element_by_css(text, ".search-navigation-row h4")
    result = parse_text("All Courses ({:d} matches)", h4.text)
    return result[0]
Exemple #3
0
async def hku_hk_parser(site, session):
    url = site_url(site, "/mbbs_admin/public/downloadMbbsJsonFile")
    text = await session.text_from_url(url)
    site.process_text(text)
    data = json.loads(text)
    count = len(data)
    return count
Exemple #4
0
async def count_elements_parser(site, session, rel_url, css):
    url = site_url(site, rel_url)
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    elts = elements_by_css(text, css)
    count = len(elts)
    return count
Exemple #5
0
async def openedu_ru_parser(site, session):
    url = site_url(site, "/course/")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    count = element_by_css(text, "span#courses-found")
    assert " кур" in count.text
    return int(count.text.split()[0])
Exemple #6
0
async def campus_il_parser(site, session):
    url = site_url(site, "/course")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    elt = element_by_css(text, "span#add-sum-course")
    count = int(elt.text)
    return count
Exemple #7
0
async def learning_hku_parser(site, session):
    url = site_url(site, "/catalog/all-courses/")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    elt = element_by_css(text, "li#course-all span")
    count = int(elt.text)
    return count
Exemple #8
0
async def hku_nursing_parser(site, session):
    url = site_url(site, "/nurs_admin/public/downloadNursJsonFile")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    data = json.loads(text)
    count = len(data)
    return count
Exemple #9
0
async def enlightme_parser(site, session):
    url = site_url(site, "/courses/")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    elt = element_by_css(text, ".course-index span")
    result = parse_text("Showing 1-10 of {:d} results", elt.text)
    return result[0]
Exemple #10
0
async def entuze_parser(site, session):
    url = site_url(site, "/course_packages/")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    elt = element_by_css(text, "div#discovery-message")
    result = parse_text("Viewing {:d} courses", elt.text)
    return result[0]
Exemple #11
0
async def edraak_org_parser(site, session):
    url = site_url(site, "/en/courses/")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    elts = elements_by_css(text, "aside.all-courses div.course span")
    count = 0
    for elt in elts:
        count += int(elt.text.strip(" ()"))
    return count
Exemple #12
0
async def iitbombayx_parser(site, session):
    url = site_url(site, "/courses")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    elts = elements_by_css(text, "#block-timeline-2 .facet-item__count")
    count = 0
    for elt in elts:
        count += int(elt.text.strip("()"))
    return count
Exemple #13
0
async def studio_to_tiles(site, session):
    url = site_url(site, "/")
    text = await session.text_from_url(url)
    site.process_text(text)
    lms_links = elements_by_css(text, "#lms-link")
    if len(lms_links) == 1:
        lms_link = lms_links[0].get("href")
        return await count_tiles(lms_link, site, session)
    raise NotTrying("Not studio I guess")
Exemple #14
0
async def contact_page(site, session):
    current_courses = site.attempt_course_count()
    if current_courses is None:
        raise NotTrying("No point trying /contact")

    # Only try the contact page if we got some data from the site.
    url = site_url(site, "/contact")
    text = await session.text_from_url(url)
    site.process_text(text, fingerprint=False, emails=True)
    raise NotTrying("Not looking for courses on /contact")
Exemple #15
0
async def millionlights_parser(site, session):
    url = site_url(site, "/Course/AllCourses")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    # Find the language-faceted results, and add up their parenthesized
    # numbers.
    elts = elements_by_xpath(
        text, "//a[contains(text(), 'English (')]/ancestor::ul//a")
    count = 0
    for elt in elts:
        result = parse_text("{} ({:d})", elt.text)
        count += result[1]
    return count
Exemple #16
0
async def gotoclass_parser(site, session):
    url = site_url(site, "/courses/")
    count = 0
    while True:
        text = await session.text_from_url(url)
        site.add_to_fingerprint(text)
        elts = elements_by_css(text, "div.course-block")
        count += len(elts)
        next_a = elements_by_css(text, "a.next.page-numbers")
        if not next_a:
            break
        assert len(next_a) == 1
        url = urllib.parse.urljoin(url, next_a[0].get('href'))
    return count
Exemple #17
0
async def cognitiveclass_parser(site, session):
    url = site_url(site, "/courses")
    count = 0
    while True:
        text = await session.text_from_url(url)
        site.add_to_fingerprint(text)
        elts = elements_by_css(text, "article.course.card")
        count += len(elts)
        # Find the a element with '>' as the text, get its href.
        next_href = elements_by_xpath(text, "//a/span[text() = '>']/../@href")
        if not next_href:
            break
        assert len(next_href) == 1
        url = urllib.parse.urljoin(url, next_href[0])
    return count
Exemple #18
0
async def edx_org_parser(site, session):
    url = site_url(site, "/api/v1/catalog/search?page=1&page_size=200")
    count = 0
    while True:
        text = await session.text_from_url(url)
        site.add_to_fingerprint(text)
        data = json.loads(text)
        objs = data['objects']['results']
        count += len(objs)
        for obj in objs:
            course_id = obj.get('key')
            if course_id:
                site.course_ids[course_id] += 1
        url = data['objects'].get('next')
        if not url:
            break
    return count
Exemple #19
0
async def regex_extract_parser(site, session, rel_url, pattern):
    url = site_url(site, rel_url)
    text = await session.text_from_url(url)
    return int(re.search(pattern, text)[1])
Exemple #20
0
async def courses_page_full_of_tiles(site, session):
    url = site_url(site, "/courses")
    return await count_tiles(url, site, session)
Exemple #21
0
async def json_total_value_parser(site, session, rel_url, key):
    url = site_url(site, rel_url)
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    data = json.loads(text)
    return data[key]