Beispiel #1
0
async def count_tiles(url, site, session):
    text = await session.text_from_url(url)
    elts = elements_by_css(text, ".courses ul.courses-listing > li")
    count = len(elts)
    if count == 0:
        elts = elements_by_css(text, ".courses-listing-item")
        count = len(elts)
        if count == 0:
            # No courses, but do we see any indication of it being open edx?
            if any(snip in text.lower() for snip in OPENEDX_SNIPS):
                site.is_openedx = True
            raise GotZero("No .courses-listing-item's")

    soon = datetime.datetime.now() + datetime.timedelta(days=365)
    elts = filter_by_date(elts, soon.isoformat())
    count = len(elts)

    # Try to get the course ids also!
    try:
        for elt in elts:
            course_id = elt.xpath("article/@id")[0]
            site.course_ids[course_id] += 1
    except Exception:
        pass
    site.add_to_fingerprint(text)
    return count
Beispiel #2
0
async def gotoclass_parser(site, session):
    url = site_url(site, "/courses/")
    count = 0
    while True:
        text = await session.text_from_url(url)
        site.add_to_fingerprint(text)
        elts = elements_by_css(text, "div.course-block")
        count += len(elts)
        next_a = elements_by_css(text, "a.next.page-numbers")
        if not next_a:
            break
        assert len(next_a) == 1
        url = urllib.parse.urljoin(url, next_a[0].get('href'))
    return count
Beispiel #3
0
async def count_elements_parser(site, session, rel_url, css):
    url = site_url(site, rel_url)
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    elts = elements_by_css(text, css)
    count = len(elts)
    return count
Beispiel #4
0
async def openu_kz_parser(site, session):
    text = await session.text_from_url(site.url)
    site.add_to_fingerprint(text)
    stat_elt = elements_by_css(text,
                               ".statistics-block .statistics-block__value")[0]
    count = int(stat_elt.text)
    return count
Beispiel #5
0
async def edraak_org_parser(site, session):
    url = site_url(site, "/en/courses/")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    elts = elements_by_css(text, "aside.all-courses div.course span")
    count = 0
    for elt in elts:
        count += int(elt.text.strip(" ()"))
    return count
Beispiel #6
0
async def iitbombayx_parser(site, session):
    url = site_url(site, "/courses")
    text = await session.text_from_url(url)
    site.add_to_fingerprint(text)
    elts = elements_by_css(text, "#block-timeline-2 .facet-item__count")
    count = 0
    for elt in elts:
        count += int(elt.text.strip("()"))
    return count
Beispiel #7
0
async def studio_to_tiles(site, session):
    url = site_url(site, "/")
    text = await session.text_from_url(url)
    site.process_text(text)
    lms_links = elements_by_css(text, "#lms-link")
    if len(lms_links) == 1:
        lms_link = lms_links[0].get("href")
        return await count_tiles(lms_link, site, session)
    raise NotTrying("Not studio I guess")
Beispiel #8
0
async def numfocus_parser(site, session):
    urls = collections.deque()
    urls.append(site.url)
    count = 0
    while urls:
        url = urls.popleft()
        text = await session.text_from_url(url)
        site.process_text(text)

        # Look for courses.
        tiles = elements_by_css(text, ".course-rec-3")
        count += len(tiles)

        # Look for further pages that are or have courses.
        subs = elements_by_css(text, ".et_pb_blurb_content a")
        hrefs = {sub.get("href") for sub in subs}
        for href in hrefs:
            if "/about-course/" in href:
                count += 1
            else:
                urls.append(href)

    return count
Beispiel #9
0
async def cognitiveclass_parser(site, session):
    url = site_url(site, "/courses")
    count = 0
    while True:
        text = await session.text_from_url(url)
        site.add_to_fingerprint(text)
        elts = elements_by_css(text, "article.course.card")
        count += len(elts)
        # Find the a element with '>' as the text, get its href.
        next_href = elements_by_xpath(text, "//a/span[text() = '>']/../@href")
        if not next_href:
            break
        assert len(next_href) == 1
        url = urllib.parse.urljoin(url, next_href[0])
    return count