async def count_tiles(url, site, session): text = await session.text_from_url(url) elts = elements_by_css(text, ".courses ul.courses-listing > li") count = len(elts) if count == 0: elts = elements_by_css(text, ".courses-listing-item") count = len(elts) if count == 0: # No courses, but do we see any indication of it being open edx? if any(snip in text.lower() for snip in OPENEDX_SNIPS): site.is_openedx = True raise GotZero("No .courses-listing-item's") soon = datetime.datetime.now() + datetime.timedelta(days=365) elts = filter_by_date(elts, soon.isoformat()) count = len(elts) # Try to get the course ids also! try: for elt in elts: course_id = elt.xpath("article/@id")[0] site.course_ids[course_id] += 1 except Exception: pass site.add_to_fingerprint(text) return count
async def gotoclass_parser(site, session): url = site_url(site, "/courses/") count = 0 while True: text = await session.text_from_url(url) site.add_to_fingerprint(text) elts = elements_by_css(text, "div.course-block") count += len(elts) next_a = elements_by_css(text, "a.next.page-numbers") if not next_a: break assert len(next_a) == 1 url = urllib.parse.urljoin(url, next_a[0].get('href')) return count
async def count_elements_parser(site, session, rel_url, css): url = site_url(site, rel_url) text = await session.text_from_url(url) site.add_to_fingerprint(text) elts = elements_by_css(text, css) count = len(elts) return count
async def openu_kz_parser(site, session): text = await session.text_from_url(site.url) site.add_to_fingerprint(text) stat_elt = elements_by_css(text, ".statistics-block .statistics-block__value")[0] count = int(stat_elt.text) return count
async def edraak_org_parser(site, session): url = site_url(site, "/en/courses/") text = await session.text_from_url(url) site.add_to_fingerprint(text) elts = elements_by_css(text, "aside.all-courses div.course span") count = 0 for elt in elts: count += int(elt.text.strip(" ()")) return count
async def iitbombayx_parser(site, session): url = site_url(site, "/courses") text = await session.text_from_url(url) site.add_to_fingerprint(text) elts = elements_by_css(text, "#block-timeline-2 .facet-item__count") count = 0 for elt in elts: count += int(elt.text.strip("()")) return count
async def studio_to_tiles(site, session): url = site_url(site, "/") text = await session.text_from_url(url) site.process_text(text) lms_links = elements_by_css(text, "#lms-link") if len(lms_links) == 1: lms_link = lms_links[0].get("href") return await count_tiles(lms_link, site, session) raise NotTrying("Not studio I guess")
async def numfocus_parser(site, session): urls = collections.deque() urls.append(site.url) count = 0 while urls: url = urls.popleft() text = await session.text_from_url(url) site.process_text(text) # Look for courses. tiles = elements_by_css(text, ".course-rec-3") count += len(tiles) # Look for further pages that are or have courses. subs = elements_by_css(text, ".et_pb_blurb_content a") hrefs = {sub.get("href") for sub in subs} for href in hrefs: if "/about-course/" in href: count += 1 else: urls.append(href) return count
async def cognitiveclass_parser(site, session): url = site_url(site, "/courses") count = 0 while True: text = await session.text_from_url(url) site.add_to_fingerprint(text) elts = elements_by_css(text, "article.course.card") count += len(elts) # Find the a element with '>' as the text, get its href. next_href = elements_by_xpath(text, "//a/span[text() = '>']/../@href") if not next_href: break assert len(next_href) == 1 url = urllib.parse.urljoin(url, next_href[0]) return count