예제 #1
0
def parse_content_page(soup) -> List[Union[SDoc, SFolder, SLecture]]:
    # NOTE e.g. "course_id": "_306327_1", "content_id": "_1790226_1"
    contentList = soup.find("ul", {"id": "content_listContainer"})
    if contentList is None:
        return []
    children = [c for c in contentList.children if c.name is not None]

    result: List[Union[SDoc, SFolder, SLecture]] = []

    def is_content_folder(child, img):
        return img is not None and img.get("alt") == "Content Folder"

    def is_item(child, img):
        # some items do not have the item icon
        return img is not None and (img.get("alt") == "Item" or
                                    (img.get("alt") == "" and child.find(
                                        "div", {"class": "item clearfix"})))

    def is_AcuStudio(child, img):
        return img is not None and img.get("alt") == "AcuStudio"

    def is_file(child, img):
        return img is not None and img.get("alt") == "File"

    for c in children:
        img = c.find("img")
        if is_content_folder(c, img):
            hyperlink = c.find("a")
            name = hyperlink.text
            link = hyperlink.get("href")
            details = c.find("div", {"class": "details"}).text
            result.append(
                SFolder(name.strip(), link.strip(), details.strip(), None))
        elif is_item(c, img):
            folder = __item_to_folder(c)
            if folder:
                result.append(folder)
        elif is_AcuStudio(c, img):
            # ignore that for now
            hyperlink = c.find("a")
            name = hyperlink.text
            link = hyperlink.get("href")
            result.append(SLecture(name.strip(), link))
        elif is_file(c, img):
            hyperlink = c.find("a")
            # sometimes file link is broken, in that case no href tag is rendered
            # see: https://github.com/leafgecko/NTULearn-Downloader/issues/8
            if hyperlink is None:
                continue
            name = hyperlink.text
            link = hyperlink.get("href")
            result.append(SDoc(name.strip(), link))
        else:
            pass

    return result
예제 #2
0
def __item_to_folder(item):
    folder_name = item.find("h3").text
    details = item.find("div", {"class", "details"})

    dl_links = [
        a for a in details.find_all("a") if is_download_link(a.get("href"))
    ]
    children = []
    for a_tag in dl_links:
        link, name = a_tag.get("href"), a_tag.text
        children.append(SDoc(name=name.strip(), link=link))
    if children:
        return SFolder(name=folder_name.strip(),
                       link=None,
                       details="",
                       children=children)
    return None