예제 #1
0
def render_wiki(mooc):
    wiki_data=mooc.wiki
    wiki_name=mooc.wiki_name
    for page in wiki_data:
        if "text" in wiki_data[page]: #this is a page
            jinja(
                os.path.join(wiki_data[page]["path"],"index.html"),
                "wiki_page.html",
                False,
                content=wiki_data[page],
                dir=wiki_data[page]["dir"].replace(mooc.instance_url + "/wiki/","") + "index.html",
                mooc=mooc,
                rooturl=wiki_data[page]["rooturl"]
            )
    
        make_dir(os.path.join(wiki_data[page]["path"],"_dir"))
        if len(wiki_data[page]["children"]) != 0: #this is a list page
            page_to_list=[]
            for child_page in wiki_data[page]["children"]:
                if "title" in wiki_data[child_page]:
                    page_to_list.append({ "url": wiki_data[page]["rooturl"]+ "/.." + child_page.replace(mooc.instance_url,""), "title": wiki_data[child_page]["title"], "last-modif": wiki_data[child_page]["last-modif"]})
            jinja(
                os.path.join(wiki_data[page]["path"],"_dir","index.html"),
                "wiki_list.html",
                False,
                pages=page_to_list,
                wiki_name=wiki_name,
                mooc=mooc,
                rooturl=wiki_data[page]["rooturl"] + "../"
            )
예제 #2
0
    def __init__(self,c,course_url,convert_in_webm,ignore_missing_xblock,lang):
        self.course_url=course_url
        self.convert_in_webm=convert_in_webm
        self.ignore_missing_xblock=ignore_missing_xblock
        self.lang = lang or "en"
        self.instance_url=c.conf["instance_url"]
        self.course_id=get_course_id(self.course_url, c.conf["course_page_name"], c.conf["course_prefix"], self.instance_url)
        logging.info("Get info about course")
        self.info=c.get_api_json("/api/courses/v1/courses/" + self.course_id + "?username="******"output",slugify(self.info["name"]))
        self.name = slugify(self.info["name"])
        make_dir(self.output_path)
        logging.info("Get course blocks")
        json_from_api=c.get_api_json("/api/courses/v1/blocks/?course_id=" + self.course_id + "&username="******"&depth=all&requested_fields=graded,format,student_view_multi_device&student_view_data=video,discussion&block_counts=video,discussion,problem&nav_depth=3")
        self.json=json_from_api["blocks"]
        self.root_id=json_from_api["root"]

        self.course_root=None
        self.path=""
        self.rooturl=""
        self.top={}
        self.object=[]
        self.no_homepage=False
        self.wiki=None
        self.forum_thread=None
        self.page_annexe=[]
        self.book_list_list=[]
예제 #3
0
 def __init__(self, json, path, rooturl, id, descendants, mooc):
     self.mooc = mooc
     self.json = json
     self.path = path
     self.rooturl = rooturl
     self.id = id
     self.folder_name = slugify(json["display_name"])
     self.output_path = os.path.join(self.mooc.output_path, self.path)
     make_dir(self.output_path)
예제 #4
0
 def __init__(self, json, path, rooturl, id, descendants, mooc):
     self.mooc = mooc
     self.json = json
     self.path = path
     self.rooturl = rooturl
     self.id = id
     self.output_path = self.mooc.output_path
     path = os.path.join(self.output_path, self.path,
                         slugify(json["display_name"]))
     make_dir(path)
     self.data = []
     self.category_title = ""
예제 #5
0
    def download(self,c):
        download("https://www.google.com/s2/favicons?domain=" + self.instance_url , os.path.join(self.output_path,"favicon.png"),None)

        logging.info("Get homepage")
        content=c.get_page(self.course_url)
        make_dir(os.path.join(self.output_path,"home"))
        self.html_homepage=[]
        soup=BeautifulSoup.BeautifulSoup(content, 'lxml')
        html_content=soup.find('div', attrs={"class": "welcome-message" })
        if html_content is None:
            html_content=soup.find_all('div', attrs={"class": re.compile("info-wrapper")})
            if html_content == [] :
                self.no_homepage=True
            else:
                for x in range(0,len(html_content)):
                    article=html_content[x]
                    dismiss = article.find("div", attrs={"class": "dismiss-message"})
                    if dismiss != None:
                        dismiss.decompose()
                    bookmark = article.find("a", attrs={"class": "action-show-bookmarks"})
                    if bookmark != None:
                        bookmark.decompose()
                    buttons = article.find_all("button",attrs={"class": "toggle-visibility-button"})
                    if buttons != None:
                        for button in buttons:
                            button.decompose()
                    article['class']="toggle-visibility-element article-content"
                    self.html_homepage.append(dl_dependencies(article.prettify(),os.path.join(self.output_path, "home"),"home",c))
        else:
                dismiss = html_content.find("div", attrs={"class": "dismiss-message"})
                if dismiss != None:
                    dismiss.decompose()
                bookmark = html_content.find("a", attrs={"class": "action-show-bookmarks"})
                if bookmark != None:
                    bookmark.decompose()
                buttons = html_content.find_all("button",attrs={"class": "toggle-visibility-button"})
                if buttons != None:
                    for button in buttons:
                        button.decompose()
                self.html_homepage.append(dl_dependencies(html_content.prettify(),os.path.join(self.output_path, "home"),"home",c))
        logging.info("Get content")
        for x in self.object:
            x.download(c)
예제 #6
0
 def annexe(self,c):
     logging.info("Try to get specific page of mooc")
     content=c.get_page(self.course_url)
     soup=BeautifulSoup.BeautifulSoup(content, 'lxml')
     top_bs=soup.find('ol', attrs={"class": "course-material" }) or soup.find('ul', attrs={"class": "course-material" }) or soup.find('ul', attrs={"class": "navbar-nav" }) or soup.find('ol', attrs={"class": "course-tabs"})
     if top_bs != None:
         for top_elem in top_bs.find_all("li"):
             top_elem=top_elem.find("a")
             if top_elem["href"][-1] == "/":
                 path=top_elem["href"][:-1].split("/")[-1]
             else:
                 path=top_elem["href"].split("/")[-1]
             if path == "course" or "courseware" in path:
                 name = top_elem.get_text().replace(", current location", "")
                 self.top[name] = "course/" + self.head.folder_name + "/index.html"
             if "info" in path:
                 name = top_elem.get_text().replace(", current location", "")
                 self.top[name] = "/index.html" 
             if path == "course" or "edxnotes" in path or "progress" in path or "info" in path or "courseware" in path:
                 continue
             if "wiki" in path:
                 self.wiki, self.wiki_name, path=annexe.wiki(c,self)
             elif "forum" in path:
                 path="forum/"
                 self.forum_thread, self.forum_category, self.staff_user_forum = annexe.forum(c,self)
             else:
                 output_path = os.path.join(self.output_path,path)
                 make_dir(output_path)
                 page_content=c.get_page(self.instance_url + top_elem["href"])
                 soup_page=BeautifulSoup.BeautifulSoup(page_content, 'lxml')
                 just_content = soup_page.find('section', attrs={"class": "container"})
                 if just_content != None :
                     html_content=dl_dependencies(str(just_content),output_path,"",c)
                     self.page_annexe.append({ "output_path": output_path, "content": html_content,"title" : soup_page.find('title').get_text()})
                 else:
                     book=soup_page.find('section', attrs={"class": "book-sidebar"})
                     if book != None:
                         self.book_list_list.append({"output_path": output_path, "book_list" : annexe.booknav(self,book,output_path), "dir_path": path})
                     else:
                         logging.warning("Oh it's seems we does not support one type of extra content (in top bar) :" + path)
                         continue
             self.top[top_elem.get_text()]= path + "/index.html"
예제 #7
0
    def __init__(self, json, path, rooturl, id, descendants, mooc):
        self.mooc = mooc
        self.json = json
        self.path = path
        self.rooturl = rooturl
        self.id = id
        self.descendants = descendants
        self.top = self.mooc.top
        self.output_path = self.mooc.output_path
        if self.json["block_counts"]["video"] != 0:
            self.icon_type = "fa-video-camera"
        elif self.json["block_counts"]["problem"] != 0:
            self.icon_type = "fa-question-circle"
        elif self.json["block_counts"]["discussion"] != 0:
            self.icon_type = "fa-comment"
        else:
            self.icon_type = "fa-book"

        self.display_name = json["display_name"]
        self.folder_name = slugify(self.display_name)
        path = os.path.join(self.output_path, self.path, self.folder_name)
        make_dir(path)
예제 #8
0
def wiki(c,mooc):
    #Get redirection to wiki
    first_page=c.get_redirection(mooc.instance_url + "/courses/" +  mooc.course_id + "/course_wiki")
    page_to_visit=[first_page]
    wiki_data={} #Data from page already visit
    # "[url]" : { "rooturl": , "path": , "text": , "title": , "dir" : , "children": [] }
    #Extract wiki name
    wiki_name = first_page.replace(mooc.instance_url + "/wiki/", "")[:-1]
    wiki_path = os.path.join("wiki", first_page.replace(mooc.instance_url + "/wiki/",""))

    while page_to_visit:
        get_page_error=False
        url = page_to_visit.pop()
        try:
            content=c.get_page(url)
        except HTTPError as e:
            if e.code == 404 or e.code == 403:
                get_page_error=True 
            else:
                logging.warning("Fail to get " + url + "Error :" + str(e.code))
                pass

        wiki_data[url]={}
        web_path=os.path.join("wiki", url.replace(mooc.instance_url + "/wiki/",""))
        path=os.path.join(mooc.output_path, web_path)
        make_dir(path)
        wiki_data[url]["path"] = path
        rooturl="../"
        for x in range(0,len(web_path.split("/"))):
            rooturl+="../"
        wiki_data[url]["rooturl"]= rooturl
        wiki_data[url]["children"]=[]


        #Parse content page
        soup=BeautifulSoup.BeautifulSoup(content, 'lxml')
        text=soup.find("div", attrs={"class": "wiki-article"})
        if text != None : #If it's a page (and not a list of page)
            #Find new wiki page in page content
            for link in text.find_all("a"):
                if link.has_attr("href") and "/wiki/" in link["href"]:
                    if link not in wiki_data and link not in page_to_visit:
                        if not link["href"][0:4] == "http":
                            page_to_visit.append(mooc.instance_url + link["href"])
                        else:
                            page_to_visit.append(link["href"])

                    if not link["href"][0:4] == "http": #Update path in wiki page
                        link["href"] = rooturl[:-1] + link["href"].replace(mooc.instance_url,"") + "/index.html"

            wiki_data[url]["text"] = dl_dependencies(str(text),path,"",c)
            wiki_data[url]["title"] = soup.find("title").text
            wiki_data[url]["last-modif"] = soup.find("span", attrs={"class": "date"}).text
            wiki_data[url]["children"]=[]
        elif get_page_error:
            wiki_data[url]["text"] = """<div><h1 class="page-header">Permission Denied</h1><p class="alert denied">Sorry, you don't have permission to view this page.</p></div>""" 
            wiki_data[url]["title"] = "Permission Denied | Wiki" 
            wiki_data[url]["last-modif"] = "Unknow" 
            wiki_data[url]["children"]=[]

        #find new url of wiki in the list children page
        see_children=soup.find('div', attrs={"class": "see-children"})
        if see_children:
            allpage_url=str(see_children.find("a")["href"])
            wiki_data[url]["dir"] = allpage_url 
            content=c.get_page(mooc.instance_url + allpage_url)
            soup=BeautifulSoup.BeautifulSoup(content, 'lxml')
            table=soup.find("table")
            if table != None:
                for link in table.find_all("a"):
                    if link.has_attr("class") and "list-children" in link["class"]:
                        pass
                    else:
                        if link["href"] not in wiki_data and link["href"] not in page_to_visit:
                            page_to_visit.append(mooc.instance_url + link["href"])
                        wiki_data[url]["children"].append(mooc.instance_url + link["href"])
    return wiki_data, wiki_name, wiki_path
예제 #9
0
def forum(c,mooc):
    forum_output=os.path.join(mooc.output_path, "forum")
    make_dir(forum_output)
    content=c.get_page(mooc.instance_url + "/courses/" +  mooc.course_id + "/discussion/forum")
    good_content=BeautifulSoup.BeautifulSoup(content, 'lxml').find("script", attrs={"id": "thread-list-template"})
    category=OrderedDict()
    if good_content:
        soup=BeautifulSoup.BeautifulSoup(content, 'lxml')

        all_category=soup.find_all('li', attrs={"class": "forum-nav-browse-menu-item"})
        if len(all_category) == 0:
            soup=BeautifulSoup.BeautifulSoup(good_content.text, 'lxml') #On Fun plateform, categorie list is in script with id thread-list-template
            all_category=soup.find_all('li', attrs={"class": "forum-nav-browse-menu-item"}) 
        for cat in all_category:
            if (cat.has_attr("id") and cat["id"] in [ "all_discussions", "posts_following" ]) or (cat.has_attr("class") and ("forum-nav-browse-menu-all" in cat["class"] or "forum-nav-browse-menu-following" in cat["class"])):
                continue
            if not cat.has_attr("data-discussion-id"): #and cat.find("a") != None:
                category[str(uuid4())] = { "title" : cat.find(["a", "span"], attrs={"class": "forum-nav-browse-title"}).text, "catego_with_sub_catego" : True}
            elif cat.has_attr("data-discussion-id"):
                category[cat["data-discussion-id"]] = {"title": str(cat.text).replace("\n","")}

    else:
        logging.error("No forum category found")
    threads=[]

    #Search for Staff user :
    json_user={}
    section_user = BeautifulSoup.BeautifulSoup(content, 'lxml').find("section", attrs={"id": "discussion-container"})
    if section_user and section_user.has_attr("data-roles"):
        if "&#34;" in section_user["data-roles"]:
            json_user = json.loads(unescape(section_user["data-roles"]))
        else:
            json_user = json.loads(section_user["data-roles"])
    else:
        section_user = re.search("roles: [^\n]*", content)
        if section_user: #TODO check ok in this case
            json_user=json.loads(re.sub(r"roles: (.*),", r'\1', section_user.group()))
    staff_user = []
    for x in json_user:
        staff_user += [ str(y) for y in json_user[x]]

    #Search category
    for x in category:
        make_dir(os.path.join(forum_output,x))
        url="/courses/" + mooc.course_id + "/discussion/forum/" + x + "/inline?ajax=1&page=1&sort_key=activity&sort_order=desc"
        data=c.get_api_json(url)
        d=data["discussion_data"]
        threads+=d
        for i in range(1,data["num_pages"]):
            url="/courses/" + mooc.course_id + "/discussion/forum/" + x + "/inline?ajax=1&page=" + str(i+1) + "&sort_key=activity&sort_order=desc"
            data=c.get_api_json(url)
            d=data["discussion_data"]
            threads+=d

    for thread in threads:
        url = "/courses/" + mooc.course_id + "/discussion/forum/" + thread["commentable_id"] + "/threads/" + thread["id"] + "?ajax=1&resp_skip=0&resp_limit=100"
        make_dir(os.path.join(forum_output,thread["id"]))
        try:
            thread["data_thread"]=c.get_api_json(url, referer=mooc.instance_url+url.split("?")[0])
            total_answers = 100
            while total_answers < thread["data_thread"]["content"]["resp_total"]:
                url = "/courses/" + mooc.course_id + "/discussion/forum/" + thread["commentable_id"] + "/threads/" + thread["id"] + "?ajax=1&resp_skip="+ str(total_answers) + "&resp_limit=100"
                new_answers=c.get_api_json(url, referer=mooc.instance_url+url.split("?")[0])["content"]["children"]
                thread["data_thread"]["content"]["children"] += new_answers
                total_answers += 100
        except:
            try:
                thread["data_thread"]=c.get_api_json(url)
            except:
                logging.log("Can not get " + mooc.instance_url + url + "discussion")
        if ("endorsed_responses" in thread["data_thread"]["content"] or "non_endorsed_responses" in thread["data_thread"]["content"]) and "children" in thread["data_thread"]["content"]:
            logging.warning("pb endorsed VS children" + thread["id"])
        if "children" not in thread["data_thread"]["content"]:
            thread["data_thread"]["content"]["children"] = []
        if "endorsed_responses" in thread["data_thread"]["content"]:
            thread["data_thread"]["content"]["children"] += thread["data_thread"]["content"]["endorsed_responses"]
        if "non_endorsed_responses" in thread["data_thread"]["content"]:
            thread["data_thread"]["content"]["children"] += thread["data_thread"]["content"]["non_endorsed_responses"]
        thread["data_thread"]["content"]["body"] = dl_dependencies(markdown(thread["data_thread"]["content"]["body"]),os.path.join(forum_output,thread["id"]),"",c)
        for children in thread["data_thread"]["content"]["children"]:
            children["body"]=dl_dependencies(markdown(children["body"]),os.path.join(forum_output,thread["id"]),"",c)
            if "children" in children:
                for children_children in children["children"]:
                    children_children["body"]=dl_dependencies(markdown(children_children["body"]),os.path.join(forum_output,thread["id"]),"",c)

    return threads, category, staff_user