예제 #1
0
    def get_sections(self):
        if not self.session:
            raise Exception("You need to login() first.")
        if not self.courses:
            raise Exception("You need to get_courses() first.")

        for cid, semestername, _ in self.courses:
            response = self.session.get(cid, params=self.params)
            soup = bs(response.text, features="html.parser")

            # needed for OpenCast
            session_key = soup.find("a",
                                    {"data-title": "logout,moodle"})["href"]
            session_key = re.findall("sesskey=([a-zA-Z0-9]*)", session_key)[0]
            course_id = re.findall("id=([0-9]*)", cid)[0]

            coursename = helper.clean_filename(
                soup.select_one(".page-header-headings").text)

            # Get Sections. Some courses have them on one page, others on multiple, then we need to crawl all of them
            sectionpages = re.findall("&section=[0-9]+", response.text)
            if sectionpages:
                self.sections[course_id, session_key, semestername,
                              coursename] = []
                for s in sectionpages:
                    response = self.session.get(cid + s, params=self.params)
                    tempsoup = bs(response.text, features="html.parser")
                    self.sections[course_id, session_key, semestername,
                                  coursename].extend(
                                      tempsoup.select_one(".topics").children)
            else:
                self.sections[course_id, session_key, semestername,
                              coursename] = soup.select_one(".topics").children
예제 #2
0
    def get_courses(self, getAllCourses=False):
        if not self.session:
            raise Exception("You need to login() first.")
        response = self.session.get('https://moodle.rwth-aachen.de/my/',
                                    params=self.params)
        soup = bs(response.text, features="html.parser")
        categories = [(c["value"], c.text)
                      for c in soup.find("select", {
                          "name": "coc-category"
                      }).findAll("option")]
        categories.remove(('all', 'All'))
        self.max_semester = max(categories, key=lambda item: int(item[0]))
        self.selected_categories = [
            c for c in categories if c == self.max_semester
        ] if (not getAllCourses
              and self.config["onlyfetchcurrentsemester"]) else categories

        self.courses = [(c.find("h3").find("a")["href"],
                         helper.clean_filename(semestername),
                         c.get_text().replace("\n", ""))
                        for (sid, semestername) in self.selected_categories
                        for c in soup.select(f".coc-category-{sid}")]

        if self.config["selected_courses"] and not getAllCourses:
            self.courses = [(cid, semestername, title)
                            for (cid, semestername, title) in self.courses
                            if cid in self.config["selected_courses"]]
예제 #3
0
def create_color_val_dict(file, node_list, graph):
    """Assigns colors to each node depending on their location and builds graph

    This method assigns all the nodes to one of three colors. The "root" node
    is the key for the green color value. The color yellow is assigned to the
    nodes that are intermediate between the "root" and the leaf (actual
    imports). The red color is assigned to the actual imports.

    Args:
        file (str): name of the python file
        node_list (str): list of nodes representing imported modules
        graph: networkx graph

    Returns:
        A dictionary of nodes as keys with their respective colors as values
    """

    color_dict = {helper.clean_filename(file): 'green'}

    for i in node_list:
        if '.' in i:
            temp = i.split('.')
            graph.add_edge(file[:-3], temp[0])
            color_dict[temp[-1]] = 'red'
            for j in range(0, len(temp) - 1):
                graph.add_edge(temp[j], temp[j + 1])
                color_dict[temp[j]] = 'yellow'
        else:
            graph.add_edge(file[:-3], i)
            color_dict[i] = 'red'

    return color_dict
예제 #4
0
def create_color_key_dict(file, node_list, graph):
    """Assigns nodes to a color depending on their location and builds graph

    This method assigns all the nodes to one of three colors. The "root" node
    is the value for the green key. The nodes that are intermediate between
    the "root" and the leaf (actual imports) are assigned to yellow, and the
    imports are assigned to red.

    Args:
        file (str): the name of the python file
        node_list (str): list of nodes representing imported modules
        graph: networkx graph

    Returns:
        Dictionary with colors as keys and nodes as values
    """

    color_dict = {
        'green': [helper.clean_filename(file)],
        'yellow': [],
        'red': []
    }

    for i in node_list:
        if '.' in i:
            temp = i.split('.')
            graph.add_edge(file[:-3], temp[0])
            color_dict['red'].append(temp[-1])
            for j in range(0, len(temp) - 1):
                graph.add_edge(temp[j], temp[j + 1])
                color_dict['yellow'].append(temp[j])
        else:
            graph.add_edge(file[:-3], i)
            color_dict['red'].append(i)

    return color_dict
예제 #5
0
def get_episode_name(idx):
    filename = "{}.{}.{}".format(serie, str(idx+1).zfill(3), episodes_name[idx])
    return clean_filename(filename)
예제 #6
0
    def sync(self):
        if not self.session:
            raise Exception("You need to login() first.")
        if not self.courses:
            raise Exception("You need to get_courses() first.")
        if not self.sections:
            raise Exception("You need to get_sections() first.")

        ### Syncing all courses

        for course_id, session_key, semestername, coursename in self.sections.keys(
        ):
            print(f"Syncing {coursename}...")
            for sec in self.sections[course_id, session_key, semestername,
                                     coursename]:
                sectionname = helper.clean_filename(
                    sec.select_one(".sectionname").get_text())
                #print(f"[{datetime.now()}] Section {sectionname}")
                mainsectionpath = os.path.join(self.config["basedir"],
                                               semestername, coursename,
                                               sectionname)

                # Categories can be multiple levels deep like folders, see https://moodle.rwth-aachen.de/course/view.php?id=7053&section=1

                label_categories = sec.findAll(
                    "li", {
                        "class": [
                            "modtype_label", "modtype_resource", "modtype_url",
                            "modtype_folder", "modtype_assign", "modtype_page"
                        ]
                    })

                categories = []
                category = None
                for l in label_categories:
                    # Create a category for all labels if enableExperimentalCategories is set
                    if "modtype_label" in l['class'] and self.config[
                            "enableExperimentalCategories"]:
                        category = (helper.clean_filename(
                            l.findAll(text=True)[-1]), [])
                        categories.append(category)
                    else:
                        if category == None:
                            category = (None, [])
                            categories.append(category)
                        category[1].append(l)

                ## Download Opencast Videos directly embedded in section
                helper.scan_for_opencast(sec, course_id, session_key,
                                         mainsectionpath, self.session)

                for category_name, category_soups in categories:
                    if category_name == None:
                        sectionpath = mainsectionpath
                    else:
                        sectionpath = os.path.join(mainsectionpath,
                                                   category_name)
                    for s in category_soups:
                        mod_link = s.find('a', href=True)
                        if not mod_link:
                            continue
                        mod_link = mod_link["href"]

                        ## Get Resources
                        if "modtype_resource" in s["class"]:
                            # First check if the file is directly accessible:
                            if helper.download_file(mod_link, sectionpath,
                                                    self.session):
                                continue
                            # If no file was found, then it could be an html page with an enbedded video
                            response = self.session.get(mod_link,
                                                        params=self.params)
                            if "Content-Type" in response.headers and "text/html" in response.headers[
                                    "Content-Type"]:
                                tempsoup = bs(response.text,
                                              features="html.parser")
                                videojs = tempsoup.select_one(".video-js")
                                if videojs:
                                    videojs = videojs.select_one("source")
                                    if videojs and videojs.get("src"):
                                        helper.download_file(
                                            videojs["src"], sectionpath,
                                            self.session,
                                            videojs["src"].split("/")[-1])

                        ## Get Resources in URLs
                        if "modtype_url" in s["class"]:
                            url = None
                            try:
                                response = self.session.head(
                                    mod_link, params=self.params)
                                if "Location" in response.headers:
                                    url = response.headers["Location"]
                                    response = self.session.head(
                                        url, params=self.params)
                                    if "Content-Type" in response.headers and "text/html" not in response.headers[
                                            "Content-Type"]:
                                        # Don't download html pages
                                        helper.download_file(
                                            url, sectionpath, self.session)
                                    elif "engage.streaming.rwth-aachen.de" in url:
                                        # Maybe its a link to an OpenCast video
                                        helper.downloadOpenCastVideos(
                                            url, course_id, session_key,
                                            sectionpath, self.session)
                            except:
                                # Maybe the url is down?
                                print(f"Error while downloading url {url}")

                        ## Get Folders
                        if "modtype_folder" in s["class"]:
                            response = self.session.get(mod_link,
                                                        params=self.params)
                            soup = bs(response.text, features="html.parser")
                            soup_results = soup.find("a", {"title": "Folder"})

                            if not soup_results:
                                # page has no title?
                                continue

                            foldername = helper.clean_filename(
                                soup_results.text)
                            filemanager = soup.select_one(
                                ".filemanager").findAll('a', href=True)
                            # Scheiß auf folder, das mach ich 1 andernmal
                            for file in filemanager:
                                link = file["href"]
                                filename = file.select_one(".fp-filename").text
                                helper.download_file(
                                    link, os.path.join(sectionpath,
                                                       foldername),
                                    self.session, filename)

                        ## Get Assignments
                        if "modtype_assign" in s["class"]:
                            response = self.session.get(mod_link,
                                                        params=self.params)
                            soup = bs(response.text, features="html.parser")
                            soup_results = soup.find("a",
                                                     {"title": "Assignment"})

                            if not soup_results:
                                # page has no title?
                                continue

                            foldername = helper.clean_filename(
                                soup_results.text)
                            files = soup.select(".fileuploadsubmission")
                            for file in files:
                                link = file.find('a', href=True)["href"]
                                filename = file.text
                                helper.download_file(
                                    link, os.path.join(sectionpath,
                                                       foldername),
                                    self.session, filename)

                        ## Get embedded videos in pages
                        if "modtype_page" in s["class"]:
                            response = self.session.get(mod_link,
                                                        params=self.params)
                            soup = bs(response.text, features="html.parser")
                            soup_results = soup.find("a", {"title": "Page"})

                            if not soup_results:
                                # page has no title?
                                continue

                            pagename = helper.clean_filename(soup_results.text)
                            path = os.path.join(sectionpath, pagename)

                            # Youtube videos
                            helper.scanAndDownloadYouTube(soup, path)

                            # OpenCast videos
                            helper.scan_for_opencast(soup, course_id,
                                                     session_key, path,
                                                     self.session)