Beispiel #1
0
    def _find_exercise_entries_detail_page(self) -> List[IliasPageElement]:
        results: List[IliasPageElement] = []

        # Find all download links in the container (this will contain all the files)
        download_links: List[Tag] = self._soup.findAll(
            name="a",
            # download links contain the given command class
            attrs={"href": lambda x: x and "cmd=download" in x},
            text="Download")

        for link in download_links:
            parent_row: Tag = link.findParent("tr")
            children: List[Tag] = parent_row.findChildren("td")

            name = _sanitize_path_name(children[1].getText().strip())
            log.explain(f"Found exercise detail entry {name!r}")

            for child in reversed(children):
                date = demangle_date(child.getText().strip(),
                                     fail_silently=True)
                if date is not None:
                    break
            if date is None:
                log.warn(f"Date parsing failed for exercise entry {name!r}")

            results.append(
                IliasPageElement(IliasElementType.FILE,
                                 self._abs_url_from_link(link), name, date))

        return results
Beispiel #2
0
    def _find_video_entries_paginated(self) -> List[IliasPageElement]:
        table_element: Tag = self._soup.find(name="table",
                                             id=re.compile(r"tbl_xoct_.+"))

        if table_element is None:
            log.warn(
                "Couldn't increase elements per page (table not found). I might miss elements."
            )
            return self._find_video_entries_no_paging()

        id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
        if id_match is None:
            log.warn(
                "Couldn't increase elements per page (table id not found). I might miss elements."
            )
            return self._find_video_entries_no_paging()

        table_id = id_match.group(1)

        query_params = {
            f"tbl_xoct_{table_id}_trows": "800",
            "cmd": "asyncGetTableGUI",
            "cmdMode": "asynch"
        }
        url = url_set_query_params(self._page_url, query_params)

        log.explain("Disabled pagination, retrying folder as a new entry")
        return [IliasPageElement(IliasElementType.VIDEO_FOLDER, url, "")]
Beispiel #3
0
    def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
        # The link is part of a table with multiple columns, describing metadata.
        # 6th or 7th child (1 indexed) is the modification time string. Try to find it
        # by parsing backwards from the end and finding something that looks like a date
        modification_time = None
        row: Tag = link.parent.parent.parent
        column_count = len(row.select("td.std"))
        for index in range(column_count, 0, -1):
            modification_string = link.parent.parent.parent.select_one(
                f"td.std:nth-child({index})").getText().strip()
            if re.search(r"\d+\.\d+.\d+ - \d+:\d+", modification_string):
                modification_time = datetime.strptime(modification_string,
                                                      "%d.%m.%Y - %H:%M")
                break

        if modification_time is None:
            log.warn(f"Could not determine upload time for {link}")
            modification_time = datetime.now()

        title = link.parent.parent.parent.select_one(
            "td.std:nth-child(3)").getText().strip()
        title += ".mp4"

        video_name: str = _sanitize_path_name(title)

        video_url = self._abs_url_from_link(link)

        log.explain(f"Found video {video_name!r} at {video_url}")
        return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url,
                                video_name, modification_time)
Beispiel #4
0
def demangle_date(date_str: str,
                  fail_silently: bool = False) -> Optional[datetime]:
    """
    Demangle a given date in one of the following formats (hour/minute part is optional):
    "Gestern, HH:MM"
    "Heute, HH:MM"
    "Morgen, HH:MM"
    "dd. mon yyyy, HH:MM
    """
    try:
        # Normalize whitespace because users
        date_str = re.sub(r"\s+", " ", date_str)

        date_str = re.sub("Gestern|Yesterday",
                          _format_date_english(_yesterday()), date_str, re.I)
        date_str = re.sub("Heute|Today", _format_date_english(date.today()),
                          date_str, re.I)
        date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()),
                          date_str, re.I)
        date_str = date_str.strip()
        for german, english in zip(german_months, english_months):
            date_str = date_str.replace(german, english)
            # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
            date_str = date_str.replace(english + ".", english)

        # We now have a nice english String in the format: "dd. mmm yyyy, hh:mm" or "dd. mmm yyyy"

        # Check if we have a time as well
        if ", " in date_str:
            day_part, time_part = date_str.split(",")
        else:
            day_part = date_str.split(",")[0]
            time_part = None

        day_str, month_str, year_str = day_part.split(" ")

        day = int(day_str.strip().replace(".", ""))
        month = english_months.index(month_str.strip()) + 1
        year = int(year_str.strip())

        if time_part:
            hour_str, minute_str = time_part.split(":")
            hour = int(hour_str)
            minute = int(minute_str)
            return datetime(year, month, day, hour, minute)

        return datetime(year, month, day)
    except Exception:
        if not fail_silently:
            log.warn(f"Date parsing failed for {date_str!r}")
        return None
Beispiel #5
0
    def _player_to_video(self) -> List[IliasPageElement]:
        # Fetch the actual video page. This is a small wrapper page initializing a javscript
        # player. Sadly we can not execute that JS. The actual video stream url is nowhere
        # on the page, but defined in a JS object inside a script tag, passed to the player
        # library.
        # We do the impossible and RegEx the stream JSON object out of the page's HTML source
        regex = re.compile(r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file",
                           re.IGNORECASE)
        json_match = regex.search(str(self._soup))

        if json_match is None:
            log.warn(
                "Could not find JSON stream info in video player. Ignoring video."
            )
            return []
        json_str = json_match.group(1)

        # parse it
        json_object = json.loads(json_str)
        streams = [stream for stream in json_object["streams"]]

        # and just fetch the lone video url!
        if len(streams) == 1:
            video_url = streams[0]["sources"]["mp4"][0]["src"]
            return [
                IliasPageElement(IliasElementType.VIDEO, video_url,
                                 self._source_name)
            ]

        log.explain(f"Found multiple videos for stream at {self._source_name}")
        items = []
        for stream in sorted(streams, key=lambda stream: stream["content"]):
            full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
            video_url = stream["sources"]["mp4"][0]["src"]
            items.append(
                IliasPageElement(IliasElementType.VIDEO, video_url, full_name))

        return items
Beispiel #6
0
class IliasPageElement:
    type: IliasElementType
    url: str
    name: str
    mtime: Optional[datetime] = None
    description: Optional[str] = None

    def id(self) -> str:
        regexes = [
            r"eid=(?P<id>[0-9a-z\-]+)", r"file_(?P<id>\d+)",
            r"ref_id=(?P<id>\d+)", r"target=[a-z]+_(?P<id>\d+)"
        ]

        for regex in regexes:
            if match := re.search(regex, self.url):
                return match.groupdict()["id"]

        # Fall back to URL
        log.warn(
            f"Didn't find identity for {self.name} - {self.url}. Please report this."
        )
        return self.url
Beispiel #7
0
def _unexpected_html_warning() -> None:
    log.warn("Encountered unexpected HTML structure, ignoring element.")