Esempio n. 1
0
 def _login(self,
            username="",
            password="",
            cookies="",
            cache_session=False):
     # check if we already have session on udemy.
     auth = UdemyAuth(cache_session=cache_session)
     is_exists, conf = auth.is_session_exists()
     if is_exists and username and password:
         logger.info(
             msg="Using existing session..",
             new_line=True,
         )
         cookies = conf.get("cookies")
     if not is_exists and not cookies:
         cookies = None
         if not username and not password:
             logger.info(
                 msg="Updating session cookie..",
                 new_line=True,
             )
             username = conf.get("username")
             password = conf.get("password")
         if not username and not password and not cookies:
             print("")
             cookies = getpass.get_access_token(prompt="Access Token : ")
             if not cookies:
                 username = getpass.getuser(prompt="Username : "******"Password : "******"\n")
             if not cookies and not username and not password:
                 logger.error(
                     msg=
                     f"You should either provide Fresh Access Token or Username/Password to create new udemy session.."
                 )
                 sys.exit(0)
     if not cookies:
         auth.username = username
         auth.password = password
         self._session, self._access_token = auth.authenticate()
     if cookies:
         self._cookies = extract_cookie_string(raw_cookies=cookies)
         self._access_token = self._cookies.get("access_token")
         client_id = self._cookies.get("client_id")
         self._session, _ = auth.authenticate(
             access_token=self._access_token, client_id=client_id)
         self._session._session.cookies.update(self._cookies)
     if self._session is not None:
         return {"login": "******"}
     else:
         return {"login": "******"}
Esempio n. 2
0
 def _fetch_course(self):
     auth = {}
     if not self._cookies:
         auth = self._login(username=self._username,
                            password=self._password)
     if not auth and self._cookies:
         auth = self._login(cookies=self._cookies)
     if auth.get("login") == "successful":
         logger.info(msg="Logged in successfully.", new_line=True)
         logger.info(msg="Fetching all enrolled course(s) url(s)..")
         self._courses = self._extract_subscribed_courses()
         time.sleep(1)
         logger.success(msg="Fetching all enrolled course(s) url(s).. ")
         self._logout()
     if auth.get("login") == "failed":
         logger.error(msg="Failed to login ..\n")
         sys.exit(0)
Esempio n. 3
0
 def download_lecture(self, lecture, filepath, current, total, quality):
     """This function will simply download the lectures.."""
     if quality and lecture:
         lecture = lecture.get_quality(quality)
     if lecture:
         title = lecture.title
         logger.info(msg=f"Lecture(s) : ({current} of {total})",
                     new_line=True,
                     before=True)
         logger.info(msg=f"Downloading ({title})", new_line=True)
         try:
             retval = lecture.download(
                 filepath=filepath,
                 quiet=True,
                 callback=self.show_progress,
             )
             msg = retval.get("msg")
             if msg == "already downloaded":
                 logger.already_downloaded(msg=f"Lecture : '{title}'")
             elif msg == "download":
                 logger.info(msg=f"Downloaded  ({title})", new_line=True)
             else:
                 logger.download_skipped(msg=f"Lecture : '{title}' ",
                                         reason=msg)
         except KeyboardInterrupt:
             logger.error(msg="User Interrupted..", new_line=True)
             sys.exit(0)
Esempio n. 4
0
 def download_assets(self, assets, filepath):
     """This function will simply download the asstes.."""
     if assets:
         for asset in assets:
             title = asset.filename
             logger.info(msg="Downloading asset(s)",
                         new_line=True,
                         before=True)
             logger.info(msg=f"Downloading ({title})", new_line=True)
             try:
                 retval = asset.download(
                     filepath=filepath,
                     quiet=True,
                     callback=self.show_progress,
                 )
                 msg = retval.get("msg")
                 if msg == "already downloaded":
                     logger.already_downloaded(msg=f"Asset : '{title}'")
                 elif msg == "download":
                     logger.info(msg=f"Downloaded  ({title})",
                                 new_line=True)
                 else:
                     logger.download_skipped(msg=f"Asset : '{title}' ",
                                             reason=msg)
             except KeyboardInterrupt:
                 logger.error(msg="User Interrupted..", new_line=True)
                 sys.exit(0)
Esempio n. 5
0
 def download_subtitles(self, subtitles, filepath, language="en", keep_vtt=False):
     """This function will simply download the subtitles.."""
     if language and subtitles:
         subtitle = subtitles[0]
         subtitles = subtitle.get_subtitle(language)
     if subtitles:
         for sub in subtitles:
             title = f"{sub.title}.{sub.language}"
             filename = os.path.join(filepath, sub.filename)
             logger.info(msg="Downloading subtitle(s)", new_line=True, before=True)
             logger.info(msg=f"Downloading ({title})", new_line=True)
             try:
                 retval = sub.download(
                     filepath=filepath, quiet=True, callback=self.show_progress,
                 )
                 msg = retval.get("msg")
                 if msg == "already downloaded":
                     logger.already_downloaded(msg=f"Subtitle : '{title}'")
                 elif msg == "download":
                     logger.info(msg=f"Downloaded  ({title})", new_line=True)
                     self.convert(filename=filename, keep_vtt=keep_vtt)
                 else:
                     logger.download_skipped(
                         msg=f"Subtitle : '{title}' ", reason=msg
                     )
             except KeyboardInterrupt:
                 logger.error(msg="User Interrupted..", new_line=True)
                 sys.exit(0)
Esempio n. 6
0
def extract_cookie_string(raw_cookies):
    cookies = {}
    try:
        access_token = re.search(
            r"(?i)(?:access_token=(?P<access_token>\w+))", raw_cookies
        )
    except Exception as error:
        logger.error(
            msg=f"Cookies error, {error}, unable to extract access_token from cookies."
        )
        sys.exit(0)
    if not access_token:
        logger.error(msg="Unable to find access_token, proper cookies required")
        logger.info(
            msg="follow: https://github.com/r0oth3x49/udemy-dl#how-to-login-with-cookie",
            new_line=True,
        )
        sys.stdout.flush()
        sys.exit(0)
    access_token = access_token.group("access_token")
    cookies.update({"access_token": access_token})
    return cookies
Esempio n. 7
0
 def _login(self, username="", password="", cookies=""):
     # check if we already have session on udemy.
     auth = UdemyAuth()
     is_exists, conf = auth.is_session_exists()
     if is_exists and username and password:
         logger.info(
             msg="Using existing session..",
             new_line=True,
         )
         cookies = conf.get("cookies")
     if not is_exists:
         cookies = None
         if not username and not password:
             logger.info(
                 msg="Updating session cookie..",
                 new_line=True,
             )
             username = conf.get("username")
             password = conf.get("password")
         if not username and not password:
             print("")
             username = getpass.getuser(prompt="Username : "******"Password : "******"\n")
     if not cookies:
         auth.username = username
         auth.password = password
         self._session, self._access_token = auth.authenticate()
     if cookies:
         self._cookies = extract_cookie_string(raw_cookies=cookies)
         self._access_token = self._cookies.get("access_token")
         client_id = self._cookies.get("client_id")
         self._session, _ = auth.authenticate(
             access_token=self._access_token, client_id=client_id)
         self._session._session.cookies.update(self._cookies)
     if self._session is not None:
         return {"login": "******"}
     else:
         return {"login": "******"}
Esempio n. 8
0
 def _fetch_course(self):
     if self._have_basic:
         return
     auth = {}
     if not self._cookies:
         auth = self._login(
             username=self._username,
             password=self._password,
             cache_session=self._cache_session,
         )
     if not auth and self._cookies:
         auth = self._login(cookies=self._cookies,
                            cache_session=self._cache_session)
     if auth.get("login") == "successful":
         logger.info(msg="Logged in successfully.", new_line=True)
         logger.info(msg="Downloading course information ..")
         self._info = self._real_extract(
             self._url, skip_hls_stream=self._skip_hls_stream)
         time.sleep(1)
         logger.success(msg="Downloaded course information .. ")
         access_token = self._info["access_token"]
         self._id = self._info["course_id"]
         self._title = self._info["course_title"]
         self._chapters_count = self._info["total_chapters"]
         self._total_lectures = self._info["total_lectures"]
         self._chapters = [
             InternUdemyChapter(z, access_token=access_token)
             for z in self._info["chapters"]
         ]
         logger.info(
             msg="Trying to logout now...",
             new_line=True,
         )
         if not self._cookies:
             self._logout()
         logger.info(
             msg="Logged out successfully.",
             new_line=True,
         )
         self._have_basic = True
     if auth.get("login") == "failed":
         logger.error(msg="Failed to login ..\n")
         sys.exit(0)
Esempio n. 9
0
    def _extract_course_info(self, url):
        portal_name, course_name = self._course_name(url)
        course = {}
        results = self._subscribed_courses(portal_name=portal_name,
                                           course_name=course_name)
        course = self.__extract_course(response=results,
                                       course_name=course_name)
        if not course:
            results = self._my_courses(portal_name=portal_name)
            course = self.__extract_course(response=results,
                                           course_name=course_name)
        if not course:
            results = self._subscribed_collection_courses(
                portal_name=portal_name)
            course = self.__extract_course(response=results,
                                           course_name=course_name)
        if not course:
            results = self._archived_courses(portal_name=portal_name)
            course = self.__extract_course(response=results,
                                           course_name=course_name)

        if course:
            course.update({"portal_name": portal_name})
            return course.get("id"), course
        if not course:
            logger.failed(
                msg="Downloading course information, course id not found .. ")
            logger.info(
                msg=
                "It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.",
                new_line=True,
            )
            logger.info(
                msg="Trying to logout now...",
                new_line=True,
            )
            if not self._cookies:
                self._logout()
            logger.info(
                msg="Logged out successfully.",
                new_line=True,
            )
            sys.exit(0)
Esempio n. 10
0
 def course_download(
     self,
     path="",
     quality="",
     language="en",
     dl_assets=True,
     dl_lecture=True,
     dl_subtitles=True,
     chapter_number=None,
     chapter_start=None,
     chapter_end=None,
     lecture_number=None,
     lecture_start=None,
     lecture_end=None,
     keep_vtt=False,
     skip_hls_stream=False,
 ):
     """This function will download the course contents .."""
     if not self.cookies:
         logger.info(msg="Trying to login as", status=self.username)
     if self.cookies:
         logger.info(msg="Trying to login using session cookie",
                     new_line=True)
     for url in self.url_or_courses:
         course = udemy.course(
             url=url,
             username=self.username,
             password=self.password,
             cookies=self.cookies,
             skip_hls_stream=skip_hls_stream,
             cache_session=self._cache_session,
         )
         course_name = course.title
         if path:
             if "~" in path:
                 path = os.path.expanduser(path)
         course_path = os.path.join(path, course_name)
         chapters = course.get_chapters(
             chapter_number=chapter_number,
             chapter_start=chapter_start,
             chapter_end=chapter_end,
         )
         total_lectures = course.lectures
         total_chapters = course.chapters
         logger.success(msg=course_name, course=True)
         logger.info(msg=f"Chapter(s) ({total_chapters})", new_line=True)
         logger.info(msg=f"Lecture(s) ({total_lectures})", new_line=True)
         for chapter in chapters:
             chapter_index = chapter.index
             chapter_title = chapter.title
             lectures = chapter.get_lectures(
                 lecture_number=lecture_number,
                 lecture_start=lecture_start,
                 lecture_end=lecture_end,
             )
             lectures_count = chapter.lectures
             filepath = to_filepath(course_path, chapter_title)
             logger.set_log_filepath(course_path)
             chapter_progress = (chapter_index if chapter_number else
                                 f"{chapter_index} of {total_chapters}")
             logger.info(
                 msg=f"Downloading chapter : ({chapter_progress})",
                 new_line=True,
                 before=True,
                 cc=80,
                 cc_msg=80,
             )
             logger.info(msg=f"Chapter ({chapter_title})",
                         new_line=True,
                         cc=15,
                         cc_msg=60)
             logger.info(
                 msg=f"Found ({lectures_count}) lecture(s).",
                 new_line=True,
             )
             lecture_index = 0
             if lecture_number:
                 lecture_index = lecture_number - 1
             if lecture_start:
                 lecture_index = lecture_start - 1
             if lecture_index < 0:
                 lecture_index = 0
             for lecture in lectures:
                 lecture_assets = lecture.assets
                 lecture_subtitles = lecture.subtitles
                 lecture_best = lecture.getbest()
                 if dl_lecture:
                     lecture_index = lecture_index + 1
                     if lecture.html:
                         retval = lecture.dump(filepath=filepath)
                         msg = retval.get("msg")
                         if msg not in ["download", "already downloaded"]:
                             msg = f"Lecture: '{lecture.title}.{lecture.extension}' failed to dump, reason: {msg}"
                             logger.warning(msg=msg, silent=True)
                     self.download_lecture(
                         lecture_best,
                         filepath,
                         lecture_index,
                         lectures_count,
                         quality,
                     )
                 if dl_assets:
                     self.download_assets(lecture_assets, filepath)
                 if dl_subtitles:
                     self.download_subtitles(
                         lecture_subtitles,
                         filepath,
                         language=language,
                         keep_vtt=keep_vtt,
                     )
         print("")
Esempio n. 11
0
 def course_listdown(
     self,
     chapter_number=None,
     chapter_start=None,
     chapter_end=None,
     lecture_number=None,
     lecture_start=None,
     lecture_end=None,
     skip_hls_stream=False,
 ):
     """This function will listdown the course contents .."""
     if not self.cookies:
         logger.info(msg="Trying to login as", status=self.username)
     if self.cookies:
         logger.info(msg="Trying to login using session cookie",
                     new_line=True)
     for url in self.url_or_courses:
         course = udemy.course(
             url=url,
             username=self.username,
             password=self.password,
             cookies=self.cookies,
             skip_hls_stream=skip_hls_stream,
             cache_session=self._cache_session,
         )
         course_name = course.title
         chapters = course.get_chapters(
             chapter_number=chapter_number,
             chapter_start=chapter_start,
             chapter_end=chapter_end,
         )
         total_lectures = course.lectures
         total_chapters = course.chapters
         logger.success(msg=course_name, course=True)
         logger.info(msg=f"Chapter(s) ({total_chapters})", new_line=True)
         logger.info(msg=f"Lecture(s) ({total_lectures})", new_line=True)
         for chapter in chapters:
             chapter_id = chapter.id
             chapter_title = chapter.title
             lectures = chapter.get_lectures(
                 lecture_number=lecture_number,
                 lecture_start=lecture_start,
                 lecture_end=lecture_end,
             )
             lectures_count = chapter.lectures
             logger.info(
                 msg=f"Chapter ({chapter_title}-{chapter_id})",
                 new_line=True,
                 before=True,
                 cc=15,
                 cc_msg=15,
             )
             logger.info(msg=f"Lecture(s) ({lectures_count})",
                         new_line=True)
             for lecture in lectures:
                 lecture_id = lecture.id
                 lecture_streams = lecture.streams
                 lecture_best = lecture.getbest()
                 lecture_assets = lecture.assets
                 lecture_subtitles = lecture.subtitles
                 if not lecture_streams:
                     continue
                 logger.info(
                     indent="     - ",
                     msg="duration   : ",
                     new_line=True,
                     cc=80,
                     cc_msg=10,
                     post_msg=f"{lecture.duration}.",
                     cc_pmsg=80,
                 )
                 logger.info(
                     indent="     - ",
                     msg="Lecture id : ",
                     new_line=True,
                     cc=80,
                     cc_msg=10,
                     post_msg=f"{lecture_id}.",
                     cc_pmsg=80,
                 )
                 indent = "\t- "
                 for stream in lecture_streams:
                     post_msg = None
                     if stream.is_hls:
                         human_readable = ""
                     if not stream.is_hls:
                         content_length = stream.get_filesize()
                         if content_length == 0:
                             continue
                         human_readable = to_human_readable(content_length)
                         if lecture_best.quality == stream.quality:
                             post_msg = "(Best)"
                     msg = "{:<22} {:<8}{}".format(f"{stream}",
                                                   f"{stream.quality}p",
                                                   human_readable)
                     logger.info(
                         indent=indent,
                         msg=msg,
                         new_line=True,
                         cc=15,
                         post_msg=post_msg,
                         cc_pmsg=30,
                     )
                 if lecture_assets:
                     for asset in lecture_assets:
                         if asset.mediatype == "external_link":
                             continue
                         content_length = asset.get_filesize()
                         if content_length == 0:
                             continue
                         human_readable = to_human_readable(content_length)
                         msg = "{:<22} {:<8}{}".format(
                             f"{asset}", asset.extension, human_readable)
                         logger.info(
                             indent=indent,
                             msg=msg,
                             new_line=True,
                             cc=15,
                         )
                 if lecture_subtitles:
                     for sub in lecture_subtitles:
                         content_length = sub.get_filesize()
                         if content_length == 0:
                             continue
                         human_readable = to_human_readable(content_length)
                         msg = "{:<22} {:<8}{}".format(
                             f"{sub}", sub.extension, human_readable)
                         logger.info(
                             indent=indent,
                             msg=msg,
                             new_line=True,
                             cc=15,
                         )
         print("")
Esempio n. 12
0
    def _real_extract(self, url="", skip_hls_stream=False):

        _udemy = {}
        course_id, course_info = self._extract_course_info(url)

        if course_info and isinstance(course_info, dict):
            title = self._clean(course_info.get("title"))
            course_title = course_info.get("published_title")
            portal_name = course_info.get("portal_name")

        course_json = self._extract_course_json(url, course_id, portal_name)
        course = course_json.get("results")
        resource = course_json.get("detail")

        if resource:
            if not self._cookies:
                logger.error(
                    msg=
                    f"Udemy Says : '{resource}' Run udemy-dl against course within few seconds"
                )
            if self._cookies:
                logger.error(
                    msg=f"Udemy Says : '{resource}' cookies seems to be expired"
                )
            logger.info(
                msg="Trying to logout now...",
                new_line=True,
            )
            if not self._cookies:
                self._logout()
            logger.info(
                msg="Logged out successfully.",
                new_line=True,
            )
            sys.exit(0)

        _udemy["access_token"] = self._access_token
        _udemy["course_id"] = course_id
        _udemy["title"] = title
        _udemy["course_title"] = course_title
        _udemy["chapters"] = []

        counter = -1

        if course:
            lecture_counter = 0
            for entry in course:
                clazz = entry.get("_class")
                asset = entry.get("asset")
                supp_assets = entry.get("supplementary_assets")

                if clazz == "chapter":
                    lecture_counter = 0
                    lectures = []
                    chapter_index = entry.get("object_index")
                    chapter_title = "{0:02d} ".format(
                        chapter_index) + self._clean(entry.get("title"))
                    if chapter_title not in _udemy["chapters"]:
                        _udemy["chapters"].append({
                            "chapter_title": chapter_title,
                            "chapter_id": entry.get("id"),
                            "chapter_index": chapter_index,
                            "lectures": [],
                        })
                        counter += 1
                elif clazz == "lecture":
                    lecture_counter += 1
                    lecture_id = entry.get("id")
                    if len(_udemy["chapters"]) == 0:
                        lectures = []
                        chapter_index = entry.get("object_index")
                        chapter_title = "{0:02d} ".format(
                            chapter_index) + self._clean(entry.get("title"))
                        if chapter_title not in _udemy["chapters"]:
                            _udemy["chapters"].append({
                                "chapter_title": chapter_title,
                                "chapter_id": lecture_id,
                                "chapter_index": chapter_index,
                                "lectures": [],
                            })
                            counter += 1

                    if lecture_id:

                        retVal = []

                        if isinstance(asset, dict):
                            asset_type = (asset.get("asset_type").lower()
                                          or asset.get("assetType").lower())
                            if asset_type == "article":
                                if (isinstance(supp_assets, list)
                                        and len(supp_assets) > 0):
                                    retVal = self._extract_supplementary_assets(
                                        supp_assets)
                            elif asset_type == "video":
                                if (isinstance(supp_assets, list)
                                        and len(supp_assets) > 0):
                                    retVal = self._extract_supplementary_assets(
                                        supp_assets)
                            elif asset_type == "e-book":
                                retVal = self._extract_ebook(asset)
                            elif asset_type == "file":
                                retVal = self._extract_file(asset)
                            elif asset_type == "presentation":
                                retVal = self._extract_ppt(asset)
                            elif asset_type == "audio":
                                retVal = self._extract_audio(asset)

                        logger.progress(
                            msg="Downloading course information .. ")
                        lecture_index = entry.get("object_index")
                        lecture_title = "{0:03d} ".format(
                            lecture_counter) + self._clean(entry.get("title"))
                        data = asset.get("stream_urls")
                        if data and isinstance(data, dict):
                            sources = data.get("Video")
                            tracks = asset.get("captions")
                            duration = asset.get("time_estimation")
                            sources = self._extract_sources(
                                sources, skip_hls_stream=skip_hls_stream)
                            subtitles = self._extract_subtitles(tracks)
                            sources_count = len(sources)
                            subtitle_count = len(subtitles)
                            lectures.append({
                                "index": lecture_counter,
                                "lecture_index": lecture_index,
                                "lectures_id": lecture_id,
                                "lecture_title": lecture_title,
                                "duration": duration,
                                "assets": retVal,
                                "assets_count": len(retVal),
                                "sources": sources,
                                "subtitles": subtitles,
                                "subtitle_count": subtitle_count,
                                "sources_count": sources_count,
                            })
                        else:
                            lectures.append({
                                "index": lecture_counter,
                                "lecture_index": lecture_index,
                                "lectures_id": lecture_id,
                                "lecture_title": lecture_title,
                                "html_content": asset.get("body"),
                                "extension": "html",
                                "assets": retVal,
                                "assets_count": len(retVal),
                                "subtitle_count": 0,
                                "sources_count": 0,
                            })

                    _udemy["chapters"][counter]["lectures"] = lectures
                    _udemy["chapters"][counter]["lectures_count"] = len(
                        lectures)
                elif clazz == "quiz":
                    lecture_id = entry.get("id")
                    if len(_udemy["chapters"]) == 0:
                        lectures = []
                        chapter_index = entry.get("object_index")
                        chapter_title = "{0:02d} ".format(
                            chapter_index) + self._clean(entry.get("title"))
                        if chapter_title not in _udemy["chapters"]:
                            lecture_counter = 0
                            _udemy["chapters"].append({
                                "chapter_title": chapter_title,
                                "chapter_id": lecture_id,
                                "chapter_index": chapter_index,
                                "lectures": [],
                            })
                            counter += 1
                    _udemy["chapters"][counter]["lectures"] = lectures
                    _udemy["chapters"][counter]["lectures_count"] = len(
                        lectures)
            _udemy["total_chapters"] = len(_udemy["chapters"])
            _udemy["total_lectures"] = sum([
                entry.get("lectures_count", 0) for entry in _udemy["chapters"]
                if entry
            ])

        return _udemy