def _login(self, username="", password="", cookies="", cache_session=False): # check if we already have session on udemy. auth = UdemyAuth(cache_session=cache_session) is_exists, conf = auth.is_session_exists() if is_exists and username and password: logger.info( msg="Using existing session..", new_line=True, ) cookies = conf.get("cookies") if not is_exists and not cookies: cookies = None if not username and not password: logger.info( msg="Updating session cookie..", new_line=True, ) username = conf.get("username") password = conf.get("password") if not username and not password and not cookies: print("") cookies = getpass.get_access_token(prompt="Access Token : ") if not cookies: username = getpass.getuser(prompt="Username : "******"Password : "******"\n") if not cookies and not username and not password: logger.error( msg= f"You should either provide Fresh Access Token or Username/Password to create new udemy session.." ) sys.exit(0) if not cookies: auth.username = username auth.password = password self._session, self._access_token = auth.authenticate() if cookies: self._cookies = extract_cookie_string(raw_cookies=cookies) self._access_token = self._cookies.get("access_token") client_id = self._cookies.get("client_id") self._session, _ = auth.authenticate( access_token=self._access_token, client_id=client_id) self._session._session.cookies.update(self._cookies) if self._session is not None: return {"login": "******"} else: return {"login": "******"}
def _fetch_course(self): auth = {} if not self._cookies: auth = self._login(username=self._username, password=self._password) if not auth and self._cookies: auth = self._login(cookies=self._cookies) if auth.get("login") == "successful": logger.info(msg="Logged in successfully.", new_line=True) logger.info(msg="Fetching all enrolled course(s) url(s)..") self._courses = self._extract_subscribed_courses() time.sleep(1) logger.success(msg="Fetching all enrolled course(s) url(s).. ") self._logout() if auth.get("login") == "failed": logger.error(msg="Failed to login ..\n") sys.exit(0)
def download_lecture(self, lecture, filepath, current, total, quality): """This function will simply download the lectures..""" if quality and lecture: lecture = lecture.get_quality(quality) if lecture: title = lecture.title logger.info(msg=f"Lecture(s) : ({current} of {total})", new_line=True, before=True) logger.info(msg=f"Downloading ({title})", new_line=True) try: retval = lecture.download( filepath=filepath, quiet=True, callback=self.show_progress, ) msg = retval.get("msg") if msg == "already downloaded": logger.already_downloaded(msg=f"Lecture : '{title}'") elif msg == "download": logger.info(msg=f"Downloaded ({title})", new_line=True) else: logger.download_skipped(msg=f"Lecture : '{title}' ", reason=msg) except KeyboardInterrupt: logger.error(msg="User Interrupted..", new_line=True) sys.exit(0)
def download_assets(self, assets, filepath): """This function will simply download the asstes..""" if assets: for asset in assets: title = asset.filename logger.info(msg="Downloading asset(s)", new_line=True, before=True) logger.info(msg=f"Downloading ({title})", new_line=True) try: retval = asset.download( filepath=filepath, quiet=True, callback=self.show_progress, ) msg = retval.get("msg") if msg == "already downloaded": logger.already_downloaded(msg=f"Asset : '{title}'") elif msg == "download": logger.info(msg=f"Downloaded ({title})", new_line=True) else: logger.download_skipped(msg=f"Asset : '{title}' ", reason=msg) except KeyboardInterrupt: logger.error(msg="User Interrupted..", new_line=True) sys.exit(0)
def download_subtitles(self, subtitles, filepath, language="en", keep_vtt=False): """This function will simply download the subtitles..""" if language and subtitles: subtitle = subtitles[0] subtitles = subtitle.get_subtitle(language) if subtitles: for sub in subtitles: title = f"{sub.title}.{sub.language}" filename = os.path.join(filepath, sub.filename) logger.info(msg="Downloading subtitle(s)", new_line=True, before=True) logger.info(msg=f"Downloading ({title})", new_line=True) try: retval = sub.download( filepath=filepath, quiet=True, callback=self.show_progress, ) msg = retval.get("msg") if msg == "already downloaded": logger.already_downloaded(msg=f"Subtitle : '{title}'") elif msg == "download": logger.info(msg=f"Downloaded ({title})", new_line=True) self.convert(filename=filename, keep_vtt=keep_vtt) else: logger.download_skipped( msg=f"Subtitle : '{title}' ", reason=msg ) except KeyboardInterrupt: logger.error(msg="User Interrupted..", new_line=True) sys.exit(0)
def extract_cookie_string(raw_cookies): cookies = {} try: access_token = re.search( r"(?i)(?:access_token=(?P<access_token>\w+))", raw_cookies ) except Exception as error: logger.error( msg=f"Cookies error, {error}, unable to extract access_token from cookies." ) sys.exit(0) if not access_token: logger.error(msg="Unable to find access_token, proper cookies required") logger.info( msg="follow: https://github.com/r0oth3x49/udemy-dl#how-to-login-with-cookie", new_line=True, ) sys.stdout.flush() sys.exit(0) access_token = access_token.group("access_token") cookies.update({"access_token": access_token}) return cookies
def _login(self, username="", password="", cookies=""): # check if we already have session on udemy. auth = UdemyAuth() is_exists, conf = auth.is_session_exists() if is_exists and username and password: logger.info( msg="Using existing session..", new_line=True, ) cookies = conf.get("cookies") if not is_exists: cookies = None if not username and not password: logger.info( msg="Updating session cookie..", new_line=True, ) username = conf.get("username") password = conf.get("password") if not username and not password: print("") username = getpass.getuser(prompt="Username : "******"Password : "******"\n") if not cookies: auth.username = username auth.password = password self._session, self._access_token = auth.authenticate() if cookies: self._cookies = extract_cookie_string(raw_cookies=cookies) self._access_token = self._cookies.get("access_token") client_id = self._cookies.get("client_id") self._session, _ = auth.authenticate( access_token=self._access_token, client_id=client_id) self._session._session.cookies.update(self._cookies) if self._session is not None: return {"login": "******"} else: return {"login": "******"}
def _fetch_course(self): if self._have_basic: return auth = {} if not self._cookies: auth = self._login( username=self._username, password=self._password, cache_session=self._cache_session, ) if not auth and self._cookies: auth = self._login(cookies=self._cookies, cache_session=self._cache_session) if auth.get("login") == "successful": logger.info(msg="Logged in successfully.", new_line=True) logger.info(msg="Downloading course information ..") self._info = self._real_extract( self._url, skip_hls_stream=self._skip_hls_stream) time.sleep(1) logger.success(msg="Downloaded course information .. ") access_token = self._info["access_token"] self._id = self._info["course_id"] self._title = self._info["course_title"] self._chapters_count = self._info["total_chapters"] self._total_lectures = self._info["total_lectures"] self._chapters = [ InternUdemyChapter(z, access_token=access_token) for z in self._info["chapters"] ] logger.info( msg="Trying to logout now...", new_line=True, ) if not self._cookies: self._logout() logger.info( msg="Logged out successfully.", new_line=True, ) self._have_basic = True if auth.get("login") == "failed": logger.error(msg="Failed to login ..\n") sys.exit(0)
def _extract_course_info(self, url): portal_name, course_name = self._course_name(url) course = {} results = self._subscribed_courses(portal_name=portal_name, course_name=course_name) course = self.__extract_course(response=results, course_name=course_name) if not course: results = self._my_courses(portal_name=portal_name) course = self.__extract_course(response=results, course_name=course_name) if not course: results = self._subscribed_collection_courses( portal_name=portal_name) course = self.__extract_course(response=results, course_name=course_name) if not course: results = self._archived_courses(portal_name=portal_name) course = self.__extract_course(response=results, course_name=course_name) if course: course.update({"portal_name": portal_name}) return course.get("id"), course if not course: logger.failed( msg="Downloading course information, course id not found .. ") logger.info( msg= "It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.", new_line=True, ) logger.info( msg="Trying to logout now...", new_line=True, ) if not self._cookies: self._logout() logger.info( msg="Logged out successfully.", new_line=True, ) sys.exit(0)
def course_download( self, path="", quality="", language="en", dl_assets=True, dl_lecture=True, dl_subtitles=True, chapter_number=None, chapter_start=None, chapter_end=None, lecture_number=None, lecture_start=None, lecture_end=None, keep_vtt=False, skip_hls_stream=False, ): """This function will download the course contents ..""" if not self.cookies: logger.info(msg="Trying to login as", status=self.username) if self.cookies: logger.info(msg="Trying to login using session cookie", new_line=True) for url in self.url_or_courses: course = udemy.course( url=url, username=self.username, password=self.password, cookies=self.cookies, skip_hls_stream=skip_hls_stream, cache_session=self._cache_session, ) course_name = course.title if path: if "~" in path: path = os.path.expanduser(path) course_path = os.path.join(path, course_name) chapters = course.get_chapters( chapter_number=chapter_number, chapter_start=chapter_start, chapter_end=chapter_end, ) total_lectures = course.lectures total_chapters = course.chapters logger.success(msg=course_name, course=True) logger.info(msg=f"Chapter(s) ({total_chapters})", new_line=True) logger.info(msg=f"Lecture(s) ({total_lectures})", new_line=True) for chapter in chapters: chapter_index = chapter.index chapter_title = chapter.title lectures = chapter.get_lectures( lecture_number=lecture_number, lecture_start=lecture_start, lecture_end=lecture_end, ) lectures_count = chapter.lectures filepath = to_filepath(course_path, chapter_title) logger.set_log_filepath(course_path) chapter_progress = (chapter_index if chapter_number else f"{chapter_index} of {total_chapters}") logger.info( msg=f"Downloading chapter : ({chapter_progress})", new_line=True, before=True, cc=80, cc_msg=80, ) logger.info(msg=f"Chapter ({chapter_title})", new_line=True, cc=15, cc_msg=60) logger.info( msg=f"Found ({lectures_count}) lecture(s).", new_line=True, ) lecture_index = 0 if lecture_number: lecture_index = lecture_number - 1 if lecture_start: lecture_index = lecture_start - 1 if lecture_index < 0: lecture_index = 0 for lecture in lectures: lecture_assets = lecture.assets lecture_subtitles = lecture.subtitles lecture_best = lecture.getbest() if dl_lecture: lecture_index = lecture_index + 1 if lecture.html: retval = lecture.dump(filepath=filepath) msg = retval.get("msg") if msg not in ["download", "already downloaded"]: msg = f"Lecture: '{lecture.title}.{lecture.extension}' failed to dump, reason: {msg}" logger.warning(msg=msg, silent=True) self.download_lecture( lecture_best, filepath, lecture_index, lectures_count, quality, ) if dl_assets: self.download_assets(lecture_assets, filepath) if dl_subtitles: self.download_subtitles( lecture_subtitles, filepath, language=language, keep_vtt=keep_vtt, ) print("")
def course_listdown( self, chapter_number=None, chapter_start=None, chapter_end=None, lecture_number=None, lecture_start=None, lecture_end=None, skip_hls_stream=False, ): """This function will listdown the course contents ..""" if not self.cookies: logger.info(msg="Trying to login as", status=self.username) if self.cookies: logger.info(msg="Trying to login using session cookie", new_line=True) for url in self.url_or_courses: course = udemy.course( url=url, username=self.username, password=self.password, cookies=self.cookies, skip_hls_stream=skip_hls_stream, cache_session=self._cache_session, ) course_name = course.title chapters = course.get_chapters( chapter_number=chapter_number, chapter_start=chapter_start, chapter_end=chapter_end, ) total_lectures = course.lectures total_chapters = course.chapters logger.success(msg=course_name, course=True) logger.info(msg=f"Chapter(s) ({total_chapters})", new_line=True) logger.info(msg=f"Lecture(s) ({total_lectures})", new_line=True) for chapter in chapters: chapter_id = chapter.id chapter_title = chapter.title lectures = chapter.get_lectures( lecture_number=lecture_number, lecture_start=lecture_start, lecture_end=lecture_end, ) lectures_count = chapter.lectures logger.info( msg=f"Chapter ({chapter_title}-{chapter_id})", new_line=True, before=True, cc=15, cc_msg=15, ) logger.info(msg=f"Lecture(s) ({lectures_count})", new_line=True) for lecture in lectures: lecture_id = lecture.id lecture_streams = lecture.streams lecture_best = lecture.getbest() lecture_assets = lecture.assets lecture_subtitles = lecture.subtitles if not lecture_streams: continue logger.info( indent=" - ", msg="duration : ", new_line=True, cc=80, cc_msg=10, post_msg=f"{lecture.duration}.", cc_pmsg=80, ) logger.info( indent=" - ", msg="Lecture id : ", new_line=True, cc=80, cc_msg=10, post_msg=f"{lecture_id}.", cc_pmsg=80, ) indent = "\t- " for stream in lecture_streams: post_msg = None if stream.is_hls: human_readable = "" if not stream.is_hls: content_length = stream.get_filesize() if content_length == 0: continue human_readable = to_human_readable(content_length) if lecture_best.quality == stream.quality: post_msg = "(Best)" msg = "{:<22} {:<8}{}".format(f"{stream}", f"{stream.quality}p", human_readable) logger.info( indent=indent, msg=msg, new_line=True, cc=15, post_msg=post_msg, cc_pmsg=30, ) if lecture_assets: for asset in lecture_assets: if asset.mediatype == "external_link": continue content_length = asset.get_filesize() if content_length == 0: continue human_readable = to_human_readable(content_length) msg = "{:<22} {:<8}{}".format( f"{asset}", asset.extension, human_readable) logger.info( indent=indent, msg=msg, new_line=True, cc=15, ) if lecture_subtitles: for sub in lecture_subtitles: content_length = sub.get_filesize() if content_length == 0: continue human_readable = to_human_readable(content_length) msg = "{:<22} {:<8}{}".format( f"{sub}", sub.extension, human_readable) logger.info( indent=indent, msg=msg, new_line=True, cc=15, ) print("")
def _real_extract(self, url="", skip_hls_stream=False): _udemy = {} course_id, course_info = self._extract_course_info(url) if course_info and isinstance(course_info, dict): title = self._clean(course_info.get("title")) course_title = course_info.get("published_title") portal_name = course_info.get("portal_name") course_json = self._extract_course_json(url, course_id, portal_name) course = course_json.get("results") resource = course_json.get("detail") if resource: if not self._cookies: logger.error( msg= f"Udemy Says : '{resource}' Run udemy-dl against course within few seconds" ) if self._cookies: logger.error( msg=f"Udemy Says : '{resource}' cookies seems to be expired" ) logger.info( msg="Trying to logout now...", new_line=True, ) if not self._cookies: self._logout() logger.info( msg="Logged out successfully.", new_line=True, ) sys.exit(0) _udemy["access_token"] = self._access_token _udemy["course_id"] = course_id _udemy["title"] = title _udemy["course_title"] = course_title _udemy["chapters"] = [] counter = -1 if course: lecture_counter = 0 for entry in course: clazz = entry.get("_class") asset = entry.get("asset") supp_assets = entry.get("supplementary_assets") if clazz == "chapter": lecture_counter = 0 lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": entry.get("id"), "chapter_index": chapter_index, "lectures": [], }) counter += 1 elif clazz == "lecture": lecture_counter += 1 lecture_id = entry.get("id") if len(_udemy["chapters"]) == 0: lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": [], }) counter += 1 if lecture_id: retVal = [] if isinstance(asset, dict): asset_type = (asset.get("asset_type").lower() or asset.get("assetType").lower()) if asset_type == "article": if (isinstance(supp_assets, list) and len(supp_assets) > 0): retVal = self._extract_supplementary_assets( supp_assets) elif asset_type == "video": if (isinstance(supp_assets, list) and len(supp_assets) > 0): retVal = self._extract_supplementary_assets( supp_assets) elif asset_type == "e-book": retVal = self._extract_ebook(asset) elif asset_type == "file": retVal = self._extract_file(asset) elif asset_type == "presentation": retVal = self._extract_ppt(asset) elif asset_type == "audio": retVal = self._extract_audio(asset) logger.progress( msg="Downloading course information .. ") lecture_index = entry.get("object_index") lecture_title = "{0:03d} ".format( lecture_counter) + self._clean(entry.get("title")) data = asset.get("stream_urls") if data and isinstance(data, dict): sources = data.get("Video") tracks = asset.get("captions") duration = asset.get("time_estimation") sources = self._extract_sources( sources, skip_hls_stream=skip_hls_stream) subtitles = self._extract_subtitles(tracks) sources_count = len(sources) subtitle_count = len(subtitles) lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, "duration": duration, "assets": retVal, "assets_count": len(retVal), "sources": sources, "subtitles": subtitles, "subtitle_count": subtitle_count, "sources_count": sources_count, }) else: lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, "html_content": asset.get("body"), "extension": "html", "assets": retVal, "assets_count": len(retVal), "subtitle_count": 0, "sources_count": 0, }) _udemy["chapters"][counter]["lectures"] = lectures _udemy["chapters"][counter]["lectures_count"] = len( lectures) elif clazz == "quiz": lecture_id = entry.get("id") if len(_udemy["chapters"]) == 0: lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: lecture_counter = 0 _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": [], }) counter += 1 _udemy["chapters"][counter]["lectures"] = lectures _udemy["chapters"][counter]["lectures_count"] = len( lectures) _udemy["total_chapters"] = len(_udemy["chapters"]) _udemy["total_lectures"] = sum([ entry.get("lectures_count", 0) for entry in _udemy["chapters"] if entry ]) return _udemy