def _extract_large_course_content(self, url): url = url.replace("10000", "50") if url.endswith("10000") else url try: data = self._session._get(url).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) else: _next = data.get("next") while _next: logger.progress(msg="Downloading course information .. ") try: resp = self._session._get(_next).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) else: _next = resp.get("next") results = resp.get("results") if results and isinstance(results, list): for d in resp["results"]: data["results"].append(d) return data
def _login(self, username="", password="", cookies="", cache_session=False): # check if we already have session on udemy. auth = UdemyAuth(cache_session=cache_session) is_exists, conf = auth.is_session_exists() if is_exists and username and password: logger.info( msg="Using existing session..", new_line=True, ) cookies = conf.get("cookies") if not is_exists and not cookies: cookies = None if not username and not password: logger.info( msg="Updating session cookie..", new_line=True, ) username = conf.get("username") password = conf.get("password") if not username and not password and not cookies: print("") cookies = getpass.get_access_token(prompt="Access Token : ") if not cookies: username = getpass.getuser(prompt="Username : "******"Password : "******"\n") if not cookies and not username and not password: logger.error( msg= f"You should either provide Fresh Access Token or Username/Password to create new udemy session.." ) sys.exit(0) if not cookies: auth.username = username auth.password = password self._session, self._access_token = auth.authenticate() if cookies: self._cookies = extract_cookie_string(raw_cookies=cookies) self._access_token = self._cookies.get("access_token") client_id = self._cookies.get("client_id") self._session, _ = auth.authenticate( access_token=self._access_token, client_id=client_id) self._session._session.cookies.update(self._cookies) if self._session is not None: return {"login": "******"} else: return {"login": "******"}
def _my_courses(self, portal_name): results = [] try: url = MY_COURSES_URL.format(portal_name=portal_name) webpage = self._session._get(url).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception) as error: logger.error(msg=f"Udemy Says: {error}") time.sleep(0.8) sys.exit(0) else: results = webpage.get("results", []) return results
def _extract_subscribed_courses(self): def clean_urls(courses): _urls = [] courses = [ dict(tupleized) for tupleized in set( tuple(item.items()) for item in courses) ] for entry in courses: logger.progress( msg="Fetching all enrolled course(s) url(s).. ") url = entry.get("url") if not url: continue url = f"https://www.udemy.com{url}" _urls.append(url) _urls = list(set(_urls)) return _urls _temp = [] try: response = self._session._get(SUBSCRIBED_COURSES).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception) as error: logger.error(msg=f"Udemy Says: {error}") time.sleep(0.8) sys.exit(0) else: results = response.get("results", []) _temp.extend(results) _next = response.get("next") logger.progress(msg="Fetching all enrolled course(s) url(s).. ") while _next: logger.progress( msg="Fetching all enrolled course(s) url(s).. ") try: resp = self._session._get(_next) resp.raise_for_status() resp = resp.json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except Exception as error: logger.error(msg=f"Udemy Says: error, {error}") time.sleep(0.8) sys.exit(0) else: _next = resp.get("next") results = resp.get("results", []) _temp.extend(results) if _temp: _temp = clean_urls(_temp) return _temp
def _fetch_course(self): auth = {} if not self._cookies: auth = self._login(username=self._username, password=self._password) if not auth and self._cookies: auth = self._login(cookies=self._cookies) if auth.get("login") == "successful": logger.info(msg="Logged in successfully.", new_line=True) logger.info(msg="Fetching all enrolled course(s) url(s)..") self._courses = self._extract_subscribed_courses() time.sleep(1) logger.success(msg="Fetching all enrolled course(s) url(s).. ") self._logout() if auth.get("login") == "failed": logger.error(msg="Failed to login ..\n") sys.exit(0)
def _extract_course_json(self, url, course_id, portal_name): self._session._headers.update({"Referer": url}) url = COURSE_URL.format(portal_name=portal_name, course_id=course_id) try: resp = self._session._get(url) if resp.status_code in [502, 503]: resp = self._extract_large_course_content(url=url) else: resp = resp.json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception): resp = self._extract_large_course_content(url=url) return resp else: return resp
def _extract_course_info(self, url): portal_name, course_name = self._course_name(url) course = {} results = self._subscribed_courses(portal_name=portal_name, course_name=course_name) course = self.__extract_course(response=results, course_name=course_name) if not course: results = self._my_courses(portal_name=portal_name) course = self.__extract_course(response=results, course_name=course_name) if not course: results = self._subscribed_collection_courses( portal_name=portal_name) course = self.__extract_course(response=results, course_name=course_name) if not course: results = self._archived_courses(portal_name=portal_name) course = self.__extract_course(response=results, course_name=course_name) if course: course.update({"portal_name": portal_name}) return course.get("id"), course if not course: logger.failed( msg="Downloading course information, course id not found .. ") logger.info( msg= "It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.", new_line=True, ) logger.info( msg="Trying to logout now...", new_line=True, ) if not self._cookies: self._logout() logger.info( msg="Logged out successfully.", new_line=True, ) sys.exit(0)
def _fetch_course(self): if self._have_basic: return auth = {} if not self._cookies: auth = self._login( username=self._username, password=self._password, cache_session=self._cache_session, ) if not auth and self._cookies: auth = self._login(cookies=self._cookies, cache_session=self._cache_session) if auth.get("login") == "successful": logger.info(msg="Logged in successfully.", new_line=True) logger.info(msg="Downloading course information ..") self._info = self._real_extract( self._url, skip_hls_stream=self._skip_hls_stream) time.sleep(1) logger.success(msg="Downloaded course information .. ") access_token = self._info["access_token"] self._id = self._info["course_id"] self._title = self._info["course_title"] self._chapters_count = self._info["total_chapters"] self._total_lectures = self._info["total_lectures"] self._chapters = [ InternUdemyChapter(z, access_token=access_token) for z in self._info["chapters"] ] logger.info( msg="Trying to logout now...", new_line=True, ) if not self._cookies: self._logout() logger.info( msg="Logged out successfully.", new_line=True, ) self._have_basic = True if auth.get("login") == "failed": logger.error(msg="Failed to login ..\n") sys.exit(0)
def _subscribed_collection_courses(self, portal_name): url = COLLECTION_URL.format(portal_name=portal_name) courses_lists = [] try: webpage = self._session._get(url).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception) as error: logger.error(msg=f"Udemy Says: {error}") time.sleep(0.8) sys.exit(0) else: results = webpage.get("results", []) if results: [ courses_lists.extend(courses.get("courses", [])) for courses in results if courses.get("courses", []) ] return courses_lists
def extract_cookie_string(raw_cookies): cookies = {} try: access_token = re.search( r"(?i)(?:access_token=(?P<access_token>\w+))", raw_cookies ) except Exception as error: logger.error( msg=f"Cookies error, {error}, unable to extract access_token from cookies." ) sys.exit(0) if not access_token: logger.error(msg="Unable to find access_token, proper cookies required") logger.info( msg="follow: https://github.com/r0oth3x49/udemy-dl#how-to-login-with-cookie", new_line=True, ) sys.stdout.flush() sys.exit(0) access_token = access_token.group("access_token") cookies.update({"access_token": access_token}) return cookies
def _subscribed_courses(self, portal_name, course_name): results = [] self._session._headers.update({ "Host": "{portal_name}.udemy.com".format(portal_name=portal_name), "Referer": "https://{portal_name}.udemy.com/home/my-courses/search/?q={course_name}" .format(portal_name=portal_name, course_name=course_name), }) url = COURSE_SEARCH.format(portal_name=portal_name, course_name=course_name) try: webpage = self._session._get(url).json() except conn_error as error: logger.error(msg=f"Udemy Says: Connection error, {error}") time.sleep(0.8) sys.exit(0) except (ValueError, Exception) as error: logger.error(msg=f"Udemy Says: {error} on {url}") time.sleep(0.8) sys.exit(0) else: results = webpage.get("results", []) return results
def _real_extract(self, url="", skip_hls_stream=False): _udemy = {} course_id, course_info = self._extract_course_info(url) if course_info and isinstance(course_info, dict): title = self._clean(course_info.get("title")) course_title = course_info.get("published_title") portal_name = course_info.get("portal_name") course_json = self._extract_course_json(url, course_id, portal_name) course = course_json.get("results") resource = course_json.get("detail") if resource: if not self._cookies: logger.error( msg= f"Udemy Says : '{resource}' Run udemy-dl against course within few seconds" ) if self._cookies: logger.error( msg=f"Udemy Says : '{resource}' cookies seems to be expired" ) logger.info( msg="Trying to logout now...", new_line=True, ) if not self._cookies: self._logout() logger.info( msg="Logged out successfully.", new_line=True, ) sys.exit(0) _udemy["access_token"] = self._access_token _udemy["course_id"] = course_id _udemy["title"] = title _udemy["course_title"] = course_title _udemy["chapters"] = [] counter = -1 if course: lecture_counter = 0 for entry in course: clazz = entry.get("_class") asset = entry.get("asset") supp_assets = entry.get("supplementary_assets") if clazz == "chapter": lecture_counter = 0 lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": entry.get("id"), "chapter_index": chapter_index, "lectures": [], }) counter += 1 elif clazz == "lecture": lecture_counter += 1 lecture_id = entry.get("id") if len(_udemy["chapters"]) == 0: lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": [], }) counter += 1 if lecture_id: retVal = [] if isinstance(asset, dict): asset_type = (asset.get("asset_type").lower() or asset.get("assetType").lower()) if asset_type == "article": if (isinstance(supp_assets, list) and len(supp_assets) > 0): retVal = self._extract_supplementary_assets( supp_assets) elif asset_type == "video": if (isinstance(supp_assets, list) and len(supp_assets) > 0): retVal = self._extract_supplementary_assets( supp_assets) elif asset_type == "e-book": retVal = self._extract_ebook(asset) elif asset_type == "file": retVal = self._extract_file(asset) elif asset_type == "presentation": retVal = self._extract_ppt(asset) elif asset_type == "audio": retVal = self._extract_audio(asset) logger.progress( msg="Downloading course information .. ") lecture_index = entry.get("object_index") lecture_title = "{0:03d} ".format( lecture_counter) + self._clean(entry.get("title")) data = asset.get("stream_urls") if data and isinstance(data, dict): sources = data.get("Video") tracks = asset.get("captions") duration = asset.get("time_estimation") sources = self._extract_sources( sources, skip_hls_stream=skip_hls_stream) subtitles = self._extract_subtitles(tracks) sources_count = len(sources) subtitle_count = len(subtitles) lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, "duration": duration, "assets": retVal, "assets_count": len(retVal), "sources": sources, "subtitles": subtitles, "subtitle_count": subtitle_count, "sources_count": sources_count, }) else: lectures.append({ "index": lecture_counter, "lecture_index": lecture_index, "lectures_id": lecture_id, "lecture_title": lecture_title, "html_content": asset.get("body"), "extension": "html", "assets": retVal, "assets_count": len(retVal), "subtitle_count": 0, "sources_count": 0, }) _udemy["chapters"][counter]["lectures"] = lectures _udemy["chapters"][counter]["lectures_count"] = len( lectures) elif clazz == "quiz": lecture_id = entry.get("id") if len(_udemy["chapters"]) == 0: lectures = [] chapter_index = entry.get("object_index") chapter_title = "{0:02d} ".format( chapter_index) + self._clean(entry.get("title")) if chapter_title not in _udemy["chapters"]: lecture_counter = 0 _udemy["chapters"].append({ "chapter_title": chapter_title, "chapter_id": lecture_id, "chapter_index": chapter_index, "lectures": [], }) counter += 1 _udemy["chapters"][counter]["lectures"] = lectures _udemy["chapters"][counter]["lectures_count"] = len( lectures) _udemy["total_chapters"] = len(_udemy["chapters"]) _udemy["total_lectures"] = sum([ entry.get("lectures_count", 0) for entry in _udemy["chapters"] if entry ]) return _udemy