Exemple #1
0
 def _extract_large_course_content(self, url):
     url = url.replace("10000", "50") if url.endswith("10000") else url
     try:
         data = self._session._get(url).json()
     except conn_error as error:
         logger.error(msg=f"Udemy Says: Connection error, {error}")
         time.sleep(0.8)
         sys.exit(0)
     else:
         _next = data.get("next")
         while _next:
             logger.progress(msg="Downloading course information .. ")
             try:
                 resp = self._session._get(_next).json()
             except conn_error as error:
                 logger.error(msg=f"Udemy Says: Connection error, {error}")
                 time.sleep(0.8)
                 sys.exit(0)
             else:
                 _next = resp.get("next")
                 results = resp.get("results")
                 if results and isinstance(results, list):
                     for d in resp["results"]:
                         data["results"].append(d)
         return data
Exemple #2
0
 def _login(self,
            username="",
            password="",
            cookies="",
            cache_session=False):
     # check if we already have session on udemy.
     auth = UdemyAuth(cache_session=cache_session)
     is_exists, conf = auth.is_session_exists()
     if is_exists and username and password:
         logger.info(
             msg="Using existing session..",
             new_line=True,
         )
         cookies = conf.get("cookies")
     if not is_exists and not cookies:
         cookies = None
         if not username and not password:
             logger.info(
                 msg="Updating session cookie..",
                 new_line=True,
             )
             username = conf.get("username")
             password = conf.get("password")
         if not username and not password and not cookies:
             print("")
             cookies = getpass.get_access_token(prompt="Access Token : ")
             if not cookies:
                 username = getpass.getuser(prompt="Username : "******"Password : "******"\n")
             if not cookies and not username and not password:
                 logger.error(
                     msg=
                     f"You should either provide Fresh Access Token or Username/Password to create new udemy session.."
                 )
                 sys.exit(0)
     if not cookies:
         auth.username = username
         auth.password = password
         self._session, self._access_token = auth.authenticate()
     if cookies:
         self._cookies = extract_cookie_string(raw_cookies=cookies)
         self._access_token = self._cookies.get("access_token")
         client_id = self._cookies.get("client_id")
         self._session, _ = auth.authenticate(
             access_token=self._access_token, client_id=client_id)
         self._session._session.cookies.update(self._cookies)
     if self._session is not None:
         return {"login": "******"}
     else:
         return {"login": "******"}
Exemple #3
0
 def _my_courses(self, portal_name):
     results = []
     try:
         url = MY_COURSES_URL.format(portal_name=portal_name)
         webpage = self._session._get(url).json()
     except conn_error as error:
         logger.error(msg=f"Udemy Says: Connection error, {error}")
         time.sleep(0.8)
         sys.exit(0)
     except (ValueError, Exception) as error:
         logger.error(msg=f"Udemy Says: {error}")
         time.sleep(0.8)
         sys.exit(0)
     else:
         results = webpage.get("results", [])
     return results
Exemple #4
0
    def _extract_subscribed_courses(self):
        def clean_urls(courses):
            _urls = []
            courses = [
                dict(tupleized) for tupleized in set(
                    tuple(item.items()) for item in courses)
            ]
            for entry in courses:
                logger.progress(
                    msg="Fetching all enrolled course(s) url(s).. ")
                url = entry.get("url")
                if not url:
                    continue
                url = f"https://www.udemy.com{url}"
                _urls.append(url)
            _urls = list(set(_urls))
            return _urls

        _temp = []
        try:
            response = self._session._get(SUBSCRIBED_COURSES).json()
        except conn_error as error:
            logger.error(msg=f"Udemy Says: Connection error, {error}")
            time.sleep(0.8)
            sys.exit(0)
        except (ValueError, Exception) as error:
            logger.error(msg=f"Udemy Says: {error}")
            time.sleep(0.8)
            sys.exit(0)
        else:
            results = response.get("results", [])
            _temp.extend(results)
            _next = response.get("next")
            logger.progress(msg="Fetching all enrolled course(s) url(s).. ")
            while _next:
                logger.progress(
                    msg="Fetching all enrolled course(s) url(s).. ")
                try:
                    resp = self._session._get(_next)
                    resp.raise_for_status()
                    resp = resp.json()
                except conn_error as error:
                    logger.error(msg=f"Udemy Says: Connection error, {error}")
                    time.sleep(0.8)
                    sys.exit(0)
                except Exception as error:
                    logger.error(msg=f"Udemy Says: error, {error}")
                    time.sleep(0.8)
                    sys.exit(0)
                else:
                    _next = resp.get("next")
                    results = resp.get("results", [])
                    _temp.extend(results)
        if _temp:
            _temp = clean_urls(_temp)
        return _temp
Exemple #5
0
 def _fetch_course(self):
     auth = {}
     if not self._cookies:
         auth = self._login(username=self._username,
                            password=self._password)
     if not auth and self._cookies:
         auth = self._login(cookies=self._cookies)
     if auth.get("login") == "successful":
         logger.info(msg="Logged in successfully.", new_line=True)
         logger.info(msg="Fetching all enrolled course(s) url(s)..")
         self._courses = self._extract_subscribed_courses()
         time.sleep(1)
         logger.success(msg="Fetching all enrolled course(s) url(s).. ")
         self._logout()
     if auth.get("login") == "failed":
         logger.error(msg="Failed to login ..\n")
         sys.exit(0)
Exemple #6
0
 def _extract_course_json(self, url, course_id, portal_name):
     self._session._headers.update({"Referer": url})
     url = COURSE_URL.format(portal_name=portal_name, course_id=course_id)
     try:
         resp = self._session._get(url)
         if resp.status_code in [502, 503]:
             resp = self._extract_large_course_content(url=url)
         else:
             resp = resp.json()
     except conn_error as error:
         logger.error(msg=f"Udemy Says: Connection error, {error}")
         time.sleep(0.8)
         sys.exit(0)
     except (ValueError, Exception):
         resp = self._extract_large_course_content(url=url)
         return resp
     else:
         return resp
Exemple #7
0
    def _extract_course_info(self, url):
        portal_name, course_name = self._course_name(url)
        course = {}
        results = self._subscribed_courses(portal_name=portal_name,
                                           course_name=course_name)
        course = self.__extract_course(response=results,
                                       course_name=course_name)
        if not course:
            results = self._my_courses(portal_name=portal_name)
            course = self.__extract_course(response=results,
                                           course_name=course_name)
        if not course:
            results = self._subscribed_collection_courses(
                portal_name=portal_name)
            course = self.__extract_course(response=results,
                                           course_name=course_name)
        if not course:
            results = self._archived_courses(portal_name=portal_name)
            course = self.__extract_course(response=results,
                                           course_name=course_name)

        if course:
            course.update({"portal_name": portal_name})
            return course.get("id"), course
        if not course:
            logger.failed(
                msg="Downloading course information, course id not found .. ")
            logger.info(
                msg=
                "It seems either you are not enrolled or you have to visit the course atleast once while you are logged in.",
                new_line=True,
            )
            logger.info(
                msg="Trying to logout now...",
                new_line=True,
            )
            if not self._cookies:
                self._logout()
            logger.info(
                msg="Logged out successfully.",
                new_line=True,
            )
            sys.exit(0)
Exemple #8
0
 def _fetch_course(self):
     if self._have_basic:
         return
     auth = {}
     if not self._cookies:
         auth = self._login(
             username=self._username,
             password=self._password,
             cache_session=self._cache_session,
         )
     if not auth and self._cookies:
         auth = self._login(cookies=self._cookies,
                            cache_session=self._cache_session)
     if auth.get("login") == "successful":
         logger.info(msg="Logged in successfully.", new_line=True)
         logger.info(msg="Downloading course information ..")
         self._info = self._real_extract(
             self._url, skip_hls_stream=self._skip_hls_stream)
         time.sleep(1)
         logger.success(msg="Downloaded course information .. ")
         access_token = self._info["access_token"]
         self._id = self._info["course_id"]
         self._title = self._info["course_title"]
         self._chapters_count = self._info["total_chapters"]
         self._total_lectures = self._info["total_lectures"]
         self._chapters = [
             InternUdemyChapter(z, access_token=access_token)
             for z in self._info["chapters"]
         ]
         logger.info(
             msg="Trying to logout now...",
             new_line=True,
         )
         if not self._cookies:
             self._logout()
         logger.info(
             msg="Logged out successfully.",
             new_line=True,
         )
         self._have_basic = True
     if auth.get("login") == "failed":
         logger.error(msg="Failed to login ..\n")
         sys.exit(0)
Exemple #9
0
 def _subscribed_collection_courses(self, portal_name):
     url = COLLECTION_URL.format(portal_name=portal_name)
     courses_lists = []
     try:
         webpage = self._session._get(url).json()
     except conn_error as error:
         logger.error(msg=f"Udemy Says: Connection error, {error}")
         time.sleep(0.8)
         sys.exit(0)
     except (ValueError, Exception) as error:
         logger.error(msg=f"Udemy Says: {error}")
         time.sleep(0.8)
         sys.exit(0)
     else:
         results = webpage.get("results", [])
         if results:
             [
                 courses_lists.extend(courses.get("courses", []))
                 for courses in results if courses.get("courses", [])
             ]
     return courses_lists
Exemple #10
0
def extract_cookie_string(raw_cookies):
    cookies = {}
    try:
        access_token = re.search(
            r"(?i)(?:access_token=(?P<access_token>\w+))", raw_cookies
        )
    except Exception as error:
        logger.error(
            msg=f"Cookies error, {error}, unable to extract access_token from cookies."
        )
        sys.exit(0)
    if not access_token:
        logger.error(msg="Unable to find access_token, proper cookies required")
        logger.info(
            msg="follow: https://github.com/r0oth3x49/udemy-dl#how-to-login-with-cookie",
            new_line=True,
        )
        sys.stdout.flush()
        sys.exit(0)
    access_token = access_token.group("access_token")
    cookies.update({"access_token": access_token})
    return cookies
Exemple #11
0
 def _subscribed_courses(self, portal_name, course_name):
     results = []
     self._session._headers.update({
         "Host":
         "{portal_name}.udemy.com".format(portal_name=portal_name),
         "Referer":
         "https://{portal_name}.udemy.com/home/my-courses/search/?q={course_name}"
         .format(portal_name=portal_name, course_name=course_name),
     })
     url = COURSE_SEARCH.format(portal_name=portal_name,
                                course_name=course_name)
     try:
         webpage = self._session._get(url).json()
     except conn_error as error:
         logger.error(msg=f"Udemy Says: Connection error, {error}")
         time.sleep(0.8)
         sys.exit(0)
     except (ValueError, Exception) as error:
         logger.error(msg=f"Udemy Says: {error} on {url}")
         time.sleep(0.8)
         sys.exit(0)
     else:
         results = webpage.get("results", [])
     return results
Exemple #12
0
    def _real_extract(self, url="", skip_hls_stream=False):

        _udemy = {}
        course_id, course_info = self._extract_course_info(url)

        if course_info and isinstance(course_info, dict):
            title = self._clean(course_info.get("title"))
            course_title = course_info.get("published_title")
            portal_name = course_info.get("portal_name")

        course_json = self._extract_course_json(url, course_id, portal_name)
        course = course_json.get("results")
        resource = course_json.get("detail")

        if resource:
            if not self._cookies:
                logger.error(
                    msg=
                    f"Udemy Says : '{resource}' Run udemy-dl against course within few seconds"
                )
            if self._cookies:
                logger.error(
                    msg=f"Udemy Says : '{resource}' cookies seems to be expired"
                )
            logger.info(
                msg="Trying to logout now...",
                new_line=True,
            )
            if not self._cookies:
                self._logout()
            logger.info(
                msg="Logged out successfully.",
                new_line=True,
            )
            sys.exit(0)

        _udemy["access_token"] = self._access_token
        _udemy["course_id"] = course_id
        _udemy["title"] = title
        _udemy["course_title"] = course_title
        _udemy["chapters"] = []

        counter = -1

        if course:
            lecture_counter = 0
            for entry in course:
                clazz = entry.get("_class")
                asset = entry.get("asset")
                supp_assets = entry.get("supplementary_assets")

                if clazz == "chapter":
                    lecture_counter = 0
                    lectures = []
                    chapter_index = entry.get("object_index")
                    chapter_title = "{0:02d} ".format(
                        chapter_index) + self._clean(entry.get("title"))
                    if chapter_title not in _udemy["chapters"]:
                        _udemy["chapters"].append({
                            "chapter_title": chapter_title,
                            "chapter_id": entry.get("id"),
                            "chapter_index": chapter_index,
                            "lectures": [],
                        })
                        counter += 1
                elif clazz == "lecture":
                    lecture_counter += 1
                    lecture_id = entry.get("id")
                    if len(_udemy["chapters"]) == 0:
                        lectures = []
                        chapter_index = entry.get("object_index")
                        chapter_title = "{0:02d} ".format(
                            chapter_index) + self._clean(entry.get("title"))
                        if chapter_title not in _udemy["chapters"]:
                            _udemy["chapters"].append({
                                "chapter_title": chapter_title,
                                "chapter_id": lecture_id,
                                "chapter_index": chapter_index,
                                "lectures": [],
                            })
                            counter += 1

                    if lecture_id:

                        retVal = []

                        if isinstance(asset, dict):
                            asset_type = (asset.get("asset_type").lower()
                                          or asset.get("assetType").lower())
                            if asset_type == "article":
                                if (isinstance(supp_assets, list)
                                        and len(supp_assets) > 0):
                                    retVal = self._extract_supplementary_assets(
                                        supp_assets)
                            elif asset_type == "video":
                                if (isinstance(supp_assets, list)
                                        and len(supp_assets) > 0):
                                    retVal = self._extract_supplementary_assets(
                                        supp_assets)
                            elif asset_type == "e-book":
                                retVal = self._extract_ebook(asset)
                            elif asset_type == "file":
                                retVal = self._extract_file(asset)
                            elif asset_type == "presentation":
                                retVal = self._extract_ppt(asset)
                            elif asset_type == "audio":
                                retVal = self._extract_audio(asset)

                        logger.progress(
                            msg="Downloading course information .. ")
                        lecture_index = entry.get("object_index")
                        lecture_title = "{0:03d} ".format(
                            lecture_counter) + self._clean(entry.get("title"))
                        data = asset.get("stream_urls")
                        if data and isinstance(data, dict):
                            sources = data.get("Video")
                            tracks = asset.get("captions")
                            duration = asset.get("time_estimation")
                            sources = self._extract_sources(
                                sources, skip_hls_stream=skip_hls_stream)
                            subtitles = self._extract_subtitles(tracks)
                            sources_count = len(sources)
                            subtitle_count = len(subtitles)
                            lectures.append({
                                "index": lecture_counter,
                                "lecture_index": lecture_index,
                                "lectures_id": lecture_id,
                                "lecture_title": lecture_title,
                                "duration": duration,
                                "assets": retVal,
                                "assets_count": len(retVal),
                                "sources": sources,
                                "subtitles": subtitles,
                                "subtitle_count": subtitle_count,
                                "sources_count": sources_count,
                            })
                        else:
                            lectures.append({
                                "index": lecture_counter,
                                "lecture_index": lecture_index,
                                "lectures_id": lecture_id,
                                "lecture_title": lecture_title,
                                "html_content": asset.get("body"),
                                "extension": "html",
                                "assets": retVal,
                                "assets_count": len(retVal),
                                "subtitle_count": 0,
                                "sources_count": 0,
                            })

                    _udemy["chapters"][counter]["lectures"] = lectures
                    _udemy["chapters"][counter]["lectures_count"] = len(
                        lectures)
                elif clazz == "quiz":
                    lecture_id = entry.get("id")
                    if len(_udemy["chapters"]) == 0:
                        lectures = []
                        chapter_index = entry.get("object_index")
                        chapter_title = "{0:02d} ".format(
                            chapter_index) + self._clean(entry.get("title"))
                        if chapter_title not in _udemy["chapters"]:
                            lecture_counter = 0
                            _udemy["chapters"].append({
                                "chapter_title": chapter_title,
                                "chapter_id": lecture_id,
                                "chapter_index": chapter_index,
                                "lectures": [],
                            })
                            counter += 1
                    _udemy["chapters"][counter]["lectures"] = lectures
                    _udemy["chapters"][counter]["lectures_count"] = len(
                        lectures)
            _udemy["total_chapters"] = len(_udemy["chapters"])
            _udemy["total_lectures"] = sum([
                entry.get("lectures_count", 0) for entry in _udemy["chapters"]
                if entry
            ])

        return _udemy