def library_login(s, libCardNum, libCardPin, orgDomain, LDEBUG=False): global DEBUG DEBUG = LDEBUG libraryLoginURL = "https://www.lynda.com/portal/sip" payload = { "org": orgDomain } r = s.get(libraryLoginURL, params=payload) # log(str(r)) # log(r.text) # log("lib login url: " + r.url) form = pd(r.text, "form")[1] seasurf_sec_token = pd(form, "input", attrs={"name": "seasurf"}, ret="value")[0].encode("utf-8") payload = { "libraryCardNumber": libCardNum, "libraryCardPin": libCardPin, "libraryCardPasswordVerify": "", "org": orgDomain, "seasurf": seasurf_sec_token } # from pprint import pformat # log(pformat(payload)) r2 = s.post(libraryLoginURL + '?org=' + orgDomain, data=payload) # log("lib login post url: " + r2.url) if r2.url != 'http://www.lynda.com/member' and r2.url != 'https://www.lynda.com/member' and r2.url != 'https://www.lynda.com/': return False else: name = getName(r2.text) return name
def get_category_letter_software(s, search_letter): r = s.get("http://www.lynda.com/subject/all") letters_html = pd(r.text, "div", attrs={"class": "letter"}) softwares = [] for letter in letters_html: letter_name = pd(letter, "h3")[0] if letter_name == search_letter: softwares_html = pd(letter, "div", attrs={"class": "software-name"}) for software_html in softwares_html: software_name = pd(software_html, "a")[0] link = pd(software_html, "a", ret="href")[0] if link[0] == "/": link = "http://www.lynda.com" + link num_courses, software_name = software_name.split( "<span>")[1].split("</span>") softwares.append({ "name": software_name + " " + num_courses, "link": link }) break return softwares
def library_login(s, libCardNum, libCardPin, orgDomain, LDEBUG=False): global DEBUG DEBUG = LDEBUG libraryLoginURL = "https://www.lynda.com/portal/sip" payload = {"org": orgDomain} r = s.get(libraryLoginURL, params=payload) # log(str(r)) # log(r.text) # log("lib login url: " + r.url) form = pd(r.text, "form")[1] seasurf_sec_token = pd(form, "input", attrs={"name": "seasurf"}, ret="value")[0].encode("utf-8") payload = { "libraryCardNumber": libCardNum, "libraryCardPin": libCardPin, "libraryCardPasswordVerify": "", "org": orgDomain, "seasurf": seasurf_sec_token } # from pprint import pformat # log(pformat(payload)) r2 = s.post(libraryLoginURL + '?org=' + orgDomain, data=payload) # log("lib login post url: " + r2.url) if r2.url != 'http://www.lynda.com/member' and r2.url != 'https://www.lynda.com/member' and r2.url != 'https://www.lynda.com/': return False else: name = getName(r2.text) return name
def get_categories_letters(s): r = s.get("http://www.lynda.com/subject/all") letters_html = pd(r.text, "div", attrs={"class": "letter"}) # print(len(letters)) letter_list = [] for letter in letters_html: letter_name = pd(letter, "h3")[0] letter_list.append(letter_name) return letter_list
def get_categories_letters(s): r = s.get("http://www.lynda.com/subject/all") letters_html = pd(r.text, "div", attrs={"class": "letter"}) # print(len(letters)) letter_list = [] for letter in letters_html: letter_name = pd(letter, "h3")[0] letter_list.append(letter_name) return letter_list
def getName(html): # Tested on members page try: drop_menu = pd(html, "span", attrs={"data-qa": "eyebrow_account_menu"})[0] name = stripTags(drop_menu)[3:] except: name = "Can't get name" return name
def courses_for_category(s, link): r = s.get(link) courselist = pd(r.text, "ul", attrs={"class": "course-list"}) # print(repr(courselist)) courses = pd(courselist, "li") course_ids = pd(courselist, "li", ret="id") course_thumbs = pd(courselist, "img", ret="data-img-src") course_descriptions = pd(courselist, "p") # print("Number of courses found:", len(courses)) course_list = [] for i, course in enumerate(courses): title_heading = pd(course, "h3")[0] title = stripTags(pd(title_heading, "a")[0].strip()) # print(title) courseId = course_ids[i][7:] thumbURL = course_thumbs[i] shortDesc = stripTags(course_descriptions[i]).strip() c = { "title": title, "courseId": courseId, "thumbURL": thumbURL, "shortDesc": shortDesc } course_list.append(c) return course_list
def courses_for_category(s, link): r = s.get(link) courselist = pd(r.text, "ul", attrs={"class": "course-list"}) # print(repr(courselist)) courses = pd(courselist, "li") course_ids = pd(courselist, "li", ret="id") course_thumbs = pd(courselist, "img", ret="data-img-src") course_descriptions = pd(courselist, "p") # print("Number of courses found:", len(courses)) course_list = [] for i, course in enumerate(courses): title_heading = pd(course, "h3")[0] title = stripTags(pd(title_heading, "a")[0].strip()) # print(title) courseId = course_ids[i][7:] thumbURL = course_thumbs[i] shortDesc = stripTags(course_descriptions[i]).strip() c = { "title": title, "courseId": courseId, "thumbURL": thumbURL, "shortDesc": shortDesc } course_list.append(c) return course_list
def getName(html): # Tested on members page try: drop_menu = pd(html, "span", attrs={"data-qa": "eyebrow_account_menu"})[0] name = stripTags(drop_menu)[3:] except: name = "Can't get name" return name
def get_category_letter_software(s, search_letter): r = s.get("http://www.lynda.com/subject/all") letters_html = pd(r.text, "div", attrs={"class": "letter"}) softwares = [] for letter in letters_html: letter_name = pd(letter, "h3")[0] if letter_name == search_letter: softwares_html = pd(letter, "div", attrs={"class": "software-name"}) for software_html in softwares_html: software_name = pd(software_html, "a")[0] link = pd(software_html, "a", ret="href")[0] if link[0] == "/": link = "http://www.lynda.com" + link num_courses, software_name = software_name.split("<span>")[1].split("</span>") softwares.append({"name": software_name + " " + num_courses, "link": link}) break return softwares
def get_my_courses(s): my_courses_url = "http://www.lynda.com/CourseHistory" r = s.get(my_courses_url) courses_html = pd(r.text, "div", attrs={"class": "row_course"}) course_list = [] for course_html in courses_html: col_course = pd(course_html, "div", attrs={"class": "col_course"}) courseId = pd(col_course, "a", ret="data-course")[0] title = pd(col_course, "a")[1].strip() thumbURL = None shortDesc = "" c = { "title": title, "courseId": courseId, "thumbURL": thumbURL, "shortDesc": shortDesc } course_list.append(c) return course_list
def get_my_courses(s): my_courses_url = "http://www.lynda.com/CourseHistory" r = s.get(my_courses_url) courses_html = pd(r.text, "div", attrs={"class": "row_course"}) course_list = [] for course_html in courses_html: col_course = pd(course_html, "div", attrs={"class": "col_course"}) courseId = pd(col_course, "a", ret="data-course")[0] title = pd(col_course, "a")[1].strip() thumbURL = None shortDesc = "" c = { "title": title, "courseId": courseId, "thumbURL": thumbURL, "shortDesc": shortDesc } course_list.append(c) return course_list
def parse_course_lists(html, is_text_search=True): """Takes the page html with the list of courses and returns a list of course objects (dicts)""" if is_text_search: ul_id = "search-results-list" course_class = "card card-list-style search-result course" description_class = "meta-description hidden-xs" title_el = "h2" else: ul_id = "category-courses" course_class = "card card-list-style course" description_class = "meta-description hidden-xs dot-ellipsis dot-resize-update" title_el = "h3" courselist = pd(html, "ul", attrs={"id": ul_id}) # log("<<<<<<<<<<<<<<<<<DEBUG>>>>>>>>>>>>>>>>>>>>") # log(repr(courselist)) courses = pd(courselist, "li") course_list = [] for i, course in enumerate(courses): search_result = pd(course, "div", attrs={"class": course_class}, ret="id") if len(search_result) == 0: continue title = stripTags(pd(course, title_el)[0]) courseId = search_result[0] thumbnail_el = pd(course, "div", attrs={"class": "thumbnail"})[0] thumbURL = pd(thumbnail_el, "img", ret="data-lazy-src")[0] shortDesc = stripTags(pd(course, "div", attrs={"class": description_class})[0]).strip() c = { "title": title, "courseId": courseId, "thumbURL": thumbURL, "shortDesc": shortDesc } # log(str(c)) course_list.append(c) return course_list
def getForm(html, formIndex=0): formObj = {} form = pd(html, "form")[formIndex] try: formObj['action'] = pd(html, "form", ret="action")[0] # TODO: Use indexed form html except: formObj['action'] = None inputs_html = pd(form, "input") input_types = pd(form, "input", ret="type") input_values = pd(form, "input", ret="value") input_names = pd(form, "input", ret="name") formObj['input_types'] = input_types formObj['input_values'] = input_values formObj['input_names'] = input_names return formObj
def getForm(html, formIndex=0): formObj = {} form = pd(html, "form")[formIndex] try: formObj['action'] = pd(html, "form", ret="action")[0] # TODO: Use indexed form html except: formObj['action'] = None inputs_html = pd(form, "input") input_types = pd(form, "input", ret="type") input_values = pd(form, "input", ret="value") input_names = pd(form, "input", ret="name") formObj['input_types'] = input_types formObj['input_values'] = input_values formObj['input_names'] = input_names return formObj
def course_search(s, query): url = "http://www.lynda.com/search" payload = {"q": query, "f": "producttypeid:2"} r = s.get(url, params=payload) page_html = r.text courselist = pd(page_html, "ul", attrs={"class": "course-list search-movies"}) # print(repr(courselist)) courses = pd(courselist, "li") course_ids = pd(courselist, "li", ret="id") course_thumbs = pd(courselist, "img", ret="data-img-src") course_descriptions = pd(courselist, "p", attrs={"class": "highlights"}) # print("Number of courses found:", len(courses)) # print(courses) course_list = [] for i, course in enumerate(courses): title = stripTags(pd(course, "a", attrs={"class": "title"})[0]) # print(title) courseId = course_ids[i][7:] thumbURL = course_thumbs[i] shortDesc = stripTags(course_descriptions[i]).strip() c = { "title": title, "courseId": courseId, "thumbURL": thumbURL, "shortDesc": shortDesc } course_list.append(c) return course_list
def course_search(s, query): url = "http://www.lynda.com/search" payload = {"q": query, "f": "producttypeid:2"} r = s.get(url, params=payload) page_html = r.text courselist = pd(page_html, "ul", attrs={"class": "course-list search-movies"}) # print(repr(courselist)) courses = pd(courselist, "li") course_ids = pd(courselist, "li", ret="id") course_thumbs = pd(courselist, "img", ret="data-img-src") course_descriptions = pd(courselist, "p", attrs={"class": "highlights"}) # print("Number of courses found:", len(courses)) # print(courses) course_list = [] for i, course in enumerate(courses): title = stripTags( pd(course, "a", attrs={"class": "title"})[0] ) # print(title) courseId = course_ids[i][7:] thumbURL = course_thumbs[i] shortDesc = stripTags(course_descriptions[i]).strip() c = { "title": title, "courseId": courseId, "thumbURL": thumbURL, "shortDesc": shortDesc } course_list.append(c) return course_list