def gather_info(url, course_path): ''' gather course information. ''' soup = create_soup(url) course_title = soup.find('h1', {"class": "default-title"}).text course_ids = soup.findAll( 'div', {"data-course-id": True}) # find all course id attributes course_id = course_ids[0][ 'data-course-id'] # select the first course id attribute value author_name = soup.find('cite', {"data-ga-label": "author-name"}).text #topic tags topic_tags = soup.findAll('a', {"data-ga-label": "topic-tag"}) topic_tag = '\t' for tag in topic_tags: topic_tag += '#' + tag.text + '\t' #software tags software_tags = soup.findAll('a', {"data-ga-label": "software-tag"}) software_tag = '' for tag in software_tags: software_tag += '#' + tag.text + '\t' release_date = soup.find('span', {"id": "release-date"}).text duration = soup.find('div', {"class": "duration"}).find('span').text download_date = time.strftime("%d/%m/%y") # todays date if soup.find('span', {"id": "update-date"}) != None: update_date = soup.find('span', {"id": "update-date"}).text else: update_date = release_date message.write("Course Name", course_title) message.write("Course id", course_id) message.write("Author Name", author_name) message.write("Topics", topic_tag) message.write("Softwares", software_tag) message.write("Duration", duration) message.write("Release Date", release_date) message.write("Updated On", update_date) message.write("Course URL", url) os.chdir(course_path) with open('info.txt', 'a') as info_file: info_file.writelines('Course Name' + '\t\t' + course_title + '\n') info_file.writelines('Course id' + '\t\t' + course_id + '\n') info_file.writelines('Author Name' + '\t\t' + author_name + '\n') info_file.writelines('Topics' + '\t\t' + topic_tag + '\n') info_file.writelines('Softwares' + '\t\t' + software_tag + '\n') info_file.writelines('Duration' + '\t\t' + duration + '\n') info_file.writelines('Release Date' + '\t\t' + release_date + '\n') info_file.writelines('Updated On' + '\t\t' + update_date + '\n') info_file.writelines('Downloaded On' + '\t\t' + download_date + '\n') info_file.writelines('Course URL' + '\t\t' + url + '\n') info_file.close() with open('CONTENT.md', 'a') as content_md: content_md.writelines("# " + course_title + " with " + author_name + " on lynda.com \n\n") content_md.writelines("## Chapters:\n\n") # next heading content_md.close() # print message message.print_line(message.INFO_FILE_CREATED)
def info_file(url, course_path): ''' gather course information. ''' soup = create_soup(url) course_title = soup.find('h1', {"class": "default-title"}).text course_ids = soup.findAll( 'div', {"data-course-id": True}) # find all course id attributes course_id = course_ids[0][ 'data-course-id'] # select the first course id attribute value author_name = soup.find('cite', {"data-ga-label": "author-name"}).text #topic tags topic_tags = soup.findAll('a', {"data-ga-label": "topic-tag"}) topic_tag = '\t' for tag in topic_tags: topic_tag += '#' + tag.text + '\t' #software tags software_tags = soup.findAll('a', {"data-ga-label": "software-tag"}) software_tag = '' for tag in software_tags: software_tag += '#' + tag.text + '\t' if software_tag == '': software_tag += 'None' release_date = soup.find('span', {"id": "release-date"}).text duration = soup.find('div', {"class": "duration"}).find('span').text download_date = time.strftime("%d/%B/%Y") # todays date message.write("\nCourse Name", course_title) message.write("Course id", course_id) message.write("Author Name", author_name) message.write("Topics", topic_tag) message.write("Softwares", software_tag) message.write("Duration", duration) message.write("Release Date", release_date) message.write("Downloaded On", download_date) message.write("Course URL", url) os.chdir(course_path) # Jump to course directory to save info.txt # write to info.txt info_file = io.open('info.txt', mode="a", encoding="utf-8") info_file.writelines(u'Course Name' + '\t\t' + course_title + '\n') info_file.writelines(u'Course id' + '\t\t' + course_id + '\n') info_file.writelines(u'Author Name' + '\t\t' + author_name + '\n') info_file.writelines(u'Topics' + '\t\t' + topic_tag + '\n') info_file.writelines(u'Softwares' + '\t\t' + software_tag + '\n') info_file.writelines(u'Duration' + '\t\t' + duration + '\n') info_file.writelines(u'Release Date' + '\t\t' + release_date + '\n') info_file.writelines(u'Downloaded On' + '\t\t' + download_date + '\n') info_file.writelines(u'Course URL' + '\t\t' + url + '\n') info_file.close() # write to content.md content_md = io.open('CONTENT.md', mode="a", encoding="utf-8") content_md.writelines(u"# " + course_title + " with " + author_name + " on lynda.com \n") content_md.close() # print message message.print_line(message.INFO_FILE_CREATED)