Ejemplo n.º 1
0
def gather_info(url, course_path):
    ''' gather course information. '''
    soup = create_soup(url)
    course_title = soup.find('h1', {"class": "default-title"}).text
    course_ids = soup.findAll(
        'div', {"data-course-id": True})  # find all course id attributes
    course_id = course_ids[0][
        'data-course-id']  # select the first course id attribute value
    author_name = soup.find('cite', {"data-ga-label": "author-name"}).text
    #topic tags
    topic_tags = soup.findAll('a', {"data-ga-label": "topic-tag"})
    topic_tag = '\t'
    for tag in topic_tags:
        topic_tag += '#' + tag.text + '\t'
    #software tags
    software_tags = soup.findAll('a', {"data-ga-label": "software-tag"})
    software_tag = ''
    for tag in software_tags:
        software_tag += '#' + tag.text + '\t'

    release_date = soup.find('span', {"id": "release-date"}).text
    duration = soup.find('div', {"class": "duration"}).find('span').text
    download_date = time.strftime("%d/%m/%y")  # todays date
    if soup.find('span', {"id": "update-date"}) != None:
        update_date = soup.find('span', {"id": "update-date"}).text
    else:
        update_date = release_date

    message.write("Course Name", course_title)
    message.write("Course id", course_id)
    message.write("Author Name", author_name)
    message.write("Topics", topic_tag)
    message.write("Softwares", software_tag)
    message.write("Duration", duration)
    message.write("Release Date", release_date)
    message.write("Updated On", update_date)
    message.write("Course URL", url)

    os.chdir(course_path)

    with open('info.txt', 'a') as info_file:
        info_file.writelines('Course Name' + '\t\t' + course_title + '\n')
        info_file.writelines('Course id' + '\t\t' + course_id + '\n')
        info_file.writelines('Author Name' + '\t\t' + author_name + '\n')
        info_file.writelines('Topics' + '\t\t' + topic_tag + '\n')
        info_file.writelines('Softwares' + '\t\t' + software_tag + '\n')
        info_file.writelines('Duration' + '\t\t' + duration + '\n')
        info_file.writelines('Release Date' + '\t\t' + release_date + '\n')
        info_file.writelines('Updated On' + '\t\t' + update_date + '\n')
        info_file.writelines('Downloaded On' + '\t\t' + download_date + '\n')
        info_file.writelines('Course URL' + '\t\t' + url + '\n')
    info_file.close()

    with open('CONTENT.md', 'a') as content_md:
        content_md.writelines("# " + course_title + " with " + author_name +
                              " on lynda.com \n\n")
        content_md.writelines("## Chapters:\n\n")  # next heading
    content_md.close()

    # print message
    message.print_line(message.INFO_FILE_CREATED)
Ejemplo n.º 2
0
def info_file(url, course_path):
    ''' gather course information. '''
    soup = create_soup(url)
    course_title = soup.find('h1', {"class": "default-title"}).text
    course_ids = soup.findAll(
        'div', {"data-course-id": True})  # find all course id attributes
    course_id = course_ids[0][
        'data-course-id']  # select the first course id attribute value
    author_name = soup.find('cite', {"data-ga-label": "author-name"}).text
    #topic tags
    topic_tags = soup.findAll('a', {"data-ga-label": "topic-tag"})
    topic_tag = '\t'
    for tag in topic_tags:
        topic_tag += '#' + tag.text + '\t'
    #software tags
    software_tags = soup.findAll('a', {"data-ga-label": "software-tag"})
    software_tag = ''
    for tag in software_tags:
        software_tag += '#' + tag.text + '\t'
    if software_tag == '':
        software_tag += 'None'
    release_date = soup.find('span', {"id": "release-date"}).text
    duration = soup.find('div', {"class": "duration"}).find('span').text
    download_date = time.strftime("%d/%B/%Y")  # todays date

    message.write("\nCourse Name", course_title)
    message.write("Course id", course_id)
    message.write("Author Name", author_name)
    message.write("Topics", topic_tag)
    message.write("Softwares", software_tag)
    message.write("Duration", duration)
    message.write("Release Date", release_date)
    message.write("Downloaded On", download_date)
    message.write("Course URL", url)

    os.chdir(course_path)  # Jump to course directory to save info.txt

    # write to info.txt
    info_file = io.open('info.txt', mode="a", encoding="utf-8")
    info_file.writelines(u'Course Name' + '\t\t' + course_title + '\n')
    info_file.writelines(u'Course id' + '\t\t' + course_id + '\n')
    info_file.writelines(u'Author Name' + '\t\t' + author_name + '\n')
    info_file.writelines(u'Topics' + '\t\t' + topic_tag + '\n')
    info_file.writelines(u'Softwares' + '\t\t' + software_tag + '\n')
    info_file.writelines(u'Duration' + '\t\t' + duration + '\n')
    info_file.writelines(u'Release Date' + '\t\t' + release_date + '\n')
    info_file.writelines(u'Downloaded On' + '\t\t' + download_date + '\n')
    info_file.writelines(u'Course URL' + '\t\t' + url + '\n')
    info_file.close()

    # write to content.md
    content_md = io.open('CONTENT.md', mode="a", encoding="utf-8")
    content_md.writelines(u"# " + course_title + " with " + author_name +
                          " on lynda.com \n")
    content_md.close()

    # print message
    message.print_line(message.INFO_FILE_CREATED)