コード例 #1
0
def check_file_exists(output_dir, lesson_title):
    filename = output_dir + '/{}.pdf'.format(lesson_title)
    if os.path.isfile(filename):
        logger.info('lesson: %s already exists. Skipping.', lesson_title)
        return False
    else:
        return filename
コード例 #2
0
def download_quizzes(topic, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    concepts = get_concepts(topic)
    logger.info('%d concepts present in topic %s', len(concepts), topic)
    for c in concepts:
        quizzes = get_quizzes(c)
        logger.info('%d quizzes present in concept %s', len(quizzes), c)
        for q in quizzes:
            download_quiz(output_dir, q)
コード例 #3
0
def download_quiz(output_dir, quiz):
    filename = output_dir + '/{0}-Answer-Key.docx'.format(quiz)
    if os.path.isfile(filename):
        logger.info('Quiz: %s already exists. Skipping.', quiz)
        return

    res = requests.get('http://www.ck12.org/flx/show/answer%20key/' + quiz +
                       '-Answer-Key')
    if res.status_code != 200:
        logger.error('Error getting quiz %s: %s', quiz, res.status_code)
        return

    res.raise_for_status()
    assert res.headers[
        'Content-Type'] == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
    with open(filename, 'wb') as f:
        f.write(res.content)
コード例 #4
0
def make_pdf_download_requests(out_dir, concept_topics):
    lesson_base_url = 'http://www.ck12.org/earth-science/{}/lesson/{}'
    render_req_base_url = 'http://www.ck12.org/render/pdf/status/{}/{}'

    topic_lessons = defaultdict(list)
    for topic in concept_topics:
        topic_lessons[topic].extend(get_topic_lesson_names(topic))
    for topic, lessons in topic_lessons.items():
        for lesson in lessons:
            out_filename = check_file_exists(out_dir, lesson)
            if not out_filename:
                continue
            lesson_url = lesson_base_url.format(topic, lesson)
            lesson_r = requests.get(lesson_url)
            soup = BeautifulSoup(lesson_r.content, 'html.parser')
            pdf_links = soup.find_all("a", {"class": "js_signinrequired pdf"})
            link_attr = pdf_links[0].attrs
            da_id = link_attr['data-artifactid']
            dar_id = link_attr['data-artifactrevisionid']
            render_req_url = render_req_base_url.format(da_id, dar_id)
            render_req_response = requests.get(
                render_req_url, cookies=req_cookie.your_acc_cookie).json()

            while render_req_response['status'] != 'SUCCESS':
                retry_time = 15 + random.randrange(-3, 3, 1)
                logger.info('%s pdf is %s... waiting %s before trying again',
                            lesson, render_req_response['status'], retry_time)
                time.sleep(retry_time)
                render_req_response = requests.get(
                    render_req_url, cookies=req_cookie.your_acc_cookie).json()

            download_uri = None
            if 'downloadUri' in render_req_response.keys():
                download_uri = render_req_response['downloadUri']
            elif render_req_response['result']:
                download_uri = render_req_response['result']
            elif not download_uri:
                logger.error('error for %s with status', lesson,
                             render_req_response['status'])
            download_lesson_pdf(out_filename, lesson, download_uri)
コード例 #5
0
def download_topic_lessons(topic, out_dir):
    os.makedirs(out_dir, exist_ok=True)
    topic_concepts = quiz.get_concepts(topic)
    logger.info('%d concepts present in topic %s', len(topic_concepts), topic)
    make_pdf_download_requests(out_dir, topic_concepts[14:20])