Exemple #1
0
def get_enrolled_users(course_id, token=None):
    if not token:
        token = get_token()

    learner_group = fetch_objects('courses', token=token, pk=course_id)[0]['learners_group']
    users = fetch_objects('groups', token=token, pk=learner_group)[0]['users']
    return users
Exemple #2
0
def get_course_grades(course_id, cached=True, token=None):
    header = [
        'user_id', 'step_id', 'is_passed', 'score', 'total_score',
        'date_joined', 'last_viewed'
    ]

    # use cache
    course_grades_filename = 'cache/course-{}-grades.csv'.format(course_id)
    if os.path.isfile(course_grades_filename) and cached:
        course_grades = pd.read_csv(course_grades_filename)
        course_grades = course_grades[header]
        return course_grades

    if not token:
        token = get_token()

    course_grades = pd.DataFrame()
    grades = fetch_objects('course-grades', course=course_id, token=token)

    for grade in grades:
        user_grade = pd.DataFrame(grade['results']).transpose()
        user_grade['user_id'] = grade['user']
        user_grade['total_score'] = grade['score']
        user_grade['date_joined'] = grade['date_joined']
        user_grade['last_viewed'] = grade['last_viewed']
        course_grades = course_grades.append(user_grade)

    course_grades['date_joined'] = course_grades['date_joined'].apply(
        get_unix_date)
    course_grades['last_viewed'] = course_grades['last_viewed'].apply(
        get_unix_date)
    course_grades = course_grades.reset_index(drop=True)
    course_grades = course_grades[header]
    course_grades.to_csv(course_grades_filename, index=False)
    return course_grades
Exemple #3
0
def get_course_grades(course_id, cached=True, token=None):
    header = ['user_id', 'step_id', 'is_passed', 'score', 'total_score', 'date_joined', 'last_viewed']

    # use cache
    course_grades_filename = 'cache/course-{}-grades.csv'.format(course_id)
    if os.path.isfile(course_grades_filename) and cached:
        course_grades = pd.read_csv(course_grades_filename)
        course_grades = course_grades[header]
        return course_grades

    if not token:
        token = get_token()

    course_grades = pd.DataFrame()
    grades = fetch_objects('course-grades', course=course_id, token=token)

    for grade in grades:
        user_grade = pd.DataFrame(grade['results']).transpose()
        user_grade['user_id'] = grade['user']
        user_grade['total_score'] = grade['score']
        user_grade['date_joined'] = grade['date_joined']
        user_grade['last_viewed'] = grade['last_viewed']
        course_grades = course_grades.append(user_grade)

    course_grades['date_joined'] = course_grades['date_joined'].apply(get_unix_date)
    course_grades['last_viewed'] = course_grades['last_viewed'].apply(get_unix_date)
    course_grades = course_grades.reset_index(drop=True)
    course_grades = course_grades[header]
    course_grades.to_csv(course_grades_filename, index=False)
    return course_grades
Exemple #4
0
def get_enrolled_users(course_id, token=None):
    if not token:
        token = get_token()

    learner_group = fetch_objects('courses', token=token,
                                  pk=course_id)[0]['learners_group']
    users = fetch_objects('groups', token=token, pk=learner_group)[0]['users']
    return users
Exemple #5
0
def get_course_structure(course_id, cached=True, token=None):
    # use cache
    course_structure_filename = 'cache/course-{}-structure.csv'.format(course_id)
    if os.path.isfile(course_structure_filename) and cached:
        course_structure = pd.read_csv(course_structure_filename)
        return course_structure

    if not token:
        token = get_token()
    course = fetch_objects_by_id('courses', course_id, token=token)[0]
    sections = fetch_objects('sections', token=token, id=course['sections'])

    unit_ids = [unit for section in sections for unit in section['units']]
    units = fetch_objects('units', token=token, id=unit_ids)

    lesson_ids = [unit['lesson'] for unit in units]
    lessons = fetch_objects('lessons', token=token, id=lesson_ids)

    step_ids = [step for lesson in lessons for step in lesson['steps']]
    steps = fetch_objects('steps', token=token, id=step_ids)
    step_id = [step['id'] for step in steps]
    step_position = [step['position'] for step in steps]
    step_type = [step['block']['name'] for step in steps]
    step_lesson = [step['lesson'] for step in steps]
    step_correct_ratio = [step['correct_ratio'] for step in steps]

    course_structure = pd.DataFrame({'course_id': course_id,
                                     'lesson_id': step_lesson,
                                     'step_id': step_id,
                                     'step_position': step_position,
                                     'step_type': step_type,
                                     'step_correct_ratio': step_correct_ratio})

    module_position = [[section['position']]*len(section['units']) for section in sections]
    module_position = [value for small_list in module_position for value in small_list]

    module_id = [[section['id']]*len(section['units']) for section in sections]
    module_id = [value for small_list in module_id for value in small_list]

    module_hard_deadline = [[section['hard_deadline']]*len(section['units']) for section in sections]
    module_hard_deadline = [value for small_list in module_hard_deadline for value in small_list]

    module_begin_date = [[section['begin_date']]*len(section['units']) for section in sections]
    module_begin_date = [value for small_list in module_begin_date for value in small_list]

    lesson_position = [unit['position'] for unit in units]

    module_structure = pd.DataFrame({'lesson_id': lesson_ids,
                                     'lesson_position': lesson_position,
                                     'module_id': module_id,
                                     'module_position': module_position,
                                     'hard_deadline': module_hard_deadline,
                                     'begin_date': module_begin_date})

    course_structure = course_structure.merge(module_structure)
    course_structure = course_structure.sort_values(['module_position', 'lesson_position', 'step_position'])
    course_structure.to_csv(course_structure_filename, index=False)
    return course_structure
Exemple #6
0
def get_course_submissions(course_id,
                           course_structure=pd.DataFrame(),
                           token=None):
    header = [
        'submission_id', 'user_id', 'step_id', 'attempt_id', 'status',
        'submission_time', 'reply', 'hint'
    ]
    if not token:
        token = get_token()

    if course_structure.empty:
        course_structure = get_course_structure(course_id, token)

    course_submissions = pd.DataFrame()
    for step in course_structure.step_id.unique().tolist():
        step_submissions = pd.DataFrame(
            fetch_objects('submissions', token=token, step=step))
        if step_submissions.empty:
            continue

        step_submissions = step_submissions.rename(
            columns={
                'id': 'submission_id',
                'time': 'submission_time',
                'attempt': 'attempt_id'
            })
        attempt_ids = step_submissions['attempt_id'].unique().tolist()
        step_attempts = pd.DataFrame(
            fetch_objects_by_id('attempts', attempt_ids, token=token))
        step_attempts = step_attempts.rename(columns={
            'id': 'attempt_id',
            'time': 'attempt_time',
            'status': 'attempt_status'
        })
        step_submissions = pd.merge(step_submissions,
                                    step_attempts,
                                    on='attempt_id')
        step_submissions['step_id'] = step
        course_submissions = course_submissions.append(step_submissions)

    if course_submissions.empty:
        return pd.DataFrame(columns=header)

    course_submissions['submission_time'] = course_submissions[
        'submission_time'].apply(get_unix_date)
    course_submissions['attempt_time'] = course_submissions[
        'attempt_time'].apply(get_unix_date)

    course_submissions = course_submissions.rename(columns={'user': '******'})
    course_submissions = course_submissions[header]
    return course_submissions
Exemple #7
0
def get_video_stats(step_id, cached=True, token=None):
    if not token:
        token = get_token()

    cached_name = 'cache/step-{}-videostats.csv'.format(step_id)

    if cached and os.path.isfile(cached_name):
        stats = pd.read_csv(cached_name)
        return stats

    stats = pd.DataFrame(fetch_objects('video-stats', token=token, step=step_id))

    if not stats.empty:
        stats.to_csv(cached_name, index=False)
        stats = pd.read_csv(cached_name)
    return stats
Exemple #8
0
def get_video_stats(step_id, cached=True, token=None):
    if not token:
        token = get_token()

    cached_name = 'cache/step-{}-videostats.csv'.format(step_id)

    if cached and os.path.isfile(cached_name):
        stats = pd.read_csv(cached_name)
        return stats

    stats = pd.DataFrame(
        fetch_objects('video-stats', token=token, step=step_id))

    if not stats.empty:
        stats.to_csv(cached_name, index=False)
        stats = pd.read_csv(cached_name)
    return stats
Exemple #9
0
def get_course_submissions(course_id, course_structure=pd.DataFrame(), cached=True, token=None):
    header = ['submission_id', 'step_id', 'user_id', 'attempt_time', 'submission_time', 'status']

    # use cache
    course_submissions_filename = 'cache/course-{}-submissions.csv'.format(course_id)
    if os.path.isfile(course_submissions_filename) and cached:
        course_submissions = pd.read_csv(course_submissions_filename)
        course_submissions = course_submissions[header]
        return course_submissions

    if not token:
        token = get_token()

    if course_structure.empty:
        course_structure = get_course_structure(course_id, token)

    course_submissions = pd.DataFrame()
    for step in course_structure.step_id.unique().tolist():
        step_submissions = pd.DataFrame(fetch_objects('submissions', token=token, step=step))
        if step_submissions.empty:
            continue

        step_submissions = step_submissions.rename(columns={'id': 'submission_id',
                                                            'time': 'submission_time',
                                                            'attempt': 'attempt_id'})
        attempt_ids = step_submissions['attempt_id'].unique().tolist()
        step_attempts = pd.DataFrame(fetch_objects_by_id('attempts', attempt_ids, token=token))
        step_attempts = step_attempts.rename(columns={'id': 'attempt_id',
                                                      'time': 'attempt_time',
                                                      'status': 'attempt_status'})
        step_submissions = pd.merge(step_submissions, step_attempts, on='attempt_id')
        step_submissions['step_id'] = step
        course_submissions = course_submissions.append(step_submissions)

    if course_submissions.empty:
        return pd.DataFrame(columns=header)

    course_submissions['submission_time'] = course_submissions['submission_time'].apply(get_unix_date)
    course_submissions['attempt_time'] = course_submissions['attempt_time'].apply(get_unix_date)

    course_submissions = course_submissions.rename(columns={'user': '******'})
    course_submissions = course_submissions[header]
    course_submissions.to_csv(course_submissions_filename, index=False)
    return course_submissions
Exemple #10
0
def get_course_structure(course_id, token=None):
    if not token:
        token = get_token()
    course = fetch_objects_by_id('courses', course_id, token=token)[0]
    sections = fetch_objects('sections', token=token, id=course['sections'])

    unit_ids = [unit for section in sections for unit in section['units']]
    units = fetch_objects('units', token=token, id=unit_ids)

    lesson_ids = [unit['lesson'] for unit in units]
    lessons = fetch_objects('lessons', token=token, id=lesson_ids)

    step_ids = [step for lesson in lessons for step in lesson['steps']]
    steps = fetch_objects('steps', token=token, id=step_ids)
    step_id = [step['id'] for step in steps]
    step_position = [step['position'] for step in steps]
    step_type = [step['block']['name'] for step in steps]
    step_lesson = [step['lesson'] for step in steps]
    step_correct_ratio = [step['correct_ratio'] for step in steps]

    course_structure = pd.DataFrame({
        'course_id': course_id,
        'lesson_id': step_lesson,
        'step_id': step_id,
        'step_position': step_position,
        'step_type': step_type,
        'step_correct_ratio': step_correct_ratio
    })

    module_position = [[section['position']] * len(section['units'])
                       for section in sections]
    module_position = [
        value for small_list in module_position for value in small_list
    ]

    module_id = [[section['id']] * len(section['units'])
                 for section in sections]
    module_id = [value for small_list in module_id for value in small_list]

    module_hard_deadline = [[section['hard_deadline']] * len(section['units'])
                            for section in sections]
    module_hard_deadline = [
        value for small_list in module_hard_deadline for value in small_list
    ]

    module_begin_date = [[section['begin_date']] * len(section['units'])
                         for section in sections]
    module_begin_date = [
        value for small_list in module_begin_date for value in small_list
    ]

    lesson_position = [unit['position'] for unit in units]

    module_structure = pd.DataFrame({
        'lesson_id': lesson_ids,
        'lesson_position': lesson_position,
        'module_id': module_id,
        'module_position': module_position,
        'hard_deadline': module_hard_deadline,
        'begin_date': module_begin_date
    })

    course_structure = course_structure.merge(module_structure)
    course_structure = course_structure.sort_values(
        ['module_position', 'lesson_position', 'step_position'])
    return course_structure
Exemple #11
0
def get_course_submissions(course_id,
                           course_structure=pd.DataFrame(),
                           cached=True,
                           token=None):
    header = [
        'submission_id', 'step_id', 'user_id', 'attempt_time',
        'submission_time', 'status'
    ]

    # use cache
    course_submissions_filename = 'cache/course-{}-submissions.csv'.format(
        course_id)
    if os.path.isfile(course_submissions_filename) and cached:
        course_submissions = pd.read_csv(course_submissions_filename)
        course_submissions = course_submissions[header]
        return course_submissions

    if not token:
        token = get_token()

    if course_structure.empty:
        course_structure = get_course_structure(course_id, token)

    course_submissions = pd.DataFrame()
    for step in course_structure.step_id.unique().tolist():
        step_submissions = pd.DataFrame(
            fetch_objects('submissions', token=token, step=step))
        if step_submissions.empty:
            continue

        step_submissions = step_submissions.rename(
            columns={
                'id': 'submission_id',
                'time': 'submission_time',
                'attempt': 'attempt_id'
            })
        attempt_ids = step_submissions['attempt_id'].unique().tolist()
        step_attempts = pd.DataFrame(
            fetch_objects_by_id('attempts', attempt_ids, token=token))
        step_attempts = step_attempts.rename(columns={
            'id': 'attempt_id',
            'time': 'attempt_time',
            'status': 'attempt_status'
        })
        step_submissions = pd.merge(step_submissions,
                                    step_attempts,
                                    on='attempt_id')
        step_submissions['step_id'] = step
        course_submissions = course_submissions.append(step_submissions)

    if course_submissions.empty:
        return pd.DataFrame(columns=header)

    course_submissions['submission_time'] = course_submissions[
        'submission_time'].apply(get_unix_date)
    course_submissions['attempt_time'] = course_submissions[
        'attempt_time'].apply(get_unix_date)

    course_submissions = course_submissions.rename(columns={'user': '******'})
    course_submissions = course_submissions[header]
    course_submissions.to_csv(course_submissions_filename, index=False)
    return course_submissions
Exemple #12
0
    def generate_latex_report(self, directory, cached=True):
        course_id = self.course_id
        token = get_token()

        course_info = fetch_objects('courses', pk=course_id)[0]
        course_title = course_info['title']
        course_url = '{}/course/{}'.format(API_HOST, course_id)

        with open('{}info.tex'.format(directory), 'w',
                  encoding='utf-8') as info_file:
            info_file.write(
                '\\def\\coursetitle{{{}}}\n\\def\\courseurl{{{}}}\n'.format(
                    course_title, course_url))

        with open('{}map.tex'.format(directory), 'w',
                  encoding='utf-8') as map_file:
            map_file.write('')

        time_now = time.time()
        certificate_threshold = course_info['certificate_regular_threshold']
        begin_date = get_unix_date(
            course_info['begin_date']) if course_info['begin_date'] else 0
        last_deadline = get_unix_date(
            course_info['last_deadline']
        ) if course_info['begin_date'] else time_now

        course_teachers = course_info['instructors']
        course_testers = fetch_objects_by_pk('groups',
                                             course_info["testers_group"],
                                             token=token)[0]['users']
        users_to_delete = course_teachers + course_testers

        # collect course grades
        grades = get_course_grades(course_id, token=token)
        learners = grades[[
            'user_id', 'total_score', 'date_joined', 'last_viewed'
        ]].drop_duplicates()
        learners = learners[~learners.user_id.isin(users_to_delete)]
        learners = learners[(0 < learners.total_score)
                            & (learners.total_score < certificate_threshold)]

        # collect submissions
        course_structure = get_course_structure(course_id, token=token)
        course_submissions = get_course_submissions(course_id,
                                                    course_structure, token)
        course_submissions = course_submissions[
            course_submissions.user_id.isin(learners.user_id)]

        # find last submissions
        course_submissions = course_submissions[
            (begin_date < course_submissions.submission_time)
            & (course_submissions.submission_time < last_deadline)]
        idx_grouped = course_submissions.groupby('user_id')['submission_time']
        idx = idx_grouped.transform(
            max) == course_submissions['submission_time']
        last_submissions = course_submissions[idx].groupby(
            'step_id', as_index=False)['submission_id'].count()
        last_submissions = last_submissions.rename(
            columns={'submission_id': 'last_submissions'})

        unique_submissions = course_submissions.groupby(
            'step_id', as_index=False)['user_id'].agg(pd.Series.nunique)
        unique_submissions = unique_submissions.rename(
            columns={'user_id': 'unique_submissions'})
        step_stats = unique_submissions.merge(last_submissions)
        step_stats['dropout_rate'] = step_stats.apply(
            lambda row: (row.last_submissions / row.unique_submissions
                         if row.unique_submissions else 0),
            axis=1)

        step_stats = pd.merge(course_structure, step_stats, how='left')
        additional_columns = ['viewed_by', 'passed_by', 'correct_ratio']
        step_stats[additional_columns] = step_stats.step_id.apply(
            lambda step: get_step_info(step)[additional_columns])
        step_stats['difficulty'] = 1 - step_stats['correct_ratio']
        step_stats['completion_rate'] = step_stats.apply(
            (lambda row: row.passed_by / row.viewed_by
             if row.viewed_by else 0),
            axis=1)

        step_stats.to_csv('cache/course-{}-stepstats.csv'.format(course_id),
                          index=False)

        step_stats['step_url'] = step_stats.apply(process_step_url, axis=1)
        step_stats['completion_rate'] *= 100
        step_stats['dropout_rate'] *= 100
        step_stats['completion_rate'] = step_stats['completion_rate'].round(1)
        step_stats['dropout_rate'] = step_stats['dropout_rate'].round(1)

        for lesson_id in step_stats.lesson_id.unique():
            step_lesson_stats = step_stats[step_stats.lesson_id == lesson_id]
            step_lesson_stats = step_lesson_stats.fillna('')
            lesson_url = '{}/lesson/{}'.format(API_HOST, lesson_id)
            lesson_name = '{}'.format(
                lesson_id)  # TODO: use module and lesson position

            with open('{}map.tex'.format(directory), 'a',
                      encoding='utf-8') as map_file:
                map_file.write(
                    '\\input{{generated/lesson-{}.tex}}\n'.format(lesson_id))

            with open('{}lesson-{}.tex'.format(directory, lesson_id),
                      'w',
                      encoding='utf-8') as lesson_file:
                lesson_file.write(
                    '\\newpage\n\\lessoninfo{{{}}}{{{}}}\n'.format(
                        lesson_name, lesson_url))
                lesson_file.write('\\begin{lessonstatistics}')
                for _, step_stat in step_lesson_stats.iterrows():
                    lesson_file.write(STEP_STAT_FORMAT.format(stat=step_stat))
                lesson_file.write('\\end{lessonstatistics}')
Exemple #13
0
    def generate_latex_report(self, directory, cached=True):
        course_id = self.course_id
        token = get_token()

        course_structure = get_course_structure(course_id)
        course_structure = course_structure.loc[
            course_structure.step_type == 'video',
            ['step_id', 'step_position', 'lesson_id']]

        course_info = fetch_objects('courses', pk=course_id)
        course_title = course_info[0]['title']
        course_url = '{}/course/{}'.format(API_HOST, course_id)

        with open('{}info.tex'.format(directory), 'w',
                  encoding='utf-8') as info_file:
            info_file.write(
                '\\def\\coursetitle{{{}}}\n\\def\\courseurl{{{}}}\n'.format(
                    course_title, course_url))

        with open('{}map.tex'.format(directory), 'w',
                  encoding='utf-8') as map_file:
            map_file.write('')

        total_peaks = pd.DataFrame()
        for ind, row in course_structure.iterrows():
            step_id = row.step_id
            step_url = 'https://stepik.org/lesson/{}/step/{}'.format(
                row.lesson_id, row.step_position)

            stats = get_video_stats(step_id, cached, token)

            fig = plt.figure()
            ax1 = fig.add_subplot(211)
            ax2 = fig.add_subplot(212)
            windows = get_video_peaks(stats, plot=True, ax=ax1, ax2=ax2)

            windows['step_id'] = step_id
            windows['course_id'] = course_id
            windows['step_url'] = step_url

            windows['start_sec'] = windows['start'].apply(
                lambda x: '{:02d}:{:02d}'.format(x // 60, x % 60))
            windows['end_sec'] = windows['end'].apply(
                lambda x: '{:02d}:{:02d}'.format(x // 60, x % 60))

            self.generate_latex_files(course_id, step_id, step_url, windows,
                                      directory)
            fig.savefig('{}step_{}.png'.format(directory, step_id))
            plt.close()

            if total_peaks.empty:
                total_peaks = windows
            else:
                total_peaks = total_peaks.append(windows)

        total_peaks.to_csv('cache/course-{}-totalpeaks.csv'.format(course_id),
                           index=False)

        # total_peaks = pd.read_csv('cache/course-{}-totalpeaks.csv'.format(course_id))
        total_peaks = total_peaks.sort_values('area', ascending=False)
        if total_peaks.shape[0] <= 5:
            top_peaks = total_peaks
        else:
            top_peaks = total_peaks[0:5]

        with open('{}total.tex'.format(directory), 'w',
                  encoding='utf-8') as total_file:
            if not total_peaks.empty:
                total_file.write(
                    'В курсе выделены следующие пики, имеющие максимальную относительную площадь.\n'
                )
                total_file.write(
                    'Проверьте, нет ли в данных местах у учащихся ' +
                    'трудностей с пониманием учебного материала.\n')

                total_file.write('\\begin{totalpeaks}\n')
                for ind, row in top_peaks.iterrows():
                    total_file.write(
                        '\\totalpeak{{{}}}{{{}}}{{{}}}{{{}}}{{{:.2f}}}\n'.
                        format(row.step_id, row.step_url, row.start_sec,
                               row.end_sec, row.area))
                total_file.write('\\end{totalpeaks}\n')
            else:
                total_file.write('\n')