def get_enrolled_users(course_id, token=None): if not token: token = get_token() learner_group = fetch_objects('courses', token=token, pk=course_id)[0]['learners_group'] users = fetch_objects('groups', token=token, pk=learner_group)[0]['users'] return users
def get_course_structure(course_id, cached=True, token=None): # use cache course_structure_filename = 'cache/course-{}-structure.csv'.format(course_id) if os.path.isfile(course_structure_filename) and cached: course_structure = pd.read_csv(course_structure_filename) return course_structure if not token: token = get_token() course = fetch_objects_by_id('courses', course_id, token=token)[0] sections = fetch_objects('sections', token=token, id=course['sections']) unit_ids = [unit for section in sections for unit in section['units']] units = fetch_objects('units', token=token, id=unit_ids) lesson_ids = [unit['lesson'] for unit in units] lessons = fetch_objects('lessons', token=token, id=lesson_ids) step_ids = [step for lesson in lessons for step in lesson['steps']] steps = fetch_objects('steps', token=token, id=step_ids) step_id = [step['id'] for step in steps] step_position = [step['position'] for step in steps] step_type = [step['block']['name'] for step in steps] step_lesson = [step['lesson'] for step in steps] step_correct_ratio = [step['correct_ratio'] for step in steps] course_structure = pd.DataFrame({'course_id': course_id, 'lesson_id': step_lesson, 'step_id': step_id, 'step_position': step_position, 'step_type': step_type, 'step_correct_ratio': step_correct_ratio}) module_position = [[section['position']]*len(section['units']) for section in sections] module_position = [value for small_list in module_position for value in small_list] module_id = [[section['id']]*len(section['units']) for section in sections] module_id = [value for small_list in module_id for value in small_list] module_hard_deadline = [[section['hard_deadline']]*len(section['units']) for section in sections] module_hard_deadline = [value for small_list in module_hard_deadline for value in small_list] module_begin_date = [[section['begin_date']]*len(section['units']) for section in sections] module_begin_date = [value for small_list in module_begin_date for value in small_list] lesson_position = [unit['position'] for unit in units] module_structure = pd.DataFrame({'lesson_id': lesson_ids, 'lesson_position': lesson_position, 'module_id': module_id, 'module_position': module_position, 'hard_deadline': module_hard_deadline, 'begin_date': module_begin_date}) course_structure = course_structure.merge(module_structure) course_structure = course_structure.sort_values(['module_position', 'lesson_position', 'step_position']) course_structure.to_csv(course_structure_filename, index=False) return course_structure
def get_course_grades(course_id, cached=True, token=None): header = ['user_id', 'step_id', 'is_passed', 'score', 'total_score', 'date_joined', 'last_viewed'] # use cache course_grades_filename = 'cache/course-{}-grades.csv'.format(course_id) if os.path.isfile(course_grades_filename) and cached: course_grades = pd.read_csv(course_grades_filename) course_grades = course_grades[header] return course_grades if not token: token = get_token() course_grades = pd.DataFrame() grades = fetch_objects('course-grades', course=course_id, token=token) for grade in grades: user_grade = pd.DataFrame(grade['results']).transpose() user_grade['user_id'] = grade['user'] user_grade['total_score'] = grade['score'] user_grade['date_joined'] = grade['date_joined'] user_grade['last_viewed'] = grade['last_viewed'] course_grades = course_grades.append(user_grade) course_grades['date_joined'] = course_grades['date_joined'].apply(get_unix_date) course_grades['last_viewed'] = course_grades['last_viewed'].apply(get_unix_date) course_grades = course_grades.reset_index(drop=True) course_grades = course_grades[header] course_grades.to_csv(course_grades_filename, index=False) return course_grades
def get_course_grades(course_id, cached=True, token=None): header = [ 'user_id', 'step_id', 'is_passed', 'score', 'total_score', 'date_joined', 'last_viewed' ] # use cache course_grades_filename = 'cache/course-{}-grades.csv'.format(course_id) if os.path.isfile(course_grades_filename) and cached: course_grades = pd.read_csv(course_grades_filename) course_grades = course_grades[header] return course_grades if not token: token = get_token() course_grades = pd.DataFrame() grades = fetch_objects('course-grades', course=course_id, token=token) for grade in grades: user_grade = pd.DataFrame(grade['results']).transpose() user_grade['user_id'] = grade['user'] user_grade['total_score'] = grade['score'] user_grade['date_joined'] = grade['date_joined'] user_grade['last_viewed'] = grade['last_viewed'] course_grades = course_grades.append(user_grade) course_grades['date_joined'] = course_grades['date_joined'].apply( get_unix_date) course_grades['last_viewed'] = course_grades['last_viewed'].apply( get_unix_date) course_grades = course_grades.reset_index(drop=True) course_grades = course_grades[header] course_grades.to_csv(course_grades_filename, index=False) return course_grades
def generate_latex_report(self, directory, cached=True): course_id = self.course_id course_info = fetch_objects('courses', pk=course_id) course_title = course_info[0]['title'] course_url = '{}/course/{}'.format(API_HOST, course_id) with open('{}info.tex'.format(directory), 'w') as info_file: info_file.write('\\def\\coursetitle{{{}}}\n\\def\\courseurl{{{}}}\n'.format(course_title, course_url)) course_structure_filename = 'cache/course-{}-structure.csv'.format(course_id) if os.path.isfile(course_structure_filename) and cached: course_structure = pd.read_csv(course_structure_filename) else: course_structure = get_course_structure(course_id) course_structure.to_csv(course_structure_filename, index=False) # course_structure = course_structure[course_structure.step_type == 'choice'] # course_structure.drop(['begin_date', 'hard_deadline'], axis=1, inplace=True) course_structure['step_variation'] = course_structure.groupby(['lesson_id', 'step_position']).cumcount() course_structure['step_variation'] += 1 submissions_filename = 'cache/course-{}-submissions.csv'.format(course_id) if os.path.isfile(submissions_filename) and cached: submissions = pd.read_csv(submissions_filename) else: submissions = get_course_submissions(course_id, course_structure) submissions.to_csv(submissions_filename, index=False) submissions = pd.merge(submissions, course_structure, on='step_id') item_statistics = self.perform_item_analysis(submissions, course_structure, cached) option_statistics = self.perform_option_analysis(submissions, cached) self.generate_latex_files(item_statistics, option_statistics, directory)
def get_question(step_id): source = fetch_objects('step-sources', id=step_id) try: question = source[0]['block']['text'] except: question = '\n' question = html2latex(question) return question
def get_course_submissions(course_id, course_structure=pd.DataFrame(), token=None): header = [ 'submission_id', 'user_id', 'step_id', 'attempt_id', 'status', 'submission_time', 'reply', 'hint' ] if not token: token = get_token() if course_structure.empty: course_structure = get_course_structure(course_id, token) course_submissions = pd.DataFrame() for step in course_structure.step_id.unique().tolist(): step_submissions = pd.DataFrame( fetch_objects('submissions', token=token, step=step)) if step_submissions.empty: continue step_submissions = step_submissions.rename( columns={ 'id': 'submission_id', 'time': 'submission_time', 'attempt': 'attempt_id' }) attempt_ids = step_submissions['attempt_id'].unique().tolist() step_attempts = pd.DataFrame( fetch_objects_by_id('attempts', attempt_ids, token=token)) step_attempts = step_attempts.rename(columns={ 'id': 'attempt_id', 'time': 'attempt_time', 'status': 'attempt_status' }) step_submissions = pd.merge(step_submissions, step_attempts, on='attempt_id') step_submissions['step_id'] = step course_submissions = course_submissions.append(step_submissions) if course_submissions.empty: return pd.DataFrame(columns=header) course_submissions['submission_time'] = course_submissions[ 'submission_time'].apply(get_unix_date) course_submissions['attempt_time'] = course_submissions[ 'attempt_time'].apply(get_unix_date) course_submissions = course_submissions.rename(columns={'user': '******'}) course_submissions = course_submissions[header] return course_submissions
def get_video_stats(step_id, cached=True, token=None): if not token: token = get_token() cached_name = 'cache/step-{}-videostats.csv'.format(step_id) if cached and os.path.isfile(cached_name): stats = pd.read_csv(cached_name) return stats stats = pd.DataFrame(fetch_objects('video-stats', token=token, step=step_id)) if not stats.empty: stats.to_csv(cached_name, index=False) stats = pd.read_csv(cached_name) return stats
def get_video_stats(step_id, cached=True, token=None): if not token: token = get_token() cached_name = 'cache/step-{}-videostats.csv'.format(step_id) if cached and os.path.isfile(cached_name): stats = pd.read_csv(cached_name) return stats stats = pd.DataFrame( fetch_objects('video-stats', token=token, step=step_id)) if not stats.empty: stats.to_csv(cached_name, index=False) stats = pd.read_csv(cached_name) return stats
def get_course_submissions(course_id, course_structure=pd.DataFrame(), cached=True, token=None): header = ['submission_id', 'step_id', 'user_id', 'attempt_time', 'submission_time', 'status'] # use cache course_submissions_filename = 'cache/course-{}-submissions.csv'.format(course_id) if os.path.isfile(course_submissions_filename) and cached: course_submissions = pd.read_csv(course_submissions_filename) course_submissions = course_submissions[header] return course_submissions if not token: token = get_token() if course_structure.empty: course_structure = get_course_structure(course_id, token) course_submissions = pd.DataFrame() for step in course_structure.step_id.unique().tolist(): step_submissions = pd.DataFrame(fetch_objects('submissions', token=token, step=step)) if step_submissions.empty: continue step_submissions = step_submissions.rename(columns={'id': 'submission_id', 'time': 'submission_time', 'attempt': 'attempt_id'}) attempt_ids = step_submissions['attempt_id'].unique().tolist() step_attempts = pd.DataFrame(fetch_objects_by_id('attempts', attempt_ids, token=token)) step_attempts = step_attempts.rename(columns={'id': 'attempt_id', 'time': 'attempt_time', 'status': 'attempt_status'}) step_submissions = pd.merge(step_submissions, step_attempts, on='attempt_id') step_submissions['step_id'] = step course_submissions = course_submissions.append(step_submissions) if course_submissions.empty: return pd.DataFrame(columns=header) course_submissions['submission_time'] = course_submissions['submission_time'].apply(get_unix_date) course_submissions['attempt_time'] = course_submissions['attempt_time'].apply(get_unix_date) course_submissions = course_submissions.rename(columns={'user': '******'}) course_submissions = course_submissions[header] course_submissions.to_csv(course_submissions_filename, index=False) return course_submissions
def get_step_options(step_id): source = fetch_objects('step-sources', id=step_id) try: options = source[0]['block']['source']['options'] options = pd.DataFrame(options) is_multiple = source[0]['block']['source']['is_multiple_choice'] except KeyError: options = pd.DataFrame(columns=['step_id', 'option_id', 'option_name', 'is_correct', 'is_multiple']) return options options['step_id'] = step_id options['is_multiple'] = is_multiple options = options.sort_values('text').reset_index() options = options.rename(columns={'text': 'option_name'}) options['option_id'] = options.index + 1 options = options[['step_id', 'option_id', 'option_name', 'is_correct', 'is_multiple']] return options
def get_step_options(step_id): source = fetch_objects('step-sources', id=step_id) try: options = source[0]['block']['source']['options'] options = pd.DataFrame(options) is_multiple = source[0]['block']['source']['is_multiple_choice'] except KeyError: options = pd.DataFrame(columns=[ 'step_id', 'option_id', 'option_name', 'is_correct', 'is_multiple' ]) return options options['step_id'] = step_id options['is_multiple'] = is_multiple options = options.sort_values('text').reset_index() options = options.rename(columns={'text': 'option_name'}) options['option_id'] = options.index + 1 options = options[[ 'step_id', 'option_id', 'option_name', 'is_correct', 'is_multiple' ]] return options
def get_course_structure(course_id, token=None): if not token: token = get_token() course = fetch_objects_by_id('courses', course_id, token=token)[0] sections = fetch_objects('sections', token=token, id=course['sections']) unit_ids = [unit for section in sections for unit in section['units']] units = fetch_objects('units', token=token, id=unit_ids) lesson_ids = [unit['lesson'] for unit in units] lessons = fetch_objects('lessons', token=token, id=lesson_ids) step_ids = [step for lesson in lessons for step in lesson['steps']] steps = fetch_objects('steps', token=token, id=step_ids) step_id = [step['id'] for step in steps] step_position = [step['position'] for step in steps] step_type = [step['block']['name'] for step in steps] step_lesson = [step['lesson'] for step in steps] step_correct_ratio = [step['correct_ratio'] for step in steps] course_structure = pd.DataFrame({ 'course_id': course_id, 'lesson_id': step_lesson, 'step_id': step_id, 'step_position': step_position, 'step_type': step_type, 'step_correct_ratio': step_correct_ratio }) module_position = [[section['position']] * len(section['units']) for section in sections] module_position = [ value for small_list in module_position for value in small_list ] module_id = [[section['id']] * len(section['units']) for section in sections] module_id = [value for small_list in module_id for value in small_list] module_hard_deadline = [[section['hard_deadline']] * len(section['units']) for section in sections] module_hard_deadline = [ value for small_list in module_hard_deadline for value in small_list ] module_begin_date = [[section['begin_date']] * len(section['units']) for section in sections] module_begin_date = [ value for small_list in module_begin_date for value in small_list ] lesson_position = [unit['position'] for unit in units] module_structure = pd.DataFrame({ 'lesson_id': lesson_ids, 'lesson_position': lesson_position, 'module_id': module_id, 'module_position': module_position, 'hard_deadline': module_hard_deadline, 'begin_date': module_begin_date }) course_structure = course_structure.merge(module_structure) course_structure = course_structure.sort_values( ['module_position', 'lesson_position', 'step_position']) return course_structure
def generate_latex_report(self, directory, cached=True): course_id = self.course_id token = get_token() course_info = fetch_objects('courses', pk=course_id)[0] course_title = course_info['title'] course_url = '{}/course/{}'.format(API_HOST, course_id) with open('{}info.tex'.format(directory), 'w', encoding='utf-8') as info_file: info_file.write( '\\def\\coursetitle{{{}}}\n\\def\\courseurl{{{}}}\n'.format( course_title, course_url)) with open('{}map.tex'.format(directory), 'w', encoding='utf-8') as map_file: map_file.write('') time_now = time.time() certificate_threshold = course_info['certificate_regular_threshold'] begin_date = get_unix_date( course_info['begin_date']) if course_info['begin_date'] else 0 last_deadline = get_unix_date( course_info['last_deadline'] ) if course_info['begin_date'] else time_now course_teachers = course_info['instructors'] course_testers = fetch_objects_by_pk('groups', course_info["testers_group"], token=token)[0]['users'] users_to_delete = course_teachers + course_testers # collect course grades grades = get_course_grades(course_id, token=token) learners = grades[[ 'user_id', 'total_score', 'date_joined', 'last_viewed' ]].drop_duplicates() learners = learners[~learners.user_id.isin(users_to_delete)] learners = learners[(0 < learners.total_score) & (learners.total_score < certificate_threshold)] # collect submissions course_structure = get_course_structure(course_id, token=token) course_submissions = get_course_submissions(course_id, course_structure, token) course_submissions = course_submissions[ course_submissions.user_id.isin(learners.user_id)] # find last submissions course_submissions = course_submissions[ (begin_date < course_submissions.submission_time) & (course_submissions.submission_time < last_deadline)] idx_grouped = course_submissions.groupby('user_id')['submission_time'] idx = idx_grouped.transform( max) == course_submissions['submission_time'] last_submissions = course_submissions[idx].groupby( 'step_id', as_index=False)['submission_id'].count() last_submissions = last_submissions.rename( columns={'submission_id': 'last_submissions'}) unique_submissions = course_submissions.groupby( 'step_id', as_index=False)['user_id'].agg(pd.Series.nunique) unique_submissions = unique_submissions.rename( columns={'user_id': 'unique_submissions'}) step_stats = unique_submissions.merge(last_submissions) step_stats['dropout_rate'] = step_stats.apply( lambda row: (row.last_submissions / row.unique_submissions if row.unique_submissions else 0), axis=1) step_stats = pd.merge(course_structure, step_stats, how='left') additional_columns = ['viewed_by', 'passed_by', 'correct_ratio'] step_stats[additional_columns] = step_stats.step_id.apply( lambda step: get_step_info(step)[additional_columns]) step_stats['difficulty'] = 1 - step_stats['correct_ratio'] step_stats['completion_rate'] = step_stats.apply( (lambda row: row.passed_by / row.viewed_by if row.viewed_by else 0), axis=1) step_stats.to_csv('cache/course-{}-stepstats.csv'.format(course_id), index=False) step_stats['step_url'] = step_stats.apply(process_step_url, axis=1) step_stats['completion_rate'] *= 100 step_stats['dropout_rate'] *= 100 step_stats['completion_rate'] = step_stats['completion_rate'].round(1) step_stats['dropout_rate'] = step_stats['dropout_rate'].round(1) for lesson_id in step_stats.lesson_id.unique(): step_lesson_stats = step_stats[step_stats.lesson_id == lesson_id] step_lesson_stats = step_lesson_stats.fillna('') lesson_url = '{}/lesson/{}'.format(API_HOST, lesson_id) lesson_name = '{}'.format( lesson_id) # TODO: use module and lesson position with open('{}map.tex'.format(directory), 'a', encoding='utf-8') as map_file: map_file.write( '\\input{{generated/lesson-{}.tex}}\n'.format(lesson_id)) with open('{}lesson-{}.tex'.format(directory, lesson_id), 'w', encoding='utf-8') as lesson_file: lesson_file.write( '\\newpage\n\\lessoninfo{{{}}}{{{}}}\n'.format( lesson_name, lesson_url)) lesson_file.write('\\begin{lessonstatistics}') for _, step_stat in step_lesson_stats.iterrows(): lesson_file.write(STEP_STAT_FORMAT.format(stat=step_stat)) lesson_file.write('\\end{lessonstatistics}')
def get_step_info(step_id): info = pd.Series(fetch_objects('steps', pk=step_id)[0]) info = info.rename(columns={'id': 'step_id'}) return info
def generate_latex_report(self, directory, cached=True): course_id = self.course_id token = get_token() course_structure = get_course_structure(course_id) course_structure = course_structure.loc[ course_structure.step_type == 'video', ['step_id', 'step_position', 'lesson_id']] course_info = fetch_objects('courses', pk=course_id) course_title = course_info[0]['title'] course_url = '{}/course/{}'.format(API_HOST, course_id) with open('{}info.tex'.format(directory), 'w', encoding='utf-8') as info_file: info_file.write( '\\def\\coursetitle{{{}}}\n\\def\\courseurl{{{}}}\n'.format( course_title, course_url)) with open('{}map.tex'.format(directory), 'w', encoding='utf-8') as map_file: map_file.write('') total_peaks = pd.DataFrame() for ind, row in course_structure.iterrows(): step_id = row.step_id step_url = 'https://stepik.org/lesson/{}/step/{}'.format( row.lesson_id, row.step_position) stats = get_video_stats(step_id, cached, token) fig = plt.figure() ax1 = fig.add_subplot(211) ax2 = fig.add_subplot(212) windows = get_video_peaks(stats, plot=True, ax=ax1, ax2=ax2) windows['step_id'] = step_id windows['course_id'] = course_id windows['step_url'] = step_url windows['start_sec'] = windows['start'].apply( lambda x: '{:02d}:{:02d}'.format(x // 60, x % 60)) windows['end_sec'] = windows['end'].apply( lambda x: '{:02d}:{:02d}'.format(x // 60, x % 60)) self.generate_latex_files(course_id, step_id, step_url, windows, directory) fig.savefig('{}step_{}.png'.format(directory, step_id)) plt.close() if total_peaks.empty: total_peaks = windows else: total_peaks = total_peaks.append(windows) total_peaks.to_csv('cache/course-{}-totalpeaks.csv'.format(course_id), index=False) # total_peaks = pd.read_csv('cache/course-{}-totalpeaks.csv'.format(course_id)) total_peaks = total_peaks.sort_values('area', ascending=False) if total_peaks.shape[0] <= 5: top_peaks = total_peaks else: top_peaks = total_peaks[0:5] with open('{}total.tex'.format(directory), 'w', encoding='utf-8') as total_file: if not total_peaks.empty: total_file.write( 'В курсе выделены следующие пики, имеющие максимальную относительную площадь.\n' ) total_file.write( 'Проверьте, нет ли в данных местах у учащихся ' + 'трудностей с пониманием учебного материала.\n') total_file.write('\\begin{totalpeaks}\n') for ind, row in top_peaks.iterrows(): total_file.write( '\\totalpeak{{{}}}{{{}}}{{{}}}{{{}}}{{{:.2f}}}\n'. format(row.step_id, row.step_url, row.start_sec, row.end_sec, row.area)) total_file.write('\\end{totalpeaks}\n') else: total_file.write('\n')
def get_course_submissions(course_id, course_structure=pd.DataFrame(), cached=True, token=None): header = [ 'submission_id', 'step_id', 'user_id', 'attempt_time', 'submission_time', 'status' ] # use cache course_submissions_filename = 'cache/course-{}-submissions.csv'.format( course_id) if os.path.isfile(course_submissions_filename) and cached: course_submissions = pd.read_csv(course_submissions_filename) course_submissions = course_submissions[header] return course_submissions if not token: token = get_token() if course_structure.empty: course_structure = get_course_structure(course_id, token) course_submissions = pd.DataFrame() for step in course_structure.step_id.unique().tolist(): step_submissions = pd.DataFrame( fetch_objects('submissions', token=token, step=step)) if step_submissions.empty: continue step_submissions = step_submissions.rename( columns={ 'id': 'submission_id', 'time': 'submission_time', 'attempt': 'attempt_id' }) attempt_ids = step_submissions['attempt_id'].unique().tolist() step_attempts = pd.DataFrame( fetch_objects_by_id('attempts', attempt_ids, token=token)) step_attempts = step_attempts.rename(columns={ 'id': 'attempt_id', 'time': 'attempt_time', 'status': 'attempt_status' }) step_submissions = pd.merge(step_submissions, step_attempts, on='attempt_id') step_submissions['step_id'] = step course_submissions = course_submissions.append(step_submissions) if course_submissions.empty: return pd.DataFrame(columns=header) course_submissions['submission_time'] = course_submissions[ 'submission_time'].apply(get_unix_date) course_submissions['attempt_time'] = course_submissions[ 'attempt_time'].apply(get_unix_date) course_submissions = course_submissions.rename(columns={'user': '******'}) course_submissions = course_submissions[header] course_submissions.to_csv(course_submissions_filename, index=False) return course_submissions