def delete_draft_only(root_location): """ Helper function that calls delete on the specified location if a draft version of the item exists. If no draft exists, this function recursively calls itself on the children of the item. """ query = root_location.to_deprecated_son(prefix='_id.') del query['_id.revision'] versions_found = self.collection.find( query, {'_id': True, 'definition.children': True}, sort=[SORT_REVISION_FAVOR_DRAFT] ) # If 2 versions versions exist, we can assume one is a published version. Go ahead and do the delete # of the draft version. if versions_found.count() > 1: # Moving a child from published parent creates a draft of the parent and moved child. published_version = [ version for version in versions_found if version.get('_id').get('revision') != MongoRevisionKey.draft ] if len(published_version) > 0: # This change makes sure that parents are updated too i.e. an item will have only one parent. self.update_parent_if_moved(root_location, published_version[0], delete_draft_only, user_id) self._delete_subtree(root_location, [as_draft], draft_only=True) elif versions_found.count() == 1: # Since this method cannot be called on something in DIRECT_ONLY_CATEGORIES and we call # delete_subtree as soon as we find an item with a draft version, if there is only 1 version # it must be published (since adding a child to a published item creates a draft of the parent). item = versions_found[0] assert item.get('_id').get('revision') != MongoRevisionKey.draft for child in item.get('definition', {}).get('children', []): child_loc = BlockUsageLocator.from_string(child) delete_draft_only(child_loc)
def convert_item(item, to_be_deleted): """ Convert the subtree """ # collect the children's ids for future processing next_tier = [] for child in item.get('definition', {}).get('children', []): child_loc = BlockUsageLocator.from_string(child) next_tier.append(child_loc.to_deprecated_son()) # insert a new DRAFT version of the item item['_id']['revision'] = MongoRevisionKey.draft # ensure keys are in fixed and right order before inserting item['_id'] = self._id_dict_to_son(item['_id']) bulk_record = self._get_bulk_ops_record(location.course_key) bulk_record.dirty = True try: self.collection.insert(item) except pymongo.errors.DuplicateKeyError: # prevent re-creation of DRAFT versions, unless explicitly requested to ignore if not ignore_if_draft: raise DuplicateItemError(item['_id'], self, 'collection') # delete the old PUBLISHED version if requested if delete_published: item['_id']['revision'] = MongoRevisionKey.published to_be_deleted.append(item['_id']) return next_tier
def update_parent_if_moved(self, original_parent_location, published_version, delete_draft_only, user_id): """ Update parent of an item if it has moved. Arguments: original_parent_location (BlockUsageLocator) : Original parent block locator. published_version (dict) : Published version of the block. delete_draft_only (function) : A callback function to delete draft children if it was moved. user_id (int) : User id """ for child_location in published_version.get('definition', {}).get('children', []): item_location = UsageKey.from_string( child_location).map_into_course( original_parent_location.course_key) try: source_item = self.get_item(item_location) except ItemNotFoundError: log.error('Unable to find the item %s', unicode(item_location)) return if source_item.parent and source_item.parent.block_id != original_parent_location.block_id: if self.update_item_parent(item_location, original_parent_location, source_item.parent, user_id): delete_draft_only( BlockUsageLocator.from_string(child_location))
def get_course_key_from_url(url): """ Extracts the course from the given `url`, if possible. """ url = url or '' # First, try to extract the course_id assuming the URL follows this pattern: # https://courses.edx.org/xblock/block-v1:org+course+run+type@vertical+block@3848270e75f34e409eaad53a2a7f1da5?show_title=0&show_bookmark_button=0 block_match = BLOCK_REGEX.match(url) if block_match: block_id_string = block_match.group('block_id') try: return BlockUsageLocator.from_string(block_id_string).course_key except InvalidKeyError: return None # Second, try to extract the course_id assuming the URL follows this pattern: # https://courses.edx.org/courses/course-v1:org+course+run/courseware/unit1/der_3-sequential/?activate_block_id=block-v1%3Aorg%2Bcourse%2Brun%2Btype%40sequential%2Bblock%40der_3-sequential course_match = COURSE_REGEX.match(url) if course_match: course_id_string = course_match.group('course_id') try: return CourseKey.from_string(course_id_string) except InvalidKeyError: return None
def get_block_id_from_string(block_id_string): if not block_id_string: return None try: return BlockUsageLocator.from_string(block_id_string) except InvalidKeyError: # workbench support return block_id_string
def is_valid_progress_module(content_id): """ Returns boolean indicating if given module is valid for marking progress A valid module should be child of `vertical` and its category should be one of the PROGRESS_DETACHED_CATEGORIES """ try: detached_categories = getattr(settings, 'PROGRESS_DETACHED_CATEGORIES', []) usage_id = BlockUsageLocator.from_string(content_id) module = modulestore().get_item(usage_id) if module and module.parent and module.parent.category == "vertical" and \ module.category not in detached_categories and not is_progress_detached_vertical(module.parent): return True else: return False except (InvalidKeyError, ItemNotFoundError) as exception: log.debug("Error getting module for content_id:%s %s", content_id, exception.message) return False except Exception as exception: # pylint: disable=broad-except # broad except to avoid wrong calculation of progress in case of unknown exception log.exception("Error getting module for content_id:%s %s", content_id, exception.message) return False
def test_conditional_module_with_empty_sources_list(self): """ If a ConditionalDescriptor is initialized with an empty sources_list, we assert that the sources_list is set via generating UsageKeys from the values in xml_attributes['sources'] """ dummy_system = Mock() dummy_location = BlockUsageLocator(CourseLocator("edX", "conditional_test", "test_run"), "conditional", "SampleConditional") dummy_scope_ids = ScopeIds(None, None, dummy_location, dummy_location) dummy_field_data = DictFieldData({ 'data': '<conditional/>', 'xml_attributes': {'sources': 'i4x://HarvardX/ER22x/poll_question/T15_poll'}, 'children': None, }) conditional = ConditionalDescriptor( dummy_system, dummy_field_data, dummy_scope_ids, ) new_run = conditional.location.course_key.run self.assertEqual( conditional.sources_list[0], # Matching what is in ConditionalDescriptor.__init__. BlockUsageLocator.from_string(conditional.xml_attributes['sources']).replace(run=new_run) )
def test_conditional_module_with_empty_sources_list(self): """ If a ConditionalDescriptor is initialized with an empty sources_list, we assert that the sources_list is set via generating UsageKeys from the values in xml_attributes['sources'] """ dummy_system = Mock() dummy_location = BlockUsageLocator( CourseLocator("edX", "conditional_test", "test_run"), "conditional", "SampleConditional") dummy_scope_ids = ScopeIds(None, None, dummy_location, dummy_location) dummy_field_data = DictFieldData({ 'data': '<conditional/>', 'xml_attributes': { 'sources': 'i4x://HarvardX/ER22x/poll_question/T15_poll' }, 'children': None, }) conditional = ConditionalDescriptor( dummy_system, dummy_field_data, dummy_scope_ids, ) new_run = conditional.location.course_key.run self.assertEqual( conditional.sources_list[0], # Matching what is in ConditionalDescriptor.__init__. BlockUsageLocator.from_string(conditional.xml_attributes['sources'] ).replace(run=new_run))
def get_block_id_from_string(block_id_string): if not block_id_string: return None try: return BlockUsageLocator.from_string(block_id_string) except InvalidKeyError: # workbench support return block_id_string
def recurse_mark_complete(course_block_completions, latest_completion, block): """ Helper function to walk course tree dict, marking blocks as 'complete' and 'last_complete' If all blocks are complete, mark parent block complete mark parent blocks of 'last_complete' as 'last_complete' :param course_block_completions: dict[course_completion_object] = completion_value :param latest_completion: course_completion_object :param block: course_outline_root_block block object or child block :return: block: course_outline_root_block block object or child block """ locatable_block_string = BlockUsageLocator.from_string(block['id']) if course_block_completions.get(locatable_block_string): block['complete'] = True if locatable_block_string == latest_completion.block_key: block['resume_block'] = True if block.get('children'): for idx in range(len(block['children'])): recurse_mark_complete( course_block_completions, latest_completion, block=block['children'][idx] ) if block['children'][idx]['resume_block'] is True: block['resume_block'] = True if len([child['complete'] for child in block['children'] if child['complete']]) == len(block['children']): block['complete'] = True
def get_students_problem_grades(request, csv=False): """ Get a list of students and grades for a particular problem. If 'csv' is False, returns a dict of student's name: username: grade: percent. If 'csv' is True, returns a header array, and an array of arrays in the format: student names, usernames, grades, percents for CSV download. """ module_state_key = BlockUsageLocator.from_string(request.GET.get('module_id')) csv = request.GET.get('csv') # Query for "problem grades" students students = models.StudentModule.objects.select_related('student').filter( module_state_key=module_state_key, module_type__exact='problem', grade__isnull=False, ).values('student__username', 'student__profile__name', 'grade', 'max_grade').order_by('student__profile__name') results = [] if not csv: # Restrict screen list length # Adding 1 so can tell if list is larger than MAX_SCREEN_LIST_LENGTH # without doing another select. for student in students[0:MAX_SCREEN_LIST_LENGTH + 1]: student_dict = { 'name': student['student__profile__name'], 'username': student['student__username'], 'grade': student['grade'], } student_dict['percent'] = 0 if student['max_grade'] > 0: student_dict['percent'] = round(student['grade'] * 100 / student['max_grade']) results.append(student_dict) max_exceeded = False if len(results) > MAX_SCREEN_LIST_LENGTH: # Remove the last item so list length is exactly MAX_SCREEN_LIST_LENGTH del results[-1] max_exceeded = True response_payload = { 'results': results, 'max_exceeded': max_exceeded, } return JsonResponse(response_payload) else: tooltip = request.GET.get('tooltip') filename = sanitize_filename(tooltip[:tooltip.rfind(' - ')]) header = [_("Name"), _("Username"), _("Grade"), _("Percent")] for student in students: percent = 0 if student['max_grade'] > 0: percent = round(student['grade'] * 100 / student['max_grade']) results.append([student['student__profile__name'], student['student__username'], student['grade'], percent]) response = create_csv_response(filename, header, results) return response
def get_students_problem_grades(request, csv=False): """ Get a list of students and grades for a particular problem. If 'csv' is False, returns a dict of student's name: username: grade: percent. If 'csv' is True, returns a header array, and an array of arrays in the format: student names, usernames, grades, percents for CSV download. """ module_state_key = BlockUsageLocator.from_string(request.GET.get('module_id')) csv = request.GET.get('csv') # Query for "problem grades" students students = models.StudentModule.objects.select_related('student').filter( module_state_key=module_state_key, module_type__exact='problem', grade__isnull=False, ).values('student__username', 'student__profile__name', 'grade', 'max_grade').order_by('student__profile__name') results = [] if not csv: # Restrict screen list length # Adding 1 so can tell if list is larger than MAX_SCREEN_LIST_LENGTH # without doing another select. for student in students[0:MAX_SCREEN_LIST_LENGTH + 1]: student_dict = { 'name': student['student__profile__name'], 'username': student['student__username'], 'grade': student['grade'], } student_dict['percent'] = 0 if student['max_grade'] > 0: student_dict['percent'] = round(student['grade'] * 100 / student['max_grade']) results.append(student_dict) max_exceeded = False if len(results) > MAX_SCREEN_LIST_LENGTH: # Remove the last item so list length is exactly MAX_SCREEN_LIST_LENGTH del results[-1] max_exceeded = True response_payload = { 'results': results, 'max_exceeded': max_exceeded, } return JsonResponse(response_payload) else: tooltip = request.GET.get('tooltip') filename = sanitize_filename(tooltip[:tooltip.rfind(' - ')]) header = [_("Name"), _("Username"), _("Grade"), _("Percent")] for student in students: percent = 0 if student['max_grade'] > 0: percent = round(student['grade'] * 100 / student['max_grade']) results.append([student['student__profile__name'], student['student__username'], student['grade'], percent]) response = create_csv_response(filename, header, results) return response
def _get_parent_content_id(html_content_id): """ Gets parent block content id """ try: html_usage_id = BlockUsageLocator.from_string(html_content_id) html_module = modulestore().get_item(html_usage_id) return unicode(html_module.parent) except (InvalidKeyError, ItemNotFoundError) as exception: # something has gone wrong - the best we can do is to return original content id log.warn("Error getting parent content_id for html module: %s", exception.message) return html_content_id
def from_json(cls, blockrecord_json): """ Return a BlockRecordSet from a json list. """ block_dicts = json.loads(blockrecord_json) record_generator = (BlockRecord( locator=BlockUsageLocator.from_string(block["locator"]), weight=block["weight"], max_score=block["max_score"], ) for block in block_dicts) return cls(record_generator)
def get_students_opened_subsection(request, csv=False): """ Get a list of students that opened a particular subsection. If 'csv' is False, returns a dict of student's name: username. If 'csv' is True, returns a header array, and an array of arrays in the format: student names, usernames for CSV download. """ module_state_key = BlockUsageLocator.from_string( request.GET.get('module_id')) csv = request.GET.get('csv') # Query for "opened a subsection" students students = models.StudentModule.objects.select_related('student').filter( module_state_key__exact=module_state_key, module_type__exact='sequential', ).values('student__username', 'student__profile__name').order_by('student__profile__name') results = [] if not csv: # Restrict screen list length # Adding 1 so can tell if list is larger than MAX_SCREEN_LIST_LENGTH # without doing another select. for student in students[0:MAX_SCREEN_LIST_LENGTH + 1]: results.append({ 'name': student['student__profile__name'], 'username': student['student__username'], }) max_exceeded = False if len(results) > MAX_SCREEN_LIST_LENGTH: # Remove the last item so list length is exactly MAX_SCREEN_LIST_LENGTH del results[-1] max_exceeded = True response_payload = { 'results': results, 'max_exceeded': max_exceeded, } return JsonResponse(response_payload) else: tooltip = request.GET.get('tooltip') # Subsection name is everything after 3rd space in tooltip filename = sanitize_filename(' '.join(tooltip.split(' ')[3:])) header = [_("Name"), _("Username")] for student in students: results.append([ student['student__profile__name'], student['student__username'] ]) response = create_csv_response(filename, header, results) return response
def from_json(cls, blockrecord_json): """ Return a BlockRecordSet from a json list. """ block_dicts = json.loads(blockrecord_json) record_generator = ( BlockRecord( locator=BlockUsageLocator.from_string(block["locator"]), weight=block["weight"], max_score=block["max_score"], ) for block in block_dicts ) return cls(record_generator)
def get_students_opened_subsection(request, csv=False): """ Get a list of students that opened a particular subsection. If 'csv' is False, returns a dict of student's name: username. If 'csv' is True, returns a header array, and an array of arrays in the format: student names, usernames for CSV download. """ module_state_key = BlockUsageLocator.from_string(request.GET.get('module_id')) csv = request.GET.get('csv') # Query for "opened a subsection" students students = models.StudentModule.objects.select_related('student').filter( module_state_key__exact=module_state_key, module_type__exact='sequential', ).values('student__username', 'student__profile__name').order_by('student__profile__name') results = [] if not csv: # Restrict screen list length # Adding 1 so can tell if list is larger than MAX_SCREEN_LIST_LENGTH # without doing another select. for student in students[0:MAX_SCREEN_LIST_LENGTH + 1]: results.append({ 'name': student['student__profile__name'], 'username': student['student__username'], }) max_exceeded = False if len(results) > MAX_SCREEN_LIST_LENGTH: # Remove the last item so list length is exactly MAX_SCREEN_LIST_LENGTH del results[-1] max_exceeded = True response_payload = { 'results': results, 'max_exceeded': max_exceeded, } return JsonResponse(response_payload) else: tooltip = request.GET.get('tooltip') # Subsection name is everything after 3rd space in tooltip filename = sanitize_filename(' '.join(tooltip.split(' ')[3:])) header = [_("Name"), _("Username")] for student in students: results.append([student['student__profile__name'], student['student__username']]) response = create_csv_response(filename, header, results) return response
def zip_student_submissions(course_id, block_id, locator_unicode, username): """ Task to download all submissions as zip file Args: course_id (unicode): edx course id block_id (unicode): edx block id locator_unicode (unicode): Unicode representing a BlockUsageLocator for the sga module username (unicode): user name of the staff user requesting the zip file """ locator = BlockUsageLocator.from_string(locator_unicode) zip_file_path = get_zip_file_path(username, course_id, block_id, locator) if default_storage.exists(zip_file_path): default_storage.delete(zip_file_path) _compress_student_submissions(zip_file_path, block_id, course_id, locator)
def populate_subsections(sender, instance, created, *args, **kwargs): """ Add all the subsections and tracked units to the ChapterProgress instance. Structure of subsections field: subsection_id: viewed units: unit_id: type done """ if created: course_structure = CourseStructure.objects.get( course_id=instance.completion_profile.course_key).ordered_blocks subsections = {} for section in course_structure[instance.chapter_id]['children']: for subsection in course_structure[section]['children']: subsection_dict = {'units': {}, 'viewed': False} subsection_usage_key = BlockUsageLocator.from_string( subsection) subsections.update( {subsection_usage_key.block_id: subsection_dict}) for unit in course_structure[subsection]['children']: unit_type = course_structure[unit]['block_type'] if unit_type in ChapterProgress.UNIT_TYPES: usage_key = BlockUsageLocator.from_string(unit) subsection_dict['units'].update({ usage_key.block_id: { 'type': unit_type, 'done': False } }) instance.subsections = subsections instance.save()
def problems_with_psychometric_data(course_id): ''' Return dict of {problems (location urls): count} for which psychometric data is available. Does this for a given course_id. ''' pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id) plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()] problems = dict( ( p, pmdset.filter( studentmodule__module_state_key=BlockUsageLocator.from_string(p) ).count() ) for p in plist ) return problems
def __init__(self, *args, **kwargs): """ Create an instance of the Conditional XBlock. """ super(ConditionalBlock, self).__init__(*args, **kwargs) # lint-amnesty, pylint: disable=super-with-arguments # Convert sources xml_attribute to a ReferenceList field type so Location/Locator # substitution can be done. if not self.sources_list: if 'sources' in self.xml_attributes and isinstance(self.xml_attributes['sources'], six.string_types): self.sources_list = [ # TODO: it is not clear why we are replacing the run here (which actually is a no-op # for old-style course locators. However, this is the implementation of # CourseLocator.make_usage_key_from_deprecated_string, which was previously # being called in this location. BlockUsageLocator.from_string(item).replace(run=self.location.course_key.run) for item in ConditionalBlock.parse_sources(self.xml_attributes) ]
def __init__(self, *args, **kwargs): """ Create an instance of the conditional module. """ super(ConditionalDescriptor, self).__init__(*args, **kwargs) # Convert sources xml_attribute to a ReferenceList field type so Location/Locator # substitution can be done. if not self.sources_list: if 'sources' in self.xml_attributes and isinstance(self.xml_attributes['sources'], basestring): self.sources_list = [ # TODO: it is not clear why we are replacing the run here (which actually is a no-op # for old-style course locators. However, this is the implementation of # CourseLocator.make_usage_key_from_deprecated_string, which was previously # being called in this location. BlockUsageLocator.from_string(item).replace(run=self.location.course_key.run) for item in ConditionalDescriptor.parse_sources(self.xml_attributes) ]
def update_parent_if_moved(self, original_parent_location, published_version, delete_draft_only, user_id): """ Update parent of an item if it has moved. Arguments: original_parent_location (BlockUsageLocator) : Original parent block locator. published_version (dict) : Published version of the block. delete_draft_only (function) : A callback function to delete draft children if it was moved. user_id (int) : User id """ for child_location in published_version.get('definition', {}).get('children', []): item_location = UsageKey.from_string(child_location).map_into_course(original_parent_location.course_key) try: source_item = self.get_item(item_location) except ItemNotFoundError: log.error('Unable to find the item %s', unicode(item_location)) return if source_item.parent and source_item.parent.block_id != original_parent_location.block_id: if self.update_item_parent(item_location, original_parent_location, source_item.parent, user_id): delete_draft_only(BlockUsageLocator.from_string(child_location))
def zip_student_submissions(course_id, block_id, locator_unicode, username): """ Task to download all submissions as zip file Args: course_id (unicode): edx course id block_id (unicode): edx block id locator_unicode (unicode): Unicode representing a BlockUsageLocator for the sga module username (unicode): user name of the staff user requesting the zip file """ locator = BlockUsageLocator.from_string(locator_unicode) zip_file_path = get_zip_file_path(username, course_id, block_id, locator) log.info("Creating zip file for course: %s at path: %s", locator, zip_file_path) if default_storage.exists(zip_file_path): log.info("Deleting already-existing zip file at path: %s", zip_file_path) default_storage.delete(zip_file_path) _compress_student_submissions( zip_file_path, block_id, course_id, locator )
def get_course_required_block_ids(self, required_block_ids): """ Filters the required_block_ids list, and returns only the required block ids that belong to the same course key. If the self.matching_blocks_by_type is set, it returns a mix with required_block_ids which exists in self.matching_blocks_by_type too. If required_block_ids is not provided, it returns just the self.matching_blocks_by_type list. Args: required_block_ids: List of the block location ids. Returns: required_course_block_ids: List containing only the BlockUsageLocator items. """ matching_blocks_by_type = self.matching_blocks_by_type if not required_block_ids: return matching_blocks_by_type required_course_block_ids = [] for required_block_id in required_block_ids: try: block_locator = BlockUsageLocator.from_string( required_block_id) if not matching_blocks_by_type and block_locator.course_key == self.course_key: required_course_block_ids.append(block_locator) continue if block_locator in matching_blocks_by_type: required_course_block_ids.append(block_locator) except InvalidKeyError: continue return required_course_block_ids
def async_migrate_transcript_subtask(self, *args, **kwargs): # pylint: disable=unused-argument """ Migrates a transcript of a given video in a course as a new celery task. """ success, failure = 'Success', 'Failure' video_location, revision, language_code, force_update = args command_run = kwargs['command_run'] store = modulestore() video = store.get_item( usage_key=BlockUsageLocator.from_string(video_location), revision=revision) edx_video_id = clean_video_id(video.edx_video_id) if not kwargs['commit']: LOGGER.info( ('[%s] [run=%s] [video-transcript-will-be-migrated] ' '[revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) return success LOGGER.info(( '[%s] [run=%s] [transcripts-migration-process-started-for-video-transcript] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) try: transcripts_info = video.get_transcripts_info() transcript_content, _, _ = get_transcript_from_contentstore( video=video, language=language_code, output_format=Transcript.SJSON, transcripts_info=transcripts_info, ) is_video_valid = edx_video_id and is_video_available(edx_video_id) if not is_video_valid: edx_video_id = create_external_video('external-video') video.edx_video_id = edx_video_id # determine branch published/draft branch_setting = (ModuleStoreEnum.Branch.published_only if revision == ModuleStoreEnum.RevisionOption.published_only else ModuleStoreEnum.Branch.draft_preferred) with store.branch_setting(branch_setting): store.update_item(video, ModuleStoreEnum.UserID.mgmt_command) LOGGER.info( '[%s] [run=%s] [generated-edx-video-id] [revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]', MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) save_transcript_to_storage( command_run=command_run, edx_video_id=edx_video_id, language_code=language_code, transcript_content=transcript_content, file_format=Transcript.SJSON, force_update=force_update, ) except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError): LOGGER.exception(( '[%s] [run=%s] [video-transcript-migration-failed-with-known-exc] [revision=%s] [video=%s] ' '[edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) return failure except Exception: LOGGER.exception(( '[%s] [run=%s] [video-transcript-migration-failed-with-unknown-exc] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) raise LOGGER.info(( '[%s] [run=%s] [video-transcript-migration-succeeded-for-a-video] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) return success
def async_migrate_transcript_subtask(self, *args, **kwargs): # pylint: disable=unused-argument """ Migrates a transcript of a given video in a course as a new celery task. """ video_location, language_code, force_update = args store = modulestore() video = store.get_item(usage_key=BlockUsageLocator.from_string(video_location)) commit = kwargs['commit'] if not commit: return 'Language {language_code} transcript of video {edx_video_id} will be migrated'.format( language_code=language_code, edx_video_id=video.edx_video_id ) # Start transcript's migration edx_video_id = clean_video_id(video.edx_video_id) LOGGER.info( "[Transcript migration] migration process is started for video [%s] language [%s].", edx_video_id, language_code ) try: transcripts_info = video.get_transcripts_info() transcript_content, _, _ = get_transcript_from_contentstore( video=video, language=language_code, output_format=Transcript.SJSON, transcripts_info=transcripts_info, ) if not edx_video_id: edx_video_id = create_external_video('external-video') video.edx_video_id = edx_video_id store.update_item(video, ModuleStoreEnum.UserID.mgmt_command) save_transcript_to_storage( edx_video_id=edx_video_id, language_code=language_code, transcript_content=transcript_content, file_format=Transcript.SJSON, force_update=force_update, ) except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError) as exc: LOGGER.exception( '[Transcript migration] transcript migration failed for video [%s] and language [%s].', edx_video_id, language_code ) message = 'Failed: language {language} of video {video} with exception {exception}'.format( language=language_code, video=video.edx_video_id, exception=text_type(exc) ) except Exception: LOGGER.exception( '[Transcript migration] transcript migration failed for video [%s] and language [%s].', edx_video_id, language_code ) raise else: message = ( 'Success: transcript (language: {language_code}, edx_video_id: {edx_video_id}) has been migrated ' 'for video [{location}].' ).format(edx_video_id=edx_video_id, language_code=language_code, location=unicode(video.location)) return message
def from_string(cls, serialized): """Deprecated. Use :meth:`locator.BlockUsageLocator.from_string`.""" cls._deprecation_warning() return BlockUsageLocator.from_string(serialized)
def generate_plots_for_problem(problem): pmdset = PsychometricData.objects.using(db).filter( studentmodule__module_state_key=BlockUsageLocator.from_string(problem) ) nstudents = pmdset.count() msg = "" plots = [] if nstudents < 2: msg += "%s nstudents=%d --> skipping, too few" % (problem, nstudents) return msg, plots max_grade = pmdset[0].studentmodule.max_grade agdat = pmdset.aggregate(Sum('attempts'), Max('attempts')) max_attempts = agdat['attempts__max'] total_attempts = agdat['attempts__sum'] # not used yet msg += "max attempts = %d" % max_attempts xdat = range(1, max_attempts + 1) dataset = {'xdat': xdat} # compute grade statistics grades = [pmd.studentmodule.grade for pmd in pmdset] gsv = StatVar() for g in grades: gsv += g msg += "<br><p><font color='blue'>Grade distribution: %s</font></p>" % gsv # generate grade histogram ghist = [] axisopts = """{ xaxes: [{ axisLabel: 'Grade' }], yaxes: [{ position: 'left', axisLabel: 'Count' }] }""" if gsv.max > max_grade: msg += "<br/><p><font color='red'>Something is wrong: max_grade=%s, but max(grades)=%s</font></p>" % (max_grade, gsv.max) max_grade = gsv.max if max_grade > 1: ghist = make_histogram(grades, np.linspace(0, max_grade, max_grade + 1)) ghist_json = json.dumps(ghist.items()) plot = {'title': "Grade histogram for %s" % problem, 'id': 'histogram', 'info': '', 'data': "var dhist = %s;\n" % ghist_json, 'cmd': '[ {data: dhist, bars: { show: true, align: "center" }} ], %s' % axisopts, } plots.append(plot) else: msg += "<br/>Not generating histogram: max_grade=%s" % max_grade # histogram of time differences between checks # Warning: this is inefficient - doesn't scale to large numbers of students dtset = [] # time differences in minutes dtsv = StatVar() for pmd in pmdset: try: checktimes = eval(pmd.checktimes) # update log of attempt timestamps except: continue if len(checktimes) < 2: continue ct0 = checktimes[0] for ct in checktimes[1:]: dt = (ct - ct0).total_seconds() / 60.0 if dt < 20: # ignore if dt too long dtset.append(dt) dtsv += dt ct0 = ct if dtsv.cnt > 2: msg += "<br/><p><font color='brown'>Time differences between checks: %s</font></p>" % dtsv bins = np.linspace(0, 1.5 * dtsv.sdv(), 30) dbar = bins[1] - bins[0] thist = make_histogram(dtset, bins) thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0])) axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}""" plot = {'title': "Histogram of time differences between checks", 'id': 'thistogram', 'info': '', 'data': "var thist = %s;\n" % thist_json, 'cmd': '[ {data: thist, bars: { show: true, align: "center", barWidth:%f }} ], %s' % (dbar, axisopts), } plots.append(plot) # one IRT plot curve for each grade received (TODO: this assumes integer grades) for grade in range(1, int(max_grade) + 1): yset = {} gset = pmdset.filter(studentmodule__grade=grade) ngset = gset.count() if ngset == 0: continue ydat = [] ylast = 0 for x in xdat: y = gset.filter(attempts=x).count() / ngset ydat.append(y + ylast) ylast = y + ylast yset['ydat'] = ydat if len(ydat) > 3: # try to fit to logistic function if enough data points try: cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts / 2.0]) yset['fitparam'] = cfp yset['fitpts'] = func_2pl(np.array(xdat), *cfp[0]) yset['fiterr'] = [yd - yf for (yd, yf) in zip(ydat, yset['fitpts'])] fitx = np.linspace(xdat[0], xdat[-1], 100) yset['fitx'] = fitx yset['fity'] = func_2pl(np.array(fitx), *cfp[0]) except Exception as err: log.debug('Error in psychoanalyze curve fitting: %s' % err) dataset['grade_%d' % grade] = yset axisopts = """{ xaxes: [{ axisLabel: 'Number of Attempts' }], yaxes: [{ max:1.0, position: 'left', axisLabel: 'Probability of correctness' }] }""" # generate points for flot plot for grade in range(1, int(max_grade) + 1): jsdata = "" jsplots = [] gkey = 'grade_%d' % grade if gkey in dataset: yset = dataset[gkey] jsdata += "var d%d = %s;\n" % (grade, json.dumps(zip(xdat, yset['ydat']))) jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade) if 'fitpts' in yset: jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'], yset['fity']))) jsplots.append('{ data: fit, lines: { show: true }, color: "blue" }') (a, b) = yset['fitparam'][0] irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a, b) else: irtinfo = "" plots.append({'title': 'IRT Plot for grade=%s %s' % (grade, irtinfo), 'id': "irt%s" % grade, 'info': '', 'data': jsdata, 'cmd': '[%s], %s' % (','.join(jsplots), axisopts), }) #log.debug('plots = %s' % plots) return msg, plots
def async_migrate_transcript_subtask(self, *args, **kwargs): # pylint: disable=unused-argument """ Migrates a transcript of a given video in a course as a new celery task. """ success, failure = 'Success', 'Failure' video_location, revision, language_code, force_update = args command_run = kwargs['command_run'] store = modulestore() video = store.get_item(usage_key=BlockUsageLocator.from_string(video_location), revision=revision) edx_video_id = clean_video_id(video.edx_video_id) if not kwargs['commit']: LOGGER.info( ('[%s] [run=%s] [video-transcript-will-be-migrated] ' '[revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) return success LOGGER.info( ('[%s] [run=%s] [transcripts-migration-process-started-for-video-transcript] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) try: transcripts_info = video.get_transcripts_info() transcript_content, _, _ = get_transcript_from_contentstore( video=video, language=language_code, output_format=Transcript.SJSON, transcripts_info=transcripts_info, ) is_video_valid = edx_video_id and is_video_available(edx_video_id) if not is_video_valid: edx_video_id = create_external_video('external-video') video.edx_video_id = edx_video_id # determine branch published/draft branch_setting = ( ModuleStoreEnum.Branch.published_only if revision == ModuleStoreEnum.RevisionOption.published_only else ModuleStoreEnum.Branch.draft_preferred ) with store.branch_setting(branch_setting): store.update_item(video, ModuleStoreEnum.UserID.mgmt_command) LOGGER.info( '[%s] [run=%s] [generated-edx-video-id] [revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]', MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) save_transcript_to_storage( command_run=command_run, edx_video_id=edx_video_id, language_code=language_code, transcript_content=transcript_content, file_format=Transcript.SJSON, force_update=force_update, ) except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError): LOGGER.exception( ('[%s] [run=%s] [video-transcript-migration-failed-with-known-exc] [revision=%s] [video=%s] ' '[edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) return failure except Exception: LOGGER.exception( ('[%s] [run=%s] [video-transcript-migration-failed-with-unknown-exc] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) raise LOGGER.info( ('[%s] [run=%s] [video-transcript-migration-succeeded-for-a-video] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) return success
def from_string(cls, serialized): """Deprecated. Use :meth:`locator.BlockUsageLocator.from_string`.""" cls._deprecation_warning() return BlockUsageLocator.from_string(serialized)
def generate_plots_for_problem(problem): pmdset = PsychometricData.objects.using(db).filter( studentmodule__module_state_key=BlockUsageLocator.from_string(problem)) nstudents = pmdset.count() msg = "" plots = [] if nstudents < 2: msg += "%s nstudents=%d --> skipping, too few" % (problem, nstudents) return msg, plots max_grade = pmdset[0].studentmodule.max_grade agdat = pmdset.aggregate(Sum('attempts'), Max('attempts')) max_attempts = agdat['attempts__max'] total_attempts = agdat['attempts__sum'] # not used yet msg += "max attempts = %d" % max_attempts xdat = range(1, max_attempts + 1) dataset = {'xdat': xdat} # compute grade statistics grades = [pmd.studentmodule.grade for pmd in pmdset] gsv = StatVar() for g in grades: gsv += g msg += "<br><p><font color='blue'>Grade distribution: %s</font></p>" % gsv # generate grade histogram ghist = [] axisopts = """{ xaxes: [{ axisLabel: 'Grade' }], yaxes: [{ position: 'left', axisLabel: 'Count' }] }""" if gsv.max > max_grade: msg += "<br/><p><font color='red'>Something is wrong: max_grade=%s, but max(grades)=%s</font></p>" % ( max_grade, gsv.max) max_grade = gsv.max if max_grade > 1: ghist = make_histogram(grades, np.linspace(0, max_grade, max_grade + 1)) ghist_json = json.dumps(ghist.items()) plot = { 'title': "Grade histogram for %s" % problem, 'id': 'histogram', 'info': '', 'data': "var dhist = %s;\n" % ghist_json, 'cmd': '[ {data: dhist, bars: { show: true, align: "center" }} ], %s' % axisopts, } plots.append(plot) else: msg += "<br/>Not generating histogram: max_grade=%s" % max_grade # histogram of time differences between checks # Warning: this is inefficient - doesn't scale to large numbers of students dtset = [] # time differences in minutes dtsv = StatVar() for pmd in pmdset: try: checktimes = eval( pmd.checktimes) # update log of attempt timestamps except: continue if len(checktimes) < 2: continue ct0 = checktimes[0] for ct in checktimes[1:]: dt = (ct - ct0).total_seconds() / 60.0 if dt < 20: # ignore if dt too long dtset.append(dt) dtsv += dt ct0 = ct if dtsv.cnt > 2: msg += "<br/><p><font color='brown'>Time differences between checks: %s</font></p>" % dtsv bins = np.linspace(0, 1.5 * dtsv.sdv(), 30) dbar = bins[1] - bins[0] thist = make_histogram(dtset, bins) thist_json = json.dumps(sorted(thist.items(), key=lambda (x): x[0])) axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}""" plot = { 'title': "Histogram of time differences between checks", 'id': 'thistogram', 'info': '', 'data': "var thist = %s;\n" % thist_json, 'cmd': '[ {data: thist, bars: { show: true, align: "center", barWidth:%f }} ], %s' % (dbar, axisopts), } plots.append(plot) # one IRT plot curve for each grade received (TODO: this assumes integer grades) for grade in range(1, int(max_grade) + 1): yset = {} gset = pmdset.filter(studentmodule__grade=grade) ngset = gset.count() if ngset == 0: continue ydat = [] ylast = 0 for x in xdat: y = gset.filter(attempts=x).count() / ngset ydat.append(y + ylast) ylast = y + ylast yset['ydat'] = ydat if len(ydat ) > 3: # try to fit to logistic function if enough data points try: cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts / 2.0]) yset['fitparam'] = cfp yset['fitpts'] = func_2pl(np.array(xdat), *cfp[0]) yset['fiterr'] = [ yd - yf for (yd, yf) in zip(ydat, yset['fitpts']) ] fitx = np.linspace(xdat[0], xdat[-1], 100) yset['fitx'] = fitx yset['fity'] = func_2pl(np.array(fitx), *cfp[0]) except Exception as err: log.debug('Error in psychoanalyze curve fitting: %s' % err) dataset['grade_%d' % grade] = yset axisopts = """{ xaxes: [{ axisLabel: 'Number of Attempts' }], yaxes: [{ max:1.0, position: 'left', axisLabel: 'Probability of correctness' }] }""" # generate points for flot plot for grade in range(1, int(max_grade) + 1): jsdata = "" jsplots = [] gkey = 'grade_%d' % grade if gkey in dataset: yset = dataset[gkey] jsdata += "var d%d = %s;\n" % (grade, json.dumps(zip(xdat, yset['ydat']))) jsplots.append( '{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade) if 'fitpts' in yset: jsdata += 'var fit = %s;\n' % (json.dumps( zip(yset['fitx'], yset['fity']))) jsplots.append( '{ data: fit, lines: { show: true }, color: "blue" }') (a, b) = yset['fitparam'][0] irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a, b) else: irtinfo = "" plots.append({ 'title': 'IRT Plot for grade=%s %s' % (grade, irtinfo), 'id': "irt%s" % grade, 'info': '', 'data': jsdata, 'cmd': '[%s], %s' % (','.join(jsplots), axisopts), }) #log.debug('plots = %s' % plots) return msg, plots