def iter(self, course, students): """ Given a course and an iterable of students (User), yield a GradeResult for every student enrolled in the course. GradeResult is a named tuple of: (student, course_grade, err_msg) If an error occurred, course_grade will be None and err_msg will be an exception message. If there was no error, err_msg is an empty string. """ for student in students: with dog_stats_api.timer('lms.grades.CourseGradeFactory.iter', tags=[u'action:{}'.format(course.id)]): try: course_grade = CourseGradeFactory().create(student, course) yield self.GradeResult(student, course_grade, "") except Exception as exc: # pylint: disable=broad-except # Keep marching on even if this student couldn't be graded for # some reason, but log it for future reference. log.exception( 'Cannot grade student %s (%s) in course %s because of exception: %s', student.username, student.id, course.id, exc.message) yield self.GradeResult(student, None, exc.message)
def iter(self, course, students): """ Given a course and an iterable of students (User), yield a GradeResult for every student enrolled in the course. GradeResult is a named tuple of: (student, course_grade, err_msg) If an error occurred, course_grade will be None and err_msg will be an exception message. If there was no error, err_msg is an empty string. """ for student in students: with dog_stats_api.timer("lms.grades.CourseGradeFactory.iter", tags=[u"action:{}".format(course.id)]): try: course_grade = CourseGradeFactory().create(student, course) yield self.GradeResult(student, course_grade, "") except Exception as exc: # pylint: disable=broad-except # Keep marching on even if this student couldn't be graded for # some reason, but log it for future reference. log.exception( "Cannot grade student %s (%s) in course %s because of exception: %s", student.username, student.id, course.id, exc.message, ) yield self.GradeResult(student, None, exc.message)
def run_main_task(entry_id, task_fcn, action_name): """ Applies the `task_fcn` to the arguments defined in `entry_id` InstructorTask. Arguments passed to `task_fcn` are: `entry_id` : the primary key for the InstructorTask entry representing the task. `course_id` : the id for the course. `task_input` : dict containing task-specific arguments, JSON-decoded from InstructorTask's task_input. `action_name` : past-tense verb to use for constructing status messages. If no exceptions are raised, the `task_fcn` should return a dict containing the task's result with the following keys: 'attempted': number of attempts made 'succeeded': number of attempts that "succeeded" 'skipped': number of attempts that "skipped" 'failed': number of attempts that "failed" 'total': number of possible subtasks to attempt 'action_name': user-visible verb to use in status messages. Should be past-tense. Pass-through of input `action_name`. 'duration_ms': how long the task has (or had) been running. """ # Get the InstructorTask to be updated. If this fails then let the exception return to Celery. # There's no point in catching it here. entry = InstructorTask.objects.get(pk=entry_id) entry.task_state = PROGRESS entry.save_now() # Get inputs to use in this task from the entry task_id = entry.task_id course_id = entry.course_id task_input = json.loads(entry.task_input) # Construct log message fmt = u'Task: {task_id}, InstructorTask ID: {entry_id}, Course: {course_id}, Input: {task_input}' task_info_string = fmt.format(task_id=task_id, entry_id=entry_id, course_id=course_id, task_input=task_input) TASK_LOG.info(u'%s, Starting update (nothing %s yet)', task_info_string, action_name) # Check that the task_id submitted in the InstructorTask matches the current task # that is running. request_task_id = _get_current_task().request.id if task_id != request_task_id: fmt = u'{task_info}, Requested task did not match actual task "{actual_id}"' message = fmt.format(task_info=task_info_string, actual_id=request_task_id) TASK_LOG.error(message) raise ValueError(message) # Now do the work with dog_stats_api.timer('instructor_tasks.time.overall', tags=[u'action:{name}'.format(name=action_name)]): task_progress = task_fcn(entry_id, course_id, task_input, action_name) # Release any queries that the connection has been hanging onto reset_queries() # Log and exit, returning task_progress info as task result TASK_LOG.info(u'%s, Task type: %s, Finishing task: %s', task_info_string, action_name, task_progress) return task_progress
def iter( self, users, course=None, collected_block_structure=None, course_key=None, force_update=False, ): """ Given a course and an iterable of students (User), yield a GradeResult for every student enrolled in the course. GradeResult is a named tuple of: (student, course_grade, err_msg) If an error occurred, course_grade will be None and err_msg will be an exception message. If there was no error, err_msg is an empty string. """ # Pre-fetch the collected course_structure (in _iter_grade_result) so: # 1. Correctness: the same version of the course is used to # compute the grade for all students. # 2. Optimization: the collected course_structure is not # retrieved from the data store multiple times. course_data = CourseData( user=None, course=course, collected_block_structure=collected_block_structure, course_key=course_key, ) stats_tags = [u'action:{}'.format(course_data.course_key)] for user in users: with dog_stats_api.timer('lms.grades.CourseGradeFactory.iter', tags=stats_tags): yield self._iter_grade_result(user, course_data, force_update)
def iter(self, course, students): """ Given a course and an iterable of students (User), yield a GradeResult for every student enrolled in the course. GradeResult is a named tuple of: (student, course_grade, err_msg) If an error occurred, course_grade will be None and err_msg will be an exception message. If there was no error, err_msg is an empty string. """ # Pre-fetch the collected course_structure so: # 1. Correctness: the same version of the course is used to # compute the grade for all students. # 2. Optimization: the collected course_structure is not # retrieved from the data store multiple times. collected_block_structure = get_block_structure_manager(course.id).get_collected() for student in students: with dog_stats_api.timer('lms.grades.CourseGradeFactory.iter', tags=[u'action:{}'.format(course.id)]): try: course_grade = CourseGradeFactory().create(student, course, collected_block_structure) yield self.GradeResult(student, course_grade, "") except Exception as exc: # pylint: disable=broad-except # Keep marching on even if this student couldn't be graded for # some reason, but log it for future reference. log.exception( 'Cannot grade student %s (%s) in course %s because of exception: %s', student.username, student.id, course.id, exc.message ) yield self.GradeResult(student, None, exc.message)
def iter(self, course, students, force_update=False): """ Given a course and an iterable of students (User), yield a GradeResult for every student enrolled in the course. GradeResult is a named tuple of: (student, course_grade, err_msg) If an error occurred, course_grade will be None and err_msg will be an exception message. If there was no error, err_msg is an empty string. """ # Pre-fetch the collected course_structure so: # 1. Correctness: the same version of the course is used to # compute the grade for all students. # 2. Optimization: the collected course_structure is not # retrieved from the data store multiple times. collected_block_structure = get_block_structure_manager( course.id).get_collected() for student in students: with dog_stats_api.timer('lms.grades.CourseGradeFactory.iter', tags=[u'action:{}'.format(course.id)]): try: operation = CourseGradeFactory( ).update if force_update else CourseGradeFactory().create course_grade = operation(student, course, collected_block_structure) yield self.GradeResult(student, course_grade, "") except Exception as exc: # pylint: disable=broad-except # Keep marching on even if this student couldn't be graded for # some reason, but log it for future reference. log.exception( 'Cannot grade student %s (%s) in course %s because of exception: %s', student.username, student.id, course.id, exc.message) yield self.GradeResult(student, None, exc.message)
def request_timer(request_id, method, url, tags=None): start = time() with dog_stats_api.timer("comment_client.request.time", tags=tags): yield end = time() duration = end - start log.info( u"comment_client_request_log: request_id={request_id}, method={method}, " u"url={url}, duration={duration}".format(request_id=request_id, method=method, url=url, duration=duration) )
def request_timer(request_id, method, url, tags=None): start = time() with dog_stats_api.timer('comment_client.request.time', tags=tags): yield end = time() duration = end - start log.info( u"comment_client_request_log: request_id={request_id}, method={method}, " u"url={url}, duration={duration}".format(request_id=request_id, method=method, url=url, duration=duration))
def iterate_grades_for(course_or_id, students, keep_raw_scores=False): """Given a course_id and an iterable of students (User), yield a tuple of: (student, gradeset, err_msg) for every student enrolled in the course. If an error occurred, gradeset will be an empty dict and err_msg will be an exception message. If there was no error, err_msg is an empty string. The gradeset is a dictionary with the following fields: - grade : A final letter grade. - percent : The final percent for the class (rounded up). - section_breakdown : A breakdown of each section that makes up the grade. (For display) - grade_breakdown : A breakdown of the major components that make up the final grade. (For display) - raw_scores: contains scores for every graded module """ if isinstance(course_or_id, (basestring, CourseKey)): course = courses.get_course_by_id(course_or_id) else: course = course_or_id # We make a fake request because grading code expects to be able to look at # the request. We have to attach the correct user to the request before # grading that student. request = RequestFactory().get('/') for student in students: with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=[u'action:{}'.format(course.id)]): try: request.user = student # Grading calls problem rendering, which calls masquerading, # which checks session vars -- thus the empty session dict below. # It's not pretty, but untangling that is currently beyond the # scope of this feature. request.session = {} gradeset = grade(student, request, course, keep_raw_scores) yield student, gradeset, "" except Exception as exc: # pylint: disable=broad-except # Keep marching on even if this student couldn't be graded for # some reason, but log it for future reference. log.exception( 'Cannot grade student %s (%s) in course %s because of exception: %s', student.username, student.id, course.id, exc.message ) yield student, {}, exc.message
def send_all_users_email(entry_id, email_id, to_list, global_email_context, subtask_status_dict): subtask_status = SubtaskStatus.from_dict(subtask_status_dict) current_task_id = subtask_status.task_id num_to_send = len(to_list) log.info((u"Preparing to send email %s to %d recipients as subtask %s " u"for email for all users task %d: context = %s, status=%s"), email_id, num_to_send, current_task_id, entry_id, global_email_context, subtask_status) check_all_users_email_subtask_is_valid(entry_id, current_task_id, subtask_status) send_exception = None new_subtask_status = None try: # course_title = global_email_context['course_title'] # ENTENDER LOS PARAMETROS DEL TIMER with dog_stats_api.timer('course_email.single_task.time.overall'): new_subtask_status, send_exception = _send_all_users_email( entry_id, email_id, to_list, global_email_context, subtask_status, ) except Exception: log.exception("Send-email task %s for email %s: failed unexpectedly!", current_task_id, email_id) subtask_status.increment(failed=num_to_send, state=FAILURE) update_subtask_status(entry_id, current_task_id, subtask_status) raise if send_exception is None: log.info("Send-email task %s for email %s: succeeded", current_task_id, email_id) update_subtask_status(entry_id, current_task_id, new_subtask_status) elif isinstance(send_exception, RetryTaskError): log.warning("Send-email task %s for email %s: being retried", current_task_id, email_id) raise send_exception # pylint: disable=raising-bad-type else: log.error("Send-email task %s for email %s: failed: %s", current_task_id, email_id, send_exception) update_subtask_status(entry_id, current_task_id, new_subtask_status) raise send_exception # pylint: disable=raising-bad-type log.info("Send-email task %s for email %s: returning status %s", current_task_id, email_id, new_subtask_status) return new_subtask_status.to_dict()
def iterate_grades_for(course_or_id, students): """ Given a course_id and an iterable of students (User), yield a GradeResult for every student enrolled in the course. GradeResult is a named tuple of: (student, gradeset, err_msg) If an error occurred, gradeset will be an empty dict and err_msg will be an exception message. If there was no error, err_msg is an empty string. The gradeset is a dictionary with the following fields: - grade : A final letter grade. - percent : The final percent for the class (rounded up). - section_breakdown : A breakdown of each section that makes up the grade. (For display) - grade_breakdown : A breakdown of the major components that make up the final grade. (For display) - raw_scores: contains scores for every graded module """ if isinstance(course_or_id, (basestring, CourseKey)): course = get_course_by_id(course_or_id) else: course = course_or_id for student in students: with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=[u'action:{}'.format(course.id)]): try: gradeset = summary(student, course) yield GradeResult(student, gradeset, "") except Exception as exc: # pylint: disable=broad-except # Keep marching on even if this student couldn't be graded for # some reason, but log it for future reference. log.exception( 'Cannot grade student %s (%s) in course %s because of exception: %s', student.username, student.id, course.id, exc.message ) yield GradeResult(student, {}, exc.message)
def _send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status): """ Performs the email sending task. Sends an email to a list of recipients. Inputs are: * `entry_id`: id of the InstructorTask object to which progress should be recorded. * `email_id`: id of the CourseEmail model that is to be emailed. * `to_list`: list of recipients. Each is represented as a dict with the following keys: - 'profile__name': full name of User. - 'email': email address of User. - 'pk': primary key of User model. * `global_email_context`: dict containing values that are unique for this email but the same for all recipients of this email. This dict is to be used to fill in slots in email template. It does not include 'name' and 'email', which will be provided by the to_list. * `subtask_status` : object of class SubtaskStatus representing current status. Sends to all addresses contained in to_list that are not also in the Optout table. Emails are sent multi-part, in both plain text and html. Returns a tuple of two values: * First value is a SubtaskStatus object which represents current progress at the end of this call. * Second value is an exception returned by the innards of the method, indicating a fatal error. In this case, the number of recipients that were not sent have already been added to the 'failed' count above. """ # Get information from current task's request: parent_task_id = InstructorTask.objects.get(pk=entry_id).task_id task_id = subtask_status.task_id total_recipients = len(to_list) recipient_num = 0 total_recipients_successful = 0 total_recipients_failed = 0 recipients_info = Counter() log.info( "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, TotalRecipients: %s", parent_task_id, task_id, email_id, total_recipients ) try: course_email = CourseEmail.objects.get(id=email_id) except CourseEmail.DoesNotExist as exc: log.exception( "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Could not find email to send.", parent_task_id, task_id, email_id ) raise # Exclude optouts (if not a retry): # Note that we don't have to do the optout logic at all if this is a retry, # because we have presumably already performed the optout logic on the first # attempt. Anyone on the to_list on a retry has already passed the filter # that existed at that time, and we don't need to keep checking for changes # in the Optout list. if subtask_status.get_retry_count() == 0: to_list, num_optout = _filter_optouts_from_recipients(to_list, course_email.course_id) subtask_status.increment(skipped=num_optout) course_title = global_email_context['course_title'] # use the email from address in the CourseEmail, if it is present, otherwise compute it from_addr = course_email.from_addr if course_email.from_addr else \ _get_source_address(course_email.course_id, course_title) # use the CourseEmailTemplate that was associated with the CourseEmail course_email_template = course_email.get_template() try: connection = get_connection() connection.open() # Define context values to use in all course emails: email_context = {'name': '', 'email': ''} email_context.update(global_email_context) while to_list: # Update context with user-specific values from the user at the end of the list. # At the end of processing this user, they will be popped off of the to_list. # That way, the to_list will always contain the recipients remaining to be emailed. # This is convenient for retries, which will need to send to those who haven't # yet been emailed, but not send to those who have already been sent to. recipient_num += 1 current_recipient = to_list[-1] email = current_recipient['email'] email_context['email'] = email email_context['name'] = current_recipient['profile__name'] email_context['user_id'] = current_recipient['pk'] email_context['course_id'] = course_email.course_id # Construct message content using templates and context: plaintext_msg = course_email_template.render_plaintext(course_email.text_message, email_context) html_msg = course_email_template.render_htmltext(course_email.html_message, email_context) # Create email: email_msg = EmailMultiAlternatives( course_email.subject, plaintext_msg, from_addr, [email], connection=connection ) email_msg.attach_alternative(html_msg, 'text/html') # Throttle if we have gotten the rate limiter. This is not very high-tech, # but if a task has been retried for rate-limiting reasons, then we sleep # for a period of time between all emails within this task. Choice of # the value depends on the number of workers that might be sending email in # parallel, and what the SES throttle rate is. if subtask_status.retried_nomax > 0: sleep(settings.BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS) try: log.info( "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Recipient num: %s/%s, \ Recipient name: %s, Email address: %s", parent_task_id, task_id, email_id, recipient_num, total_recipients, current_recipient['profile__name'], email ) with dog_stats_api.timer('course_email.single_send.time.overall', tags=[_statsd_tag(course_title)]): connection.send_messages([email_msg]) except SMTPDataError as exc: # According to SMTP spec, we'll retry error codes in the 4xx range. 5xx range indicates hard failure. total_recipients_failed += 1 log.error( "BulkEmail ==> Status: Failed(SMTPDataError), Task: %s, SubTask: %s, EmailId: %s, \ Recipient num: %s/%s, Email address: %s", parent_task_id, task_id, email_id, recipient_num, total_recipients, email ) if exc.smtp_code >= 400 and exc.smtp_code < 500: # This will cause the outer handler to catch the exception and retry the entire task. raise exc else: # This will fall through and not retry the message. log.warning( 'BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Recipient num: %s/%s, \ Email not delivered to %s due to error %s', parent_task_id, task_id, email_id, recipient_num, total_recipients, email, exc.smtp_error ) dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)]) subtask_status.increment(failed=1) except SINGLE_EMAIL_FAILURE_ERRORS as exc: # This will fall through and not retry the message. total_recipients_failed += 1 log.error( "BulkEmail ==> Status: Failed(SINGLE_EMAIL_FAILURE_ERRORS), Task: %s, SubTask: %s, \ EmailId: %s, Recipient num: %s/%s, Email address: %s, Exception: %s", parent_task_id, task_id, email_id, recipient_num, total_recipients, email, exc ) dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)]) subtask_status.increment(failed=1) else: total_recipients_successful += 1 log.info( "BulkEmail ==> Status: Success, Task: %s, SubTask: %s, EmailId: %s, \ Recipient num: %s/%s, Email address: %s,", parent_task_id, task_id, email_id, recipient_num, total_recipients, email ) dog_stats_api.increment('course_email.sent', tags=[_statsd_tag(course_title)]) if settings.BULK_EMAIL_LOG_SENT_EMAILS: log.info('Email with id %s sent to %s', email_id, email) else: log.debug('Email with id %s sent to %s', email_id, email) subtask_status.increment(succeeded=1) # Pop the user that was emailed off the end of the list only once they have # successfully been processed. (That way, if there were a failure that # needed to be retried, the user is still on the list.) recipients_info[email] += 1 to_list.pop() log.info( "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Total Successful Recipients: %s/%s, \ Failed Recipients: %s/%s", parent_task_id, task_id, email_id, total_recipients_successful, total_recipients, total_recipients_failed, total_recipients ) duplicate_recipients = ["{0} ({1})".format(email, repetition) for email, repetition in recipients_info.most_common() if repetition > 1] if duplicate_recipients: log.info( "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Total Duplicate Recipients [%s]: [%s]", parent_task_id, task_id, email_id, len(duplicate_recipients), ', '.join(duplicate_recipients) ) except INFINITE_RETRY_ERRORS as exc: dog_stats_api.increment('course_email.infinite_retry', tags=[_statsd_tag(course_title)]) # Increment the "retried_nomax" counter, update other counters with progress to date, # and set the state to RETRY: subtask_status.increment(retried_nomax=1, state=RETRY) return _submit_for_retry( entry_id, email_id, to_list, global_email_context, exc, subtask_status, skip_retry_max=True ) except LIMITED_RETRY_ERRORS as exc: # Errors caught here cause the email to be retried. The entire task is actually retried # without popping the current recipient off of the existing list. # Errors caught are those that indicate a temporary condition that might succeed on retry. dog_stats_api.increment('course_email.limited_retry', tags=[_statsd_tag(course_title)]) # Increment the "retried_withmax" counter, update other counters with progress to date, # and set the state to RETRY: subtask_status.increment(retried_withmax=1, state=RETRY) return _submit_for_retry( entry_id, email_id, to_list, global_email_context, exc, subtask_status, skip_retry_max=False ) except BULK_EMAIL_FAILURE_ERRORS as exc: dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)]) num_pending = len(to_list) log.exception(('Task %s: email with id %d caused send_course_email task to fail ' 'with "fatal" exception. %d emails unsent.'), task_id, email_id, num_pending) # Update counters with progress to date, counting unsent emails as failures, # and set the state to FAILURE: subtask_status.increment(failed=num_pending, state=FAILURE) return subtask_status, exc except Exception as exc: # pylint: disable=broad-except # Errors caught here cause the email to be retried. The entire task is actually retried # without popping the current recipient off of the existing list. # These are unexpected errors. Since they might be due to a temporary condition that might # succeed on retry, we give them a retry. dog_stats_api.increment('course_email.limited_retry', tags=[_statsd_tag(course_title)]) log.exception(('Task %s: email with id %d caused send_course_email task to fail ' 'with unexpected exception. Generating retry.'), task_id, email_id) # Increment the "retried_withmax" counter, update other counters with progress to date, # and set the state to RETRY: subtask_status.increment(retried_withmax=1, state=RETRY) return _submit_for_retry( entry_id, email_id, to_list, global_email_context, exc, subtask_status, skip_retry_max=False ) else: # All went well. Update counters with progress to date, # and set the state to SUCCESS: subtask_status.increment(state=SUCCESS) # Successful completion is marked by an exception value of None. return subtask_status, None finally: # Clean up at the end. connection.close()
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id, task_input, action_name): """ Performs generic update by visiting StudentModule instances with the update_fcn provided. The student modules are fetched for update the `update_fcn` is called on each StudentModule that passes the resulting filtering. It is passed four arguments: the module_descriptor for the module pointed to by the module_state_key, the particular StudentModule to update, the xmodule_instance_args, and the task_input being passed through. If the value returned by the update function evaluates to a boolean True, the update is successful; False indicates the update on the particular student module failed. A raised exception indicates a fatal condition -- that no other student modules should be considered. The return value is a dict containing the task's results, with the following keys: 'attempted': number of attempts made 'succeeded': number of attempts that "succeeded" 'skipped': number of attempts that "skipped" 'failed': number of attempts that "failed" 'total': number of possible updates to attempt 'action_name': user-visible verb to use in status messages. Should be past-tense. Pass-through of input `action_name`. 'duration_ms': how long the task has (or had) been running. Because this is run internal to a task, it does not catch exceptions. These are allowed to pass up to the next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the result object. """ start_time = time() usage_keys = [] problem_url = task_input.get('problem_url') entrance_exam_url = task_input.get('entrance_exam_url') student_identifier = task_input.get('student') override_score_task = action_name == ugettext_noop('overridden') problems = {} # if problem_url is present make a usage key from it if problem_url: usage_key = UsageKey.from_string(problem_url).map_into_course(course_id) usage_keys.append(usage_key) # find the problem descriptor: problem_descriptor = modulestore().get_item(usage_key) problems[unicode(usage_key)] = problem_descriptor # if entrance_exam is present grab all problems in it if entrance_exam_url: problems = get_problems_in_section(entrance_exam_url) usage_keys = [UsageKey.from_string(location) for location in problems.keys()] modules_to_update = _get_modules_to_update( course_id, usage_keys, student_identifier, filter_fcn, override_score_task ) task_progress = TaskProgress(action_name, len(modules_to_update), start_time) task_progress.update_task_state() for module_to_update in modules_to_update: task_progress.attempted += 1 module_descriptor = problems[unicode(module_to_update.module_state_key)] # There is no try here: if there's an error, we let it throw, and the task will # be marked as FAILED, with a stack trace. with dog_stats_api.timer('instructor_tasks.module.time.step', tags=[u'action:{name}'.format(name=action_name)]): update_status = update_fcn(module_descriptor, module_to_update, task_input) if update_status == UPDATE_STATUS_SUCCEEDED: # If the update_fcn returns true, then it performed some kind of work. # Logging of failures is left to the update_fcn itself. task_progress.succeeded += 1 elif update_status == UPDATE_STATUS_FAILED: task_progress.failed += 1 elif update_status == UPDATE_STATUS_SKIPPED: task_progress.skipped += 1 else: raise UpdateProblemModuleStateError("Unexpected update_status returned: {}".format(update_status)) return task_progress.update_task_state()
def send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status_dict): """ Sends an email to a list of recipients. Inputs are: * `entry_id`: id of the InstructorTask object to which progress should be recorded. * `email_id`: id of the CourseEmail model that is to be emailed. * `to_list`: list of recipients. Each is represented as a dict with the following keys: - 'profile__name': full name of User. - 'email': email address of User. - 'pk': primary key of User model. * `global_email_context`: dict containing values that are unique for this email but the same for all recipients of this email. This dict is to be used to fill in slots in email template. It does not include 'name' and 'email', which will be provided by the to_list. * `subtask_status_dict` : dict containing values representing current status. Keys are: 'task_id' : id of subtask. This is used to pass task information across retries. 'attempted' : number of attempts -- should equal succeeded plus failed 'succeeded' : number that succeeded in processing 'skipped' : number that were not processed. 'failed' : number that failed during processing 'retried_nomax' : number of times the subtask has been retried for conditions that should not have a maximum count applied 'retried_withmax' : number of times the subtask has been retried for conditions that should have a maximum count applied 'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS) Most values will be zero on initial call, but may be different when the task is invoked as part of a retry. Sends to all addresses contained in to_list that are not also in the Optout table. Emails are sent multi-part, in both plain text and html. Updates InstructorTask object with status information (sends, failures, skips) and updates number of subtasks completed. """ subtask_status = SubtaskStatus.from_dict(subtask_status_dict) current_task_id = subtask_status.task_id num_to_send = len(to_list) log.info((u"Preparing to send email %s to %d recipients as subtask %s " u"for instructor task %d: context = %s, status=%s"), email_id, num_to_send, current_task_id, entry_id, global_email_context, subtask_status) # Check that the requested subtask is actually known to the current InstructorTask entry. # If this fails, it throws an exception, which should fail this subtask immediately. # This can happen when the parent task has been run twice, and results in duplicate # subtasks being created for the same InstructorTask entry. This can happen when Celery # loses its connection to its broker, and any current tasks get requeued. # We hope to catch this condition in perform_delegate_email_batches() when it's the parent # task that is resubmitted, but just in case we fail to do so there, we check here as well. # There is also a possibility that this task will be run twice by Celery, for the same reason. # To deal with that, we need to confirm that the task has not already been completed. check_subtask_is_valid(entry_id, current_task_id, subtask_status) send_exception = None new_subtask_status = None try: course_title = global_email_context['course_title'] with dog_stats_api.timer('course_email.single_task.time.overall', tags=[_statsd_tag(course_title)]): new_subtask_status, send_exception = _send_course_email( entry_id, email_id, to_list, global_email_context, subtask_status, ) except Exception: # Unexpected exception. Try to write out the failure to the entry before failing. log.exception("Send-email task %s for email %s: failed unexpectedly!", current_task_id, email_id) # We got here for really unexpected reasons. Since we don't know how far # the task got in emailing, we count all recipients as having failed. # It at least keeps the counts consistent. subtask_status.increment(failed=num_to_send, state=FAILURE) update_subtask_status(entry_id, current_task_id, subtask_status) raise if send_exception is None: # Update the InstructorTask object that is storing its progress. log.info("Send-email task %s for email %s: succeeded", current_task_id, email_id) update_subtask_status(entry_id, current_task_id, new_subtask_status) elif isinstance(send_exception, RetryTaskError): # If retrying, a RetryTaskError needs to be returned to Celery. # We assume that the the progress made before the retry condition # was encountered has already been updated before the retry call was made, # so we only log here. log.warning("Send-email task %s for email %s: being retried", current_task_id, email_id) raise send_exception # pylint: disable=raising-bad-type else: log.error("Send-email task %s for email %s: failed: %s", current_task_id, email_id, send_exception) update_subtask_status(entry_id, current_task_id, new_subtask_status) raise send_exception # pylint: disable=raising-bad-type # return status in a form that can be serialized by Celery into JSON: log.info("Send-email task %s for email %s: returning status %s", current_task_id, email_id, new_subtask_status) return new_subtask_status.to_dict()
def post(self, request, course_key_string): """ The restful handler for importing a course. GET json: return json import status POST or PUT json: import a course via the .tar.gz file specified inrequest.FILES """ courselike_key = CourseKey.from_string(course_key_string) library = isinstance(courselike_key, LibraryLocator) if library: root_name = LIBRARY_ROOT import_func = import_library_from_xml else: root_name = COURSE_ROOT import_func = import_course_from_xml filename = request.FILES['course-data'].name courselike_string = unicode(courselike_key) + filename data_root = path(settings.GITHUB_REPO_ROOT) subdir = base64.urlsafe_b64encode(repr(courselike_key)) course_dir = data_root / subdir status_key = "import_export.import.status:{}|{}".format( request.user.username, courselike_string) # Do everything in a try-except block to make sure everything is # properly cleaned up. try: # Cache the import progress self._save_request_status(request, courselike_string, 0) if not filename.endswith('.tar.gz'): self._save_request_status(request, courselike_string, -1) return JsonResponse( { 'error_message': _('We only support uploading a .tar.gz file.'), 'stage': -1 }, status=415) temp_filepath = course_dir / filename # Only handle exceptions caused by the directory already existing, # to avoid a potential race condition caused by the "check and go" # method. try: os.makedirs(course_dir) except OSError as exc: if exc.errno != exc.EEXIST: raise logging.debug('importing course to %s', temp_filepath) # Get upload chunks byte ranges try: matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) content_range = matches.groupdict() except KeyError: # Single chunk # no Content-Range header, so make one that will work content_range = {'start': 0, 'stop': 1, 'end': 2} # stream out the uploaded files in chunks to disk if int(content_range['start']) == 0: mode = "wb+" else: mode = "ab+" size = os.path.getsize(temp_filepath) # Check to make sure we haven't missed a chunk # This shouldn't happen, even if different instances are # handling the same session, but it's always better to catch # errors earlier. if size < int(content_range['start']): self._save_request_status(request, courselike_string, -1) log.warning( "Reported range %s does not match size downloaded so " "far %s", content_range['start'], size) return JsonResponse( { 'error_message': _('File upload corrupted. Please try again'), 'stage': -1 }, status=409) # The last request sometimes comes twice. This happens because # nginx sends a 499 error code when the response takes too long. elif size > int(content_range['stop']) \ and size == int(content_range['end']): return JsonResponse({'ImportStatus': 1}) with open(temp_filepath, mode) as temp_file: for chunk in request.FILES['course-data'].chunks(): temp_file.write(chunk) size = os.path.getsize(temp_filepath) if int(content_range['stop']) != int(content_range['end']) - 1: # More chunks coming return JsonResponse({ "files": [{ "name": filename, "size": size, "delete_url": "", "delete_type": "", "thumbnail_url": "" }] }) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except self._save_request_status(request, courselike_string, -1) if course_dir.isdir(): # pylint: disable=no-value-for-parameter shutil.rmtree(course_dir) log.info("Course import %s: Temp data cleared", courselike_key) log.exception("error importing course") return JsonResponse({ 'error_message': str(exception), 'stage': -1 }, status=400) # try-finally block for proper clean up after receiving last chunk. try: # This was the last chunk. log.info("Course import %s: Upload complete", courselike_key) self._save_request_status(request, courselike_string, 1) tar_file = tarfile.open(temp_filepath) try: safetar_extractall(tar_file, (course_dir + '/').encode('utf-8')) except SuspiciousOperation as exc: self._save_request_status(request, courselike_string, -1) return JsonResponse( { 'error_message': 'Unsafe tar file. Aborting import.', 'suspicious_operation_message': exc.args[0], 'stage': -1 }, status=400) finally: tar_file.close() log.info("Course import %s: Uploaded file extracted", courselike_key) self._save_request_status(request, courselike_string, 2) # find the 'course.xml' file def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for dirpath, _dirnames, filenames in os.walk(directory): for filename in filenames: yield (filename, dirpath) def get_dir_for_fname(directory, filename): """ Returns the dirpath for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for fname, dirpath in get_all_files(directory): if fname == filename: return dirpath return None dirpath = get_dir_for_fname(course_dir, root_name) if not dirpath: self._save_request_status(request, courselike_string, -2) return JsonResponse( { 'error_message': _('Could not find the {root_xml_file} file in the package.' ).format(root_xml_file=root_name), 'stage': -2 }, status=415) dirpath = os.path.relpath(dirpath, data_root) logging.debug('found %s at %s', root_name, dirpath) log.info("Course import %s: Extracted file verified", courselike_key) self._save_request_status(request, courselike_string, 3) with dog_stats_api.timer( 'courselike_import.time', tags=[u"courselike:{}".format(courselike_key)]): courselike_items = import_func( modulestore(), request.user.id, settings.GITHUB_REPO_ROOT, [dirpath], load_error_modules=False, static_content_store=contentstore(), target_id=courselike_key, ) new_location = courselike_items[0].location logging.debug('new course at %s', new_location) log.info("Course import %s: Course import successful", courselike_key) self._save_request_status(request, courselike_string, 4) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except log.exception("error importing course") return JsonResponse( { 'error_message': str(exception), 'stage': -cache.get(status_key) }, status=400) finally: if course_dir.isdir(): # pylint: disable=no-value-for-parameter shutil.rmtree(course_dir) log.info( "Course import %s: Temp data cleared", courselike_key # pylint: disable=no-value-for-parameter ) # set failed stage number with negative sign in case of an # unsuccessful import if cache.get(status_key) != 4: self._save_request_status(request, courselike_string, -abs(cache.get(status_key))) return JsonResponse({'status': 'OK'})
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id, task_input, action_name): """ Performs generic update by visiting StudentModule instances with the update_fcn provided. StudentModule instances are those that match the specified `course_id` and `module_state_key`. If `student_identifier` is not None, it is used as an additional filter to limit the modules to those belonging to that student. If `student_identifier` is None, performs update on modules for all students on the specified problem. If a `filter_fcn` is not None, it is applied to the query that has been constructed. It takes one argument, which is the query being filtered, and returns the filtered version of the query. The `update_fcn` is called on each StudentModule that passes the resulting filtering. It is passed three arguments: the module_descriptor for the module pointed to by the module_state_key, the particular StudentModule to update, and the xmodule_instance_args being passed through. If the value returned by the update function evaluates to a boolean True, the update is successful; False indicates the update on the particular student module failed. A raised exception indicates a fatal condition -- that no other student modules should be considered. The return value is a dict containing the task's results, with the following keys: 'attempted': number of attempts made 'succeeded': number of attempts that "succeeded" 'skipped': number of attempts that "skipped" 'failed': number of attempts that "failed" 'total': number of possible updates to attempt 'action_name': user-visible verb to use in status messages. Should be past-tense. Pass-through of input `action_name`. 'duration_ms': how long the task has (or had) been running. Because this is run internal to a task, it does not catch exceptions. These are allowed to pass up to the next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the result object. """ start_time = time() usage_keys = [] problem_url = task_input.get('problem_url') entrance_exam_url = task_input.get('entrance_exam_url') student_identifier = task_input.get('student') problems = {} # if problem_url is present make a usage key from it if problem_url: usage_key = course_id.make_usage_key_from_deprecated_string(problem_url) usage_keys.append(usage_key) # find the problem descriptor: problem_descriptor = modulestore().get_item(usage_key) problems[unicode(usage_key)] = problem_descriptor # if entrance_exam is present grab all problems in it if entrance_exam_url: problems = get_problems_in_section(entrance_exam_url) usage_keys = [UsageKey.from_string(location) for location in problems.keys()] # find the modules in question modules_to_update = StudentModule.objects.filter(course_id=course_id, module_state_key__in=usage_keys) # give the option of updating an individual student. If not specified, # then updates all students who have responded to a problem so far student = None if student_identifier is not None: # if an identifier is supplied, then look for the student, # and let it throw an exception if none is found. if "@" in student_identifier: student = User.objects.get(email=student_identifier) elif student_identifier is not None: student = User.objects.get(username=student_identifier) if student is not None: modules_to_update = modules_to_update.filter(student_id=student.id) if filter_fcn is not None: modules_to_update = filter_fcn(modules_to_update) task_progress = TaskProgress(action_name, modules_to_update.count(), start_time) task_progress.update_task_state() for module_to_update in modules_to_update: task_progress.attempted += 1 module_descriptor = problems[unicode(module_to_update.module_state_key)] # There is no try here: if there's an error, we let it throw, and the task will # be marked as FAILED, with a stack trace. with dog_stats_api.timer('instructor_tasks.module.time.step', tags=[u'action:{name}'.format(name=action_name)]): update_status = update_fcn(module_descriptor, module_to_update) if update_status == UPDATE_STATUS_SUCCEEDED: # If the update_fcn returns true, then it performed some kind of work. # Logging of failures is left to the update_fcn itself. task_progress.succeeded += 1 elif update_status == UPDATE_STATUS_FAILED: task_progress.failed += 1 elif update_status == UPDATE_STATUS_SKIPPED: task_progress.skipped += 1 else: raise UpdateProblemModuleStateError("Unexpected update_status returned: {}".format(update_status)) return task_progress.update_task_state()
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id, task_input, action_name): """ Performs generic update by visiting StudentModule instances with the update_fcn provided. StudentModule instances are those that match the specified `course_id` and `module_state_key`. If `student_identifier` is not None, it is used as an additional filter to limit the modules to those belonging to that student. If `student_identifier` is None, performs update on modules for all students on the specified problem. If a `filter_fcn` is not None, it is applied to the query that has been constructed. It takes one argument, which is the query being filtered, and returns the filtered version of the query. The `update_fcn` is called on each StudentModule that passes the resulting filtering. It is passed four arguments: the module_descriptor for the module pointed to by the module_state_key, the particular StudentModule to update, the xmodule_instance_args, and the task_input being passed through. If the value returned by the update function evaluates to a boolean True, the update is successful; False indicates the update on the particular student module failed. A raised exception indicates a fatal condition -- that no other student modules should be considered. The return value is a dict containing the task's results, with the following keys: 'attempted': number of attempts made 'succeeded': number of attempts that "succeeded" 'skipped': number of attempts that "skipped" 'failed': number of attempts that "failed" 'total': number of possible updates to attempt 'action_name': user-visible verb to use in status messages. Should be past-tense. Pass-through of input `action_name`. 'duration_ms': how long the task has (or had) been running. Because this is run internal to a task, it does not catch exceptions. These are allowed to pass up to the next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the result object. """ start_time = time() usage_keys = [] problem_url = task_input.get('problem_url') entrance_exam_url = task_input.get('entrance_exam_url') student_identifier = task_input.get('student') problems = {} # if problem_url is present make a usage key from it if problem_url: usage_key = course_id.make_usage_key_from_deprecated_string( problem_url) usage_keys.append(usage_key) # find the problem descriptor: problem_descriptor = modulestore().get_item(usage_key) problems[unicode(usage_key)] = problem_descriptor # if entrance_exam is present grab all problems in it if entrance_exam_url: problems = get_problems_in_section(entrance_exam_url) usage_keys = [ UsageKey.from_string(location) for location in problems.keys() ] # find the modules in question modules_to_update = StudentModule.objects.filter( course_id=course_id, module_state_key__in=usage_keys) # give the option of updating an individual student. If not specified, # then updates all students who have responded to a problem so far student = None if student_identifier is not None: # if an identifier is supplied, then look for the student, # and let it throw an exception if none is found. if "@" in student_identifier: student = User.objects.get(email=student_identifier) elif student_identifier is not None: student = User.objects.get(username=student_identifier) if student is not None: modules_to_update = modules_to_update.filter(student_id=student.id) if filter_fcn is not None: modules_to_update = filter_fcn(modules_to_update) task_progress = TaskProgress(action_name, modules_to_update.count(), start_time) task_progress.update_task_state() for module_to_update in modules_to_update: task_progress.attempted += 1 module_descriptor = problems[unicode( module_to_update.module_state_key)] # There is no try here: if there's an error, we let it throw, and the task will # be marked as FAILED, with a stack trace. with dog_stats_api.timer( 'instructor_tasks.module.time.step', tags=[u'action:{name}'.format(name=action_name)]): update_status = update_fcn(module_descriptor, module_to_update, task_input) if update_status == UPDATE_STATUS_SUCCEEDED: # If the update_fcn returns true, then it performed some kind of work. # Logging of failures is left to the update_fcn itself. task_progress.succeeded += 1 elif update_status == UPDATE_STATUS_FAILED: task_progress.failed += 1 elif update_status == UPDATE_STATUS_SKIPPED: task_progress.skipped += 1 else: raise UpdateProblemModuleStateError( "Unexpected update_status returned: {}".format( update_status)) return task_progress.update_task_state()
def _import_handler(request, courselike_key, root_name, successful_url, context_name, courselike_module, import_func): """ Parameterized function containing the meat of import_handler. """ if not has_course_author_access(request.user, courselike_key): raise PermissionDenied() if 'application/json' in request.META.get('HTTP_ACCEPT', 'application/json'): if request.method == 'GET': raise NotImplementedError('coming soon') else: # Do everything in a try-except block to make sure everything is properly cleaned up. try: data_root = path(settings.GITHUB_REPO_ROOT) subdir = base64.urlsafe_b64encode(repr(courselike_key)) course_dir = data_root / subdir filename = request.FILES['course-data'].name # Use sessions to keep info about import progress session_status = request.session.setdefault("import_status", {}) courselike_string = unicode(courselike_key) + filename _save_request_status(request, courselike_string, 0) # If the course has an entrance exam then remove it and its corresponding milestone. # current course state before import. if root_name == COURSE_ROOT: if courselike_module.entrance_exam_enabled: remove_entrance_exam_milestone_reference(request, courselike_key) log.info( "entrance exam milestone content reference for course %s has been removed", courselike_module.id ) if not filename.endswith('.tar.gz'): _save_request_status(request, courselike_string, -1) return JsonResponse( { 'ErrMsg': _('We only support uploading a .tar.gz file.'), 'Stage': -1 }, status=415 ) temp_filepath = course_dir / filename if not course_dir.isdir(): os.mkdir(course_dir) logging.debug('importing course to {0}'.format(temp_filepath)) # Get upload chunks byte ranges try: matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) content_range = matches.groupdict() except KeyError: # Single chunk # no Content-Range header, so make one that will work content_range = {'start': 0, 'stop': 1, 'end': 2} # stream out the uploaded files in chunks to disk if int(content_range['start']) == 0: mode = "wb+" else: mode = "ab+" size = os.path.getsize(temp_filepath) # Check to make sure we haven't missed a chunk # This shouldn't happen, even if different instances are handling # the same session, but it's always better to catch errors earlier. if size < int(content_range['start']): _save_request_status(request, courselike_string, -1) log.warning( "Reported range %s does not match size downloaded so far %s", content_range['start'], size ) return JsonResponse( { 'ErrMsg': _('File upload corrupted. Please try again'), 'Stage': -1 }, status=409 ) # The last request sometimes comes twice. This happens because # nginx sends a 499 error code when the response takes too long. elif size > int(content_range['stop']) and size == int(content_range['end']): return JsonResponse({'ImportStatus': 1}) with open(temp_filepath, mode) as temp_file: for chunk in request.FILES['course-data'].chunks(): temp_file.write(chunk) size = os.path.getsize(temp_filepath) if int(content_range['stop']) != int(content_range['end']) - 1: # More chunks coming return JsonResponse({ "files": [{ "name": filename, "size": size, "deleteUrl": "", "deleteType": "", "url": reverse_course_url('import_handler', courselike_key), "thumbnailUrl": "" }] }) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except _save_request_status(request, courselike_string, -1) if course_dir.isdir(): shutil.rmtree(course_dir) log.info("Course import %s: Temp data cleared", courselike_key) log.exception( "error importing course" ) return JsonResponse( { 'ErrMsg': str(exception), 'Stage': -1 }, status=400 ) # try-finally block for proper clean up after receiving last chunk. try: # This was the last chunk. log.info("Course import %s: Upload complete", courselike_key) _save_request_status(request, courselike_string, 1) tar_file = tarfile.open(temp_filepath) try: safetar_extractall(tar_file, (course_dir + '/').encode('utf-8')) except SuspiciousOperation as exc: _save_request_status(request, courselike_string, -1) return JsonResponse( { 'ErrMsg': 'Unsafe tar file. Aborting import.', 'SuspiciousFileOperationMsg': exc.args[0], 'Stage': -1 }, status=400 ) finally: tar_file.close() log.info("Course import %s: Uploaded file extracted", courselike_key) _save_request_status(request, courselike_string, 2) # find the 'course.xml' file def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for dirpath, _dirnames, filenames in os.walk(directory): for filename in filenames: yield (filename, dirpath) def get_dir_for_fname(directory, filename): """ Returns the dirpath for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for fname, dirpath in get_all_files(directory): if fname == filename: return dirpath return None dirpath = get_dir_for_fname(course_dir, root_name) if not dirpath: _save_request_status(request, courselike_string, -2) return JsonResponse( { 'ErrMsg': _('Could not find the {0} file in the package.').format(root_name), 'Stage': -2 }, status=415 ) dirpath = os.path.relpath(dirpath, data_root) logging.debug('found %s at %s', root_name, dirpath) log.info("Course import %s: Extracted file verified", courselike_key) _save_request_status(request, courselike_string, 3) with dog_stats_api.timer( 'courselike_import.time', tags=[u"courselike:{}".format(courselike_key)] ): courselike_items = import_func( modulestore(), request.user.id, settings.GITHUB_REPO_ROOT, [dirpath], load_error_modules=False, static_content_store=contentstore(), target_id=courselike_key ) new_location = courselike_items[0].location logging.debug('new course at %s', new_location) log.info("Course import %s: Course import successful", courselike_key) _save_request_status(request, courselike_string, 4) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except log.exception( "error importing course" ) return JsonResponse( { 'ErrMsg': str(exception), 'Stage': -session_status[courselike_string] }, status=400 ) finally: if course_dir.isdir(): shutil.rmtree(course_dir) log.info("Course import %s: Temp data cleared", courselike_key) # set failed stage number with negative sign in case of unsuccessful import if session_status[courselike_string] != 4: _save_request_status(request, courselike_string, -abs(session_status[courselike_string])) # status == 4 represents that course has been imported successfully. if session_status[courselike_string] == 4 and root_name == COURSE_ROOT: # Reload the course so we have the latest state course = modulestore().get_course(courselike_key) if course.entrance_exam_enabled: entrance_exam_chapter = modulestore().get_items( course.id, qualifiers={'category': 'chapter'}, settings={'is_entrance_exam': True} )[0] metadata = {'entrance_exam_id': unicode(entrance_exam_chapter.location)} CourseMetadata.update_from_dict(metadata, course, request.user) add_entrance_exam_milestone(course.id, entrance_exam_chapter) log.info("Course %s Entrance exam imported", course.id) return JsonResponse({'Status': 'OK'}) elif request.method == 'GET': # assume html status_url = reverse_course_url( "import_status_handler", courselike_key, kwargs={'filename': "fillerName"} ) return render_to_response('import.html', { context_name: courselike_module, 'successful_import_redirect_url': successful_url, 'import_status_url': status_url, 'library': isinstance(courselike_key, LibraryLocator) }) else: return HttpResponseNotFound()
def import_olx(self, user_id, course_key_string, archive_path, archive_name, language): """ Import a course or library from a provided OLX .tar.gz archive. """ courselike_key = CourseKey.from_string(course_key_string) try: user = User.objects.get(pk=user_id) except User.DoesNotExist: with respect_language(language): self.status.fail(_(u'Unknown User ID: {0}').format(user_id)) return if not has_course_author_access(user, courselike_key): with respect_language(language): self.status.fail(_(u'Permission denied')) return is_library = isinstance(courselike_key, LibraryLocator) is_course = not is_library if is_library: root_name = LIBRARY_ROOT courselike_module = modulestore().get_library(courselike_key) import_func = import_library_from_xml else: root_name = COURSE_ROOT courselike_module = modulestore().get_course(courselike_key) import_func = import_course_from_xml # Locate the uploaded OLX archive (and download it from S3 if necessary) # Do everything in a try-except block to make sure everything is properly cleaned up. data_root = path(settings.GITHUB_REPO_ROOT) subdir = base64.urlsafe_b64encode(repr(courselike_key)) course_dir = data_root / subdir try: self.status.set_state(u'Unpacking') if not archive_name.endswith(u'.tar.gz'): with respect_language(language): self.status.fail(_(u'We only support uploading a .tar.gz file.')) return temp_filepath = course_dir / get_valid_filename(archive_name) if not course_dir.isdir(): # pylint: disable=no-value-for-parameter os.mkdir(course_dir) LOGGER.debug(u'importing course to {0}'.format(temp_filepath)) # Copy the OLX archive from where it was uploaded to (S3, Swift, file system, etc.) if not course_import_export_storage.exists(archive_path): LOGGER.info(u'Course import %s: Uploaded file %s not found', courselike_key, archive_path) with respect_language(language): self.status.fail(_(u'Tar file not found')) return with course_import_export_storage.open(archive_path, 'rb') as source: with open(temp_filepath, 'wb') as destination: def read_chunk(): """ Read and return a sequence of bytes from the source file. """ return source.read(FILE_READ_CHUNK) for chunk in iter(read_chunk, b''): destination.write(chunk) LOGGER.info(u'Course import %s: Download from storage complete', courselike_key) # Delete from source location course_import_export_storage.delete(archive_path) # If the course has an entrance exam then remove it and its corresponding milestone. # current course state before import. if is_course: if courselike_module.entrance_exam_enabled: fake_request = RequestFactory().get(u'/') fake_request.user = user from contentstore.views.entrance_exam import remove_entrance_exam_milestone_reference # TODO: Is this really ok? Seems dangerous for a live course remove_entrance_exam_milestone_reference(fake_request, courselike_key) LOGGER.info( u'entrance exam milestone content reference for course %s has been removed', courselike_module.id ) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except if course_dir.isdir(): # pylint: disable=no-value-for-parameter shutil.rmtree(course_dir) LOGGER.info(u'Course import %s: Temp data cleared', courselike_key) LOGGER.exception(u'Error importing course %s', courselike_key, exc_info=True) self.status.fail(text_type(exception)) return # try-finally block for proper clean up after receiving file. try: tar_file = tarfile.open(temp_filepath) try: safetar_extractall(tar_file, (course_dir + u'/').encode(u'utf-8')) except SuspiciousOperation as exc: LOGGER.info(u'Course import %s: Unsafe tar file - %s', courselike_key, exc.args[0]) with respect_language(language): self.status.fail(_(u'Unsafe tar file. Aborting import.')) return finally: tar_file.close() LOGGER.info(u'Course import %s: Uploaded file extracted', courselike_key) self.status.set_state(u'Verifying') self.status.increment_completed_steps() # find the 'course.xml' file def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for directory_path, _dirnames, filenames in os.walk(directory): for filename in filenames: yield (filename, directory_path) def get_dir_for_filename(directory, filename): """ Returns the directory path for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for name, directory_path in get_all_files(directory): if name == filename: return directory_path return None dirpath = get_dir_for_filename(course_dir, root_name) if not dirpath: with respect_language(language): self.status.fail(_(u'Could not find the {0} file in the package.').format(root_name)) return dirpath = os.path.relpath(dirpath, data_root) LOGGER.debug(u'found %s at %s', root_name, dirpath) LOGGER.info(u'Course import %s: Extracted file verified', courselike_key) self.status.set_state(u'Updating') self.status.increment_completed_steps() with dog_stats_api.timer( u'courselike_import.time', tags=[u"courselike:{}".format(courselike_key)] ): courselike_items = import_func( modulestore(), user.id, settings.GITHUB_REPO_ROOT, [dirpath], load_error_modules=False, static_content_store=contentstore(), target_id=courselike_key ) new_location = courselike_items[0].location LOGGER.debug(u'new course at %s', new_location) LOGGER.info(u'Course import %s: Course import successful', courselike_key) except Exception as exception: # pylint: disable=broad-except LOGGER.exception(u'error importing course', exc_info=True) self.status.fail(text_type(exception)) finally: if course_dir.isdir(): # pylint: disable=no-value-for-parameter shutil.rmtree(course_dir) LOGGER.info(u'Course import %s: Temp data cleared', courselike_key) if self.status.state == u'Updating' and is_course: # Reload the course so we have the latest state course = modulestore().get_course(courselike_key) if course.entrance_exam_enabled: entrance_exam_chapter = modulestore().get_items( course.id, qualifiers={u'category': u'chapter'}, settings={u'is_entrance_exam': True} )[0] metadata = {u'entrance_exam_id': text_type(entrance_exam_chapter.location)} CourseMetadata.update_from_dict(metadata, course, user) from contentstore.views.entrance_exam import add_entrance_exam_milestone add_entrance_exam_milestone(course.id, entrance_exam_chapter) LOGGER.info(u'Course %s Entrance exam imported', course.id)
def import_olx(self, user_id, course_key_string, archive_path, archive_name, language): """ Import a course or library from a provided OLX .tar.gz archive. """ courselike_key = CourseKey.from_string(course_key_string) try: user = User.objects.get(pk=user_id) except User.DoesNotExist: with respect_language(language): self.status.fail(_(u'Unknown User ID: {0}').format(user_id)) return if not has_course_author_access(user, courselike_key): with respect_language(language): self.status.fail(_(u'Permission denied')) return is_library = isinstance(courselike_key, LibraryLocator) is_course = not is_library if is_library: root_name = LIBRARY_ROOT courselike_module = modulestore().get_library(courselike_key) import_func = import_library_from_xml else: root_name = COURSE_ROOT courselike_module = modulestore().get_course(courselike_key) import_func = import_course_from_xml # Locate the uploaded OLX archive (and download it from S3 if necessary) # Do everything in a try-except block to make sure everything is properly cleaned up. data_root = path(settings.GITHUB_REPO_ROOT) subdir = base64.urlsafe_b64encode(repr(courselike_key)) course_dir = data_root / subdir try: self.status.set_state(u'Unpacking') if not archive_name.endswith(u'.tar.gz'): with respect_language(language): self.status.fail( _(u'We only support uploading a .tar.gz file.')) return temp_filepath = course_dir / get_valid_filename(archive_name) if not course_dir.isdir(): # pylint: disable=no-value-for-parameter os.mkdir(course_dir) LOGGER.debug(u'importing course to {0}'.format(temp_filepath)) # Copy the OLX archive from where it was uploaded to (S3, Swift, file system, etc.) if not course_import_export_storage.exists(archive_path): LOGGER.info(u'Course import %s: Uploaded file %s not found', courselike_key, archive_path) with respect_language(language): self.status.fail(_(u'Tar file not found')) return with course_import_export_storage.open(archive_path, 'rb') as source: with open(temp_filepath, 'wb') as destination: def read_chunk(): """ Read and return a sequence of bytes from the source file. """ return source.read(FILE_READ_CHUNK) for chunk in iter(read_chunk, b''): destination.write(chunk) LOGGER.info(u'Course import %s: Download from storage complete', courselike_key) # Delete from source location course_import_export_storage.delete(archive_path) # If the course has an entrance exam then remove it and its corresponding milestone. # current course state before import. if is_course: if courselike_module.entrance_exam_enabled: fake_request = RequestFactory().get(u'/') fake_request.user = user from contentstore.views.entrance_exam import remove_entrance_exam_milestone_reference # TODO: Is this really ok? Seems dangerous for a live course remove_entrance_exam_milestone_reference( fake_request, courselike_key) LOGGER.info( u'entrance exam milestone content reference for course %s has been removed', courselike_module.id) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except if course_dir.isdir(): # pylint: disable=no-value-for-parameter shutil.rmtree(course_dir) LOGGER.info(u'Course import %s: Temp data cleared', courselike_key) LOGGER.exception(u'Error importing course %s', courselike_key, exc_info=True) self.status.fail(text_type(exception)) return # try-finally block for proper clean up after receiving file. try: tar_file = tarfile.open(temp_filepath) try: safetar_extractall(tar_file, (course_dir + u'/').encode(u'utf-8')) except SuspiciousOperation as exc: LOGGER.info(u'Course import %s: Unsafe tar file - %s', courselike_key, exc.args[0]) with respect_language(language): self.status.fail(_(u'Unsafe tar file. Aborting import.')) return finally: tar_file.close() LOGGER.info(u'Course import %s: Uploaded file extracted', courselike_key) self.status.set_state(u'Verifying') self.status.increment_completed_steps() # find the 'course.xml' file def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for directory_path, _dirnames, filenames in os.walk(directory): for filename in filenames: yield (filename, directory_path) def get_dir_for_filename(directory, filename): """ Returns the directory path for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for name, directory_path in get_all_files(directory): if name == filename: return directory_path return None dirpath = get_dir_for_filename(course_dir, root_name) if not dirpath: with respect_language(language): self.status.fail( _(u'Could not find the {0} file in the package.').format( root_name)) return dirpath = os.path.relpath(dirpath, data_root) LOGGER.debug(u'found %s at %s', root_name, dirpath) LOGGER.info(u'Course import %s: Extracted file verified', courselike_key) self.status.set_state(u'Updating') self.status.increment_completed_steps() with dog_stats_api.timer( u'courselike_import.time', tags=[u"courselike:{}".format(courselike_key)]): courselike_items = import_func(modulestore(), user.id, settings.GITHUB_REPO_ROOT, [dirpath], load_error_modules=False, static_content_store=contentstore(), target_id=courselike_key) new_location = courselike_items[0].location LOGGER.debug(u'new course at %s', new_location) LOGGER.info(u'Course import %s: Course import successful', courselike_key) except Exception as exception: # pylint: disable=broad-except LOGGER.exception(u'error importing course', exc_info=True) self.status.fail(text_type(exception)) finally: if course_dir.isdir(): # pylint: disable=no-value-for-parameter shutil.rmtree(course_dir) LOGGER.info(u'Course import %s: Temp data cleared', courselike_key) if self.status.state == u'Updating' and is_course: # Reload the course so we have the latest state course = modulestore().get_course(courselike_key) if course.entrance_exam_enabled: entrance_exam_chapter = modulestore().get_items( course.id, qualifiers={u'category': u'chapter'}, settings={u'is_entrance_exam': True})[0] metadata = { u'entrance_exam_id': text_type(entrance_exam_chapter.location) } CourseMetadata.update_from_dict(metadata, course, user) from contentstore.views.entrance_exam import add_entrance_exam_milestone add_entrance_exam_milestone(course.id, entrance_exam_chapter) LOGGER.info(u'Course %s Entrance exam imported', course.id)
def _import_handler(request, courselike_key, root_name, successful_url, context_name, courselike_module, import_func): """ Parameterized function containing the meat of import_handler. """ if not has_course_author_access(request.user, courselike_key): raise PermissionDenied() if 'application/json' in request.META.get('HTTP_ACCEPT', 'application/json'): if request.method == 'GET': raise NotImplementedError('coming soon') else: # Do everything in a try-except block to make sure everything is properly cleaned up. try: data_root = path(settings.GITHUB_REPO_ROOT) subdir = base64.urlsafe_b64encode(repr(courselike_key)) course_dir = data_root / subdir filename = request.FILES['course-data'].name # Use sessions to keep info about import progress session_status = request.session.setdefault( "import_status", {}) courselike_string = unicode(courselike_key) + filename _save_request_status(request, courselike_string, 0) # If the course has an entrance exam then remove it and its corresponding milestone. # current course state before import. if root_name == COURSE_ROOT: if courselike_module.entrance_exam_enabled: remove_entrance_exam_milestone_reference( request, courselike_key) log.info( "entrance exam milestone content reference for course %s has been removed", courselike_module.id) if not filename.endswith('.tar.gz'): _save_request_status(request, courselike_string, -1) return JsonResponse( { 'ErrMsg': _('We only support uploading a .tar.gz file.'), 'Stage': -1 }, status=415) temp_filepath = course_dir / filename if not course_dir.isdir(): os.mkdir(course_dir) logging.debug('importing course to {0}'.format(temp_filepath)) # Get upload chunks byte ranges try: matches = CONTENT_RE.search( request.META["HTTP_CONTENT_RANGE"]) content_range = matches.groupdict() except KeyError: # Single chunk # no Content-Range header, so make one that will work content_range = {'start': 0, 'stop': 1, 'end': 2} # stream out the uploaded files in chunks to disk if int(content_range['start']) == 0: mode = "wb+" else: mode = "ab+" size = os.path.getsize(temp_filepath) # Check to make sure we haven't missed a chunk # This shouldn't happen, even if different instances are handling # the same session, but it's always better to catch errors earlier. if size < int(content_range['start']): _save_request_status(request, courselike_string, -1) log.warning( "Reported range %s does not match size downloaded so far %s", content_range['start'], size) return JsonResponse( { 'ErrMsg': _('File upload corrupted. Please try again'), 'Stage': -1 }, status=409) # The last request sometimes comes twice. This happens because # nginx sends a 499 error code when the response takes too long. elif size > int(content_range['stop']) and size == int( content_range['end']): return JsonResponse({'ImportStatus': 1}) with open(temp_filepath, mode) as temp_file: for chunk in request.FILES['course-data'].chunks(): temp_file.write(chunk) size = os.path.getsize(temp_filepath) if int(content_range['stop']) != int(content_range['end']) - 1: # More chunks coming return JsonResponse({ "files": [{ "name": filename, "size": size, "deleteUrl": "", "deleteType": "", "url": reverse_course_url('import_handler', courselike_key), "thumbnailUrl": "" }] }) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except _save_request_status(request, courselike_string, -1) if course_dir.isdir(): shutil.rmtree(course_dir) log.info("Course import %s: Temp data cleared", courselike_key) log.exception("error importing course") return JsonResponse({ 'ErrMsg': str(exception), 'Stage': -1 }, status=400) # try-finally block for proper clean up after receiving last chunk. try: # This was the last chunk. log.info("Course import %s: Upload complete", courselike_key) _save_request_status(request, courselike_string, 1) tar_file = tarfile.open(temp_filepath) try: safetar_extractall(tar_file, (course_dir + '/').encode('utf-8')) except SuspiciousOperation as exc: _save_request_status(request, courselike_string, -1) return JsonResponse( { 'ErrMsg': 'Unsafe tar file. Aborting import.', 'SuspiciousFileOperationMsg': exc.args[0], 'Stage': -1 }, status=400) finally: tar_file.close() log.info("Course import %s: Uploaded file extracted", courselike_key) _save_request_status(request, courselike_string, 2) # find the 'course.xml' file def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for dirpath, _dirnames, filenames in os.walk(directory): for filename in filenames: yield (filename, dirpath) def get_dir_for_fname(directory, filename): """ Returns the dirpath for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for fname, dirpath in get_all_files(directory): if fname == filename: return dirpath return None dirpath = get_dir_for_fname(course_dir, root_name) if not dirpath: _save_request_status(request, courselike_string, -2) return JsonResponse( { 'ErrMsg': _('Could not find the {0} file in the package.'). format(root_name), 'Stage': -2 }, status=415) dirpath = os.path.relpath(dirpath, data_root) logging.debug('found %s at %s', root_name, dirpath) log.info("Course import %s: Extracted file verified", courselike_key) _save_request_status(request, courselike_string, 3) with dog_stats_api.timer( 'courselike_import.time', tags=[u"courselike:{}".format(courselike_key)]): courselike_items = import_func( modulestore(), request.user.id, settings.GITHUB_REPO_ROOT, [dirpath], load_error_modules=False, static_content_store=contentstore(), target_id=courselike_key) new_location = courselike_items[0].location logging.debug('new course at %s', new_location) log.info("Course import %s: Course import successful", courselike_key) _save_request_status(request, courselike_string, 4) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except log.exception("error importing course") return JsonResponse( { 'ErrMsg': str(exception), 'Stage': -session_status[courselike_string] }, status=400) finally: if course_dir.isdir(): shutil.rmtree(course_dir) log.info("Course import %s: Temp data cleared", courselike_key) # set failed stage number with negative sign in case of unsuccessful import if session_status[courselike_string] != 4: _save_request_status( request, courselike_string, -abs(session_status[courselike_string])) # status == 4 represents that course has been imported successfully. if session_status[ courselike_string] == 4 and root_name == COURSE_ROOT: # Reload the course so we have the latest state course = modulestore().get_course(courselike_key) if course.entrance_exam_enabled: entrance_exam_chapter = modulestore().get_items( course.id, qualifiers={'category': 'chapter'}, settings={'is_entrance_exam': True})[0] metadata = { 'entrance_exam_id': unicode(entrance_exam_chapter.location) } CourseMetadata.update_from_dict( metadata, course, request.user) add_entrance_exam_milestone(course.id, entrance_exam_chapter) log.info("Course %s Entrance exam imported", course.id) return JsonResponse({'Status': 'OK'}) elif request.method == 'GET': # assume html status_url = reverse_course_url("import_status_handler", courselike_key, kwargs={'filename': "fillerName"}) return render_to_response( 'import.html', { context_name: courselike_module, 'successful_import_redirect_url': successful_url, 'import_status_url': status_url, 'library': isinstance(courselike_key, LibraryLocator) }) else: return HttpResponseNotFound()
def post(self, request, course_key_string): """ The restful handler for importing a course. GET json: return json import status POST or PUT json: import a course via the .tar.gz file specified inrequest.FILES """ courselike_key = CourseKey.from_string(course_key_string) library = isinstance(courselike_key, LibraryLocator) if library: root_name = LIBRARY_ROOT import_func = import_library_from_xml else: root_name = COURSE_ROOT import_func = import_course_from_xml filename = request.FILES['course-data'].name courselike_string = unicode(courselike_key) + filename data_root = path(settings.GITHUB_REPO_ROOT) subdir = base64.urlsafe_b64encode(repr(courselike_key)) course_dir = data_root / subdir status_key = "import_export.import.status:{}|{}".format( request.user.username, courselike_string ) # Do everything in a try-except block to make sure everything is # properly cleaned up. try: # Cache the import progress self._save_request_status(request, courselike_string, 0) if not filename.endswith('.tar.gz'): self._save_request_status(request, courselike_string, -1) return JsonResponse( { 'error_message': _( 'We only support uploading a .tar.gz file.' ), 'stage': -1 }, status=415 ) temp_filepath = course_dir / filename # Only handle exceptions caused by the directory already existing, # to avoid a potential race condition caused by the "check and go" # method. try: os.makedirs(course_dir) except OSError as exc: if exc.errno != exc.EEXIST: raise logging.debug('importing course to %s', temp_filepath) # Get upload chunks byte ranges try: matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) content_range = matches.groupdict() except KeyError: # Single chunk # no Content-Range header, so make one that will work content_range = {'start': 0, 'stop': 1, 'end': 2} # stream out the uploaded files in chunks to disk if int(content_range['start']) == 0: mode = "wb+" else: mode = "ab+" size = os.path.getsize(temp_filepath) # Check to make sure we haven't missed a chunk # This shouldn't happen, even if different instances are # handling the same session, but it's always better to catch # errors earlier. if size < int(content_range['start']): self._save_request_status(request, courselike_string, -1) log.warning( "Reported range %s does not match size downloaded so " "far %s", content_range['start'], size ) return JsonResponse( { 'error_message': _( 'File upload corrupted. Please try again' ), 'stage': -1 }, status=409 ) # The last request sometimes comes twice. This happens because # nginx sends a 499 error code when the response takes too long. elif size > int(content_range['stop']) \ and size == int(content_range['end']): return JsonResponse({'ImportStatus': 1}) with open(temp_filepath, mode) as temp_file: for chunk in request.FILES['course-data'].chunks(): temp_file.write(chunk) size = os.path.getsize(temp_filepath) if int(content_range['stop']) != int(content_range['end']) - 1: # More chunks coming return JsonResponse({ "files": [{ "name": filename, "size": size, "delete_url": "", "delete_type": "", "thumbnail_url": "" }] }) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except self._save_request_status(request, courselike_string, -1) if course_dir.isdir(): # pylint: disable=no-value-for-parameter shutil.rmtree(course_dir) log.info( "Course import %s: Temp data cleared", courselike_key ) log.exception("error importing course") return JsonResponse( { 'error_message': str(exception), 'stage': -1 }, status=400 ) # try-finally block for proper clean up after receiving last chunk. try: # This was the last chunk. log.info("Course import %s: Upload complete", courselike_key) self._save_request_status(request, courselike_string, 1) tar_file = tarfile.open(temp_filepath) try: safetar_extractall( tar_file, (course_dir + '/').encode('utf-8')) except SuspiciousOperation as exc: self._save_request_status(request, courselike_string, -1) return JsonResponse( { 'error_message': 'Unsafe tar file. Aborting import.', 'suspicious_operation_message': exc.args[0], 'stage': -1 }, status=400 ) finally: tar_file.close() log.info( "Course import %s: Uploaded file extracted", courselike_key ) self._save_request_status(request, courselike_string, 2) # find the 'course.xml' file def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for dirpath, _dirnames, filenames in os.walk(directory): for filename in filenames: yield (filename, dirpath) def get_dir_for_fname(directory, filename): """ Returns the dirpath for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for fname, dirpath in get_all_files(directory): if fname == filename: return dirpath return None dirpath = get_dir_for_fname(course_dir, root_name) if not dirpath: self._save_request_status(request, courselike_string, -2) return JsonResponse( { 'error_message': _( 'Could not find the {root_xml_file} file in the package.' ).format(root_xml_file=root_name), 'stage': -2 }, status=415 ) dirpath = os.path.relpath(dirpath, data_root) logging.debug('found %s at %s', root_name, dirpath) log.info( "Course import %s: Extracted file verified", courselike_key ) self._save_request_status(request, courselike_string, 3) with dog_stats_api.timer( 'courselike_import.time', tags=[u"courselike:{}".format(courselike_key)] ): courselike_items = import_func( modulestore(), request.user.id, settings.GITHUB_REPO_ROOT, [dirpath], load_error_modules=False, static_content_store=contentstore(), target_id=courselike_key, ) new_location = courselike_items[0].location logging.debug('new course at %s', new_location) log.info( "Course import %s: Course import successful", courselike_key ) self._save_request_status(request, courselike_string, 4) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except log.exception( "error importing course" ) return JsonResponse( { 'error_message': str(exception), 'stage': -cache.get(status_key) }, status=400 ) finally: if course_dir.isdir(): # pylint: disable=no-value-for-parameter shutil.rmtree(course_dir) log.info( "Course import %s: Temp data cleared", courselike_key # pylint: disable=no-value-for-parameter ) # set failed stage number with negative sign in case of an # unsuccessful import if cache.get(status_key) != 4: self._save_request_status( request, courselike_string, -abs(cache.get(status_key)) ) return JsonResponse({'status': 'OK'})
def _send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status): """ Performs the email sending task. Sends an email to a list of recipients. Inputs are: * `entry_id`: id of the InstructorTask object to which progress should be recorded. * `email_id`: id of the CourseEmail model that is to be emailed. * `to_list`: list of recipients. Each is represented as a dict with the following keys: - 'profile__name': full name of User. - 'email': email address of User. - 'pk': primary key of User model. * `global_email_context`: dict containing values that are unique for this email but the same for all recipients of this email. This dict is to be used to fill in slots in email template. It does not include 'name' and 'email', which will be provided by the to_list. * `subtask_status` : object of class SubtaskStatus representing current status. Sends to all addresses contained in to_list that are not also in the Optout table. Emails are sent multi-part, in both plain text and html. Returns a tuple of two values: * First value is a SubtaskStatus object which represents current progress at the end of this call. * Second value is an exception returned by the innards of the method, indicating a fatal error. In this case, the number of recipients that were not sent have already been added to the 'failed' count above. """ # Get information from current task's request: parent_task_id = InstructorTask.objects.get(pk=entry_id).task_id task_id = subtask_status.task_id total_recipients = len(to_list) recipient_num = 0 total_recipients_successful = 0 total_recipients_failed = 0 recipients_info = Counter() log.info( "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, TotalRecipients: %s", parent_task_id, task_id, email_id, total_recipients) try: course_email = CourseEmail.objects.get(id=email_id) except CourseEmail.DoesNotExist as exc: log.exception( "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Could not find email to send.", parent_task_id, task_id, email_id) raise # Exclude optouts (if not a retry): # Note that we don't have to do the optout logic at all if this is a retry, # because we have presumably already performed the optout logic on the first # attempt. Anyone on the to_list on a retry has already passed the filter # that existed at that time, and we don't need to keep checking for changes # in the Optout list. if subtask_status.get_retry_count() == 0: to_list, num_optout = _filter_optouts_from_recipients( to_list, course_email.course_id) subtask_status.increment(skipped=num_optout) course_title = global_email_context['course_title'] # use the email from address in the CourseEmail, if it is present, otherwise compute it from_addr = course_email.from_addr if course_email.from_addr else \ _get_source_address(course_email.course_id, course_title) # use the CourseEmailTemplate that was associated with the CourseEmail course_email_template = course_email.get_template() try: connection = get_connection() connection.open() # Define context values to use in all course emails: email_context = {'name': '', 'email': ''} email_context.update(global_email_context) while to_list: # Update context with user-specific values from the user at the end of the list. # At the end of processing this user, they will be popped off of the to_list. # That way, the to_list will always contain the recipients remaining to be emailed. # This is convenient for retries, which will need to send to those who haven't # yet been emailed, but not send to those who have already been sent to. recipient_num += 1 current_recipient = to_list[-1] email = current_recipient['email'] email_context['email'] = email email_context['name'] = current_recipient['profile__name'] email_context['user_id'] = current_recipient['pk'] email_context['course_id'] = course_email.course_id # Construct message content using templates and context: plaintext_msg = course_email_template.render_plaintext( course_email.text_message, email_context) html_msg = course_email_template.render_htmltext( course_email.html_message, email_context) # Create email: email_msg = EmailMultiAlternatives(course_email.subject, plaintext_msg, from_addr, [email], connection=connection) email_msg.attach_alternative(html_msg, 'text/html') # Throttle if we have gotten the rate limiter. This is not very high-tech, # but if a task has been retried for rate-limiting reasons, then we sleep # for a period of time between all emails within this task. Choice of # the value depends on the number of workers that might be sending email in # parallel, and what the SES throttle rate is. if subtask_status.retried_nomax > 0: sleep(settings.BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS) try: log.info( "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Recipient num: %s/%s, \ Recipient name: %s, Email address: %s", parent_task_id, task_id, email_id, recipient_num, total_recipients, current_recipient['profile__name'], email) with dog_stats_api.timer( 'course_email.single_send.time.overall', tags=[_statsd_tag(course_title)]): connection.send_messages([email_msg]) except SMTPDataError as exc: # According to SMTP spec, we'll retry error codes in the 4xx range. 5xx range indicates hard failure. total_recipients_failed += 1 log.error( "BulkEmail ==> Status: Failed(SMTPDataError), Task: %s, SubTask: %s, EmailId: %s, \ Recipient num: %s/%s, Email address: %s", parent_task_id, task_id, email_id, recipient_num, total_recipients, email) if exc.smtp_code >= 400 and exc.smtp_code < 500: # This will cause the outer handler to catch the exception and retry the entire task. raise exc else: # This will fall through and not retry the message. log.warning( 'BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Recipient num: %s/%s, \ Email not delivered to %s due to error %s', parent_task_id, task_id, email_id, recipient_num, total_recipients, email, exc.smtp_error) dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)]) subtask_status.increment(failed=1) except SINGLE_EMAIL_FAILURE_ERRORS as exc: # This will fall through and not retry the message. total_recipients_failed += 1 log.error( "BulkEmail ==> Status: Failed(SINGLE_EMAIL_FAILURE_ERRORS), Task: %s, SubTask: %s, \ EmailId: %s, Recipient num: %s/%s, Email address: %s, Exception: %s", parent_task_id, task_id, email_id, recipient_num, total_recipients, email, exc) dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)]) subtask_status.increment(failed=1) else: total_recipients_successful += 1 log.info( "BulkEmail ==> Status: Success, Task: %s, SubTask: %s, EmailId: %s, \ Recipient num: %s/%s, Email address: %s,", parent_task_id, task_id, email_id, recipient_num, total_recipients, email) dog_stats_api.increment('course_email.sent', tags=[_statsd_tag(course_title)]) if settings.BULK_EMAIL_LOG_SENT_EMAILS: log.info('Email with id %s sent to %s', email_id, email) else: log.debug('Email with id %s sent to %s', email_id, email) subtask_status.increment(succeeded=1) # Pop the user that was emailed off the end of the list only once they have # successfully been processed. (That way, if there were a failure that # needed to be retried, the user is still on the list.) recipients_info[email] += 1 to_list.pop() log.info( "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Total Successful Recipients: %s/%s, \ Failed Recipients: %s/%s", parent_task_id, task_id, email_id, total_recipients_successful, total_recipients, total_recipients_failed, total_recipients) duplicate_recipients = [ "{0} ({1})".format(email, repetition) for email, repetition in recipients_info.most_common() if repetition > 1 ] if duplicate_recipients: log.info( "BulkEmail ==> Task: %s, SubTask: %s, EmailId: %s, Total Duplicate Recipients [%s]: [%s]", parent_task_id, task_id, email_id, len(duplicate_recipients), ', '.join(duplicate_recipients)) except INFINITE_RETRY_ERRORS as exc: dog_stats_api.increment('course_email.infinite_retry', tags=[_statsd_tag(course_title)]) # Increment the "retried_nomax" counter, update other counters with progress to date, # and set the state to RETRY: subtask_status.increment(retried_nomax=1, state=RETRY) return _submit_for_retry(entry_id, email_id, to_list, global_email_context, exc, subtask_status, skip_retry_max=True) except LIMITED_RETRY_ERRORS as exc: # Errors caught here cause the email to be retried. The entire task is actually retried # without popping the current recipient off of the existing list. # Errors caught are those that indicate a temporary condition that might succeed on retry. dog_stats_api.increment('course_email.limited_retry', tags=[_statsd_tag(course_title)]) # Increment the "retried_withmax" counter, update other counters with progress to date, # and set the state to RETRY: subtask_status.increment(retried_withmax=1, state=RETRY) return _submit_for_retry(entry_id, email_id, to_list, global_email_context, exc, subtask_status, skip_retry_max=False) except BULK_EMAIL_FAILURE_ERRORS as exc: dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)]) num_pending = len(to_list) log.exception( ('Task %s: email with id %d caused send_course_email task to fail ' 'with "fatal" exception. %d emails unsent.'), task_id, email_id, num_pending) # Update counters with progress to date, counting unsent emails as failures, # and set the state to FAILURE: subtask_status.increment(failed=num_pending, state=FAILURE) return subtask_status, exc except Exception as exc: # pylint: disable=broad-except # Errors caught here cause the email to be retried. The entire task is actually retried # without popping the current recipient off of the existing list. # These are unexpected errors. Since they might be due to a temporary condition that might # succeed on retry, we give them a retry. dog_stats_api.increment('course_email.limited_retry', tags=[_statsd_tag(course_title)]) log.exception( ('Task %s: email with id %d caused send_course_email task to fail ' 'with unexpected exception. Generating retry.'), task_id, email_id) # Increment the "retried_withmax" counter, update other counters with progress to date, # and set the state to RETRY: subtask_status.increment(retried_withmax=1, state=RETRY) return _submit_for_retry(entry_id, email_id, to_list, global_email_context, exc, subtask_status, skip_retry_max=False) else: # All went well. Update counters with progress to date, # and set the state to SUCCESS: subtask_status.increment(state=SUCCESS) # Successful completion is marked by an exception value of None. return subtask_status, None finally: # Clean up at the end. connection.close()
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id, task_input, action_name): """ Performs generic update by visiting StudentModule instances with the update_fcn provided. The student modules are fetched for update the `update_fcn` is called on each StudentModule that passes the resulting filtering. It is passed four arguments: the module_descriptor for the module pointed to by the module_state_key, the particular StudentModule to update, the xmodule_instance_args, and the task_input being passed through. If the value returned by the update function evaluates to a boolean True, the update is successful; False indicates the update on the particular student module failed. A raised exception indicates a fatal condition -- that no other student modules should be considered. The return value is a dict containing the task's results, with the following keys: 'attempted': number of attempts made 'succeeded': number of attempts that "succeeded" 'skipped': number of attempts that "skipped" 'failed': number of attempts that "failed" 'total': number of possible updates to attempt 'action_name': user-visible verb to use in status messages. Should be past-tense. Pass-through of input `action_name`. 'duration_ms': how long the task has (or had) been running. Because this is run internal to a task, it does not catch exceptions. These are allowed to pass up to the next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the result object. """ start_time = time() usage_keys = [] problem_url = task_input.get('problem_url') entrance_exam_url = task_input.get('entrance_exam_url') student_identifier = task_input.get('student') override_score_task = action_name == ugettext_noop('overridden') problems = {} # if problem_url is present make a usage key from it if problem_url: usage_key = UsageKey.from_string(problem_url).map_into_course( course_id) usage_keys.append(usage_key) # find the problem descriptor: problem_descriptor = modulestore().get_item(usage_key) problems[unicode(usage_key)] = problem_descriptor # if entrance_exam is present grab all problems in it if entrance_exam_url: problems = get_problems_in_section(entrance_exam_url) usage_keys = [ UsageKey.from_string(location) for location in problems.keys() ] modules_to_update = _get_modules_to_update(course_id, usage_keys, student_identifier, filter_fcn, override_score_task) task_progress = TaskProgress(action_name, len(modules_to_update), start_time) task_progress.update_task_state() for module_to_update in modules_to_update: task_progress.attempted += 1 module_descriptor = problems[unicode( module_to_update.module_state_key)] # There is no try here: if there's an error, we let it throw, and the task will # be marked as FAILED, with a stack trace. with dog_stats_api.timer( 'instructor_tasks.module.time.step', tags=[u'action:{name}'.format(name=action_name)]): update_status = update_fcn(module_descriptor, module_to_update, task_input) if update_status == UPDATE_STATUS_SUCCEEDED: # If the update_fcn returns true, then it performed some kind of work. # Logging of failures is left to the update_fcn itself. task_progress.succeeded += 1 elif update_status == UPDATE_STATUS_FAILED: task_progress.failed += 1 elif update_status == UPDATE_STATUS_SKIPPED: task_progress.skipped += 1 else: raise UpdateProblemModuleStateError( "Unexpected update_status returned: {}".format( update_status)) return task_progress.update_task_state()