Example #1
0
def get_moderators(course_id):
    """
    Generator function that calls the edX user API and yields an email address
    for each user listed as a moderator for the specified course.

    Args:
        course_id (str): course identifier

    Return:
        users (gen): generator of users
    """
    api_url = "{base_url}/user_api/v1/forum_roles/Moderator/users/".format(
        base_url=settings.US_URL_BASE,
    )
    params = {
        'page_size': settings.US_RESULT_PAGE_SIZE,
        'page': 1,
        'course_id': course_id,
    }
    logger.info('calling user api for forum moderators')
    while True:
        with dog_stats_api.timer('notifier.get_moderators.time'):
            data = _http_get(api_url, params=params, headers=_headers(), **_auth()).json()
        for result in data['results']:
            if 'url' in result:
                del result['url']
            yield result
        if data['next'] is None:
            break
        params['page'] += 1
Example #2
0
def get_digest_subscribers():
    """
    Generator function that calls the edX user API and yields a dict for each
    user opted in for digest notifications.

    The returned dicts will have keys "id", "name", and "email" (all strings).
    """
    api_url = settings.US_URL_BASE + '/user_api/v1/user_prefs/'
    params = {
        'key': DIGEST_NOTIFICATION_PREFERENCE_KEY,
        'page_size': settings.US_RESULT_PAGE_SIZE,
        'page': 1
    }

    logger.info('calling user api for digest subscribers')
    while True:
        with dog_stats_api.timer('notifier.get_digest_subscribers.time'):
            data = _http_get(api_url,
                             params=params,
                             headers=_headers(),
                             **_auth()).json
        for result in data['results']:
            user = result['user']
            del user['url']  # not used
            yield user
        if data['next'] is None:
            break
        params['page'] += 1
Example #3
0
def timer(metric_name, *args, **kwargs):
    """
    Wrapper around dog_stats_api.timer that cleans any tags used.
    """
    if "tags" in kwargs:
        kwargs["tags"] = _clean_tags(kwargs["tags"])
    return dog_stats_api.timer(metric_name, *args, **kwargs)
Example #4
0
def perform_request(method, url, data_or_params=None, *args, **kwargs):
    if data_or_params is None:
        data_or_params = {}
    headers = {'X-Edx-Api-Key': settings.API_KEY}
    try:
        with dog_stats_api.timer('comment_client.request.time'):
            if method in ['post', 'put', 'patch']:
                response = requests.request(method, url, data=data_or_params, headers=headers, timeout=5)
            else:
                response = requests.request(method, url, params=data_or_params, headers=headers, timeout=5)
    except Exception as err:
        log.exception("Trying to call {method} on {url} with params {params}".format(
            method=method, url=url, params=data_or_params))
        # Reraise with a single exception type
        raise CommentClientError(str(err))

    if 200 < response.status_code < 500:
        raise CommentClientError(response.text)
    # Heroku returns a 503 when an application is in maintenance mode
    elif response.status_code == 503:
        raise CommentClientMaintenanceError(response.text)
    elif response.status_code == 500:
        raise CommentClientUnknownError(response.text)
    else:
        if kwargs.get("raw", False):
            return response.text
        else:
            return json.loads(response.text)
Example #5
0
def get_digest_subscribers():
    """
    Generator function that calls the edX user API and yields a dict for each
    user opted in for digest notifications.

    The returned dicts will have keys "id", "name", and "email" (all strings).
    """
    api_url = settings.US_URL_BASE + '/user_api/v1/user_prefs/'
    params = {
        'key': DIGEST_NOTIFICATION_PREFERENCE_KEY,
        'page_size': settings.US_RESULT_PAGE_SIZE,
        'page': 1
    }
    
    logger.info('calling user api for digest subscribers')
    while True:
        with dog_stats_api.timer('notifier.get_digest_subscribers.time'):
            data = _http_get(api_url, params=params, headers=_headers(), **_auth()).json
        for result in data['results']:
            user = result['user']
            del user['url']  # not used
            yield user
        if data['next'] is None:
            break
        params['page'] += 1
def perform_request(method, url, data_or_params=None, *args, **kwargs):
    if data_or_params is None:
        data_or_params = {}
    data_or_params['api_key'] = settings.API_KEY
    try:
        with dog_stats_api.timer('comment_client.request.time'):
            if method in ['post', 'put', 'patch']:
                response = requests.request(method, url, data=data_or_params, timeout=5)
            else:
                response = requests.request(method, url, params=data_or_params, timeout=5)
    except Exception as err:
        # remove API key if it is in the params
        if 'api_key' in data_or_params:
            log.info('Deleting API key from params')
            del data_or_params['api_key']
        log.exception("Trying to call {method} on {url} with params {params}".format(
            method=method, url=url, params=data_or_params))
        # Reraise with a single exception type
        raise CommentClientError(str(err))

    if 200 < response.status_code < 500:
        raise CommentClientError(response.text)
    # Heroku returns a 503 when an application is in maintenance mode
    elif response.status_code == 503:
        raise CommentClientMaintenanceError(response.text)
    elif response.status_code == 500:
        raise CommentClientUnknownError(response.text)
    else:
        if kwargs.get("raw", False):
            return response.text
        else:
            return json.loads(response.text)
Example #7
0
def generate_digest_content(user_ids, from_dt, to_dt):
    """
    Function that calls the edX comments service API and yields a
    tuple of (user_id, digest) for each specified user that has >0
    discussion updates between the specified points in time.

    `user_ids` should be an iterable of edX user ids.
    `from_dt` and `to_dt` should be datetime.datetime objects representing
    the desired time window.

    In each yielded tuple, the `user_id` part will contain one of the values
    passed in `user_ids` and the `digest` part will contain a Digest object
    (see notifier.digest.Digest for structure details).

    The order in which user-digest results will be yielded is undefined, and
    if no updates are found for any user_id in the given time period, no
    user-digest tuple will be yielded for them (therefore, depending on the
    parameters passed, this function may not yield anything).
    """

    # set up and execute the API call
    api_url = settings.CS_URL_BASE + "/api/v1/notifications"
    user_ids_string = ",".join(map(str, user_ids))
    dt_format = "%Y-%m-%d %H:%M:%S%z"
    headers = {"X-Edx-Api-Key": settings.CS_API_KEY}
    data = {"user_ids": user_ids_string, "from": from_dt.strftime(dt_format), "to": to_dt.strftime(dt_format)}

    with dog_stats_api.timer("notifier.comments_service.time"):
        logger.info("calling comments service to pull digests for %d user(s)", len(user_ids))
        res = _http_post(api_url, headers=headers, data=data).json

    return Parser.parse(res)
Example #8
0
def timer(metric_name, *args, **kwargs):
    """
    Wrapper around dog_stats_api.timer that cleans any tags used.
    """
    if "tags" in kwargs:
        kwargs["tags"] = _clean_tags(kwargs["tags"])
    return dog_stats_api.timer(metric_name, *args, **kwargs)
Example #9
0
def run_main_task(entry_id, task_fcn, action_name):
    """
    Applies the `task_fcn` to the arguments defined in `entry_id` InstructorTask.

    Arguments passed to `task_fcn` are:

     `entry_id` : the primary key for the InstructorTask entry representing the task.
     `course_id` : the id for the course.
     `task_input` : dict containing task-specific arguments, JSON-decoded from InstructorTask's task_input.
     `action_name` : past-tense verb to use for constructing status messages.

    If no exceptions are raised, the `task_fcn` should return a dict containing
    the task's result with the following keys:

          'attempted': number of attempts made
          'succeeded': number of attempts that "succeeded"
          'skipped': number of attempts that "skipped"
          'failed': number of attempts that "failed"
          'total': number of possible subtasks to attempt
          'action_name': user-visible verb to use in status messages.
              Should be past-tense.  Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    """

    # get the InstructorTask to be updated.  If this fails, then let the exception return to Celery.
    # There's no point in catching it here.
    entry = InstructorTask.objects.get(pk=entry_id)

    # get inputs to use in this task from the entry:
    task_id = entry.task_id
    course_id = entry.course_id
    task_input = json.loads(entry.task_input)

    # construct log message:
    fmt = 'task "{task_id}": course "{course_id}" input "{task_input}"'
    task_info_string = fmt.format(task_id=task_id, course_id=course_id, task_input=task_input)

    TASK_LOG.info('Starting update (nothing %s yet): %s', action_name, task_info_string)

    # Check that the task_id submitted in the InstructorTask matches the current task
    # that is running.
    request_task_id = _get_current_task().request.id
    if task_id != request_task_id:
        fmt = 'Requested task did not match actual task "{actual_id}": {task_info}'
        message = fmt.format(actual_id=request_task_id, task_info=task_info_string)
        TASK_LOG.error(message)
        raise ValueError(message)

    # Now do the work:
    with dog_stats_api.timer('instructor_tasks.time.overall', tags=['action:{name}'.format(name=action_name)]):
        task_progress = task_fcn(entry_id, course_id, task_input, action_name)

    # Release any queries that the connection has been hanging onto:
    reset_queries()

    # log and exit, returning task_progress info as task result:
    TASK_LOG.info('Finishing %s: final: %s', task_info_string, task_progress)
    return task_progress
Example #10
0
def run_main_task(entry_id, task_fcn, action_name):
    """
    Applies the `task_fcn` to the arguments defined in `entry_id` InstructorTask.

    Arguments passed to `task_fcn` are:

     `entry_id` : the primary key for the InstructorTask entry representing the task.
     `course_id` : the id for the course.
     `task_input` : dict containing task-specific arguments, JSON-decoded from InstructorTask's task_input.
     `action_name` : past-tense verb to use for constructing status messages.

    If no exceptions are raised, the `task_fcn` should return a dict containing
    the task's result with the following keys:

          'attempted': number of attempts made
          'succeeded': number of attempts that "succeeded"
          'skipped': number of attempts that "skipped"
          'failed': number of attempts that "failed"
          'total': number of possible subtasks to attempt
          'action_name': user-visible verb to use in status messages.
              Should be past-tense.  Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    """

    # get the InstructorTask to be updated.  If this fails, then let the exception return to Celery.
    # There's no point in catching it here.
    entry = InstructorTask.objects.get(pk=entry_id)

    # get inputs to use in this task from the entry:
    task_id = entry.task_id
    course_id = entry.course_id
    task_input = json.loads(entry.task_input)

    # construct log message:
    fmt = u'task "{task_id}": course "{course_id}" input "{task_input}"'
    task_info_string = fmt.format(task_id=task_id, course_id=course_id, task_input=task_input)

    TASK_LOG.info('Starting update (nothing %s yet): %s', action_name, task_info_string)

    # Check that the task_id submitted in the InstructorTask matches the current task
    # that is running.
    request_task_id = _get_current_task().request.id
    if task_id != request_task_id:
        fmt = u'Requested task did not match actual task "{actual_id}": {task_info}'
        message = fmt.format(actual_id=request_task_id, task_info=task_info_string)
        TASK_LOG.error(message)
        raise ValueError(message)

    # Now do the work:
    with dog_stats_api.timer('instructor_tasks.time.overall', tags=['action:{name}'.format(name=action_name)]):
        task_progress = task_fcn(entry_id, course_id, task_input, action_name)

    # Release any queries that the connection has been hanging onto:
    reset_queries()

    # log and exit, returning task_progress info as task result:
    TASK_LOG.info('Finishing %s: final: %s', task_info_string, task_progress)
    return task_progress
Example #11
0
def send(event):
    """
    Send an event object to all the initialized backends.

    """
    dog_stats_api.increment('track.send.count')

    for name, backend in backends.iteritems():
        with dog_stats_api.timer('track.send.backend.{0}'.format(name)):
            backend.send(event)
Example #12
0
def course_email(email_id, to_list, course_title, course_url, image_url, throttle=False):
    """
    Takes a primary id for a CourseEmail object and a 'to_list' of recipient objects--keys are
    'profile__name', 'email' (address), and 'pk' (in the user table).
    course_title, course_url, and image_url are to memoize course properties and save lookups.

    Sends to all addresses contained in to_list.  Emails are sent multi-part, in both plain
    text and html.
    """
    with dog_stats_api.timer('course_email.single_task.time.overall', tags=[_statsd_tag(course_title)]):
        _send_course_email(email_id, to_list, course_title, course_url, image_url, throttle)
Example #13
0
def request_timer(request_id, method, url, tags=None):
    start = time()
    with dog_stats_api.timer('comment_client.request.time', tags=tags):
        yield
    end = time()
    duration = end - start

    log.info(
        "comment_client_request_log: request_id={request_id}, method={method}, "
        "url={url}, duration={duration}".format(request_id=request_id,
                                                method=method,
                                                url=url,
                                                duration=duration))
Example #14
0
 def s3(files_from, bucket, mode, deleteAfterCopy=False):
     with dog_stats_api.timer('pearson.{0}'.format(mode), tags='s3'):
         try:
             for filename in os.listdir(files_from):
                 source_file = os.path.join(files_from, filename)
                 # use mode as name of directory into which to write files
                 dest_file = os.path.join(mode, filename)
                 upload_file_to_s3(bucket, source_file, dest_file)
                 if deleteAfterCopy:
                     os.remove(files_from + '/' + filename)
         except:
             dog_http_api.event('pearson {0}'.format(mode),
                                's3 archiving failed')
             raise
Example #15
0
def get_user(user_id):
    api_url = "{}/user_api/v1/users/{}/".format(settings.US_URL_BASE, user_id)
    logger.info("calling user api for user %s", user_id)
    with dog_stats_api.timer("notifier.get_user.time"):
        r = _http_get(api_url, headers=_headers(), **_auth())
        if r.status_code == 200:
            user = r.json
            del user["url"]
            return user
        elif r.status_code == 404:
            return None
        else:
            r.raise_for_status()
            raise Exception("unhandled response from user service: %s %s" % (r.status_code, r.reason))
 def s3(files_from, bucket, mode, deleteAfterCopy=False):
     with dog_stats_api.timer('pearson.{0}'.format(mode), tags='s3'):
         try:
             for filename in os.listdir(files_from):
                 source_file = os.path.join(files_from, filename)
                 # use mode as name of directory into which to write files
                 dest_file = os.path.join(mode, filename)
                 upload_file_to_s3(bucket, source_file, dest_file)
                 if deleteAfterCopy:
                     os.remove(files_from + '/' + filename)
         except:
             dog_http_api.event('pearson {0}'.format(mode),
                                's3 archiving failed')
             raise
Example #17
0
def get_user(user_id):
    api_url = '{}/notifier_api/v1/users/{}/'.format(settings.US_URL_BASE,
                                                    user_id)
    logger.info('calling user api for user %s', user_id)
    with dog_stats_api.timer('notifier.get_user.time'):
        r = _http_get(api_url, headers=_headers(), **_auth())
        if r.status_code == 200:
            user = r.json()
            return user
        elif r.status_code == 404:
            return None
        else:
            r.raise_for_status()
            raise Exception('unhandled response from user service: %s %s' %
                            (r.status_code, r.reason))
Example #18
0
def request_timer(request_id, method, url, tags=None):
    start = time()
    with dog_stats_api.timer('comment_client.request.time', tags=tags):
        yield
    end = time()
    duration = end - start

    log.info(
        "comment_client_request_log: request_id={request_id}, method={method}, "
        "url={url}, duration={duration}".format(
            request_id=request_id,
            method=method,
            url=url,
            duration=duration
        )
    )
Example #19
0
def iterate_grades_for(course_id, students):
    """Given a course_id and an iterable of students (User), yield a tuple of:

    (student, gradeset, err_msg) for every student enrolled in the course.

    If an error occurred, gradeset will be an empty dict and err_msg will be an
    exception message. If there was no error, err_msg is an empty string.

    The gradeset is a dictionary with the following fields:

    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
    - section_breakdown : A breakdown of each section that makes
        up the grade. (For display)
    - grade_breakdown : A breakdown of the major components that
        make up the final grade. (For display)
    - raw_scores: contains scores for every graded module
    """
    course = courses.get_course_by_id(course_id)

    # We make a fake request because grading code expects to be able to look at
    # the request. We have to attach the correct user to the request before
    # grading that student.
    request = RequestFactory().get('/')

    for student in students:
        with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=['action:{}'.format(course_id)]):
            try:
                request.user = student
                # Grading calls problem rendering, which calls masquerading,
                # which checks session vars -- thus the empty session dict below.
                # It's not pretty, but untangling that is currently beyond the
                # scope of this feature.
                request.session = {}
                gradeset = grade(student, request, course)
                yield student, gradeset, ""
            except Exception as exc:  # pylint: disable=broad-except
                # Keep marching on even if this student couldn't be graded for
                # some reason, but log it for future reference.
                log.exception(
                    'Cannot grade student %s (%s) in course %s because of exception: %s',
                    student.username,
                    student.id,
                    course_id,
                    exc.message
                )
                yield student, {}, exc.message
Example #20
0
def iterate_grades_for(course_id, students):
    """Given a course_id and an iterable of students (User), yield a tuple of:

    (student, gradeset, err_msg) for every student enrolled in the course.

    If an error occurred, gradeset will be an empty dict and err_msg will be an
    exception message. If there was no error, err_msg is an empty string.

    The gradeset is a dictionary with the following fields:

    - grade : A final letter grade.
    - percent : The final percent for the class (rounded up).
    - section_breakdown : A breakdown of each section that makes
        up the grade. (For display)
    - grade_breakdown : A breakdown of the major components that
        make up the final grade. (For display)
    - raw_scores: contains scores for every graded module
    """
    course = courses.get_course_by_id(course_id)

    # We make a fake request because grading code expects to be able to look at
    # the request. We have to attach the correct user to the request before
    # grading that student.
    request = RequestFactory().get('/')

    for student in students:
        with dog_stats_api.timer('lms.grades.iterate_grades_for', tags=['action:{}'.format(course_id)]):
            try:
                request.user = student
                # Grading calls problem rendering, which calls masquerading,
                # which checks session vars -- thus the empty session dict below.
                # It's not pretty, but untangling that is currently beyond the
                # scope of this feature.
                request.session = {}
                gradeset = grade(student, request, course)
                yield student, gradeset, ""
            except Exception as exc:  # pylint: disable=broad-except
                # Keep marching on even if this student couldn't be graded for
                # some reason, but log it for future reference.
                log.exception(
                    'Cannot grade student %s (%s) in course %s because of exception: %s',
                    student.username,
                    student.id,
                    course_id,
                    exc.message
                )
                yield student, {}, exc.message
Example #21
0
def course_email(email_id,
                 to_list,
                 course_title,
                 course_url,
                 image_url,
                 throttle=False):
    """
    Takes a primary id for a CourseEmail object and a 'to_list' of recipient objects--keys are
    'profile__name', 'email' (address), and 'pk' (in the user table).
    course_title, course_url, and image_url are to memoize course properties and save lookups.

    Sends to all addresses contained in to_list.  Emails are sent multi-part, in both plain
    text and html.
    """
    with dog_stats_api.timer('course_email.single_task.time.overall',
                             tags=[_statsd_tag(course_title)]):
        _send_course_email(email_id, to_list, course_title, course_url,
                           image_url, throttle)
        def sftp(files_from, files_to, mode, deleteAfterCopy=False):
            with dog_stats_api.timer('pearson.{0}'.format(mode), tags='sftp'):
                try:
                    t = paramiko.Transport(
                        (settings.PEARSON['SFTP_HOSTNAME'], 22))
                    t.connect(username=settings.PEARSON['SFTP_USERNAME'],
                              password=settings.PEARSON['SFTP_PASSWORD'])
                    sftp = paramiko.SFTPClient.from_transport(t)

                    if mode == 'export':
                        try:
                            sftp.chdir(files_to)
                        except IOError:
                            raise CommandError(
                                'SFTP destination path does not exist: {}'.
                                format(files_to))
                        for filename in os.listdir(files_from):
                            sftp.put(files_from + '/' + filename, filename)
                            if deleteAfterCopy:
                                os.remove(os.path.join(files_from, filename))
                    else:
                        try:
                            sftp.chdir(files_from)
                        except IOError:
                            raise CommandError(
                                'SFTP source path does not exist: {}'.format(
                                    files_from))
                        for filename in sftp.listdir('.'):
                            # skip subdirectories
                            if not S_ISDIR(sftp.stat(filename).st_mode):
                                sftp.get(filename, files_to + '/' + filename)
                                # delete files from sftp server once they are
                                # successfully pulled off:
                                if deleteAfterCopy:
                                    sftp.remove(filename)
                except:
                    dog_http_api.event('pearson {0}'.format(mode),
                                       'sftp uploading failed',
                                       alert_type='error')
                    raise
                finally:
                    sftp.close()
                    t.close()
        def sftp(files_from, files_to, mode, deleteAfterCopy=False):
            with dog_stats_api.timer('pearson.{0}'.format(mode), tags='sftp'):
                try:
                    t = paramiko.Transport((
                        settings.PEARSON['SFTP_HOSTNAME'], 22))
                    t.connect(username=settings.PEARSON['SFTP_USERNAME'],
                              password=settings.PEARSON['SFTP_PASSWORD'])
                    sftp = paramiko.SFTPClient.from_transport(t)

                    if mode == 'export':
                        try:
                            sftp.chdir(files_to)
                        except IOError:
                            raise CommandError(
                                'SFTP destination path does not exist: {}'.format(files_to))
                        for filename in os.listdir(files_from):
                            sftp.put(files_from + '/' + filename, filename)
                            if deleteAfterCopy:
                                os.remove(os.path.join(files_from, filename))
                    else:
                        try:
                            sftp.chdir(files_from)
                        except IOError:
                            raise CommandError(
                                'SFTP source path does not exist: {}'.format(files_from))
                        for filename in sftp.listdir('.'):
                            # skip subdirectories
                            if not S_ISDIR(sftp.stat(filename).st_mode):
                                sftp.get(filename, files_to + '/' + filename)
                                # delete files from sftp server once they are
                                # successfully pulled off:
                                if deleteAfterCopy:
                                    sftp.remove(filename)
                except:
                    dog_http_api.event('pearson {0}'.format(mode),
                                       'sftp uploading failed',
                                       alert_type='error')
                    raise
                finally:
                    sftp.close()
                    t.close()
Example #24
0
def send(event):
    """
    Send an event object to all the initialized backends.

    """
    dog_stats_api.increment('track.send.count')

    for name, backend in backends.iteritems():
        with dog_stats_api.timer('track.send.backend.{0}'.format(name)):
            backend.send(event)
            
    if settings.ANALITICA_ACTIVE:
        event['time'] = time.time()
        r =\
            requests.post(
                settings.ANALITICA_TRACK_URL,
                headers={'Authorization': settings.ANALITICA_TOKEN},
                json=event
            )
        if r.status_code != 200:
            log.error("Failed to post to the tracking backend with error {e}".format(e=r.json()))
Example #25
0
def generate_digest_content(user_ids, from_dt, to_dt):
    """
    Function that calls the edX comments service API and yields a
    tuple of (user_id, digest) for each specified user that has >0
    discussion updates between the specified points in time.

    `user_ids` should be an iterable of edX user ids.
    `from_dt` and `to_dt` should be datetime.datetime objects representing
    the desired time window.

    In each yielded tuple, the `user_id` part will contain one of the values
    passed in `user_ids` and the `digest` part will contain a Digest object
    (see notifier.digest.Digest for structure details).

    The order in which user-digest results will be yielded is undefined, and
    if no updates are found for any user_id in the given time period, no
    user-digest tuple will be yielded for them (therefore, depending on the
    parameters passed, this function may not yield anything).
    """

    # set up and execute the API call
    api_url = settings.CS_URL_BASE + '/api/v1/notifications'
    user_ids_string = ','.join(map(str, user_ids))
    dt_format = '%Y-%m-%d %H:%M:%S%z'
    headers = {
        'X-Edx-Api-Key': settings.CS_API_KEY,
    }
    data = {
        'user_ids': user_ids_string,
        'from': from_dt.strftime(dt_format),
        'to': to_dt.strftime(dt_format)
    }

    with dog_stats_api.timer('notifier.comments_service.time'):
        logger.info('calling comments service to pull digests for %d user(s)',
                    len(user_ids))
        res = _http_post(api_url, headers=headers, data=data).json

    return Parser.parse(res)
Example #26
0
def generate_digest_content(users_by_id, from_dt, to_dt):
    """
    Function that calls the edX comments service API and yields a
    tuple of (user_id, digest) for each specified user that has >0
    discussion updates between the specified points in time.

    `users_by_id` should be a dict of {user_id: user} where user-id is an edX
    user id and user is the user dict returned by edx notifier_api.
    `from_dt` and `to_dt` should be datetime.datetime objects representing
    the desired time window.

    In each yielded tuple, the `user_id` part will contain one of the values
    passed in `user_ids` and the `digest` part will contain a Digest object
    (see notifier.digest.Digest for structure details).

    The order in which user-digest results will be yielded is undefined, and
    if no updates are found for any user_id in the given time period, no
    user-digest tuple will be yielded for them (therefore, depending on the
    parameters passed, this function may not yield anything).
    """
    # set up and execute the API call
    api_url = settings.CS_URL_BASE + '/api/v1/notifications'
    user_ids_string = ','.join(map(str, sorted(users_by_id.keys())))
    dt_format = '%Y-%m-%d %H:%M:%S%z'
    headers = {
        'X-Edx-Api-Key': settings.CS_API_KEY,
    }
    data = {
        'user_ids': user_ids_string,
        'from': from_dt.strftime(dt_format),
        'to': to_dt.strftime(dt_format)
    }

    with dog_stats_api.timer('notifier.comments_service.time'):
        logger.info('calling comments service to pull digests for %d user(s)', len(users_by_id))
        resp = _http_post(api_url, headers=headers, data=data)

    return process_cs_response(resp.json(), users_by_id)
Example #27
0
def get_digest_subscribers():
    """
    Generator function that calls the edX user API and yields a dict for each
    user opted in for digest notifications.

    The returned dicts will have keys "id", "name", and "email" (all strings).
    """
    api_url = settings.US_URL_BASE + "/user_api/v1/user_prefs/"
    params = {"key": DIGEST_NOTIFICATION_PREFERENCE_KEY, "page_size": settings.US_RESULT_PAGE_SIZE, "page": 1}

    logger.info("calling user api for digest subscribers, url :{}".format(api_url))
    while True:
        with dog_stats_api.timer("notifier.get_digest_subscribers.time"):
            logger.info("auth: {}, params: {}, headers:{}".format(_auth(), params, _headers()))
            data = _http_get(api_url, params=params, headers=_headers(), **_auth()).json
            logger.info("results : {}".format(data))
        for result in data["results"]:
            user = result["user"]
            del user["url"]  # not used
            yield user
        if data["next"] is None:
            break
        params["page"] += 1
Example #28
0
def send_course_email(entry_id, email_id, to_list, global_email_context,
                      subtask_status_dict):
    """
    Sends an email to a list of recipients.

    Inputs are:
      * `entry_id`: id of the InstructorTask object to which progress should be recorded.
      * `email_id`: id of the CourseEmail model that is to be emailed.
      * `to_list`: list of recipients.  Each is represented as a dict with the following keys:
        - 'profile__name': full name of User.
        - 'email': email address of User.
        - 'pk': primary key of User model.
      * `global_email_context`: dict containing values that are unique for this email but the same
        for all recipients of this email.  This dict is to be used to fill in slots in email
        template.  It does not include 'name' and 'email', which will be provided by the to_list.
      * `subtask_status_dict` : dict containing values representing current status.  Keys are:

        'task_id' : id of subtask.  This is used to pass task information across retries.
        'attempted' : number of attempts -- should equal succeeded plus failed
        'succeeded' : number that succeeded in processing
        'skipped' : number that were not processed.
        'failed' : number that failed during processing
        'retried_nomax' : number of times the subtask has been retried for conditions that
            should not have a maximum count applied
        'retried_withmax' : number of times the subtask has been retried for conditions that
            should have a maximum count applied
        'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS)

        Most values will be zero on initial call, but may be different when the task is
        invoked as part of a retry.

    Sends to all addresses contained in to_list that are not also in the Optout table.
    Emails are sent multi-part, in both plain text and html.  Updates InstructorTask object
    with status information (sends, failures, skips) and updates number of subtasks completed.
    """
    subtask_status = SubtaskStatus.from_dict(subtask_status_dict)
    current_task_id = subtask_status.task_id
    num_to_send = len(to_list)
    log.info(
        "Preparing to send email %s to %d recipients as subtask %s for instructor task %d: context = %s, status=%s",
        email_id, num_to_send, current_task_id, entry_id, global_email_context,
        subtask_status)

    # Check that the requested subtask is actually known to the current InstructorTask entry.
    # If this fails, it throws an exception, which should fail this subtask immediately.
    # This can happen when the parent task has been run twice, and results in duplicate
    # subtasks being created for the same InstructorTask entry.  This can happen when Celery
    # loses its connection to its broker, and any current tasks get requeued.
    # We hope to catch this condition in perform_delegate_email_batches() when it's the parent
    # task that is resubmitted, but just in case we fail to do so there, we check here as well.
    # There is also a possibility that this task will be run twice by Celery, for the same reason.
    # To deal with that, we need to confirm that the task has not already been completed.
    check_subtask_is_valid(entry_id, current_task_id, subtask_status)

    send_exception = None
    new_subtask_status = None
    try:
        course_title = global_email_context['course_title']
        with dog_stats_api.timer('course_email.single_task.time.overall',
                                 tags=[_statsd_tag(course_title)]):
            new_subtask_status, send_exception = _send_course_email(
                entry_id,
                email_id,
                to_list,
                global_email_context,
                subtask_status,
            )
    except Exception:
        # Unexpected exception. Try to write out the failure to the entry before failing.
        log.exception("Send-email task %s for email %s: failed unexpectedly!",
                      current_task_id, email_id)
        # We got here for really unexpected reasons.  Since we don't know how far
        # the task got in emailing, we count all recipients as having failed.
        # It at least keeps the counts consistent.
        subtask_status.increment(failed=num_to_send, state=FAILURE)
        update_subtask_status(entry_id, current_task_id, subtask_status)
        raise

    if send_exception is None:
        # Update the InstructorTask object that is storing its progress.
        log.info("Send-email task %s for email %s: succeeded", current_task_id,
                 email_id)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
    elif isinstance(send_exception, RetryTaskError):
        # If retrying, a RetryTaskError needs to be returned to Celery.
        # We assume that the the progress made before the retry condition
        # was encountered has already been updated before the retry call was made,
        # so we only log here.
        log.warning("Send-email task %s for email %s: being retried",
                    current_task_id, email_id)
        raise send_exception  # pylint: disable=E0702
    else:
        log.error("Send-email task %s for email %s: failed: %s",
                  current_task_id, email_id, send_exception)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
        raise send_exception  # pylint: disable=E0702

    # return status in a form that can be serialized by Celery into JSON:
    log.info("Send-email task %s for email %s: returning status %s",
             current_task_id, email_id, new_subtask_status)
    return new_subtask_status.to_dict()
Example #29
0
def _send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status):
    """
    Performs the email sending task.

    Sends an email to a list of recipients.

    Inputs are:
      * `entry_id`: id of the InstructorTask object to which progress should be recorded.
      * `email_id`: id of the CourseEmail model that is to be emailed.
      * `to_list`: list of recipients.  Each is represented as a dict with the following keys:
        - 'profile__name': full name of User.
        - 'email': email address of User.
        - 'pk': primary key of User model.
      * `global_email_context`: dict containing values that are unique for this email but the same
        for all recipients of this email.  This dict is to be used to fill in slots in email
        template.  It does not include 'name' and 'email', which will be provided by the to_list.
      * `subtask_status` : dict containing values representing current status.  Keys are:

        'task_id' : id of subtask.  This is used to pass task information across retries.
        'attempted' : number of attempts -- should equal succeeded plus failed
        'succeeded' : number that succeeded in processing
        'skipped' : number that were not processed.
        'failed' : number that failed during processing
        'retried_nomax' : number of times the subtask has been retried for conditions that
            should not have a maximum count applied
        'retried_withmax' : number of times the subtask has been retried for conditions that
            should have a maximum count applied
        'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS)

    Sends to all addresses contained in to_list that are not also in the Optout table.
    Emails are sent multi-part, in both plain text and html.

    Returns a tuple of two values:
      * First value is a dict which represents current progress at the end of this call.  Keys are
        the same as for the input subtask_status.

      * Second value is an exception returned by the innards of the method, indicating a fatal error.
        In this case, the number of recipients that were not sent have already been added to the
        'failed' count above.
    """
    # Get information from current task's request:
    task_id = subtask_status['task_id']

    # collect stats on progress:
    num_optout = 0
    num_sent = 0
    num_error = 0

    try:
        course_email = CourseEmail.objects.get(id=email_id)
    except CourseEmail.DoesNotExist as exc:
        log.exception("Task %s: could not find email id:%s to send.", task_id, email_id)
        raise

    # Exclude optouts (if not a retry):
    # Note that we don't have to do the optout logic at all if this is a retry,
    # because we have presumably already performed the optout logic on the first
    # attempt.  Anyone on the to_list on a retry has already passed the filter
    # that existed at that time, and we don't need to keep checking for changes
    # in the Optout list.
    if (subtask_status['retried_nomax'] + subtask_status['retried_withmax']) == 0:
        to_list, num_optout = _filter_optouts_from_recipients(to_list, course_email.course_id)

    course_title = global_email_context['course_title']
    subject = "[" + course_title + "] " + course_email.subject
    from_addr = _get_source_address(course_email.course_id, course_title)

    course_email_template = CourseEmailTemplate.get_template()
    try:
        connection = get_connection()
        connection.open()

        # Define context values to use in all course emails:
        email_context = {'name': '', 'email': ''}
        email_context.update(global_email_context)

        while to_list:
            # Update context with user-specific values from the user at the end of the list.
            # At the end of processing this user, they will be popped off of the to_list.
            # That way, the to_list will always contain the recipients remaining to be emailed.
            # This is convenient for retries, which will need to send to those who haven't
            # yet been emailed, but not send to those who have already been sent to.
            current_recipient = to_list[-1]
            email = current_recipient['email']
            email_context['email'] = email
            email_context['name'] = current_recipient['profile__name']

            # Construct message content using templates and context:
            plaintext_msg = course_email_template.render_plaintext(course_email.text_message, email_context)
            html_msg = course_email_template.render_htmltext(course_email.html_message, email_context)

            # Create email:
            email_msg = EmailMultiAlternatives(
                subject,
                plaintext_msg,
                from_addr,
                [email],
                connection=connection
            )
            email_msg.attach_alternative(html_msg, 'text/html')

            # Throttle if we have gotten the rate limiter.  This is not very high-tech,
            # but if a task has been retried for rate-limiting reasons, then we sleep
            # for a period of time between all emails within this task.  Choice of
            # the value depends on the number of workers that might be sending email in
            # parallel, and what the SES throttle rate is.
            if subtask_status['retried_nomax'] > 0:
                sleep(settings.BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS)

            try:
                log.debug('Email with id %s to be sent to %s', email_id, email)

                with dog_stats_api.timer('course_email.single_send.time.overall', tags=[_statsd_tag(course_title)]):
                    connection.send_messages([email_msg])

            except SMTPDataError as exc:
                # According to SMTP spec, we'll retry error codes in the 4xx range.  5xx range indicates hard failure.
                if exc.smtp_code >= 400 and exc.smtp_code < 500:
                    # This will cause the outer handler to catch the exception and retry the entire task.
                    raise exc
                else:
                    # This will fall through and not retry the message.
                    log.warning('Task %s: email with id %s not delivered to %s due to error %s', task_id, email_id, email, exc.smtp_error)
                    dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)])
                    num_error += 1

            except SINGLE_EMAIL_FAILURE_ERRORS as exc:
                # This will fall through and not retry the message.
                log.warning('Task %s: email with id %s not delivered to %s due to error %s', task_id, email_id, email, exc)
                dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)])
                num_error += 1

            else:
                dog_stats_api.increment('course_email.sent', tags=[_statsd_tag(course_title)])
                if settings.BULK_EMAIL_LOG_SENT_EMAILS:
                    log.info('Email with id %s sent to %s', email_id, email)
                else:
                    log.debug('Email with id %s sent to %s', email_id, email)
                num_sent += 1

            # Pop the user that was emailed off the end of the list only once they have
            # successfully been processed.  (That way, if there were a failure that
            # needed to be retried, the user is still on the list.)
            to_list.pop()

    except INFINITE_RETRY_ERRORS as exc:
        dog_stats_api.increment('course_email.infinite_retry', tags=[_statsd_tag(course_title)])
        # Increment the "retried_nomax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_progress = increment_subtask_status(
            subtask_status,
            succeeded=num_sent,
            failed=num_error,
            skipped=num_optout,
            retried_nomax=1,
            state=RETRY
        )
        return _submit_for_retry(
            entry_id, email_id, to_list, global_email_context, exc, subtask_progress, skip_retry_max=True
        )

    except LIMITED_RETRY_ERRORS as exc:
        # Errors caught here cause the email to be retried.  The entire task is actually retried
        # without popping the current recipient off of the existing list.
        # Errors caught are those that indicate a temporary condition that might succeed on retry.
        dog_stats_api.increment('course_email.limited_retry', tags=[_statsd_tag(course_title)])
        # Increment the "retried_withmax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_progress = increment_subtask_status(
            subtask_status,
            succeeded=num_sent,
            failed=num_error,
            skipped=num_optout,
            retried_withmax=1,
            state=RETRY
        )
        return _submit_for_retry(
            entry_id, email_id, to_list, global_email_context, exc, subtask_progress, skip_retry_max=False
        )

    except BULK_EMAIL_FAILURE_ERRORS as exc:
        dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)])
        num_pending = len(to_list)
        log.exception('Task %s: email with id %d caused send_course_email task to fail with "fatal" exception.  %d emails unsent.',
                      task_id, email_id, num_pending)
        # Update counters with progress to date, counting unsent emails as failures,
        # and set the state to FAILURE:
        subtask_progress = increment_subtask_status(
            subtask_status,
            succeeded=num_sent,
            failed=(num_error + num_pending),
            skipped=num_optout,
            state=FAILURE
        )
        return subtask_progress, exc

    except Exception as exc:
        # Errors caught here cause the email to be retried.  The entire task is actually retried
        # without popping the current recipient off of the existing list.
        # These are unexpected errors.  Since they might be due to a temporary condition that might
        # succeed on retry, we give them a retry.
        dog_stats_api.increment('course_email.limited_retry', tags=[_statsd_tag(course_title)])
        log.exception('Task %s: email with id %d caused send_course_email task to fail with unexpected exception.  Generating retry.',
                      task_id, email_id)
        # Increment the "retried_withmax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_progress = increment_subtask_status(
            subtask_status,
            succeeded=num_sent,
            failed=num_error,
            skipped=num_optout,
            retried_withmax=1,
            state=RETRY
        )
        return _submit_for_retry(
            entry_id, email_id, to_list, global_email_context, exc, subtask_progress, skip_retry_max=False
        )

    else:
        # All went well.  Update counters with progress to date,
        # and set the state to SUCCESS:
        subtask_progress = increment_subtask_status(
            subtask_status,
            succeeded=num_sent,
            failed=num_error,
            skipped=num_optout,
            state=SUCCESS
        )
        # Successful completion is marked by an exception value of None.
        return subtask_progress, None
    finally:
        # Clean up at the end.
        connection.close()
Example #30
0
def send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status):
    """
    Sends an email to a list of recipients.

    Inputs are:
      * `entry_id`: id of the InstructorTask object to which progress should be recorded.
      * `email_id`: id of the CourseEmail model that is to be emailed.
      * `to_list`: list of recipients.  Each is represented as a dict with the following keys:
        - 'profile__name': full name of User.
        - 'email': email address of User.
        - 'pk': primary key of User model.
      * `global_email_context`: dict containing values that are unique for this email but the same
        for all recipients of this email.  This dict is to be used to fill in slots in email
        template.  It does not include 'name' and 'email', which will be provided by the to_list.
      * `subtask_status` : dict containing values representing current status.  Keys are:

        'task_id' : id of subtask.  This is used to pass task information across retries.
        'attempted' : number of attempts -- should equal succeeded plus failed
        'succeeded' : number that succeeded in processing
        'skipped' : number that were not processed.
        'failed' : number that failed during processing
        'retried_nomax' : number of times the subtask has been retried for conditions that
            should not have a maximum count applied
        'retried_withmax' : number of times the subtask has been retried for conditions that
            should have a maximum count applied
        'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS)

        Most values will be zero on initial call, but may be different when the task is
        invoked as part of a retry.

    Sends to all addresses contained in to_list that are not also in the Optout table.
    Emails are sent multi-part, in both plain text and html.  Updates InstructorTask object
    with status information (sends, failures, skips) and updates number of subtasks completed.
    """
    current_task_id = subtask_status['task_id']
    num_to_send = len(to_list)
    log.info("Preparing to send email %s to %d recipients as subtask %s for instructor task %d: context = %s, status=%s",
             email_id, num_to_send, current_task_id, entry_id, global_email_context, subtask_status)

    # Check that the requested subtask is actually known to the current InstructorTask entry.
    # If this fails, it throws an exception, which should fail this subtask immediately.
    # This can happen when the parent task has been run twice, and results in duplicate
    # subtasks being created for the same InstructorTask entry.  We hope to catch this condition
    # in perform_delegate_email_batches(), but just in case we fail to do so there,
    # we check here as well.
    check_subtask_is_valid(entry_id, current_task_id)

    send_exception = None
    new_subtask_status = None
    try:
        course_title = global_email_context['course_title']
        with dog_stats_api.timer('course_email.single_task.time.overall', tags=[_statsd_tag(course_title)]):
            new_subtask_status, send_exception = _send_course_email(
                entry_id,
                email_id,
                to_list,
                global_email_context,
                subtask_status,
            )
    except Exception:
        # Unexpected exception. Try to write out the failure to the entry before failing.
        log.exception("Send-email task %s: failed unexpectedly!", current_task_id)
        # We got here for really unexpected reasons.  Since we don't know how far
        # the task got in emailing, we count all recipients as having failed.
        # It at least keeps the counts consistent.
        new_subtask_status = increment_subtask_status(subtask_status, failed=num_to_send, state=FAILURE)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
        raise

    if send_exception is None:
        # Update the InstructorTask object that is storing its progress.
        log.info("Send-email task %s: succeeded", current_task_id)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
    elif isinstance(send_exception, RetryTaskError):
        # If retrying, a RetryTaskError needs to be returned to Celery.
        # We assume that the the progress made before the retry condition
        # was encountered has already been updated before the retry call was made,
        # so we only log here.
        log.warning("Send-email task %s: being retried", current_task_id)
        raise send_exception  # pylint: disable=E0702
    else:
        log.error("Send-email task %s: failed: %s", current_task_id, send_exception)
        update_subtask_status(entry_id, current_task_id, new_subtask_status)
        raise send_exception  # pylint: disable=E0702

    log.info("Send-email task %s: returning status %s", current_task_id, new_subtask_status)
    return new_subtask_status
Example #31
0
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id, task_input, action_name):
    """
    Performs generic update by visiting StudentModule instances with the update_fcn provided.

    StudentModule instances are those that match the specified `course_id` and `module_state_key`.
    If `student_identifier` is not None, it is used as an additional filter to limit the modules to those belonging
    to that student. If `student_identifier` is None, performs update on modules for all students on the specified problem.

    If a `filter_fcn` is not None, it is applied to the query that has been constructed.  It takes one
    argument, which is the query being filtered, and returns the filtered version of the query.

    The `update_fcn` is called on each StudentModule that passes the resulting filtering.
    It is passed three arguments:  the module_descriptor for the module pointed to by the
    module_state_key, the particular StudentModule to update, and the xmodule_instance_args being
    passed through.  If the value returned by the update function evaluates to a boolean True,
    the update is successful; False indicates the update on the particular student module failed.
    A raised exception indicates a fatal condition -- that no other student modules should be considered.

    The return value is a dict containing the task's results, with the following keys:

          'attempted': number of attempts made
          'succeeded': number of attempts that "succeeded"
          'skipped': number of attempts that "skipped"
          'failed': number of attempts that "failed"
          'total': number of possible updates to attempt
          'action_name': user-visible verb to use in status messages.  Should be past-tense.
              Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    Because this is run internal to a task, it does not catch exceptions.  These are allowed to pass up to the
    next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the
    result object.

    """
    # get start time for task:
    start_time = time()

    module_state_key = task_input.get('problem_url')
    student_identifier = task_input.get('student')

    # find the problem descriptor:
    module_descriptor = modulestore().get_instance(course_id, module_state_key)

    # find the module in question
    modules_to_update = StudentModule.objects.filter(course_id=course_id,
                                                     module_state_key=module_state_key)

    # give the option of updating an individual student. If not specified,
    # then updates all students who have responded to a problem so far
    student = None
    if student_identifier is not None:
        # if an identifier is supplied, then look for the student,
        # and let it throw an exception if none is found.
        if "@" in student_identifier:
            student = User.objects.get(email=student_identifier)
        elif student_identifier is not None:
            student = User.objects.get(username=student_identifier)

    if student is not None:
        modules_to_update = modules_to_update.filter(student_id=student.id)

    if filter_fcn is not None:
        modules_to_update = filter_fcn(modules_to_update)

    # perform the main loop
    num_attempted = 0
    num_succeeded = 0
    num_skipped = 0
    num_failed = 0
    num_total = modules_to_update.count()

    def get_task_progress():
        """Return a dict containing info about current task"""
        current_time = time()
        progress = {'action_name': action_name,
                    'attempted': num_attempted,
                    'succeeded': num_succeeded,
                    'skipped': num_skipped,
                    'failed': num_failed,
                    'total': num_total,
                    'duration_ms': int((current_time - start_time) * 1000),
                    }
        return progress

    task_progress = get_task_progress()
    _get_current_task().update_state(state=PROGRESS, meta=task_progress)
    for module_to_update in modules_to_update:
        num_attempted += 1
        # There is no try here:  if there's an error, we let it throw, and the task will
        # be marked as FAILED, with a stack trace.
        with dog_stats_api.timer('instructor_tasks.module.time.step', tags=['action:{name}'.format(name=action_name)]):
            update_status = update_fcn(module_descriptor, module_to_update)
            if update_status == UPDATE_STATUS_SUCCEEDED:
                # If the update_fcn returns true, then it performed some kind of work.
                # Logging of failures is left to the update_fcn itself.
                num_succeeded += 1
            elif update_status == UPDATE_STATUS_FAILED:
                num_failed += 1
            elif update_status == UPDATE_STATUS_SKIPPED:
                num_skipped += 1
            else:
                raise UpdateProblemModuleStateError("Unexpected update_status returned: {}".format(update_status))

        # update task status:
        task_progress = get_task_progress()
        _get_current_task().update_state(state=PROGRESS, meta=task_progress)

    return task_progress
Example #32
0
def deploy(auto_migrate=False):
    """
    Deploys the cached packages to the specified hosts.
    Packages are installed while the server is out of the loadbalancer pool
    """

    packages = env.package_descriptors

    # If these are not defined it means that the confirm
    # step was skipped, in this case we figure out pre and
    # post steps here
    if not hasattr(env, 'pre_post'):
        pkg_config = PackageInfo()
        env.pre_post = pkg_config.pre_post_actions([pkg.name
                                                   for pkg in packages])

    contains_content = any(pkg.name.startswith('content') for pkg in packages)
    contains_code = not all(pkg.name.startswith('content') for pkg in packages)

    base_tags = [
        'deploying_to_host:' + env.host,
    ] + instance_tags_for_current_host()

    if contains_content:
        base_tags.append('type:content')
    if contains_code:
        base_tags.append('type:code')

    package_tags = base_tags + ['package:' + pkg.name for pkg in packages]
    metric_name = 'fabric.deployment'

    # pre checkout commands
    with dog_stats_api.timer(metric_name, tags=package_tags +
                             ['step:pre_commands']):
        with prefix("export GIT_SSH=/tmp/git.sh"):
            for cmd in env.pre_post['pre']:
                noopable(sudo)(cmd)

    put(os.path.join(os.path.dirname(__file__), 'git.sh'),
        '/tmp/git.sh', mode=0755, use_sudo=True)
    for pkg in packages:
        existing_repo = files.exists(pkg.repo_root, use_sudo=True)

        repo_tags = base_tags + [
            'package:' + pkg.name,
            'existance:' + 'existing' if existing_repo else 'absent',
        ]

        with dog_stats_api.timer(metric_name, tags=repo_tags + ['step:clone']):
            if existing_repo:
                if not files.exists(os.path.join(pkg.repo_root, '.git'),
                                    use_sudo=True):
                    raise Exception("Repo root not a git repo - {0}".format(
                        os.path.join(pkg.repo_root, '.git')))
                with cd(pkg.repo_root):
                    if pkg.revision == 'absent':
                        noopable(sudo)('rm -rf {0}'.format(pkg.repo_root))
                    else:
                        checkout(pkg.revision)
            else:
                with cd(os.path.dirname(pkg.repo_root)):
                    if pkg.revision != 'absent':
                        clone(pkg.repo_org, pkg.repo_name, pkg.name, pkg.revision)
            if '~' in pkg.name:
                _update_course_xml(pkg, pkg.name.split('~')[1])

        with dog_stats_api.timer(metric_name, tags=repo_tags +
                                 ['step:requirements']):
            _install_requirements(pkg)
            _install_gemfile(pkg)
            _install_npm_package(pkg)

#        with dog_stats_api.timer(metric_name, tags=repo_tags + ['step:fact']):
#            # drop a file for puppet so it knows that
#            # code is installed for the service
#            with cd('/etc/facter/facts.d'):
#                pkg_config = PackageInfo()
#                if pkg.repo_name in pkg_config.service_repos:
#                    # facts can't have dashes so they are converted
#                    # to underscores
#                    noopable(sudo)(
#                        'echo "{0}_installed=true" > {0}_installed.txt'.format(
#                        pkg.repo_name.replace("-", "_")))

#    with dog_stats_api.timer(metric_name, tags=package_tags +
#                             ['step:pkg_version']):
#        pkg_version()

    with dog_stats_api.timer(metric_name, tags=package_tags +
                             ['step:post_commands']):
        # post checkout commands
        with prefix("export GIT_SSH=/tmp/git.sh"):
            for cmd in env.pre_post['post']:
                noopable(sudo)(cmd)

    if 'mitx' in [pkg.name for pkg in packages]:
        # do not slow down content deploys by checking
        # for migrations
        execute('migrate_check.migrate_check', auto_migrate)
Example #33
0
def deploy(auto_migrate=False):
    """
    Deploys the cached packages to the specified hosts.
    Packages are installed while the server is out of the loadbalancer pool
    """

    packages = env.package_descriptors

    # If these are not defined it means that the confirm
    # step was skipped, in this case we figure out pre and
    # post steps here
    if not hasattr(env, 'pre_post'):
        pkg_config = PackageInfo()
        env.pre_post = pkg_config.pre_post_actions(
            [pkg.name for pkg in packages])

    contains_content = any(pkg.name.startswith('content') for pkg in packages)
    contains_code = not all(pkg.name.startswith('content') for pkg in packages)

    base_tags = [
        'deploying_to_host:' + env.host,
    ] + instance_tags_for_current_host()

    if contains_content:
        base_tags.append('type:content')
    if contains_code:
        base_tags.append('type:code')

    package_tags = base_tags + ['package:' + pkg.name for pkg in packages]
    metric_name = 'fabric.deployment'

    # pre checkout commands
    with dog_stats_api.timer(metric_name,
                             tags=package_tags + ['step:pre_commands']):
        with prefix("export GIT_SSH=/tmp/git.sh"):
            for cmd in env.pre_post['pre']:
                noopable(sudo)(cmd)

    put(os.path.join(os.path.dirname(__file__), 'git.sh'),
        '/tmp/git.sh',
        mode=0755,
        use_sudo=True)
    for pkg in packages:
        existing_repo = files.exists(pkg.repo_root, use_sudo=True)

        repo_tags = base_tags + [
            'package:' + pkg.name,
            'existance:' + 'existing' if existing_repo else 'absent',
        ]

        with dog_stats_api.timer(metric_name, tags=repo_tags + ['step:clone']):
            if existing_repo:
                if not files.exists(os.path.join(pkg.repo_root, '.git'),
                                    use_sudo=True):
                    raise Exception("Repo root not a git repo - {0}".format(
                        os.path.join(pkg.repo_root, '.git')))
                with cd(pkg.repo_root):
                    if pkg.revision == 'absent':
                        noopable(sudo)('rm -rf {0}'.format(pkg.repo_root))
                    else:
                        checkout(pkg.revision)
            else:
                with cd(os.path.dirname(pkg.repo_root)):
                    if pkg.revision != 'absent':
                        clone(pkg.repo_org, pkg.repo_name, pkg.name,
                              pkg.revision)
            if '~' in pkg.name:
                _update_course_xml(pkg, pkg.name.split('~')[1])

        with dog_stats_api.timer(metric_name,
                                 tags=repo_tags + ['step:requirements']):
            _install_requirements(pkg)
            _install_gemfile(pkg)
            _install_npm_package(pkg)


#        with dog_stats_api.timer(metric_name, tags=repo_tags + ['step:fact']):
#            # drop a file for puppet so it knows that
#            # code is installed for the service
#            with cd('/etc/facter/facts.d'):
#                pkg_config = PackageInfo()
#                if pkg.repo_name in pkg_config.service_repos:
#                    # facts can't have dashes so they are converted
#                    # to underscores
#                    noopable(sudo)(
#                        'echo "{0}_installed=true" > {0}_installed.txt'.format(
#                        pkg.repo_name.replace("-", "_")))

#    with dog_stats_api.timer(metric_name, tags=package_tags +
#                             ['step:pkg_version']):
#        pkg_version()

    with dog_stats_api.timer(metric_name,
                             tags=package_tags + ['step:post_commands']):
        # post checkout commands
        with prefix("export GIT_SSH=/tmp/git.sh"):
            for cmd in env.pre_post['post']:
                noopable(sudo)(cmd)

    if 'mitx' in [pkg.name for pkg in packages]:
        # do not slow down content deploys by checking
        # for migrations
        execute('migrate_check.migrate_check', auto_migrate)
Example #34
0
def update_problem_module_state(entry_id, update_fcn, action_name, filter_fcn,
                                xmodule_instance_args):
    """
    Performs generic update by visiting StudentModule instances with the update_fcn provided.

    The `entry_id` is the primary key for the InstructorTask entry representing the task.  This function
    updates the entry on success and failure of the _perform_module_state_update function it
    wraps.  It is setting the entry's value for task_state based on what Celery would set it to once
    the task returns to Celery:  FAILURE if an exception is encountered, and SUCCESS if it returns normally.
    Other arguments are pass-throughs to _perform_module_state_update, and documented there.

    If no exceptions are raised, a dict containing the task's result is returned, with the following keys:

          'attempted': number of attempts made
          'updated': number of attempts that "succeeded"
          'total': number of possible subtasks to attempt
          'action_name': user-visible verb to use in status messages.  Should be past-tense.
              Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    Before returning, this is also JSON-serialized and stored in the task_output column of the InstructorTask entry.

    If an exception is raised internally, it is caught and recorded in the InstructorTask entry.
    This is also a JSON-serialized dict, stored in the task_output column, containing the following keys:

           'exception':  type of exception object
           'message': error message from exception object
           'traceback': traceback information (truncated if necessary)

    Once the exception is caught, it is raised again and allowed to pass up to the
    task-running level, so that it can also set the failure modes and capture the error trace in the
    result object that Celery creates.

    """

    # get the InstructorTask to be updated.  If this fails, then let the exception return to Celery.
    # There's no point in catching it here.
    entry = InstructorTask.objects.get(pk=entry_id)

    # get inputs to use in this task from the entry:
    task_id = entry.task_id
    course_id = entry.course_id
    task_input = json.loads(entry.task_input)
    module_state_key = task_input.get('problem_url')
    student_ident = task_input['student'] if 'student' in task_input else None

    fmt = 'Starting to update problem modules as task "{task_id}": course "{course_id}" problem "{state_key}": nothing {action} yet'
    TASK_LOG.info(fmt.format(task_id=task_id, course_id=course_id, state_key=module_state_key, action=action_name))

    # add task_id to xmodule_instance_args, so that it can be output with tracking info:
    if xmodule_instance_args is not None:
        xmodule_instance_args['task_id'] = task_id

    # Now that we have an entry we can try to catch failures:
    task_progress = None
    try:
        # Check that the task_id submitted in the InstructorTask matches the current task
        # that is running.
        request_task_id = _get_current_task().request.id
        if task_id != request_task_id:
            fmt = 'Requested task "{task_id}" did not match actual task "{actual_id}"'
            message = fmt.format(task_id=task_id, course_id=course_id, state_key=module_state_key, actual_id=request_task_id)
            TASK_LOG.error(message)
            raise UpdateProblemModuleStateError(message)

        # Now do the work:
        with dog_stats_api.timer('instructor_tasks.module.time.overall', tags=['action:{name}'.format(name=action_name)]):
            task_progress = _perform_module_state_update(course_id, module_state_key, student_ident, update_fcn,
                                                         action_name, filter_fcn, xmodule_instance_args)
        # If we get here, we assume we've succeeded, so update the InstructorTask entry in anticipation.
        # But we do this within the try, in case creating the task_output causes an exception to be
        # raised.
        entry.task_output = InstructorTask.create_output_for_success(task_progress)
        entry.task_state = SUCCESS
        entry.save_now()

    except Exception:
        # try to write out the failure to the entry before failing
        _, exception, traceback = exc_info()
        traceback_string = format_exc(traceback) if traceback is not None else ''
        TASK_LOG.warning("background task (%s) failed: %s %s", task_id, exception, traceback_string)
        entry.task_output = InstructorTask.create_output_for_failure(exception, traceback_string)
        entry.task_state = FAILURE
        entry.save_now()
        raise

    # log and exit, returning task_progress info as task result:
    fmt = 'Finishing task "{task_id}": course "{course_id}" problem "{state_key}": final: {progress}'
    TASK_LOG.info(fmt.format(task_id=task_id, course_id=course_id, state_key=module_state_key, progress=task_progress))
    return task_progress
Example #35
0
def update_problem_module_state(entry_id, update_fcn, action_name, filter_fcn,
                                xmodule_instance_args):
    """
    Performs generic update by visiting StudentModule instances with the update_fcn provided.

    The `entry_id` is the primary key for the InstructorTask entry representing the task.  This function
    updates the entry on success and failure of the _perform_module_state_update function it
    wraps.  It is setting the entry's value for task_state based on what Celery would set it to once
    the task returns to Celery:  FAILURE if an exception is encountered, and SUCCESS if it returns normally.
    Other arguments are pass-throughs to _perform_module_state_update, and documented there.

    If no exceptions are raised, a dict containing the task's result is returned, with the following keys:

          'attempted': number of attempts made
          'updated': number of attempts that "succeeded"
          'total': number of possible subtasks to attempt
          'action_name': user-visible verb to use in status messages.  Should be past-tense.
              Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    Before returning, this is also JSON-serialized and stored in the task_output column of the InstructorTask entry.

    If an exception is raised internally, it is caught and recorded in the InstructorTask entry.
    This is also a JSON-serialized dict, stored in the task_output column, containing the following keys:

           'exception':  type of exception object
           'message': error message from exception object
           'traceback': traceback information (truncated if necessary)

    Once the exception is caught, it is raised again and allowed to pass up to the
    task-running level, so that it can also set the failure modes and capture the error trace in the
    result object that Celery creates.

    """

    # get the InstructorTask to be updated.  If this fails, then let the exception return to Celery.
    # There's no point in catching it here.
    entry = InstructorTask.objects.get(pk=entry_id)

    # get inputs to use in this task from the entry:
    task_id = entry.task_id
    course_id = entry.course_id
    task_input = json.loads(entry.task_input)
    module_state_key = task_input.get('problem_url')
    student_ident = task_input['student'] if 'student' in task_input else None

    fmt = 'Starting to update problem modules as task "{task_id}": course "{course_id}" problem "{state_key}": nothing {action} yet'
    TASK_LOG.info(
        fmt.format(task_id=task_id,
                   course_id=course_id,
                   state_key=module_state_key,
                   action=action_name))

    # add task_id to xmodule_instance_args, so that it can be output with tracking info:
    if xmodule_instance_args is not None:
        xmodule_instance_args['task_id'] = task_id

    # Now that we have an entry we can try to catch failures:
    task_progress = None
    try:
        # Check that the task_id submitted in the InstructorTask matches the current task
        # that is running.
        request_task_id = _get_current_task().request.id
        if task_id != request_task_id:
            fmt = 'Requested task "{task_id}" did not match actual task "{actual_id}"'
            message = fmt.format(task_id=task_id,
                                 course_id=course_id,
                                 state_key=module_state_key,
                                 actual_id=request_task_id)
            TASK_LOG.error(message)
            raise UpdateProblemModuleStateError(message)

        # Now do the work:
        with dog_stats_api.timer(
                'instructor_tasks.module.time.overall',
                tags=['action:{name}'.format(name=action_name)]):
            task_progress = _perform_module_state_update(
                course_id, module_state_key, student_ident, update_fcn,
                action_name, filter_fcn, xmodule_instance_args)
        # If we get here, we assume we've succeeded, so update the InstructorTask entry in anticipation.
        # But we do this within the try, in case creating the task_output causes an exception to be
        # raised.
        entry.task_output = InstructorTask.create_output_for_success(
            task_progress)
        entry.task_state = SUCCESS
        entry.save_now()

    except Exception:
        # try to write out the failure to the entry before failing
        _, exception, traceback = exc_info()
        traceback_string = format_exc(
            traceback) if traceback is not None else ''
        TASK_LOG.warning("background task (%s) failed: %s %s", task_id,
                         exception, traceback_string)
        entry.task_output = InstructorTask.create_output_for_failure(
            exception, traceback_string)
        entry.task_state = FAILURE
        entry.save_now()
        raise

    # log and exit, returning task_progress info as task result:
    fmt = 'Finishing task "{task_id}": course "{course_id}" problem "{state_key}": final: {progress}'
    TASK_LOG.info(
        fmt.format(task_id=task_id,
                   course_id=course_id,
                   state_key=module_state_key,
                   progress=task_progress))
    return task_progress
Example #36
0
def _send_course_email(email_id, to_list, course_title, course_url, image_url,
                       throttle):
    """
    Performs the email sending task.
    """
    try:
        msg = CourseEmail.objects.get(id=email_id)
    except CourseEmail.DoesNotExist:
        log.exception("Could not find email id:{} to send.".format(email_id))
        raise

    # exclude optouts
    optouts = (Optout.objects.filter(course_id=msg.course_id,
                                     user__in=[i['pk'] for i in to_list
                                               ]).values_list('user__email',
                                                              flat=True))

    optouts = set(optouts)
    num_optout = len(optouts)

    to_list = [
        recipient for recipient in to_list if recipient['email'] not in optouts
    ]

    subject = "[" + course_title + "] " + msg.subject

    course_title_no_quotes = re.sub(r'"', '', course_title)
    from_addr = '"{0}" Course Staff <{1}>'.format(
        course_title_no_quotes, settings.DEFAULT_BULK_FROM_EMAIL)

    course_email_template = CourseEmailTemplate.get_template()

    try:
        connection = get_connection()
        connection.open()
        num_sent = 0
        num_error = 0

        # Define context values to use in all course emails:
        email_context = {
            'name':
            '',
            'email':
            '',
            'course_title':
            course_title,
            'course_url':
            course_url,
            'course_image_url':
            image_url,
            'account_settings_url':
            'https://{}{}'.format(settings.SITE_NAME, reverse('dashboard')),
            'platform_name':
            settings.PLATFORM_NAME,
        }

        while to_list:
            # Update context with user-specific values:
            email = to_list[-1]['email']
            email_context['email'] = email
            email_context['name'] = to_list[-1]['profile__name']

            # Construct message content using templates and context:
            plaintext_msg = course_email_template.render_plaintext(
                msg.text_message, email_context)
            html_msg = course_email_template.render_htmltext(
                msg.html_message, email_context)

            # Create email:
            email_msg = EmailMultiAlternatives(subject,
                                               plaintext_msg,
                                               from_addr, [email],
                                               connection=connection)
            email_msg.attach_alternative(html_msg, 'text/html')

            # Throttle if we tried a few times and got the rate limiter
            if throttle or current_task.request.retries > 0:
                time.sleep(0.2)

            try:
                with dog_stats_api.timer(
                        'course_email.single_send.time.overall',
                        tags=[_statsd_tag(course_title)]):
                    connection.send_messages([email_msg])

                dog_stats_api.increment('course_email.sent',
                                        tags=[_statsd_tag(course_title)])

                log.info('Email with id %s sent to %s', email_id, email)
                num_sent += 1
            except SMTPDataError as exc:
                # According to SMTP spec, we'll retry error codes in the 4xx range.  5xx range indicates hard failure
                if exc.smtp_code >= 400 and exc.smtp_code < 500:
                    # This will cause the outer handler to catch the exception and retry the entire task
                    raise exc
                else:
                    # This will fall through and not retry the message, since it will be popped
                    log.warning(
                        'Email with id %s not delivered to %s due to error %s',
                        email_id, email, exc.smtp_error)

                    dog_stats_api.increment('course_email.error',
                                            tags=[_statsd_tag(course_title)])

                    num_error += 1

            to_list.pop()

        connection.close()
        return course_email_result(num_sent, num_error, num_optout)

    except (SMTPDataError, SMTPConnectError, SMTPServerDisconnected) as exc:
        # Error caught here cause the email to be retried.  The entire task is actually retried without popping the list
        # Reasoning is that all of these errors may be temporary condition.
        log.warning(
            'Email with id %d not delivered due to temporary error %s, retrying send to %d recipients',
            email_id, exc, len(to_list))
        raise course_email.retry(arg=[
            email_id, to_list, course_title, course_url, image_url,
            current_task.request.retries > 0
        ],
                                 exc=exc,
                                 countdown=(2**current_task.request.retries) *
                                 15)
    except:
        log.exception(
            'Email with id %d caused course_email task to fail with uncaught exception. To list: %s',
            email_id, [i['email'] for i in to_list])
        # Close the connection before we exit
        connection.close()
        raise
Example #37
0
def _send_course_email(entry_id, email_id, to_list, global_email_context,
                       subtask_status):
    """
    Performs the email sending task.

    Sends an email to a list of recipients.

    Inputs are:
      * `entry_id`: id of the InstructorTask object to which progress should be recorded.
      * `email_id`: id of the CourseEmail model that is to be emailed.
      * `to_list`: list of recipients.  Each is represented as a dict with the following keys:
        - 'profile__name': full name of User.
        - 'email': email address of User.
        - 'pk': primary key of User model.
      * `global_email_context`: dict containing values that are unique for this email but the same
        for all recipients of this email.  This dict is to be used to fill in slots in email
        template.  It does not include 'name' and 'email', which will be provided by the to_list.
      * `subtask_status` : object of class SubtaskStatus representing current status.

    Sends to all addresses contained in to_list that are not also in the Optout table.
    Emails are sent multi-part, in both plain text and html.

    Returns a tuple of two values:
      * First value is a SubtaskStatus object which represents current progress at the end of this call.

      * Second value is an exception returned by the innards of the method, indicating a fatal error.
        In this case, the number of recipients that were not sent have already been added to the
        'failed' count above.
    """
    # Get information from current task's request:
    task_id = subtask_status.task_id

    try:
        course_email = CourseEmail.objects.get(id=email_id)
    except CourseEmail.DoesNotExist as exc:
        log.exception("Task %s: could not find email id:%s to send.", task_id,
                      email_id)
        raise

    # Exclude optouts (if not a retry):
    # Note that we don't have to do the optout logic at all if this is a retry,
    # because we have presumably already performed the optout logic on the first
    # attempt.  Anyone on the to_list on a retry has already passed the filter
    # that existed at that time, and we don't need to keep checking for changes
    # in the Optout list.
    if subtask_status.get_retry_count() == 0:
        to_list, num_optout = _filter_optouts_from_recipients(
            to_list, course_email.course_id)
        subtask_status.increment(skipped=num_optout)

    course_title = global_email_context['course_title']
    subject = "[" + course_title + "] " + course_email.subject
    from_addr = _get_source_address(course_email.course_id, course_title)

    course_email_template = CourseEmailTemplate.get_template()
    try:
        connection = get_connection()
        connection.open()

        # Define context values to use in all course emails:
        email_context = {'name': '', 'email': ''}
        email_context.update(global_email_context)

        while to_list:
            # Update context with user-specific values from the user at the end of the list.
            # At the end of processing this user, they will be popped off of the to_list.
            # That way, the to_list will always contain the recipients remaining to be emailed.
            # This is convenient for retries, which will need to send to those who haven't
            # yet been emailed, but not send to those who have already been sent to.
            current_recipient = to_list[-1]
            email = current_recipient['email']
            email_context['email'] = email
            email_context['name'] = current_recipient['profile__name']

            # Construct message content using templates and context:
            plaintext_msg = course_email_template.render_plaintext(
                course_email.text_message, email_context)
            html_msg = course_email_template.render_htmltext(
                course_email.html_message, email_context)

            # Create email:
            email_msg = EmailMultiAlternatives(subject,
                                               plaintext_msg,
                                               from_addr, [email],
                                               connection=connection)
            email_msg.attach_alternative(html_msg, 'text/html')

            # Throttle if we have gotten the rate limiter.  This is not very high-tech,
            # but if a task has been retried for rate-limiting reasons, then we sleep
            # for a period of time between all emails within this task.  Choice of
            # the value depends on the number of workers that might be sending email in
            # parallel, and what the SES throttle rate is.
            if subtask_status.retried_nomax > 0:
                sleep(settings.BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS)

            try:
                log.debug('Email with id %s to be sent to %s', email_id, email)

                with dog_stats_api.timer(
                        'course_email.single_send.time.overall',
                        tags=[_statsd_tag(course_title)]):
                    connection.send_messages([email_msg])

            except SMTPDataError as exc:
                # According to SMTP spec, we'll retry error codes in the 4xx range.  5xx range indicates hard failure.
                if exc.smtp_code >= 400 and exc.smtp_code < 500:
                    # This will cause the outer handler to catch the exception and retry the entire task.
                    raise exc
                else:
                    # This will fall through and not retry the message.
                    log.warning(
                        'Task %s: email with id %s not delivered to %s due to error %s',
                        task_id, email_id, email, exc.smtp_error)
                    dog_stats_api.increment('course_email.error',
                                            tags=[_statsd_tag(course_title)])
                    subtask_status.increment(failed=1)

            except SINGLE_EMAIL_FAILURE_ERRORS as exc:
                # This will fall through and not retry the message.
                log.warning(
                    'Task %s: email with id %s not delivered to %s due to error %s',
                    task_id, email_id, email, exc)
                dog_stats_api.increment('course_email.error',
                                        tags=[_statsd_tag(course_title)])
                subtask_status.increment(failed=1)

            else:
                dog_stats_api.increment('course_email.sent',
                                        tags=[_statsd_tag(course_title)])
                if settings.BULK_EMAIL_LOG_SENT_EMAILS:
                    log.info('Email with id %s sent to %s', email_id, email)
                else:
                    log.debug('Email with id %s sent to %s', email_id, email)
                subtask_status.increment(succeeded=1)

            # Pop the user that was emailed off the end of the list only once they have
            # successfully been processed.  (That way, if there were a failure that
            # needed to be retried, the user is still on the list.)
            to_list.pop()

    except INFINITE_RETRY_ERRORS as exc:
        dog_stats_api.increment('course_email.infinite_retry',
                                tags=[_statsd_tag(course_title)])
        # Increment the "retried_nomax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_status.increment(retried_nomax=1, state=RETRY)
        return _submit_for_retry(entry_id,
                                 email_id,
                                 to_list,
                                 global_email_context,
                                 exc,
                                 subtask_status,
                                 skip_retry_max=True)

    except LIMITED_RETRY_ERRORS as exc:
        # Errors caught here cause the email to be retried.  The entire task is actually retried
        # without popping the current recipient off of the existing list.
        # Errors caught are those that indicate a temporary condition that might succeed on retry.
        dog_stats_api.increment('course_email.limited_retry',
                                tags=[_statsd_tag(course_title)])
        # Increment the "retried_withmax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_status.increment(retried_withmax=1, state=RETRY)
        return _submit_for_retry(entry_id,
                                 email_id,
                                 to_list,
                                 global_email_context,
                                 exc,
                                 subtask_status,
                                 skip_retry_max=False)

    except BULK_EMAIL_FAILURE_ERRORS as exc:
        dog_stats_api.increment('course_email.error',
                                tags=[_statsd_tag(course_title)])
        num_pending = len(to_list)
        log.exception(
            'Task %s: email with id %d caused send_course_email task to fail with "fatal" exception.  %d emails unsent.',
            task_id, email_id, num_pending)
        # Update counters with progress to date, counting unsent emails as failures,
        # and set the state to FAILURE:
        subtask_status.increment(failed=num_pending, state=FAILURE)
        return subtask_status, exc

    except Exception as exc:
        # Errors caught here cause the email to be retried.  The entire task is actually retried
        # without popping the current recipient off of the existing list.
        # These are unexpected errors.  Since they might be due to a temporary condition that might
        # succeed on retry, we give them a retry.
        dog_stats_api.increment('course_email.limited_retry',
                                tags=[_statsd_tag(course_title)])
        log.exception(
            'Task %s: email with id %d caused send_course_email task to fail with unexpected exception.  Generating retry.',
            task_id, email_id)
        # Increment the "retried_withmax" counter, update other counters with progress to date,
        # and set the state to RETRY:
        subtask_status.increment(retried_withmax=1, state=RETRY)
        return _submit_for_retry(entry_id,
                                 email_id,
                                 to_list,
                                 global_email_context,
                                 exc,
                                 subtask_status,
                                 skip_retry_max=False)

    else:
        # All went well.  Update counters with progress to date,
        # and set the state to SUCCESS:
        subtask_status.increment(state=SUCCESS)
        # Successful completion is marked by an exception value of None.
        return subtask_status, None
    finally:
        # Clean up at the end.
        connection.close()
Example #38
0
def _send_course_email(email_id, to_list, course_title, course_url, image_url, throttle):
    """
    Performs the email sending task.
    """
    try:
        msg = CourseEmail.objects.get(id=email_id)
    except CourseEmail.DoesNotExist:
        log.exception("Could not find email id:{} to send.".format(email_id))
        raise

    # exclude optouts
    optouts = (Optout.objects.filter(course_id=msg.course_id,
                                     user__in=[i['pk'] for i in to_list])
                             .values_list('user__email', flat=True))

    optouts = set(optouts)
    num_optout = len(optouts)

    to_list = [recipient for recipient in to_list if recipient['email'] not in optouts]

    subject = "[" + course_title + "] " + msg.subject

    course_title_no_quotes = re.sub(r'"', '', course_title)
    from_addr = '"{0}" Course Staff <{1}>'.format(course_title_no_quotes, settings.DEFAULT_BULK_FROM_EMAIL)

    course_email_template = CourseEmailTemplate.get_template()

    try:
        connection = get_connection()
        connection.open()
        num_sent = 0
        num_error = 0

        # Define context values to use in all course emails:
        email_context = {
            'name': '',
            'email': '',
            'course_title': course_title,
            'course_url': course_url,
            'course_image_url': image_url,
            'account_settings_url': 'https://{}{}'.format(settings.SITE_NAME, reverse('dashboard')),
            'platform_name': settings.PLATFORM_NAME,
        }

        while to_list:
            # Update context with user-specific values:
            email = to_list[-1]['email']
            email_context['email'] = email
            email_context['name'] = to_list[-1]['profile__name']

            # Construct message content using templates and context:
            plaintext_msg = course_email_template.render_plaintext(msg.text_message, email_context)
            html_msg = course_email_template.render_htmltext(msg.html_message, email_context)

            # Create email:
            email_msg = EmailMultiAlternatives(
                subject,
                plaintext_msg,
                from_addr,
                [email],
                connection=connection
            )
            email_msg.attach_alternative(html_msg, 'text/html')

            # Throttle if we tried a few times and got the rate limiter
            if throttle or current_task.request.retries > 0:
                time.sleep(0.2)

            try:
                with dog_stats_api.timer('course_email.single_send.time.overall', tags=[_statsd_tag(course_title)]):
                    connection.send_messages([email_msg])

                dog_stats_api.increment('course_email.sent', tags=[_statsd_tag(course_title)])

                log.info('Email with id %s sent to %s', email_id, email)
                num_sent += 1
            except SMTPDataError as exc:
                # According to SMTP spec, we'll retry error codes in the 4xx range.  5xx range indicates hard failure
                if exc.smtp_code >= 400 and exc.smtp_code < 500:
                    # This will cause the outer handler to catch the exception and retry the entire task
                    raise exc
                else:
                    # This will fall through and not retry the message, since it will be popped
                    log.warning('Email with id %s not delivered to %s due to error %s', email_id, email, exc.smtp_error)

                    dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)])

                    num_error += 1

            to_list.pop()

        connection.close()
        return course_email_result(num_sent, num_error, num_optout)

    except (SMTPDataError, SMTPConnectError, SMTPServerDisconnected) as exc:
        # Error caught here cause the email to be retried.  The entire task is actually retried without popping the list
        # Reasoning is that all of these errors may be temporary condition.
        log.warning('Email with id %d not delivered due to temporary error %s, retrying send to %d recipients',
                    email_id, exc, len(to_list))
        raise course_email.retry(
            arg=[
                email_id,
                to_list,
                course_title,
                course_url,
                image_url,
                current_task.request.retries > 0
            ],
            exc=exc,
            countdown=(2 ** current_task.request.retries) * 15
        )
    except:
        log.exception('Email with id %d caused course_email task to fail with uncaught exception. To list: %s',
                      email_id,
                      [i['email'] for i in to_list])
        # Close the connection before we exit
        connection.close()
        raise
Example #39
0
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id,
                                task_input, action_name):
    """
    Performs generic update by visiting StudentModule instances with the update_fcn provided.

    StudentModule instances are those that match the specified `course_id` and `module_state_key`.
    If `student_identifier` is not None, it is used as an additional filter to limit the modules to those belonging
    to that student. If `student_identifier` is None, performs update on modules for all students on the specified problem.

    If a `filter_fcn` is not None, it is applied to the query that has been constructed.  It takes one
    argument, which is the query being filtered, and returns the filtered version of the query.

    The `update_fcn` is called on each StudentModule that passes the resulting filtering.
    It is passed three arguments:  the module_descriptor for the module pointed to by the
    module_state_key, the particular StudentModule to update, and the xmodule_instance_args being
    passed through.  If the value returned by the update function evaluates to a boolean True,
    the update is successful; False indicates the update on the particular student module failed.
    A raised exception indicates a fatal condition -- that no other student modules should be considered.

    The return value is a dict containing the task's results, with the following keys:

          'attempted': number of attempts made
          'succeeded': number of attempts that "succeeded"
          'skipped': number of attempts that "skipped"
          'failed': number of attempts that "failed"
          'total': number of possible updates to attempt
          'action_name': user-visible verb to use in status messages.  Should be past-tense.
              Pass-through of input `action_name`.
          'duration_ms': how long the task has (or had) been running.

    Because this is run internal to a task, it does not catch exceptions.  These are allowed to pass up to the
    next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the
    result object.

    """
    # get start time for task:
    start_time = time()

    usage_key = course_id.make_usage_key_from_deprecated_string(
        task_input.get('problem_url'))
    student_identifier = task_input.get('student')

    # find the problem descriptor:
    module_descriptor = modulestore().get_item(usage_key)

    # find the module in question
    modules_to_update = StudentModule.objects.filter(
        course_id=course_id, module_state_key=usage_key)

    # give the option of updating an individual student. If not specified,
    # then updates all students who have responded to a problem so far
    student = None
    if student_identifier is not None:
        # if an identifier is supplied, then look for the student,
        # and let it throw an exception if none is found.
        if "@" in student_identifier:
            student = User.objects.get(email=student_identifier)
        elif student_identifier is not None:
            student = User.objects.get(username=student_identifier)

    if student is not None:
        modules_to_update = modules_to_update.filter(student_id=student.id)

    if filter_fcn is not None:
        modules_to_update = filter_fcn(modules_to_update)

    # perform the main loop
    num_attempted = 0
    num_succeeded = 0
    num_skipped = 0
    num_failed = 0
    num_total = modules_to_update.count()

    def get_task_progress():
        """Return a dict containing info about current task"""
        current_time = time()
        progress = {
            'action_name': action_name,
            'attempted': num_attempted,
            'succeeded': num_succeeded,
            'skipped': num_skipped,
            'failed': num_failed,
            'total': num_total,
            'duration_ms': int((current_time - start_time) * 1000),
        }
        return progress

    task_progress = get_task_progress()
    _get_current_task().update_state(state=PROGRESS, meta=task_progress)
    for module_to_update in modules_to_update:
        num_attempted += 1
        # There is no try here:  if there's an error, we let it throw, and the task will
        # be marked as FAILED, with a stack trace.
        with dog_stats_api.timer(
                'instructor_tasks.module.time.step',
                tags=[u'action:{name}'.format(name=action_name)]):
            update_status = update_fcn(module_descriptor, module_to_update)
            if update_status == UPDATE_STATUS_SUCCEEDED:
                # If the update_fcn returns true, then it performed some kind of work.
                # Logging of failures is left to the update_fcn itself.
                num_succeeded += 1
            elif update_status == UPDATE_STATUS_FAILED:
                num_failed += 1
            elif update_status == UPDATE_STATUS_SKIPPED:
                num_skipped += 1
            else:
                raise UpdateProblemModuleStateError(
                    "Unexpected update_status returned: {}".format(
                        update_status))

        # update task status:
        task_progress = get_task_progress()
        _get_current_task().update_state(state=PROGRESS, meta=task_progress)

    return task_progress